浏览代码

Merging trunk to branch HDFS-2802

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-2802@1460410 13f79535-47bb-0310-9956-ffa450edef68
Suresh Srinivas 12 年之前
父节点
当前提交
698e3f8ae5
共有 100 个文件被更改,包括 3153 次插入1440 次删除
  1. 8 18
      BUILDING.txt
  2. 17 1
      hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/util/KerberosName.java
  3. 14 0
      hadoop-common-project/hadoop-common/CHANGES.txt
  4. 3 1
      hadoop-common-project/hadoop-common/pom.xml
  5. 28 59
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/DF.java
  6. 25 4
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java
  7. 46 11
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtobufRpcEngine.java
  8. 44 14
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java
  9. 5 0
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/CachedDNSToSwitchMapping.java
  10. 8 0
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/DNSToSwitchMapping.java
  11. 8 8
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetworkTopology.java
  12. 12 0
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NodeBase.java
  13. 6 0
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/ScriptBasedMapping.java
  14. 37 19
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/TableMapping.java
  15. 19 11
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/HadoopKerberosName.java
  16. 22 24
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/UserGroupInformation.java
  17. 15 15
      hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestDFVariations.java
  18. 5 0
      hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/StaticMapping.java
  19. 4 0
      hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/TestSwitchMapping.java
  20. 55 17
      hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/TestTableMapping.java
  21. 2 1
      hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestSecurityUtil.java
  22. 130 17
      hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestUserGroupInformation.java
  23. 24 0
      hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
  24. 6 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
  25. 2 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
  26. 30 4
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java
  27. 112 81
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java
  28. 1 2
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
  29. 13 2
      hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
  30. 88 6
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSClientExcludedNodes.java
  31. 69 53
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSShell.java
  32. 12 7
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/TestNNWithQJM.java
  33. 8 1
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestAuditLogs.java
  34. 9 8
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestGetConf.java
  35. 72 0
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/net/TestNetworkTopology.java
  36. 24 0
      hadoop-mapreduce-project/CHANGES.txt
  37. 4 4
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/launcher/ContainerLauncherImpl.java
  38. 3 5
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/local/LocalContainerAllocator.java
  39. 5 4
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerAllocator.java
  40. 7 8
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerRequestor.java
  41. 397 0
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/jobhistory/TestEvents.java
  42. 15 21
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/jobhistory/TestJobHistoryEventHandler.java
  43. 2 5
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/MRAppBenchmark.java
  44. 4 2
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestMRAppMaster.java
  45. 14 7
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/Fetcher.java
  46. 60 1
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/task/reduce/TestFetcher.java
  47. 3 3
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistoryEntities.java
  48. 12 5
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistoryEvents.java
  49. 39 12
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistoryParsing.java
  50. 9 4
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/MiniMRClientClusterFactory.java
  51. 4 1
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/MiniMRCluster.java
  52. 7 3
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-shuffle/src/main/java/org/apache/hadoop/mapred/ShuffleHandler.java
  53. 26 5
      hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/CopyListing.java
  54. 2 2
      hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCp.java
  55. 3 0
      hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpConstants.java
  56. 30 7
      hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/SimpleCopyListing.java
  57. 4 5
      hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableFileCopyCommand.java
  58. 5 0
      hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/ThrottledInputStream.java
  59. 37 4
      hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestCopyListing.java
  60. 71 20
      hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestIntegration.java
  61. 28 0
      hadoop-yarn-project/CHANGES.txt
  62. 86 8
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/AllocateResponse.java
  63. 305 29
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/AllocateResponsePBImpl.java
  64. 2 1
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/GetAllApplicationsResponsePBImpl.java
  65. 2 1
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/GetClusterNodesResponsePBImpl.java
  66. 2 1
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/GetQueueUserAclsInfoResponsePBImpl.java
  67. 0 138
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/AMResponse.java
  68. 2 2
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ApplicationAttemptId.java
  69. 2 2
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ApplicationId.java
  70. 2 2
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ContainerId.java
  71. 2 2
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/NodeId.java
  72. 2 2
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/Priority.java
  73. 2 2
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ResourceRequest.java
  74. 0 373
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/AMResponsePBImpl.java
  75. 0 10
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto
  76. 7 2
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto
  77. 191 228
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java
  78. 7 4
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/Client.java
  79. 6 5
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDistributedShell.java
  80. 2 2
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-unmanaged-am-launcher/src/test/java/org/apache/hadoop/yarn/applications/unmanagedamlauncher/TestUnmanagedAMLauncher.java
  81. 354 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/AMRMClientAsync.java
  82. 2 4
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/AMRMClientImpl.java
  83. 4 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/cli/ApplicationCLI.java
  84. 6 8
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestAMRMClient.java
  85. 184 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestAMRMClientAsync.java
  86. 2 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/cli/TestYarnCLI.java
  87. 10 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
  88. 8 6
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/AggregatedLogFormat.java
  89. 17 13
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/LogDumper.java
  90. 18 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/BuilderUtils.java
  91. 14 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
  92. 7 5
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/TestRecordFactory.java
  93. 16 5
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestProcfsBasedProcessTree.java
  94. 4 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestRackResolver.java
  95. 4 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ResourceView.java
  96. 4 2
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java
  97. 69 58
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java
  98. 4 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/NodePage.java
  99. 12 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/NodeInfo.java
  100. 18 13
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/TestLogAggregationService.java

+ 8 - 18
BUILDING.txt

@@ -107,7 +107,7 @@ When you import the project to eclipse, install hadoop-maven-plugins at first.
   $ cd hadoop-maven-plugins
   $ cd hadoop-maven-plugins
   $ mvn install
   $ mvn install
 
 
-Then, generate ecplise project files.
+Then, generate eclipse project files.
 
 
   $ mvn eclipse:eclipse -DskipTests
   $ mvn eclipse:eclipse -DskipTests
 
 
@@ -147,10 +147,10 @@ Requirements:
 * Windows System
 * Windows System
 * JDK 1.6
 * JDK 1.6
 * Maven 3.0
 * Maven 3.0
-* Findbugs 1.3.9 (if running findbugs)
+* Windows SDK or Visual Studio 2010 Professional
 * ProtocolBuffer 2.4.1+ (for MapReduce and HDFS)
 * ProtocolBuffer 2.4.1+ (for MapReduce and HDFS)
+* Findbugs 1.3.9 (if running findbugs)
 * Unix command-line tools from GnuWin32 or Cygwin: sh, mkdir, rm, cp, tar, gzip
 * Unix command-line tools from GnuWin32 or Cygwin: sh, mkdir, rm, cp, tar, gzip
-* Windows SDK or Visual Studio 2010 Professional
 * Internet connection for first build (to fetch all Maven and Hadoop dependencies)
 * Internet connection for first build (to fetch all Maven and Hadoop dependencies)
 
 
 If using Visual Studio, it must be Visual Studio 2010 Professional (not 2012).
 If using Visual Studio, it must be Visual Studio 2010 Professional (not 2012).
@@ -185,23 +185,13 @@ set Platform=Win32 (when building on a 32-bit system)
 Several tests require that the user must have the Create Symbolic Links
 Several tests require that the user must have the Create Symbolic Links
 privilege.
 privilege.
 
 
-All Maven goals are the same as described above, with the addition of profile
--Pnative-win to trigger building Windows native components.  The native
-components are required (not optional) on Windows.  For example:
-
- * Run tests                 : mvn -Pnative-win test
+All Maven goals are the same as described above with the exception that
+native code is built by enabling the 'native-win' Maven profile. -Pnative-win 
+is enabled by default when building on Windows since the native components 
+are required (not optional) on Windows.
 
 
 ----------------------------------------------------------------------------------
 ----------------------------------------------------------------------------------
 Building distributions:
 Building distributions:
 
 
-Create binary distribution with native code and with documentation:
-
-  $ mvn package -Pdist,native-win,docs -DskipTests -Dtar
-
-Create source distribution:
-
-  $ mvn package -Pnative-win,src -DskipTests
-
-Create source and binary distributions with native code and documentation:
+ * Build distribution with native code    : mvn package [-Pdist][-Pdocs][-Psrc][-Dtar]
 
 
-  $ mvn package -Pdist,native-win,docs,src -DskipTests -Dtar

+ 17 - 1
hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/util/KerberosName.java

@@ -383,9 +383,25 @@ public class KerberosName {
    * @param ruleString the rules string.
    * @param ruleString the rules string.
    */
    */
   public static void setRules(String ruleString) {
   public static void setRules(String ruleString) {
-    rules = parseRules(ruleString);
+    rules = (ruleString != null) ? parseRules(ruleString) : null;
   }
   }
 
 
+  /**
+   * Get the rules.
+   * @return String of configured rules, or null if not yet configured
+   */
+  public static String getRules() {
+    String ruleString = null;
+    if (rules != null) {
+      StringBuilder sb = new StringBuilder();
+      for (Rule rule : rules) {
+        sb.append(rule.toString()).append("\n");
+      }
+      ruleString = sb.toString().trim();
+    }
+    return ruleString;
+  }
+  
   /**
   /**
    * Indicates if the name rules have been set.
    * Indicates if the name rules have been set.
    * 
    * 

+ 14 - 0
hadoop-common-project/hadoop-common/CHANGES.txt

@@ -15,6 +15,8 @@ Trunk (Unreleased)
     HADOOP-9151 Include RPC error info in RpcResponseHeader instead of sending
     HADOOP-9151 Include RPC error info in RpcResponseHeader instead of sending
     it separately (sanjay Radia)
     it separately (sanjay Radia)
 
 
+    HADOOP-9380 Add totalLength to rpc response  (sanjay Radia)
+
   NEW FEATURES
   NEW FEATURES
     
     
     HADOOP-8561. Introduce HADOOP_PROXY_USER for secure impersonation in child
     HADOOP-8561. Introduce HADOOP_PROXY_USER for secure impersonation in child
@@ -353,6 +355,9 @@ Trunk (Unreleased)
     HADOOP-9405. TestGridmixSummary#testExecutionSummarizer is broken. (Andrew
     HADOOP-9405. TestGridmixSummary#testExecutionSummarizer is broken. (Andrew
     Wang via atm)
     Wang via atm)
 
 
+    HADOOP-9431 TestSecurityUtil#testLocalHostNameForNullOrWild on systems where hostname
+    contains capital letters  (Chris Nauroth via sanjay)
+
   OPTIMIZATIONS
   OPTIMIZATIONS
 
 
     HADOOP-7761. Improve the performance of raw comparisons. (todd)
     HADOOP-7761. Improve the performance of raw comparisons. (todd)
@@ -485,6 +490,12 @@ Trunk (Unreleased)
     Windows with NTFS ACLs. (Chris Nauroth via suresh)
     Windows with NTFS ACLs. (Chris Nauroth via suresh)
 
 
     HADOOP-9388. TestFsShellCopy fails on Windows. (Ivan Mitic via suresh)
     HADOOP-9388. TestFsShellCopy fails on Windows. (Ivan Mitic via suresh)
+
+    HADOOP-9387. Fix DF so that it won't execute a shell command on Windows
+    to compute the file system/mount point.  (Ivan Mitic via szetszwo)
+
+    HADOOP-9353. Activate native-win maven profile by default on Windows.
+    (Arpit Agarwal via szetszwo)
     
     
 Release 2.0.5-beta - UNRELEASED
 Release 2.0.5-beta - UNRELEASED
 
 
@@ -571,6 +582,9 @@ Release 2.0.5-beta - UNRELEASED
     HADOOP-9407. commons-daemon 1.0.3 dependency has bad group id causing
     HADOOP-9407. commons-daemon 1.0.3 dependency has bad group id causing
     build issues. (Sangjin Lee via suresh)
     build issues. (Sangjin Lee via suresh)
 
 
+    HADOOP-9299.  kerberos name resolution is kicking in even when kerberos
+    is not configured (daryn)
+
 Release 2.0.4-alpha - UNRELEASED
 Release 2.0.4-alpha - UNRELEASED
 
 
   INCOMPATIBLE CHANGES
   INCOMPATIBLE CHANGES

+ 3 - 1
hadoop-common-project/hadoop-common/pom.xml

@@ -573,7 +573,9 @@
     <profile>
     <profile>
       <id>native-win</id>
       <id>native-win</id>
       <activation>
       <activation>
-        <activeByDefault>false</activeByDefault>
+        <os>
+          <family>Windows</family>
+        </os>
       </activation>
       </activation>
       <build>
       <build>
         <plugins>
         <plugins>

+ 28 - 59
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/DF.java

@@ -50,37 +50,6 @@ public class DF extends Shell {
   
   
   private ArrayList<String> output;
   private ArrayList<String> output;
 
 
-  enum OSType {
-    OS_TYPE_UNIX("UNIX"),
-    OS_TYPE_WIN("Windows"),
-    OS_TYPE_SOLARIS("SunOS"),
-    OS_TYPE_MAC("Mac"),
-    OS_TYPE_AIX("AIX");
-
-    private String id;
-    OSType(String id) {
-      this.id = id;
-    }
-    public boolean match(String osStr) {
-      return osStr != null && osStr.indexOf(id) >= 0;
-    }
-    String getId() {
-      return id;
-    }
-  }
-
-  private static final String OS_NAME = System.getProperty("os.name");
-  private static final OSType OS_TYPE = getOSType(OS_NAME);
-
-  protected static OSType getOSType(String osName) {
-    for (OSType ost : EnumSet.allOf(OSType.class)) {
-      if (ost.match(osName)) {
-        return ost;
-      }
-    }
-    return OSType.OS_TYPE_UNIX;
-  }
-
   public DF(File path, Configuration conf) throws IOException {
   public DF(File path, Configuration conf) throws IOException {
     this(path, conf.getLong(CommonConfigurationKeys.FS_DF_INTERVAL_KEY, DF.DF_INTERVAL_DEFAULT));
     this(path, conf.getLong(CommonConfigurationKeys.FS_DF_INTERVAL_KEY, DF.DF_INTERVAL_DEFAULT));
   }
   }
@@ -92,10 +61,6 @@ public class DF extends Shell {
     this.output = new ArrayList<String>();
     this.output = new ArrayList<String>();
   }
   }
 
 
-  protected OSType getOSType() {
-    return OS_TYPE;
-  }
-  
   /// ACCESSORS
   /// ACCESSORS
 
 
   /** @return the canonical path to the volume we're checking. */
   /** @return the canonical path to the volume we're checking. */
@@ -105,8 +70,13 @@ public class DF extends Shell {
 
 
   /** @return a string indicating which filesystem volume we're checking. */
   /** @return a string indicating which filesystem volume we're checking. */
   public String getFilesystem() throws IOException {
   public String getFilesystem() throws IOException {
-    run();
-    return filesystem;
+    if (Shell.WINDOWS) {
+      this.filesystem = dirFile.getCanonicalPath().substring(0, 2);
+      return this.filesystem;
+    } else {
+      run();
+      return filesystem;
+    }
   }
   }
 
 
   /** @return the capacity of the measured filesystem in bytes. */
   /** @return the capacity of the measured filesystem in bytes. */
@@ -138,16 +108,23 @@ public class DF extends Shell {
       throw new FileNotFoundException("Specified path " + dirFile.getPath()
       throw new FileNotFoundException("Specified path " + dirFile.getPath()
           + "does not exist");
           + "does not exist");
     }
     }
-    run();
-    // Skip parsing if df was not successful
-    if (getExitCode() != 0) {
-      StringBuffer sb = new StringBuffer("df could not be run successfully: ");
-      for (String line: output) {
-        sb.append(line);
+
+    if (Shell.WINDOWS) {
+      // Assume a drive letter for a mount point
+      this.mount = dirFile.getCanonicalPath().substring(0, 2);
+    } else {
+      run();
+      // Skip parsing if df was not successful
+      if (getExitCode() != 0) {
+        StringBuffer sb = new StringBuffer("df could not be run successfully: ");
+        for (String line: output) {
+          sb.append(line);
+        }
+        throw new IOException(sb.toString());
       }
       }
-      throw new IOException(sb.toString());
+      parseOutput();
     }
     }
-    parseOutput();
+
     return mount;
     return mount;
   }
   }
   
   
@@ -163,24 +140,16 @@ public class DF extends Shell {
       mount;
       mount;
   }
   }
 
 
-  @Override
-  protected void run() throws IOException {
-    if (WINDOWS) {
-      try {
-        this.mount = dirFile.getCanonicalPath().substring(0,2);
-      } catch (IOException e) {
-      }
-      return;
-    }
-    super.run();
-  }
-
   @Override
   @Override
   protected String[] getExecString() {
   protected String[] getExecString() {
     // ignoring the error since the exit code it enough
     // ignoring the error since the exit code it enough
-    return (WINDOWS)? new String[]{"cmd", "/c", "df -k " + dirPath + " 2>nul"}:
-        new String[] {"bash","-c","exec 'df' '-k' '-P' '" + dirPath 
+    if (Shell.WINDOWS){
+      throw new AssertionError(
+          "DF.getExecString() should never be called on Windows");
+    } else {
+      return new String[] {"bash","-c","exec 'df' '-k' '-P' '" + dirPath 
                       + "' 2>/dev/null"};
                       + "' 2>/dev/null"};
+    }
   }
   }
 
 
   @Override
   @Override

+ 25 - 4
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java

@@ -83,6 +83,7 @@ import org.apache.hadoop.util.ReflectionUtils;
 import org.apache.hadoop.util.Time;
 import org.apache.hadoop.util.Time;
 
 
 import com.google.common.util.concurrent.ThreadFactoryBuilder;
 import com.google.common.util.concurrent.ThreadFactoryBuilder;
+import com.google.protobuf.CodedOutputStream;
 
 
 /** A client for an IPC service.  IPC calls take a single {@link Writable} as a
 /** A client for an IPC service.  IPC calls take a single {@link Writable} as a
  * parameter, and return a {@link Writable} as their value.  A service runs on
  * parameter, and return a {@link Writable} as their value.  A service runs on
@@ -242,7 +243,7 @@ public class Client {
       callComplete();
       callComplete();
     }
     }
     
     
-    public synchronized Writable getRpcResult() {
+    public synchronized Writable getRpcResponse() {
       return rpcResponse;
       return rpcResponse;
     }
     }
   }
   }
@@ -944,11 +945,14 @@ public class Client {
       touch();
       touch();
       
       
       try {
       try {
+        int totalLen = in.readInt();
         RpcResponseHeaderProto header = 
         RpcResponseHeaderProto header = 
             RpcResponseHeaderProto.parseDelimitedFrom(in);
             RpcResponseHeaderProto.parseDelimitedFrom(in);
         if (header == null) {
         if (header == null) {
           throw new IOException("Response is null.");
           throw new IOException("Response is null.");
         }
         }
+        int headerLen = header.getSerializedSize();
+        headerLen += CodedOutputStream.computeRawVarint32Size(headerLen);
 
 
         int callId = header.getCallId();
         int callId = header.getCallId();
         if (LOG.isDebugEnabled())
         if (LOG.isDebugEnabled())
@@ -961,11 +965,28 @@ public class Client {
           value.readFields(in);                 // read value
           value.readFields(in);                 // read value
           call.setRpcResponse(value);
           call.setRpcResponse(value);
           calls.remove(callId);
           calls.remove(callId);
+          
+          // verify that length was correct
+          // only for ProtobufEngine where len can be verified easily
+          if (call.getRpcResponse() instanceof ProtobufRpcEngine.RpcWrapper) {
+            ProtobufRpcEngine.RpcWrapper resWrapper = 
+                (ProtobufRpcEngine.RpcWrapper) call.getRpcResponse();
+            if (totalLen != headerLen + resWrapper.getLength()) { 
+              throw new RpcClientException(
+                  "RPC response length mismatch on rpc success");
+            }
+          }
         } else { // Rpc Request failed
         } else { // Rpc Request failed
-            final String exceptionClassName = header.hasExceptionClassName() ?
+          // Verify that length was correct
+          if (totalLen != headerLen) {
+            throw new RpcClientException(
+                "RPC response length mismatch on rpc error");
+          }
+          
+          final String exceptionClassName = header.hasExceptionClassName() ?
                 header.getExceptionClassName() : 
                 header.getExceptionClassName() : 
                   "ServerDidNotSetExceptionClassName";
                   "ServerDidNotSetExceptionClassName";
-            final String errorMsg = header.hasErrorMsg() ? 
+          final String errorMsg = header.hasErrorMsg() ? 
                 header.getErrorMsg() : "ServerDidNotSetErrorMsg" ;
                 header.getErrorMsg() : "ServerDidNotSetErrorMsg" ;
           RemoteException re = 
           RemoteException re = 
               new RemoteException(exceptionClassName, errorMsg);
               new RemoteException(exceptionClassName, errorMsg);
@@ -1251,7 +1272,7 @@ public class Client {
                   call.error);
                   call.error);
         }
         }
       } else {
       } else {
-        return call.getRpcResult();
+        return call.getRpcResponse();
       }
       }
     }
     }
   }
   }

+ 46 - 11
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtobufRpcEngine.java

@@ -48,7 +48,9 @@ import org.apache.hadoop.util.ProtoUtil;
 import org.apache.hadoop.util.Time;
 import org.apache.hadoop.util.Time;
 
 
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.annotations.VisibleForTesting;
+import com.google.protobuf.AbstractMessageLite;
 import com.google.protobuf.BlockingService;
 import com.google.protobuf.BlockingService;
+import com.google.protobuf.CodedOutputStream;
 import com.google.protobuf.Descriptors.MethodDescriptor;
 import com.google.protobuf.Descriptors.MethodDescriptor;
 import com.google.protobuf.Message;
 import com.google.protobuf.Message;
 import com.google.protobuf.ServiceException;
 import com.google.protobuf.ServiceException;
@@ -226,7 +228,7 @@ public class ProtobufRpcEngine implements RpcEngine {
       Message returnMessage;
       Message returnMessage;
       try {
       try {
         returnMessage = prototype.newBuilderForType()
         returnMessage = prototype.newBuilderForType()
-            .mergeFrom(val.responseMessage).build();
+            .mergeFrom(val.theResponseRead).build();
 
 
         if (LOG.isTraceEnabled()) {
         if (LOG.isTraceEnabled()) {
           LOG.trace(Thread.currentThread().getId() + ": Response <- " +
           LOG.trace(Thread.currentThread().getId() + ": Response <- " +
@@ -267,6 +269,9 @@ public class ProtobufRpcEngine implements RpcEngine {
     }
     }
   }
   }
 
 
+  interface RpcWrapper extends Writable {
+    int getLength();
+  }
   /**
   /**
    * Wrapper for Protocol Buffer Requests
    * Wrapper for Protocol Buffer Requests
    * 
    * 
@@ -274,7 +279,7 @@ public class ProtobufRpcEngine implements RpcEngine {
    * Protobuf. Several methods on {@link org.apache.hadoop.ipc.Server and RPC} 
    * Protobuf. Several methods on {@link org.apache.hadoop.ipc.Server and RPC} 
    * use type Writable as a wrapper to work across multiple RpcEngine kinds.
    * use type Writable as a wrapper to work across multiple RpcEngine kinds.
    */
    */
-  private static class RpcRequestWrapper implements Writable {
+  private static class RpcRequestWrapper implements RpcWrapper {
     RequestHeaderProto requestHeader;
     RequestHeaderProto requestHeader;
     Message theRequest; // for clientSide, the request is here
     Message theRequest; // for clientSide, the request is here
     byte[] theRequestRead; // for server side, the request is here
     byte[] theRequestRead; // for server side, the request is here
@@ -312,6 +317,22 @@ public class ProtobufRpcEngine implements RpcEngine {
       return requestHeader.getDeclaringClassProtocolName() + "." +
       return requestHeader.getDeclaringClassProtocolName() + "." +
           requestHeader.getMethodName();
           requestHeader.getMethodName();
     }
     }
+
+    @Override
+    public int getLength() {
+      int headerLen = requestHeader.getSerializedSize();
+      int reqLen;
+      if (theRequest != null) {
+        reqLen = theRequest.getSerializedSize();
+      } else if (theRequestRead != null ) {
+        reqLen = theRequestRead.length;
+      } else {
+        throw new IllegalArgumentException(
+            "getLenght on uninilialized RpcWrapper");      
+      }
+      return CodedOutputStream.computeRawVarint32Size(headerLen) +  headerLen
+          + CodedOutputStream.computeRawVarint32Size(reqLen) + reqLen;
+    }
   }
   }
 
 
   /**
   /**
@@ -321,29 +342,43 @@ public class ProtobufRpcEngine implements RpcEngine {
    * Protobuf. Several methods on {@link org.apache.hadoop.ipc.Server and RPC} 
    * Protobuf. Several methods on {@link org.apache.hadoop.ipc.Server and RPC} 
    * use type Writable as a wrapper to work across multiple RpcEngine kinds.
    * use type Writable as a wrapper to work across multiple RpcEngine kinds.
    */
    */
-  private static class RpcResponseWrapper implements Writable {
-    byte[] responseMessage;
+  private static class RpcResponseWrapper implements RpcWrapper {
+    Message theResponse; // for senderSide, the response is here
+    byte[] theResponseRead; // for receiver side, the response is here
 
 
     @SuppressWarnings("unused")
     @SuppressWarnings("unused")
     public RpcResponseWrapper() {
     public RpcResponseWrapper() {
     }
     }
 
 
     public RpcResponseWrapper(Message message) {
     public RpcResponseWrapper(Message message) {
-      this.responseMessage = message.toByteArray();
+      this.theResponse = message;
     }
     }
 
 
     @Override
     @Override
     public void write(DataOutput out) throws IOException {
     public void write(DataOutput out) throws IOException {
-      out.writeInt(responseMessage.length);
-      out.write(responseMessage);     
+      OutputStream os = DataOutputOutputStream.constructOutputStream(out);
+      theResponse.writeDelimitedTo(os);   
     }
     }
 
 
     @Override
     @Override
     public void readFields(DataInput in) throws IOException {
     public void readFields(DataInput in) throws IOException {
-      int length = in.readInt();
-      byte[] bytes = new byte[length];
-      in.readFully(bytes);
-      responseMessage = bytes;
+      int length = ProtoUtil.readRawVarint32(in);
+      theResponseRead = new byte[length];
+      in.readFully(theResponseRead);
+    }
+    
+    @Override
+    public int getLength() {
+      int resLen;
+      if (theResponse != null) {
+        resLen = theResponse.getSerializedSize();
+      } else if (theResponseRead != null ) {
+        resLen = theResponseRead.length;
+      } else {
+        throw new IllegalArgumentException(
+            "getLenght on uninilialized RpcWrapper");      
+      }
+      return CodedOutputStream.computeRawVarint32Size(resLen) + resLen;
     }
     }
   }
   }
 
 

+ 44 - 14
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java

@@ -73,6 +73,7 @@ import org.apache.hadoop.conf.Configuration.IntegerRanges;
 import org.apache.hadoop.fs.CommonConfigurationKeys;
 import org.apache.hadoop.fs.CommonConfigurationKeys;
 import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
 import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
 import org.apache.hadoop.io.BytesWritable;
 import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.DataOutputBuffer;
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.io.WritableUtils;
 import org.apache.hadoop.io.WritableUtils;
@@ -107,6 +108,7 @@ import org.apache.hadoop.util.StringUtils;
 import org.apache.hadoop.util.Time;
 import org.apache.hadoop.util.Time;
 
 
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.annotations.VisibleForTesting;
+import com.google.protobuf.CodedOutputStream;
 
 
 /** An abstract IPC service.  IPC calls take a single {@link Writable} as a
 /** An abstract IPC service.  IPC calls take a single {@link Writable} as a
  * parameter, and return a {@link Writable} as their value.  A service runs on
  * parameter, and return a {@link Writable} as their value.  A service runs on
@@ -202,7 +204,8 @@ public abstract class Server {
   // 6 : Made RPC Request header explicit
   // 6 : Made RPC Request header explicit
   // 7 : Changed Ipc Connection Header to use Protocol buffers
   // 7 : Changed Ipc Connection Header to use Protocol buffers
   // 8 : SASL server always sends a final response
   // 8 : SASL server always sends a final response
-  public static final byte CURRENT_VERSION = 8;
+  // 9 : Changes to protocol for HADOOP-8990
+  public static final byte CURRENT_VERSION = 9;
 
 
   /**
   /**
    * Initial and max size of response buffer
    * Initial and max size of response buffer
@@ -1512,10 +1515,15 @@ public abstract class Server {
       " cannot communicate with client version " + clientVersion;
       " cannot communicate with client version " + clientVersion;
       ByteArrayOutputStream buffer = new ByteArrayOutputStream();
       ByteArrayOutputStream buffer = new ByteArrayOutputStream();
       
       
-      if (clientVersion >= 3) {
+      if (clientVersion >= 9) {
+        // Versions >>9  understand the normal response
         Call fakeCall =  new Call(-1, null, this);
         Call fakeCall =  new Call(-1, null, this);
-        // Versions 3 and greater can interpret this exception
-        // response in the same manner
+        setupResponse(buffer, fakeCall, RpcStatusProto.FATAL,
+            null, VersionMismatch.class.getName(), errMsg);
+        responder.doRespond(fakeCall);
+      } else if (clientVersion >= 3) {
+        Call fakeCall =  new Call(-1, null, this);
+        // Versions 3 to 8 use older response
         setupResponseOldVersionFatal(buffer, fakeCall,
         setupResponseOldVersionFatal(buffer, fakeCall,
             null, VersionMismatch.class.getName(), errMsg);
             null, VersionMismatch.class.getName(), errMsg);
 
 
@@ -1997,17 +2005,34 @@ public abstract class Server {
   throws IOException {
   throws IOException {
     responseBuf.reset();
     responseBuf.reset();
     DataOutputStream out = new DataOutputStream(responseBuf);
     DataOutputStream out = new DataOutputStream(responseBuf);
-    RpcResponseHeaderProto.Builder response =  
+    RpcResponseHeaderProto.Builder headerBuilder =  
         RpcResponseHeaderProto.newBuilder();
         RpcResponseHeaderProto.newBuilder();
-    response.setCallId(call.callId);
-    response.setStatus(status);
-    response.setServerIpcVersionNum(Server.CURRENT_VERSION);
-
+    headerBuilder.setCallId(call.callId);
+    headerBuilder.setStatus(status);
+    headerBuilder.setServerIpcVersionNum(Server.CURRENT_VERSION);
 
 
     if (status == RpcStatusProto.SUCCESS) {
     if (status == RpcStatusProto.SUCCESS) {
+      RpcResponseHeaderProto header = headerBuilder.build();
+      final int headerLen = header.getSerializedSize();
+      int fullLength  = CodedOutputStream.computeRawVarint32Size(headerLen) +
+          headerLen;
       try {
       try {
-        response.build().writeDelimitedTo(out);
-        rv.write(out);
+        if (rv instanceof ProtobufRpcEngine.RpcWrapper) {
+          ProtobufRpcEngine.RpcWrapper resWrapper = 
+              (ProtobufRpcEngine.RpcWrapper) rv;
+          fullLength += resWrapper.getLength();
+          out.writeInt(fullLength);
+          header.writeDelimitedTo(out);
+          rv.write(out);
+        } else { // Have to serialize to buffer to get len
+          final DataOutputBuffer buf = new DataOutputBuffer();
+          rv.write(buf);
+          byte[] data = buf.getData();
+          fullLength += buf.getLength();
+          out.writeInt(fullLength);
+          header.writeDelimitedTo(out);
+          out.write(data, 0, buf.getLength());
+        }
       } catch (Throwable t) {
       } catch (Throwable t) {
         LOG.warn("Error serializing call response for call " + call, t);
         LOG.warn("Error serializing call response for call " + call, t);
         // Call back to same function - this is OK since the
         // Call back to same function - this is OK since the
@@ -2019,9 +2044,14 @@ public abstract class Server {
         return;
         return;
       }
       }
     } else { // Rpc Failure
     } else { // Rpc Failure
-      response.setExceptionClassName(errorClass);
-      response.setErrorMsg(error);
-      response.build().writeDelimitedTo(out);
+      headerBuilder.setExceptionClassName(errorClass);
+      headerBuilder.setErrorMsg(error);
+      RpcResponseHeaderProto header = headerBuilder.build();
+      int headerLen = header.getSerializedSize();
+      final int fullLength  = 
+          CodedOutputStream.computeRawVarint32Size(headerLen) + headerLen;
+      out.writeInt(fullLength);
+      header.writeDelimitedTo(out);
     }
     }
     if (call.connection.useWrap) {
     if (call.connection.useWrap) {
       wrapWithSasl(responseBuf, call);
       wrapWithSasl(responseBuf, call);

+ 5 - 0
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/CachedDNSToSwitchMapping.java

@@ -149,4 +149,9 @@ public class CachedDNSToSwitchMapping extends AbstractDNSToSwitchMapping {
   public boolean isSingleSwitch() {
   public boolean isSingleSwitch() {
     return isMappingSingleSwitch(rawMapping);
     return isMappingSingleSwitch(rawMapping);
   }
   }
+  
+  @Override
+  public void reloadCachedMappings() {
+    cache.clear();
+  }
 }
 }

+ 8 - 0
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/DNSToSwitchMapping.java

@@ -51,4 +51,12 @@ public interface DNSToSwitchMapping {
    * If <i>names</i> is empty, the returned list is also empty
    * If <i>names</i> is empty, the returned list is also empty
    */
    */
   public List<String> resolve(List<String> names);
   public List<String> resolve(List<String> names);
+
+  /**
+   * Reload all of the cached mappings.
+   *
+   * If there is a cache, this method will clear it, so that future accesses
+   * will get a chance to see the new data.
+   */
+  public void reloadCachedMappings();
 }
 }

+ 8 - 8
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetworkTopology.java

@@ -392,8 +392,16 @@ public class NetworkTopology {
       throw new IllegalArgumentException(
       throw new IllegalArgumentException(
         "Not allow to add an inner node: "+NodeBase.getPath(node));
         "Not allow to add an inner node: "+NodeBase.getPath(node));
     }
     }
+    int newDepth = NodeBase.locationToDepth(node.getNetworkLocation()) + 1;
     netlock.writeLock().lock();
     netlock.writeLock().lock();
     try {
     try {
+      if ((depthOfAllLeaves != -1) && (depthOfAllLeaves != newDepth)) {
+        LOG.error("Error: can't add leaf node at depth " +
+            newDepth + " to topology:\n" + oldTopoStr);
+        throw new InvalidTopologyException("Invalid network topology. " +
+            "You cannot have a rack and a non-rack node at the same " +
+            "level of the network topology.");
+      }
       Node rack = getNodeForNetworkLocation(node);
       Node rack = getNodeForNetworkLocation(node);
       if (rack != null && !(rack instanceof InnerNode)) {
       if (rack != null && !(rack instanceof InnerNode)) {
         throw new IllegalArgumentException("Unexpected data node " 
         throw new IllegalArgumentException("Unexpected data node " 
@@ -408,14 +416,6 @@ public class NetworkTopology {
         if (!(node instanceof InnerNode)) {
         if (!(node instanceof InnerNode)) {
           if (depthOfAllLeaves == -1) {
           if (depthOfAllLeaves == -1) {
             depthOfAllLeaves = node.getLevel();
             depthOfAllLeaves = node.getLevel();
-          } else {
-            if (depthOfAllLeaves != node.getLevel()) {
-              LOG.error("Error: can't add leaf node at depth " +
-                  node.getLevel() + " to topology:\n" + oldTopoStr);
-              throw new InvalidTopologyException("Invalid network topology. " +
-                  "You cannot have a rack and a non-rack node at the same " +
-                  "level of the network topology.");
-            }
           }
           }
         }
         }
       }
       }

+ 12 - 0
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NodeBase.java

@@ -167,4 +167,16 @@ public class NodeBase implements Node {
   public void setLevel(int level) {
   public void setLevel(int level) {
     this.level = level;
     this.level = level;
   }
   }
+  
+  public static int locationToDepth(String location) {
+    String normalizedLocation = normalize(location);
+    int length = normalizedLocation.length();
+    int depth = 0;
+    for (int i = 0; i < length; i++) {
+      if (normalizedLocation.charAt(i) == PATH_SEPARATOR) {
+        depth++;
+      }
+    }
+    return depth;
+  }
 }
 }

+ 6 - 0
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/ScriptBasedMapping.java

@@ -263,5 +263,11 @@ public final class ScriptBasedMapping extends CachedDNSToSwitchMapping {
     public String toString() {
     public String toString() {
       return scriptName != null ? ("script " + scriptName) : NO_SCRIPT;
       return scriptName != null ? ("script " + scriptName) : NO_SCRIPT;
     }
     }
+
+    @Override
+    public void reloadCachedMappings() {
+      // Nothing to do here, since RawScriptBasedMapping has no cache, and
+      // does not inherit from CachedDNSToSwitchMapping
+    }
   }
   }
 }
 }

+ 37 - 19
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/TableMapping.java

@@ -76,20 +76,24 @@ public class TableMapping extends CachedDNSToSwitchMapping {
     getRawMapping().setConf(conf);
     getRawMapping().setConf(conf);
   }
   }
   
   
+  @Override
+  public void reloadCachedMappings() {
+    super.reloadCachedMappings();
+    getRawMapping().reloadCachedMappings();
+  }
+  
   private static final class RawTableMapping extends Configured
   private static final class RawTableMapping extends Configured
       implements DNSToSwitchMapping {
       implements DNSToSwitchMapping {
     
     
-    private final Map<String, String> map = new HashMap<String, String>();
-    private boolean initialized = false;
+    private Map<String, String> map;
   
   
-    private synchronized void load() {
-      map.clear();
+    private Map<String, String> load() {
+      Map<String, String> loadMap = new HashMap<String, String>();
   
   
       String filename = getConf().get(NET_TOPOLOGY_TABLE_MAPPING_FILE_KEY, null);
       String filename = getConf().get(NET_TOPOLOGY_TABLE_MAPPING_FILE_KEY, null);
       if (StringUtils.isBlank(filename)) {
       if (StringUtils.isBlank(filename)) {
-        LOG.warn(NET_TOPOLOGY_TABLE_MAPPING_FILE_KEY + " not configured. "
-            + NetworkTopology.DEFAULT_RACK + " will be returned.");
-        return;
+        LOG.warn(NET_TOPOLOGY_TABLE_MAPPING_FILE_KEY + " not configured. ");
+        return null;
       }
       }
   
   
       BufferedReader reader = null;
       BufferedReader reader = null;
@@ -101,7 +105,7 @@ public class TableMapping extends CachedDNSToSwitchMapping {
           if (line.length() != 0 && line.charAt(0) != '#') {
           if (line.length() != 0 && line.charAt(0) != '#') {
             String[] columns = line.split("\\s+");
             String[] columns = line.split("\\s+");
             if (columns.length == 2) {
             if (columns.length == 2) {
-              map.put(columns[0], columns[1]);
+              loadMap.put(columns[0], columns[1]);
             } else {
             } else {
               LOG.warn("Line does not have two columns. Ignoring. " + line);
               LOG.warn("Line does not have two columns. Ignoring. " + line);
             }
             }
@@ -109,29 +113,31 @@ public class TableMapping extends CachedDNSToSwitchMapping {
           line = reader.readLine();
           line = reader.readLine();
         }
         }
       } catch (Exception e) {
       } catch (Exception e) {
-        LOG.warn(filename + " cannot be read. " + NetworkTopology.DEFAULT_RACK
-            + " will be returned.", e);
-        map.clear();
+        LOG.warn(filename + " cannot be read.", e);
+        return null;
       } finally {
       } finally {
         if (reader != null) {
         if (reader != null) {
           try {
           try {
             reader.close();
             reader.close();
           } catch (IOException e) {
           } catch (IOException e) {
-            LOG.warn(filename + " cannot be read. "
-                + NetworkTopology.DEFAULT_RACK + " will be returned.", e);
-            map.clear();
+            LOG.warn(filename + " cannot be read.", e);
+            return null;
           }
           }
         }
         }
       }
       }
+      return loadMap;
     }
     }
   
   
     @Override
     @Override
     public synchronized List<String> resolve(List<String> names) {
     public synchronized List<String> resolve(List<String> names) {
-      if (!initialized) {
-        initialized = true;
-        load();
+      if (map == null) {
+        map = load();
+        if (map == null) {
+          LOG.warn("Failed to read topology table. " +
+            NetworkTopology.DEFAULT_RACK + " will be used for all nodes.");
+          map = new HashMap<String, String>();
+        }
       }
       }
-  
       List<String> results = new ArrayList<String>(names.size());
       List<String> results = new ArrayList<String>(names.size());
       for (String name : names) {
       for (String name : names) {
         String result = map.get(name);
         String result = map.get(name);
@@ -143,6 +149,18 @@ public class TableMapping extends CachedDNSToSwitchMapping {
       }
       }
       return results;
       return results;
     }
     }
-    
+
+    @Override
+    public void reloadCachedMappings() {
+      Map<String, String> newMap = load();
+      if (newMap == null) {
+        LOG.error("Failed to reload the topology table.  The cached " +
+            "mappings will not be cleared.");
+      } else {
+        synchronized(this) {
+          map = newMap;
+        }
+      }
+    }
   }
   }
 }
 }

+ 19 - 11
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/HadoopKerberosName.java

@@ -18,6 +18,8 @@
 
 
 package org.apache.hadoop.security;
 package org.apache.hadoop.security;
 
 
+import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTH_TO_LOCAL;
+
 import java.io.IOException;
 import java.io.IOException;
 
 
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceAudience;
@@ -25,7 +27,6 @@ import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.security.authentication.util.KerberosName;
 import org.apache.hadoop.security.authentication.util.KerberosName;
 import org.apache.hadoop.security.authentication.util.KerberosUtil;
 import org.apache.hadoop.security.authentication.util.KerberosUtil;
-import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
 /**
 /**
  * This class implements parsing and handling of Kerberos principal names. In 
  * This class implements parsing and handling of Kerberos principal names. In 
  * particular, it splits them apart and translates them down into local
  * particular, it splits them apart and translates them down into local
@@ -36,15 +37,6 @@ import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
 @InterfaceStability.Evolving
 @InterfaceStability.Evolving
 public class HadoopKerberosName extends KerberosName {
 public class HadoopKerberosName extends KerberosName {
 
 
-  static {
-    try {
-      KerberosUtil.getDefaultRealm();
-    } catch (Exception ke) {
-      if(UserGroupInformation.isSecurityEnabled())
-        throw new IllegalArgumentException("Can't get Kerberos configuration",ke);
-    }
-  }
-
   /**
   /**
    * Create a name from the full Kerberos principal name.
    * Create a name from the full Kerberos principal name.
    * @param name
    * @param name
@@ -63,7 +55,23 @@ public class HadoopKerberosName extends KerberosName {
    * @throws IOException
    * @throws IOException
    */
    */
   public static void setConfiguration(Configuration conf) throws IOException {
   public static void setConfiguration(Configuration conf) throws IOException {
-    String ruleString = conf.get(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTH_TO_LOCAL, "DEFAULT");
+    final String defaultRule;
+    switch (SecurityUtil.getAuthenticationMethod(conf)) {
+      case KERBEROS:
+      case KERBEROS_SSL:
+        try {
+          KerberosUtil.getDefaultRealm();
+        } catch (Exception ke) {
+          throw new IllegalArgumentException("Can't get Kerberos realm", ke);
+        }
+        defaultRule = "DEFAULT";
+        break;
+      default:
+        // just extract the simple user name
+        defaultRule = "RULE:[1:$1] RULE:[2:$1]";
+        break; 
+    }
+    String ruleString = conf.get(HADOOP_SECURITY_AUTH_TO_LOCAL, defaultRule);
     setRules(ruleString);
     setRules(ruleString);
   }
   }
 
 

+ 22 - 24
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/UserGroupInformation.java

@@ -53,14 +53,12 @@ import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.metrics2.annotation.Metric;
 import org.apache.hadoop.metrics2.annotation.Metric;
 import org.apache.hadoop.metrics2.annotation.Metrics;
 import org.apache.hadoop.metrics2.annotation.Metrics;
 import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
 import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
 import org.apache.hadoop.metrics2.lib.MutableRate;
 import org.apache.hadoop.metrics2.lib.MutableRate;
 import org.apache.hadoop.security.SaslRpcServer.AuthMethod;
 import org.apache.hadoop.security.SaslRpcServer.AuthMethod;
-import org.apache.hadoop.security.authentication.util.KerberosName;
 import org.apache.hadoop.security.authentication.util.KerberosUtil;
 import org.apache.hadoop.security.authentication.util.KerberosUtil;
 import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.security.token.TokenIdentifier;
 import org.apache.hadoop.security.token.TokenIdentifier;
@@ -192,8 +190,6 @@ public class UserGroupInformation {
 
 
   /** Metrics to track UGI activity */
   /** Metrics to track UGI activity */
   static UgiMetrics metrics = UgiMetrics.create();
   static UgiMetrics metrics = UgiMetrics.create();
-  /** Are the static variables that depend on configuration initialized? */
-  private static boolean isInitialized = false;
   /** The auth method to use */
   /** The auth method to use */
   private static AuthenticationMethod authenticationMethod;
   private static AuthenticationMethod authenticationMethod;
   /** Server-side groups fetching service */
   /** Server-side groups fetching service */
@@ -213,8 +209,8 @@ public class UserGroupInformation {
    * Must be called before useKerberos or groups is used.
    * Must be called before useKerberos or groups is used.
    */
    */
   private static synchronized void ensureInitialized() {
   private static synchronized void ensureInitialized() {
-    if (!isInitialized) {
-        initialize(new Configuration(), KerberosName.hasRulesBeenSet());
+    if (conf == null) {
+      initialize(new Configuration(), false);
     }
     }
   }
   }
 
 
@@ -222,25 +218,17 @@ public class UserGroupInformation {
    * Initialize UGI and related classes.
    * Initialize UGI and related classes.
    * @param conf the configuration to use
    * @param conf the configuration to use
    */
    */
-  private static synchronized void initialize(Configuration conf, boolean skipRulesSetting) {
-    initUGI(conf);
-    // give the configuration on how to translate Kerberos names
-    try {
-      if (!skipRulesSetting) {
+  private static synchronized void initialize(Configuration conf,
+                                              boolean overrideNameRules) {
+    authenticationMethod = SecurityUtil.getAuthenticationMethod(conf);
+    if (overrideNameRules || !HadoopKerberosName.hasRulesBeenSet()) {
+      try {
         HadoopKerberosName.setConfiguration(conf);
         HadoopKerberosName.setConfiguration(conf);
+      } catch (IOException ioe) {
+        throw new RuntimeException(
+            "Problem with Kerberos auth_to_local name configuration", ioe);
       }
       }
-    } catch (IOException ioe) {
-      throw new RuntimeException("Problem with Kerberos auth_to_local name " +
-          "configuration", ioe);
     }
     }
-  }
-  
-  /**
-   * Set the configuration values for UGI.
-   * @param conf the configuration to use
-   */
-  private static synchronized void initUGI(Configuration conf) {
-    authenticationMethod = SecurityUtil.getAuthenticationMethod(conf);
     try {
     try {
         kerberosMinSecondsBeforeRelogin = 1000L * conf.getLong(
         kerberosMinSecondsBeforeRelogin = 1000L * conf.getLong(
                 HADOOP_KERBEROS_MIN_SECONDS_BEFORE_RELOGIN,
                 HADOOP_KERBEROS_MIN_SECONDS_BEFORE_RELOGIN,
@@ -255,7 +243,6 @@ public class UserGroupInformation {
     if (!(groups instanceof TestingGroups)) {
     if (!(groups instanceof TestingGroups)) {
       groups = Groups.getUserToGroupsMappingService(conf);
       groups = Groups.getUserToGroupsMappingService(conf);
     }
     }
-    isInitialized = true;
     UserGroupInformation.conf = conf;
     UserGroupInformation.conf = conf;
   }
   }
 
 
@@ -268,7 +255,18 @@ public class UserGroupInformation {
   @InterfaceAudience.Public
   @InterfaceAudience.Public
   @InterfaceStability.Evolving
   @InterfaceStability.Evolving
   public static void setConfiguration(Configuration conf) {
   public static void setConfiguration(Configuration conf) {
-    initialize(conf, false);
+    initialize(conf, true);
+  }
+  
+  @InterfaceAudience.Private
+  @VisibleForTesting
+  static void reset() {
+    authenticationMethod = null;
+    conf = null;
+    groups = null;
+    kerberosMinSecondsBeforeRelogin = 0;
+    setLoginUser(null);
+    HadoopKerberosName.setRules(null);
   }
   }
   
   
   /**
   /**

+ 15 - 15
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestDFVariations.java

@@ -36,15 +36,10 @@ import static org.junit.Assert.*;
 public class TestDFVariations {
 public class TestDFVariations {
 
 
   public static class XXDF extends DF {
   public static class XXDF extends DF {
-    private final String osName;
-    public XXDF(String osName) throws IOException {
+    public XXDF() throws IOException {
       super(new File(System.getProperty("test.build.data","/tmp")), 0L);
       super(new File(System.getProperty("test.build.data","/tmp")), 0L);
-      this.osName = osName;
-    }
-    @Override
-    public DF.OSType getOSType() {
-      return DF.getOSType(osName);
     }
     }
+
     @Override
     @Override
     protected String[] getExecString() {
     protected String[] getExecString() {
       return new String[] { "echo", "IGNORE\n", 
       return new String[] { "echo", "IGNORE\n", 
@@ -53,15 +48,20 @@ public class TestDFVariations {
   }
   }
 
 
   @Test(timeout=5000)
   @Test(timeout=5000)
-  public void testOSParsing() throws Exception {
-    for (DF.OSType ost : EnumSet.allOf(DF.OSType.class)) {
-      XXDF df = new XXDF(ost.getId());
-      assertEquals(ost.getId() + " mount",
-        Shell.WINDOWS ? df.getDirPath().substring(0, 2) : "/foo/bar",
-        df.getMount());
-    }
+  public void testMountAndFileSystem() throws Exception {
+    XXDF df = new XXDF();
+    String expectedMount =
+        Shell.WINDOWS ? df.getDirPath().substring(0, 2) : "/foo/bar";
+    String expectedFileSystem =
+        Shell.WINDOWS ? df.getDirPath().substring(0, 2) : "/dev/sda3";
+
+    assertEquals("Invalid mount point",
+        expectedMount, df.getMount());
+
+    assertEquals("Invalid filesystem",
+        expectedFileSystem, df.getFilesystem());
   }
   }
-  
+
   @Test(timeout=5000)
   @Test(timeout=5000)
   public void testDFInvalidPath() throws Exception {
   public void testDFInvalidPath() throws Exception {
     // Generate a path that doesn't exist
     // Generate a path that doesn't exist

+ 5 - 0
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/StaticMapping.java

@@ -147,4 +147,9 @@ public class StaticMapping extends AbstractDNSToSwitchMapping  {
       nameToRackMap.clear();
       nameToRackMap.clear();
     }
     }
   }
   }
+  
+  public void reloadCachedMappings() {
+    // reloadCachedMappings does nothing for StaticMapping; there is
+    // nowhere to reload from since all data is in memory.
+  }
 }
 }

+ 4 - 0
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/TestSwitchMapping.java

@@ -116,5 +116,9 @@ public class TestSwitchMapping extends Assert {
     public List<String> resolve(List<String> names) {
     public List<String> resolve(List<String> names) {
       return names;
       return names;
     }
     }
+
+    @Override
+    public void reloadCachedMappings() {
+    }
   }
   }
 }
 }

+ 55 - 17
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/TestTableMapping.java

@@ -34,23 +34,17 @@ import org.junit.Before;
 import org.junit.Test;
 import org.junit.Test;
 
 
 public class TestTableMapping {
 public class TestTableMapping {
-  
-  private File mappingFile;
-  
-  @Before
-  public void setUp() throws IOException {
-    mappingFile = File.createTempFile(getClass().getSimpleName(), ".txt");
-    Files.write("a.b.c /rack1\n" +
-                "1.2.3.4\t/rack2\n", mappingFile, Charsets.UTF_8);
-    mappingFile.deleteOnExit();
-  }
-
   @Test
   @Test
   public void testResolve() throws IOException {
   public void testResolve() throws IOException {
+    File mapFile = File.createTempFile(getClass().getSimpleName() +
+        ".testResolve", ".txt");
+    Files.write("a.b.c /rack1\n" +
+                "1.2.3.4\t/rack2\n", mapFile, Charsets.UTF_8);
+    mapFile.deleteOnExit();
     TableMapping mapping = new TableMapping();
     TableMapping mapping = new TableMapping();
 
 
     Configuration conf = new Configuration();
     Configuration conf = new Configuration();
-    conf.set(NET_TOPOLOGY_TABLE_MAPPING_FILE_KEY, mappingFile.getCanonicalPath());
+    conf.set(NET_TOPOLOGY_TABLE_MAPPING_FILE_KEY, mapFile.getCanonicalPath());
     mapping.setConf(conf);
     mapping.setConf(conf);
 
 
     List<String> names = new ArrayList<String>();
     List<String> names = new ArrayList<String>();
@@ -65,10 +59,15 @@ public class TestTableMapping {
 
 
   @Test
   @Test
   public void testTableCaching() throws IOException {
   public void testTableCaching() throws IOException {
+    File mapFile = File.createTempFile(getClass().getSimpleName() +
+        ".testTableCaching", ".txt");
+    Files.write("a.b.c /rack1\n" +
+                "1.2.3.4\t/rack2\n", mapFile, Charsets.UTF_8);
+    mapFile.deleteOnExit();
     TableMapping mapping = new TableMapping();
     TableMapping mapping = new TableMapping();
 
 
     Configuration conf = new Configuration();
     Configuration conf = new Configuration();
-    conf.set(NET_TOPOLOGY_TABLE_MAPPING_FILE_KEY, mappingFile.getCanonicalPath());
+    conf.set(NET_TOPOLOGY_TABLE_MAPPING_FILE_KEY, mapFile.getCanonicalPath());
     mapping.setConf(conf);
     mapping.setConf(conf);
 
 
     List<String> names = new ArrayList<String>();
     List<String> names = new ArrayList<String>();
@@ -123,13 +122,53 @@ public class TestTableMapping {
   }
   }
 
 
   @Test
   @Test
+  public void testClearingCachedMappings() throws IOException {
+    File mapFile = File.createTempFile(getClass().getSimpleName() +
+        ".testClearingCachedMappings", ".txt");
+    Files.write("a.b.c /rack1\n" +
+                "1.2.3.4\t/rack2\n", mapFile, Charsets.UTF_8);
+    mapFile.deleteOnExit();
+
+    TableMapping mapping = new TableMapping();
+
+    Configuration conf = new Configuration();
+    conf.set(NET_TOPOLOGY_TABLE_MAPPING_FILE_KEY, mapFile.getCanonicalPath());
+    mapping.setConf(conf);
+
+    List<String> names = new ArrayList<String>();
+    names.add("a.b.c");
+    names.add("1.2.3.4");
+
+    List<String> result = mapping.resolve(names);
+    assertEquals(names.size(), result.size());
+    assertEquals("/rack1", result.get(0));
+    assertEquals("/rack2", result.get(1));
+
+    Files.write("", mapFile, Charsets.UTF_8);
+
+    mapping.reloadCachedMappings();
+
+    names = new ArrayList<String>();
+    names.add("a.b.c");
+    names.add("1.2.3.4");
+
+    result = mapping.resolve(names);
+    assertEquals(names.size(), result.size());
+    assertEquals(NetworkTopology.DEFAULT_RACK, result.get(0));
+    assertEquals(NetworkTopology.DEFAULT_RACK, result.get(1));
+  }
+
+
+  @Test(timeout=60000)
   public void testBadFile() throws IOException {
   public void testBadFile() throws IOException {
-    Files.write("bad contents", mappingFile, Charsets.UTF_8);
-    
+    File mapFile = File.createTempFile(getClass().getSimpleName() +
+        ".testBadFile", ".txt");
+    Files.write("bad contents", mapFile, Charsets.UTF_8);
+    mapFile.deleteOnExit();
     TableMapping mapping = new TableMapping();
     TableMapping mapping = new TableMapping();
 
 
     Configuration conf = new Configuration();
     Configuration conf = new Configuration();
-    conf.set(NET_TOPOLOGY_TABLE_MAPPING_FILE_KEY, mappingFile.getCanonicalPath());
+    conf.set(NET_TOPOLOGY_TABLE_MAPPING_FILE_KEY, mapFile.getCanonicalPath());
     mapping.setConf(conf);
     mapping.setConf(conf);
 
 
     List<String> names = new ArrayList<String>();
     List<String> names = new ArrayList<String>();
@@ -141,5 +180,4 @@ public class TestTableMapping {
     assertEquals(result.get(0), NetworkTopology.DEFAULT_RACK);
     assertEquals(result.get(0), NetworkTopology.DEFAULT_RACK);
     assertEquals(result.get(1), NetworkTopology.DEFAULT_RACK);
     assertEquals(result.get(1), NetworkTopology.DEFAULT_RACK);
   }
   }
-
 }
 }

+ 2 - 1
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestSecurityUtil.java

@@ -24,6 +24,7 @@ import java.io.IOException;
 import java.net.InetAddress;
 import java.net.InetAddress;
 import java.net.InetSocketAddress;
 import java.net.InetSocketAddress;
 import java.net.URI;
 import java.net.URI;
+import java.util.Locale;
 
 
 import javax.security.auth.kerberos.KerberosPrincipal;
 import javax.security.auth.kerberos.KerberosPrincipal;
 
 
@@ -112,7 +113,7 @@ public class TestSecurityUtil {
 
 
   @Test
   @Test
   public void testLocalHostNameForNullOrWild() throws Exception {
   public void testLocalHostNameForNullOrWild() throws Exception {
-    String local = SecurityUtil.getLocalHostName();
+    String local = SecurityUtil.getLocalHostName().toLowerCase(Locale.US);
     assertEquals("hdfs/" + local + "@REALM",
     assertEquals("hdfs/" + local + "@REALM",
                  SecurityUtil.getServerPrincipal("hdfs/_HOST@REALM", (String)null));
                  SecurityUtil.getServerPrincipal("hdfs/_HOST@REALM", (String)null));
     assertEquals("hdfs/" + local + "@REALM",
     assertEquals("hdfs/" + local + "@REALM",

+ 130 - 17
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestUserGroupInformation.java

@@ -38,10 +38,12 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.metrics2.MetricsRecordBuilder;
 import org.apache.hadoop.metrics2.MetricsRecordBuilder;
 import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod;
 import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod;
+import org.apache.hadoop.security.authentication.util.KerberosName;
 import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.security.token.TokenIdentifier;
 import org.apache.hadoop.security.token.TokenIdentifier;
 import static org.apache.hadoop.test.MetricsAsserts.*;
 import static org.apache.hadoop.test.MetricsAsserts.*;
 import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
 import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
+import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTH_TO_LOCAL;
 import org.apache.hadoop.util.Shell;
 import org.apache.hadoop.util.Shell;
 
 
 public class TestUserGroupInformation {
 public class TestUserGroupInformation {
@@ -73,17 +75,18 @@ public class TestUserGroupInformation {
   public static void setup() {
   public static void setup() {
     javax.security.auth.login.Configuration.setConfiguration(
     javax.security.auth.login.Configuration.setConfiguration(
         new DummyLoginConfiguration());
         new DummyLoginConfiguration());
+    // doesn't matter what it is, but getGroups needs it set...
+    System.setProperty("hadoop.home.dir", "/tmp");
+    // fake the realm is kerberos is enabled
+    System.setProperty("java.security.krb5.kdc", "");
+    System.setProperty("java.security.krb5.realm", "DEFAULT.REALM");
   }
   }
   
   
   @Before
   @Before
   public void setupUgi() {
   public void setupUgi() {
     conf = new Configuration();
     conf = new Configuration();
-    conf.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTH_TO_LOCAL,
-        "RULE:[2:$1@$0](.*@HADOOP.APACHE.ORG)s/@.*//" +
-        "RULE:[1:$1@$0](.*@HADOOP.APACHE.ORG)s/@.*//"
-        + "DEFAULT");
+    UserGroupInformation.reset();
     UserGroupInformation.setConfiguration(conf);
     UserGroupInformation.setConfiguration(conf);
-    UserGroupInformation.setLoginUser(null);
   }
   }
   
   
   @After
   @After
@@ -230,28 +233,138 @@ public class TestUserGroupInformation {
   /** test constructor */
   /** test constructor */
   @Test (timeout = 30000)
   @Test (timeout = 30000)
   public void testConstructor() throws Exception {
   public void testConstructor() throws Exception {
-    UserGroupInformation ugi = 
-      UserGroupInformation.createUserForTesting("user2/cron@HADOOP.APACHE.ORG", 
-                                                GROUP_NAMES);
-    // make sure the short and full user names are correct
-    assertEquals("user2/cron@HADOOP.APACHE.ORG", ugi.getUserName());
-    assertEquals("user2", ugi.getShortUserName());
-    ugi = UserGroupInformation.createUserForTesting(USER_NAME, GROUP_NAMES);
-    assertEquals("user1", ugi.getShortUserName());
+    // security off, so default should just return simple name
+    testConstructorSuccess("user1", "user1");
+    testConstructorSuccess("user2@DEFAULT.REALM", "user2");
+    testConstructorSuccess("user3/cron@DEFAULT.REALM", "user3");    
+    testConstructorSuccess("user4@OTHER.REALM", "user4");
+    testConstructorSuccess("user5/cron@OTHER.REALM", "user5");
+    // failure test
+    testConstructorFailures(null);
+    testConstructorFailures("");
+  }
+  
+  /** test constructor */
+  @Test (timeout = 30000)
+  public void testConstructorWithRules() throws Exception {
+    // security off, but use rules if explicitly set
+    conf.set(HADOOP_SECURITY_AUTH_TO_LOCAL,
+        "RULE:[1:$1@$0](.*@OTHER.REALM)s/(.*)@.*/other-$1/");
+    UserGroupInformation.setConfiguration(conf);
+    testConstructorSuccess("user1", "user1");
+    testConstructorSuccess("user4@OTHER.REALM", "other-user4");
+    // failure test
+    testConstructorFailures("user2@DEFAULT.REALM");
+    testConstructorFailures("user3/cron@DEFAULT.REALM");
+    testConstructorFailures("user5/cron@OTHER.REALM");
+    testConstructorFailures(null);
+    testConstructorFailures("");
+  }
+  
+  /** test constructor */
+  @Test (timeout = 30000)
+  public void testConstructorWithKerberos() throws Exception {
+    // security on, default is remove default realm
+    SecurityUtil.setAuthenticationMethod(AuthenticationMethod.KERBEROS, conf);
+    UserGroupInformation.setConfiguration(conf);
+
+    testConstructorSuccess("user1", "user1");
+    testConstructorSuccess("user2@DEFAULT.REALM", "user2");
+    testConstructorSuccess("user3/cron@DEFAULT.REALM", "user3");    
+    // failure test
+    testConstructorFailures("user4@OTHER.REALM");
+    testConstructorFailures("user5/cron@OTHER.REALM");
+    testConstructorFailures(null);
+    testConstructorFailures("");
+  }
+
+  /** test constructor */
+  @Test (timeout = 30000)
+  public void testConstructorWithKerberosRules() throws Exception {
+    // security on, explicit rules
+    SecurityUtil.setAuthenticationMethod(AuthenticationMethod.KERBEROS, conf);
+    conf.set(HADOOP_SECURITY_AUTH_TO_LOCAL,
+        "RULE:[2:$1@$0](.*@OTHER.REALM)s/(.*)@.*/other-$1/" +
+        "RULE:[1:$1@$0](.*@OTHER.REALM)s/(.*)@.*/other-$1/" +
+        "DEFAULT");
+    UserGroupInformation.setConfiguration(conf);
     
     
+    testConstructorSuccess("user1", "user1");
+    testConstructorSuccess("user2@DEFAULT.REALM", "user2");
+    testConstructorSuccess("user3/cron@DEFAULT.REALM", "user3");    
+    testConstructorSuccess("user4@OTHER.REALM", "other-user4");
+    testConstructorSuccess("user5/cron@OTHER.REALM", "other-user5");
     // failure test
     // failure test
     testConstructorFailures(null);
     testConstructorFailures(null);
     testConstructorFailures("");
     testConstructorFailures("");
   }
   }
 
 
+  private void testConstructorSuccess(String principal, String shortName) {
+    UserGroupInformation ugi = 
+        UserGroupInformation.createUserForTesting(principal, GROUP_NAMES);
+    // make sure the short and full user names are correct
+    assertEquals(principal, ugi.getUserName());
+    assertEquals(shortName, ugi.getShortUserName());
+  }
+  
   private void testConstructorFailures(String userName) {
   private void testConstructorFailures(String userName) {
-    boolean gotException = false;
     try {
     try {
       UserGroupInformation.createRemoteUser(userName);
       UserGroupInformation.createRemoteUser(userName);
-    } catch (Exception e) {
-      gotException = true;
+      fail("user:"+userName+" wasn't invalid");
+    } catch (IllegalArgumentException e) {
+      String expect = (userName == null || userName.isEmpty())
+          ? "Null user" : "Illegal principal name "+userName;
+      assertEquals(expect, e.getMessage());
     }
     }
-    assertTrue(gotException);
+  }
+
+  @Test (timeout = 30000)
+  public void testSetConfigWithRules() {
+    String[] rules = { "RULE:[1:TEST1]", "RULE:[1:TEST2]", "RULE:[1:TEST3]" };
+
+    // explicitly set a rule
+    UserGroupInformation.reset();
+    assertFalse(KerberosName.hasRulesBeenSet());
+    KerberosName.setRules(rules[0]);
+    assertTrue(KerberosName.hasRulesBeenSet());
+    assertEquals(rules[0], KerberosName.getRules());
+
+    // implicit init should honor rules already being set
+    UserGroupInformation.createUserForTesting("someone", new String[0]);
+    assertEquals(rules[0], KerberosName.getRules());
+
+    // set conf, should override
+    conf.set(HADOOP_SECURITY_AUTH_TO_LOCAL, rules[1]);
+    UserGroupInformation.setConfiguration(conf);
+    assertEquals(rules[1], KerberosName.getRules());
+
+    // set conf, should again override
+    conf.set(HADOOP_SECURITY_AUTH_TO_LOCAL, rules[2]);
+    UserGroupInformation.setConfiguration(conf);
+    assertEquals(rules[2], KerberosName.getRules());
+    
+    // implicit init should honor rules already being set
+    UserGroupInformation.createUserForTesting("someone", new String[0]);
+    assertEquals(rules[2], KerberosName.getRules());
+  }
+
+  @Test (timeout = 30000)
+  public void testEnsureInitWithRules() throws IOException {
+    String rules = "RULE:[1:RULE1]";
+
+    // trigger implicit init, rules should init
+    UserGroupInformation.reset();
+    assertFalse(KerberosName.hasRulesBeenSet());
+    UserGroupInformation.createUserForTesting("someone", new String[0]);
+    assertTrue(KerberosName.hasRulesBeenSet());
+    
+    // set a rule, trigger implicit init, rule should not change 
+    UserGroupInformation.reset();
+    KerberosName.setRules(rules);
+    assertTrue(KerberosName.hasRulesBeenSet());
+    assertEquals(rules, KerberosName.getRules());
+    UserGroupInformation.createUserForTesting("someone", new String[0]);
+    assertEquals(rules, KerberosName.getRules());
   }
   }
 
 
   @Test (timeout = 30000)
   @Test (timeout = 30000)

+ 24 - 0
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt

@@ -363,6 +363,12 @@ Release 2.0.5-beta - UNRELEASED
     HDFS-4569. Small image transfer related cleanups.
     HDFS-4569. Small image transfer related cleanups.
     (Andrew Wang via suresh)
     (Andrew Wang via suresh)
 
 
+    HDFS-4521. Invalid network toploogies should not be cached. (Colin Patrick
+    McCabe via atm)
+
+    HDFS-4246. The exclude node list should be more forgiving, for each output
+    stream. (harsh via atm)
+
   OPTIMIZATIONS
   OPTIMIZATIONS
 
 
   BUG FIXES
   BUG FIXES
@@ -424,6 +430,24 @@ Release 2.0.5-beta - UNRELEASED
     HDFS-4596. Shutting down namenode during checkpointing can lead to md5sum
     HDFS-4596. Shutting down namenode during checkpointing can lead to md5sum
     error. (Andrew Wang via atm)
     error. (Andrew Wang via atm)
 
 
+    HDFS-4614. FSNamesystem#getContentSummary should use getPermissionChecker
+    helper method. (atm)
+
+    HDFS-4620. Documentation for dfs.namenode.rpc-address specifies wrong
+    format. (Sandy Ryza via atm)
+
+    HDFS-4607. In TestGetConf.testGetSpecificKey(), use a platform-specific
+    line separator; otherwise, it fails on Windows.  (Ivan Mitic via szetszwo)
+
+    HDFS-4609. TestAuditLogs should release log handles between tests. 
+    (Ivan Mitic via szetszwo)
+
+    HDFS-4615. Fix TestDFSShell failures on Windows.  (Arpit Agarwal
+    via szetszwo)
+
+    HDFS-4584. Skip TestNNWithQJM.testNewNamenodeTakesOverWriter() on Windows.
+    (Arpit Agarwal via szetszwo)
+
 Release 2.0.4-alpha - UNRELEASED
 Release 2.0.4-alpha - UNRELEASED
 
 
   INCOMPATIBLE CHANGES
   INCOMPATIBLE CHANGES

+ 6 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java

@@ -41,6 +41,8 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_SOCKET_CACHE_CAPAC
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_SOCKET_CACHE_CAPACITY_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_SOCKET_CACHE_CAPACITY_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_SOCKET_CACHE_EXPIRY_MSEC_DEFAULT;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_SOCKET_CACHE_EXPIRY_MSEC_DEFAULT;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_SOCKET_CACHE_EXPIRY_MSEC_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_SOCKET_CACHE_EXPIRY_MSEC_KEY;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_WRITE_EXCLUDE_NODES_CACHE_EXPIRY_INTERVAL;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_WRITE_EXCLUDE_NODES_CACHE_EXPIRY_INTERVAL_DEFAULT;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_SOCKET_TIMEOUT_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_SOCKET_TIMEOUT_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_USE_LEGACY_BLOCKREADER;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_USE_LEGACY_BLOCKREADER;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_USE_LEGACY_BLOCKREADER_DEFAULT;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_USE_LEGACY_BLOCKREADER_DEFAULT;
@@ -215,6 +217,7 @@ public class DFSClient implements java.io.Closeable {
     final int socketTimeout;
     final int socketTimeout;
     final int socketCacheCapacity;
     final int socketCacheCapacity;
     final long socketCacheExpiry;
     final long socketCacheExpiry;
+    final long excludedNodesCacheExpiry;
     /** Wait time window (in msec) if BlockMissingException is caught */
     /** Wait time window (in msec) if BlockMissingException is caught */
     final int timeWindow;
     final int timeWindow;
     final int nCachedConnRetry;
     final int nCachedConnRetry;
@@ -265,6 +268,9 @@ public class DFSClient implements java.io.Closeable {
           DFS_CLIENT_SOCKET_CACHE_CAPACITY_DEFAULT);
           DFS_CLIENT_SOCKET_CACHE_CAPACITY_DEFAULT);
       socketCacheExpiry = conf.getLong(DFS_CLIENT_SOCKET_CACHE_EXPIRY_MSEC_KEY,
       socketCacheExpiry = conf.getLong(DFS_CLIENT_SOCKET_CACHE_EXPIRY_MSEC_KEY,
           DFS_CLIENT_SOCKET_CACHE_EXPIRY_MSEC_DEFAULT);
           DFS_CLIENT_SOCKET_CACHE_EXPIRY_MSEC_DEFAULT);
+      excludedNodesCacheExpiry = conf.getLong(
+          DFS_CLIENT_WRITE_EXCLUDE_NODES_CACHE_EXPIRY_INTERVAL,
+          DFS_CLIENT_WRITE_EXCLUDE_NODES_CACHE_EXPIRY_INTERVAL_DEFAULT);
       prefetchSize = conf.getLong(DFS_CLIENT_READ_PREFETCH_SIZE_KEY,
       prefetchSize = conf.getLong(DFS_CLIENT_READ_PREFETCH_SIZE_KEY,
           10 * defaultBlockSize);
           10 * defaultBlockSize);
       timeWindow = conf
       timeWindow = conf

+ 2 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java

@@ -76,6 +76,8 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
   
   
   public static final String  DFS_CLIENT_SOCKET_CACHE_EXPIRY_MSEC_KEY = "dfs.client.socketcache.expiryMsec";
   public static final String  DFS_CLIENT_SOCKET_CACHE_EXPIRY_MSEC_KEY = "dfs.client.socketcache.expiryMsec";
   public static final long    DFS_CLIENT_SOCKET_CACHE_EXPIRY_MSEC_DEFAULT = 2 * 60 * 1000;
   public static final long    DFS_CLIENT_SOCKET_CACHE_EXPIRY_MSEC_DEFAULT = 2 * 60 * 1000;
+  public static final String  DFS_CLIENT_WRITE_EXCLUDE_NODES_CACHE_EXPIRY_INTERVAL = "dfs.client.write.exclude.nodes.cache.expiry.interval.millis";
+  public static final long    DFS_CLIENT_WRITE_EXCLUDE_NODES_CACHE_EXPIRY_INTERVAL_DEFAULT = 10 * 60 * 1000; // 10 minutes, in ms
   public static final String  DFS_NAMENODE_BACKUP_ADDRESS_KEY = "dfs.namenode.backup.address";
   public static final String  DFS_NAMENODE_BACKUP_ADDRESS_KEY = "dfs.namenode.backup.address";
   public static final String  DFS_NAMENODE_BACKUP_ADDRESS_DEFAULT = "localhost:50100";
   public static final String  DFS_NAMENODE_BACKUP_ADDRESS_DEFAULT = "localhost:50100";
   public static final String  DFS_NAMENODE_BACKUP_HTTP_ADDRESS_KEY = "dfs.namenode.backup.http-address";
   public static final String  DFS_NAMENODE_BACKUP_HTTP_ADDRESS_KEY = "dfs.namenode.backup.http-address";

+ 30 - 4
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java

@@ -35,6 +35,7 @@ import java.util.Arrays;
 import java.util.EnumSet;
 import java.util.EnumSet;
 import java.util.LinkedList;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.List;
+import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicBoolean;
 
 
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceAudience;
@@ -83,6 +84,11 @@ import org.apache.hadoop.util.Progressable;
 import org.apache.hadoop.util.Time;
 import org.apache.hadoop.util.Time;
 
 
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.annotations.VisibleForTesting;
+import com.google.common.cache.CacheBuilder;
+import com.google.common.cache.CacheLoader;
+import com.google.common.cache.LoadingCache;
+import com.google.common.cache.RemovalListener;
+import com.google.common.cache.RemovalNotification;
 
 
 
 
 /****************************************************************
 /****************************************************************
@@ -290,7 +296,25 @@ public class DFSOutputStream extends FSOutputSummer implements Syncable {
     private DataInputStream blockReplyStream;
     private DataInputStream blockReplyStream;
     private ResponseProcessor response = null;
     private ResponseProcessor response = null;
     private volatile DatanodeInfo[] nodes = null; // list of targets for current block
     private volatile DatanodeInfo[] nodes = null; // list of targets for current block
-    private ArrayList<DatanodeInfo> excludedNodes = new ArrayList<DatanodeInfo>();
+    private LoadingCache<DatanodeInfo, DatanodeInfo> excludedNodes =
+        CacheBuilder.newBuilder()
+        .expireAfterWrite(
+            dfsClient.getConf().excludedNodesCacheExpiry,
+            TimeUnit.MILLISECONDS)
+        .removalListener(new RemovalListener<DatanodeInfo, DatanodeInfo>() {
+          @Override
+          public void onRemoval(
+              RemovalNotification<DatanodeInfo, DatanodeInfo> notification) {
+            DFSClient.LOG.info("Removing node " +
+                notification.getKey() + " from the excluded nodes list");
+          }
+        })
+        .build(new CacheLoader<DatanodeInfo, DatanodeInfo>() {
+          @Override
+          public DatanodeInfo load(DatanodeInfo key) throws Exception {
+            return key;
+          }
+        });
     volatile boolean hasError = false;
     volatile boolean hasError = false;
     volatile int errorIndex = -1;
     volatile int errorIndex = -1;
     private BlockConstructionStage stage;  // block construction stage
     private BlockConstructionStage stage;  // block construction stage
@@ -1000,8 +1024,10 @@ public class DFSOutputStream extends FSOutputSummer implements Syncable {
         success = false;
         success = false;
 
 
         long startTime = Time.now();
         long startTime = Time.now();
-        DatanodeInfo[] excluded = excludedNodes.toArray(
-            new DatanodeInfo[excludedNodes.size()]);
+        DatanodeInfo[] excluded =
+            excludedNodes.getAllPresent(excludedNodes.asMap().keySet())
+            .keySet()
+            .toArray(new DatanodeInfo[0]);
         block = oldBlock;
         block = oldBlock;
         lb = locateFollowingBlock(startTime,
         lb = locateFollowingBlock(startTime,
             excluded.length > 0 ? excluded : null);
             excluded.length > 0 ? excluded : null);
@@ -1020,7 +1046,7 @@ public class DFSOutputStream extends FSOutputSummer implements Syncable {
           dfsClient.namenode.abandonBlock(block, src, dfsClient.clientName);
           dfsClient.namenode.abandonBlock(block, src, dfsClient.clientName);
           block = null;
           block = null;
           DFSClient.LOG.info("Excluding datanode " + nodes[errorIndex]);
           DFSClient.LOG.info("Excluding datanode " + nodes[errorIndex]);
-          excludedNodes.add(nodes[errorIndex]);
+          excludedNodes.put(nodes[errorIndex], nodes[errorIndex]);
         }
         }
       } while (!success && --count >= 0);
       } while (!success && --count >= 0);
 
 

+ 112 - 81
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java

@@ -66,6 +66,7 @@ import org.apache.hadoop.ipc.Server;
 import org.apache.hadoop.net.CachedDNSToSwitchMapping;
 import org.apache.hadoop.net.CachedDNSToSwitchMapping;
 import org.apache.hadoop.net.DNSToSwitchMapping;
 import org.apache.hadoop.net.DNSToSwitchMapping;
 import org.apache.hadoop.net.NetworkTopology;
 import org.apache.hadoop.net.NetworkTopology;
+import org.apache.hadoop.net.NetworkTopology.InvalidTopologyException;
 import org.apache.hadoop.net.Node;
 import org.apache.hadoop.net.Node;
 import org.apache.hadoop.net.NodeBase;
 import org.apache.hadoop.net.NodeBase;
 import org.apache.hadoop.net.ScriptBasedMapping;
 import org.apache.hadoop.net.ScriptBasedMapping;
@@ -431,8 +432,8 @@ public class DatanodeManager {
       host2DatanodeMap.remove(datanodeMap.put(node.getStorageID(), node));
       host2DatanodeMap.remove(datanodeMap.put(node.getStorageID(), node));
     }
     }
 
 
+    networktopology.add(node); // may throw InvalidTopologyException
     host2DatanodeMap.add(node);
     host2DatanodeMap.add(node);
-    networktopology.add(node);
     checkIfClusterIsNowMultiRack(node);
     checkIfClusterIsNowMultiRack(node);
 
 
     if (LOG.isDebugEnabled()) {
     if (LOG.isDebugEnabled()) {
@@ -647,92 +648,122 @@ public class DatanodeManager {
       nodeReg.setIpAddr(ip);
       nodeReg.setIpAddr(ip);
       nodeReg.setPeerHostName(hostname);
       nodeReg.setPeerHostName(hostname);
     }
     }
-
-    nodeReg.setExportedKeys(blockManager.getBlockKeys());
-
-    // Checks if the node is not on the hosts list.  If it is not, then
-    // it will be disallowed from registering. 
-    if (!inHostsList(nodeReg)) {
-      throw new DisallowedDatanodeException(nodeReg);
-    }
-      
-    NameNode.stateChangeLog.info("BLOCK* registerDatanode: from "
-        + nodeReg + " storage " + nodeReg.getStorageID());
-
-    DatanodeDescriptor nodeS = datanodeMap.get(nodeReg.getStorageID());
-    DatanodeDescriptor nodeN = host2DatanodeMap.getDatanodeByXferAddr(
-        nodeReg.getIpAddr(), nodeReg.getXferPort());
-      
-    if (nodeN != null && nodeN != nodeS) {
-      NameNode.LOG.info("BLOCK* registerDatanode: " + nodeN);
-      // nodeN previously served a different data storage, 
-      // which is not served by anybody anymore.
-      removeDatanode(nodeN);
-      // physically remove node from datanodeMap
-      wipeDatanode(nodeN);
-      nodeN = null;
-    }
-
-    if (nodeS != null) {
-      if (nodeN == nodeS) {
-        // The same datanode has been just restarted to serve the same data 
-        // storage. We do not need to remove old data blocks, the delta will
-        // be calculated on the next block report from the datanode
-        if(NameNode.stateChangeLog.isDebugEnabled()) {
-          NameNode.stateChangeLog.debug("BLOCK* registerDatanode: "
-              + "node restarted.");
+    
+    try {
+      nodeReg.setExportedKeys(blockManager.getBlockKeys());
+  
+      // Checks if the node is not on the hosts list.  If it is not, then
+      // it will be disallowed from registering. 
+      if (!inHostsList(nodeReg)) {
+        throw new DisallowedDatanodeException(nodeReg);
+      }
+        
+      NameNode.stateChangeLog.info("BLOCK* registerDatanode: from "
+          + nodeReg + " storage " + nodeReg.getStorageID());
+  
+      DatanodeDescriptor nodeS = datanodeMap.get(nodeReg.getStorageID());
+      DatanodeDescriptor nodeN = host2DatanodeMap.getDatanodeByXferAddr(
+          nodeReg.getIpAddr(), nodeReg.getXferPort());
+        
+      if (nodeN != null && nodeN != nodeS) {
+        NameNode.LOG.info("BLOCK* registerDatanode: " + nodeN);
+        // nodeN previously served a different data storage, 
+        // which is not served by anybody anymore.
+        removeDatanode(nodeN);
+        // physically remove node from datanodeMap
+        wipeDatanode(nodeN);
+        nodeN = null;
+      }
+  
+      if (nodeS != null) {
+        if (nodeN == nodeS) {
+          // The same datanode has been just restarted to serve the same data 
+          // storage. We do not need to remove old data blocks, the delta will
+          // be calculated on the next block report from the datanode
+          if(NameNode.stateChangeLog.isDebugEnabled()) {
+            NameNode.stateChangeLog.debug("BLOCK* registerDatanode: "
+                + "node restarted.");
+          }
+        } else {
+          // nodeS is found
+          /* The registering datanode is a replacement node for the existing 
+            data storage, which from now on will be served by a new node.
+            If this message repeats, both nodes might have same storageID 
+            by (insanely rare) random chance. User needs to restart one of the
+            nodes with its data cleared (or user can just remove the StorageID
+            value in "VERSION" file under the data directory of the datanode,
+            but this is might not work if VERSION file format has changed 
+         */        
+          NameNode.stateChangeLog.info("BLOCK* registerDatanode: " + nodeS
+              + " is replaced by " + nodeReg + " with the same storageID "
+              + nodeReg.getStorageID());
+        }
+        
+        boolean success = false;
+        try {
+          // update cluster map
+          getNetworkTopology().remove(nodeS);
+          nodeS.updateRegInfo(nodeReg);
+          nodeS.setDisallowed(false); // Node is in the include list
+          
+          // resolve network location
+          resolveNetworkLocation(nodeS);
+          getNetworkTopology().add(nodeS);
+            
+          // also treat the registration message as a heartbeat
+          heartbeatManager.register(nodeS);
+          checkDecommissioning(nodeS);
+          success = true;
+        } finally {
+          if (!success) {
+            removeDatanode(nodeS);
+            wipeDatanode(nodeS);
+          }
+        }
+        return;
+      } 
+  
+      // this is a new datanode serving a new data storage
+      if ("".equals(nodeReg.getStorageID())) {
+        // this data storage has never been registered
+        // it is either empty or was created by pre-storageID version of DFS
+        nodeReg.setStorageID(newStorageID());
+        if (NameNode.stateChangeLog.isDebugEnabled()) {
+          NameNode.stateChangeLog.debug(
+              "BLOCK* NameSystem.registerDatanode: "
+              + "new storageID " + nodeReg.getStorageID() + " assigned.");
         }
         }
-      } else {
-        // nodeS is found
-        /* The registering datanode is a replacement node for the existing 
-          data storage, which from now on will be served by a new node.
-          If this message repeats, both nodes might have same storageID 
-          by (insanely rare) random chance. User needs to restart one of the
-          nodes with its data cleared (or user can just remove the StorageID
-          value in "VERSION" file under the data directory of the datanode,
-          but this is might not work if VERSION file format has changed 
-       */        
-        NameNode.stateChangeLog.info("BLOCK* registerDatanode: " + nodeS
-            + " is replaced by " + nodeReg + " with the same storageID "
-            + nodeReg.getStorageID());
       }
       }
-      // update cluster map
-      getNetworkTopology().remove(nodeS);
-      nodeS.updateRegInfo(nodeReg);
-      nodeS.setDisallowed(false); // Node is in the include list
       
       
-      // resolve network location
-      resolveNetworkLocation(nodeS);
-      getNetworkTopology().add(nodeS);
+      DatanodeDescriptor nodeDescr 
+        = new DatanodeDescriptor(nodeReg, NetworkTopology.DEFAULT_RACK);
+      boolean success = false;
+      try {
+        resolveNetworkLocation(nodeDescr);
+        networktopology.add(nodeDescr);
+  
+        // register new datanode
+        addDatanode(nodeDescr);
+        checkDecommissioning(nodeDescr);
         
         
-      // also treat the registration message as a heartbeat
-      heartbeatManager.register(nodeS);
-      checkDecommissioning(nodeS);
-      return;
-    } 
-
-    // this is a new datanode serving a new data storage
-    if ("".equals(nodeReg.getStorageID())) {
-      // this data storage has never been registered
-      // it is either empty or was created by pre-storageID version of DFS
-      nodeReg.setStorageID(newStorageID());
-      if (NameNode.stateChangeLog.isDebugEnabled()) {
-        NameNode.stateChangeLog.debug(
-            "BLOCK* NameSystem.registerDatanode: "
-            + "new storageID " + nodeReg.getStorageID() + " assigned.");
+        // also treat the registration message as a heartbeat
+        // no need to update its timestamp
+        // because its is done when the descriptor is created
+        heartbeatManager.addDatanode(nodeDescr);
+        success = true;
+      } finally {
+        if (!success) {
+          removeDatanode(nodeDescr);
+          wipeDatanode(nodeDescr);
+        }
       }
       }
+    } catch (InvalidTopologyException e) {
+      // If the network location is invalid, clear the cached mappings
+      // so that we have a chance to re-add this DataNode with the
+      // correct network location later.
+      dnsToSwitchMapping.reloadCachedMappings();
+      throw e;
     }
     }
-    // register new datanode
-    DatanodeDescriptor nodeDescr 
-      = new DatanodeDescriptor(nodeReg, NetworkTopology.DEFAULT_RACK);
-    resolveNetworkLocation(nodeDescr);
-    addDatanode(nodeDescr);
-    checkDecommissioning(nodeDescr);
-    
-    // also treat the registration message as a heartbeat
-    // no need to update its timestamp
-    // because its is done when the descriptor is created
-    heartbeatManager.addDatanode(nodeDescr);
   }
   }
 
 
   /**
   /**

+ 1 - 2
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java

@@ -3072,8 +3072,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
 
 
   ContentSummary getContentSummary(String src) throws AccessControlException,
   ContentSummary getContentSummary(String src) throws AccessControlException,
       FileNotFoundException, UnresolvedLinkException, StandbyException {
       FileNotFoundException, UnresolvedLinkException, StandbyException {
-    FSPermissionChecker pc = new FSPermissionChecker(fsOwnerShortUserName,
-        supergroup);
+    FSPermissionChecker pc = getPermissionChecker();
     checkOperation(OperationCategory.READ);
     checkOperation(OperationCategory.READ);
     readLock();
     readLock();
     try {
     try {

+ 13 - 2
hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml

@@ -47,7 +47,7 @@
     RPC address that handles all clients requests. In the case of HA/Federation where multiple namenodes exist,
     RPC address that handles all clients requests. In the case of HA/Federation where multiple namenodes exist,
     the name service id is added to the name e.g. dfs.namenode.rpc-address.ns1
     the name service id is added to the name e.g. dfs.namenode.rpc-address.ns1
     dfs.namenode.rpc-address.EXAMPLENAMESERVICE
     dfs.namenode.rpc-address.EXAMPLENAMESERVICE
-    The value of this property will take the form of hdfs://nn-host1:rpc-port.
+    The value of this property will take the form of nn-host1:rpc-port.
   </description>
   </description>
 </property>
 </property>
 
 
@@ -59,7 +59,7 @@
     connecting to this address if it is configured. In the case of HA/Federation where multiple namenodes exist,
     connecting to this address if it is configured. In the case of HA/Federation where multiple namenodes exist,
     the name service id is added to the name e.g. dfs.namenode.servicerpc-address.ns1
     the name service id is added to the name e.g. dfs.namenode.servicerpc-address.ns1
     dfs.namenode.rpc-address.EXAMPLENAMESERVICE
     dfs.namenode.rpc-address.EXAMPLENAMESERVICE
-    The value of this property will take the form of hdfs://nn-host1:rpc-port.
+    The value of this property will take the form of nn-host1:rpc-port.
     If the value of this property is unset the value of dfs.namenode.rpc-address will be used as the default.
     If the value of this property is unset the value of dfs.namenode.rpc-address will be used as the default.
   </description>
   </description>
 </property>
 </property>
@@ -594,6 +594,17 @@
   <description>Packet size for clients to write</description>
   <description>Packet size for clients to write</description>
 </property>
 </property>
 
 
+<property>
+  <name>dfs.client.write.exclude.nodes.cache.expiry.interval.millis</name>
+  <value>600000</value>
+  <description>The maximum period to keep a DN in the excluded nodes list
+  at a client. After this period, in milliseconds, the previously excluded node(s) will
+  be removed automatically from the cache and will be considered good for block allocations
+  again. Useful to lower or raise in situations where you keep a file open for very long
+  periods (such as a Write-Ahead-Log (WAL) file) to make the writer tolerant to cluster maintenance
+  restarts. Defaults to 10 minutes.</description>
+</property>
+
 <property>
 <property>
   <name>dfs.namenode.checkpoint.dir</name>
   <name>dfs.namenode.checkpoint.dir</name>
   <value>file://${hadoop.tmp.dir}/dfs/namesecondary</value>
   <value>file://${hadoop.tmp.dir}/dfs/namesecondary</value>

+ 88 - 6
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSClientExcludedNodes.java

@@ -21,20 +21,27 @@ import static org.junit.Assert.fail;
 
 
 import java.io.IOException;
 import java.io.IOException;
 import java.io.OutputStream;
 import java.io.OutputStream;
+import java.util.List;
+
+import junit.framework.Assert;
 
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties;
+import org.apache.hadoop.util.ThreadUtil;
+
 import org.junit.Test;
 import org.junit.Test;
 
 
 
 
 /**
 /**
- * These tests make sure that DFSClient retries fetching data from DFS
- * properly in case of errors.
+ * These tests make sure that DFSClient excludes writing data to
+ * a DN properly in case of errors.
  */
  */
 public class TestDFSClientExcludedNodes {
 public class TestDFSClientExcludedNodes {
 
 
-  @Test
+  @Test(timeout=10000)
   public void testExcludedNodes() throws IOException {
   public void testExcludedNodes() throws IOException {
     Configuration conf = new HdfsConfiguration();
     Configuration conf = new HdfsConfiguration();
     MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
     MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
@@ -43,14 +50,89 @@ public class TestDFSClientExcludedNodes {
 
 
     // kill a datanode
     // kill a datanode
     cluster.stopDataNode(AppendTestUtil.nextInt(3));
     cluster.stopDataNode(AppendTestUtil.nextInt(3));
-    OutputStream out = fs.create(filePath, true, 4096);
+    OutputStream out = fs.create(
+        filePath,
+        true,
+        4096,
+        (short) 3,
+        fs.getDefaultBlockSize(filePath)
+    );
     out.write(20);
     out.write(20);
 
 
     try {
     try {
       out.close();
       out.close();
     } catch (Exception e) {
     } catch (Exception e) {
-      fail("DataNode failure should not result in a block abort: \n" + e.getMessage());
+      fail("Single DN failure should not result in a block abort: \n" +
+          e.getMessage());
+    }
+  }
+
+  @Test(timeout=10000)
+  public void testExcludedNodesForgiveness() throws IOException {
+    Configuration conf = new HdfsConfiguration();
+    // Forgive nodes in under 1s for this test case.
+    conf.setLong(
+        DFSConfigKeys.DFS_CLIENT_WRITE_EXCLUDE_NODES_CACHE_EXPIRY_INTERVAL,
+        1000);
+    // We'll be using a 512 bytes block size just for tests
+    // so making sure the checksum bytes too match it.
+    conf.setInt("io.bytes.per.checksum", 512);
+    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
+    List<DataNodeProperties> props = cluster.dataNodes;
+    FileSystem fs = cluster.getFileSystem();
+    Path filePath = new Path("/testForgivingExcludedNodes");
+
+    // 256 bytes data chunk for writes
+    byte[] bytes = new byte[256];
+    for (int index=0; index<bytes.length; index++) {
+      bytes[index] = '0';
+    }
+
+    // File with a 512 bytes block size
+    FSDataOutputStream out = fs.create(filePath, true, 4096, (short) 3, 512);
+
+    // Write a block to all 3 DNs (2x256bytes).
+    out.write(bytes);
+    out.write(bytes);
+    out.hflush();
+
+    // Remove two DNs, to put them into the exclude list.
+    DataNodeProperties two = cluster.stopDataNode(2);
+    DataNodeProperties one = cluster.stopDataNode(1);
+
+    // Write another block.
+    // At this point, we have two nodes already in excluded list.
+    out.write(bytes);
+    out.write(bytes);
+    out.hflush();
+
+    // Bring back the older DNs, since they are gonna be forgiven only
+    // afterwards of this previous block write.
+    Assert.assertEquals(true, cluster.restartDataNode(one, true));
+    Assert.assertEquals(true, cluster.restartDataNode(two, true));
+    cluster.waitActive();
+
+    // Sleep for 2s, to let the excluded nodes be expired
+    // from the excludes list (i.e. forgiven after the configured wait period).
+    // [Sleeping just in case the restart of the DNs completed < 2s cause
+    // otherwise, we'll end up quickly excluding those again.]
+    ThreadUtil.sleepAtLeastIgnoreInterrupts(2000);
+
+    // Terminate the last good DN, to assert that there's no
+    // single-DN-available scenario, caused by not forgiving the other
+    // two by now.
+    cluster.stopDataNode(0);
+
+    try {
+      // Attempt writing another block, which should still pass
+      // cause the previous two should have been forgiven by now,
+      // while the last good DN added to excludes this time.
+      out.write(bytes);
+      out.hflush();
+      out.close();
+    } catch (Exception e) {
+      fail("Excluded DataNodes should be forgiven after a while and " +
+           "not cause file writing exception of: '" + e.getMessage() + "'");
     }
     }
   }
   }
-  
 }
 }

+ 69 - 53
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSShell.java

@@ -35,6 +35,7 @@ import java.util.Arrays;
 import java.util.List;
 import java.util.List;
 import java.util.Random;
 import java.util.Random;
 import java.util.Scanner;
 import java.util.Scanner;
+import java.util.concurrent.atomic.AtomicInteger;
 import java.util.zip.DeflaterOutputStream;
 import java.util.zip.DeflaterOutputStream;
 import java.util.zip.GZIPOutputStream;
 import java.util.zip.GZIPOutputStream;
 
 
@@ -68,7 +69,8 @@ import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERV
  */
  */
 public class TestDFSShell {
 public class TestDFSShell {
   private static final Log LOG = LogFactory.getLog(TestDFSShell.class);
   private static final Log LOG = LogFactory.getLog(TestDFSShell.class);
-  
+  private static AtomicInteger counter = new AtomicInteger();
+
   static final String TEST_ROOT_DIR =
   static final String TEST_ROOT_DIR =
     new Path(System.getProperty("test.build.data","/tmp"))
     new Path(System.getProperty("test.build.data","/tmp"))
     .toString().replace(' ', '+');
     .toString().replace(' ', '+');
@@ -512,7 +514,7 @@ public class TestDFSShell {
       createLocalFile(furi);
       createLocalFile(furi);
       argv = new String[3];
       argv = new String[3];
       argv[0] = "-put";
       argv[0] = "-put";
-      argv[1] = furi.toString();
+      argv[1] = furi.toURI().toString();
       argv[2] = dstFs.getUri().toString() + "/furi";
       argv[2] = dstFs.getUri().toString() + "/furi";
       ret = ToolRunner.run(shell, argv);
       ret = ToolRunner.run(shell, argv);
       assertEquals(" put is working ", 0, ret);
       assertEquals(" put is working ", 0, ret);
@@ -867,52 +869,59 @@ public class TestDFSShell {
     shell.setConf(conf);
     shell.setConf(conf);
     
     
     try {
     try {
-     //first make dir
-     Path dir = new Path(chmodDir);
-     fs.delete(dir, true);
-     fs.mkdirs(dir);
+      //first make dir
+      Path dir = new Path(chmodDir);
+      fs.delete(dir, true);
+      fs.mkdirs(dir);
 
 
-     confirmPermissionChange(/* Setting */ "u+rwx,g=rw,o-rwx",
+      confirmPermissionChange(/* Setting */ "u+rwx,g=rw,o-rwx",
                              /* Should give */ "rwxrw----", fs, shell, dir);
                              /* Should give */ "rwxrw----", fs, shell, dir);
-     
-     //create an empty file
-     Path file = new Path(chmodDir, "file");
-     TestDFSShell.writeFile(fs, file);
-
-     //test octal mode
-     confirmPermissionChange( "644", "rw-r--r--", fs, shell, file);
-
-     //test recursive
-     runCmd(shell, "-chmod", "-R", "a+rwX", chmodDir);
-     assertEquals("rwxrwxrwx",
-                  fs.getFileStatus(dir).getPermission().toString()); 
-     assertEquals("rw-rw-rw-",
-                  fs.getFileStatus(file).getPermission().toString());
-
-     // test sticky bit on directories
-     Path dir2 = new Path(dir, "stickybit" );
-     fs.mkdirs(dir2 );
-     LOG.info("Testing sticky bit on: " + dir2);
-     LOG.info("Sticky bit directory initial mode: " + 
-                   fs.getFileStatus(dir2).getPermission());
-     
-     confirmPermissionChange("u=rwx,g=rx,o=rx", "rwxr-xr-x", fs, shell, dir2);
-     
-     confirmPermissionChange("+t", "rwxr-xr-t", fs, shell, dir2);
-
-     confirmPermissionChange("-t", "rwxr-xr-x", fs, shell, dir2);
-
-     confirmPermissionChange("=t", "--------T", fs, shell, dir2);
-
-     confirmPermissionChange("0000", "---------", fs, shell, dir2);
-
-     confirmPermissionChange("1666", "rw-rw-rwT", fs, shell, dir2);
-
-     confirmPermissionChange("777", "rwxrwxrwt", fs, shell, dir2);
-     
-     fs.delete(dir2, true);
-     fs.delete(dir, true);
-     
+
+      //create an empty file
+      Path file = new Path(chmodDir, "file");
+      TestDFSShell.writeFile(fs, file);
+
+      //test octal mode
+      confirmPermissionChange("644", "rw-r--r--", fs, shell, file);
+
+      //test recursive
+      runCmd(shell, "-chmod", "-R", "a+rwX", chmodDir);
+      assertEquals("rwxrwxrwx",
+          fs.getFileStatus(dir).getPermission().toString());
+      assertEquals("rw-rw-rw-",
+          fs.getFileStatus(file).getPermission().toString());
+
+      // Skip "sticky bit" tests on Windows.
+      //
+      if (!Path.WINDOWS) {
+        // test sticky bit on directories
+        Path dir2 = new Path(dir, "stickybit");
+        fs.mkdirs(dir2);
+        LOG.info("Testing sticky bit on: " + dir2);
+        LOG.info("Sticky bit directory initial mode: " +
+            fs.getFileStatus(dir2).getPermission());
+
+        confirmPermissionChange("u=rwx,g=rx,o=rx", "rwxr-xr-x", fs, shell, dir2);
+
+        confirmPermissionChange("+t", "rwxr-xr-t", fs, shell, dir2);
+
+        confirmPermissionChange("-t", "rwxr-xr-x", fs, shell, dir2);
+
+        confirmPermissionChange("=t", "--------T", fs, shell, dir2);
+
+        confirmPermissionChange("0000", "---------", fs, shell, dir2);
+
+        confirmPermissionChange("1666", "rw-rw-rwT", fs, shell, dir2);
+
+        confirmPermissionChange("777", "rwxrwxrwt", fs, shell, dir2);
+
+        fs.delete(dir2, true);
+      } else {
+        LOG.info("Skipped sticky bit tests on Windows");
+      }
+
+      fs.delete(dir, true);
+
     } finally {
     } finally {
       try {
       try {
         fs.close();
         fs.close();
@@ -1595,27 +1604,29 @@ public class TestDFSShell {
   // force Copy Option is -f
   // force Copy Option is -f
   @Test (timeout = 30000)
   @Test (timeout = 30000)
   public void testCopyCommandsWithForceOption() throws Exception {
   public void testCopyCommandsWithForceOption() throws Exception {
+    final int SUCCESS = 0;
+    final int ERROR = 1;
+
     Configuration conf = new Configuration();
     Configuration conf = new Configuration();
     MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1)
     MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1)
         .format(true).build();
         .format(true).build();
     FsShell shell = null;
     FsShell shell = null;
     FileSystem fs = null;
     FileSystem fs = null;
     final File localFile = new File(TEST_ROOT_DIR, "testFileForPut");
     final File localFile = new File(TEST_ROOT_DIR, "testFileForPut");
-    final String localfilepath = localFile.getAbsolutePath();
-    final String testdir = TEST_ROOT_DIR + "/ForceTestDir";
+    final String localfilepath = new Path(localFile.getAbsolutePath()).toUri().toString();
+    final String testdir = "/tmp/TestDFSShell-testCopyCommandsWithForceOption-"
+        + counter.getAndIncrement();
     final Path hdfsTestDir = new Path(testdir);
     final Path hdfsTestDir = new Path(testdir);
     try {
     try {
       fs = cluster.getFileSystem();
       fs = cluster.getFileSystem();
       fs.mkdirs(hdfsTestDir);
       fs.mkdirs(hdfsTestDir);
       localFile.createNewFile();
       localFile.createNewFile();
-      writeFile(fs, new Path(TEST_ROOT_DIR, "testFileForPut"));
+      writeFile(fs, new Path(testdir, "testFileForPut"));
       shell = new FsShell();
       shell = new FsShell();
 
 
       // Tests for put
       // Tests for put
       String[] argv = new String[] { "-put", "-f", localfilepath, testdir };
       String[] argv = new String[] { "-put", "-f", localfilepath, testdir };
       int res = ToolRunner.run(shell, argv);
       int res = ToolRunner.run(shell, argv);
-      int SUCCESS = 0;
-      int ERROR = 1;
       assertEquals("put -f is not working", SUCCESS, res);
       assertEquals("put -f is not working", SUCCESS, res);
 
 
       argv = new String[] { "-put", localfilepath, testdir };
       argv = new String[] { "-put", localfilepath, testdir };
@@ -1687,8 +1698,13 @@ public class TestDFSShell {
     try {
     try {
       // Create and delete a file
       // Create and delete a file
       fs = cluster.getFileSystem();
       fs = cluster.getFileSystem();
-      writeFile(fs, new Path(TEST_ROOT_DIR, "foo"));
-      final String testFile = TEST_ROOT_DIR + "/foo";
+
+      // Use a separate tmp dir for each invocation.
+      final String testdir = "/tmp/TestDFSShell-deleteFileUsingTrash-" +
+          counter.getAndIncrement();
+
+      writeFile(fs, new Path(testdir, "foo"));
+      final String testFile = testdir + "/foo";
       final String trashFile = shell.getCurrentTrashDir() + "/" + testFile;
       final String trashFile = shell.getCurrentTrashDir() + "/" + testFile;
       String[] argv = new String[] { "-rm", testFile };
       String[] argv = new String[] { "-rm", testFile };
       int res = ToolRunner.run(shell, argv);
       int res = ToolRunner.run(shell, argv);

+ 12 - 7
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/TestNNWithQJM.java

@@ -18,6 +18,7 @@
 package org.apache.hadoop.hdfs.qjournal;
 package org.apache.hadoop.hdfs.qjournal;
 
 
 import static org.junit.Assert.*;
 import static org.junit.Assert.*;
+import static org.junit.Assume.*;
 
 
 import java.io.File;
 import java.io.File;
 import java.io.IOException;
 import java.io.IOException;
@@ -43,7 +44,7 @@ import org.junit.Test;
 
 
 public class TestNNWithQJM {
 public class TestNNWithQJM {
   Configuration conf = new HdfsConfiguration();
   Configuration conf = new HdfsConfiguration();
-  private MiniJournalCluster mjc;
+  private MiniJournalCluster mjc = null;
   private Path TEST_PATH = new Path("/test-dir");
   private Path TEST_PATH = new Path("/test-dir");
   private Path TEST_PATH_2 = new Path("/test-dir");
   private Path TEST_PATH_2 = new Path("/test-dir");
 
 
@@ -61,10 +62,11 @@ public class TestNNWithQJM {
   public void stopJNs() throws Exception {
   public void stopJNs() throws Exception {
     if (mjc != null) {
     if (mjc != null) {
       mjc.shutdown();
       mjc.shutdown();
+      mjc = null;
     }
     }
   }
   }
   
   
-  @Test
+  @Test (timeout = 30000)
   public void testLogAndRestart() throws IOException {
   public void testLogAndRestart() throws IOException {
     conf.set(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY,
     conf.set(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY,
         MiniDFSCluster.getBaseDirectory() + "/TestNNWithQJM/image");
         MiniDFSCluster.getBaseDirectory() + "/TestNNWithQJM/image");
@@ -93,9 +95,12 @@ public class TestNNWithQJM {
       cluster.shutdown();
       cluster.shutdown();
     }
     }
   }
   }
-  
-  @Test
+
+  @Test (timeout = 30000)
   public void testNewNamenodeTakesOverWriter() throws Exception {
   public void testNewNamenodeTakesOverWriter() throws Exception {
+    // Skip the test on Windows. See HDFS-4584.
+    assumeTrue(!Path.WINDOWS);
+
     File nn1Dir = new File(
     File nn1Dir = new File(
         MiniDFSCluster.getBaseDirectory() + "/TestNNWithQJM/image-nn1");
         MiniDFSCluster.getBaseDirectory() + "/TestNNWithQJM/image-nn1");
     File nn2Dir = new File(
     File nn2Dir = new File(
@@ -154,7 +159,7 @@ public class TestNNWithQJM {
     }
     }
   }
   }
 
 
-  @Test
+  @Test (timeout = 30000)
   public void testMismatchedNNIsRejected() throws Exception {
   public void testMismatchedNNIsRejected() throws Exception {
     conf.set(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY,
     conf.set(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY,
         MiniDFSCluster.getBaseDirectory() + "/TestNNWithQJM/image");
         MiniDFSCluster.getBaseDirectory() + "/TestNNWithQJM/image");
@@ -188,8 +193,8 @@ public class TestNNWithQJM {
           "Unable to start log segment 1: too few journals", ioe);
           "Unable to start log segment 1: too few journals", ioe);
     }
     }
   }
   }
-  
-  @Test
+
+  @Test (timeout = 30000)
   public void testWebPageHasQjmInfo() throws Exception {
   public void testWebPageHasQjmInfo() throws Exception {
     conf.set(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY,
     conf.set(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY,
         MiniDFSCluster.getBaseDirectory() + "/TestNNWithQJM/image");
         MiniDFSCluster.getBaseDirectory() + "/TestNNWithQJM/image");

+ 8 - 1
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestAuditLogs.java

@@ -46,6 +46,7 @@ import org.apache.hadoop.hdfs.web.WebHdfsFileSystem;
 import org.apache.hadoop.security.AccessControlException;
 import org.apache.hadoop.security.AccessControlException;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.log4j.Level;
 import org.apache.log4j.Level;
+import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 import org.apache.log4j.Logger;
 import org.apache.log4j.PatternLayout;
 import org.apache.log4j.PatternLayout;
 import org.apache.log4j.RollingFileAppender;
 import org.apache.log4j.RollingFileAppender;
@@ -233,9 +234,15 @@ public class TestAuditLogs {
 
 
   /** Sets up log4j logger for auditlogs */
   /** Sets up log4j logger for auditlogs */
   private void setupAuditLogs() throws IOException {
   private void setupAuditLogs() throws IOException {
+    // Shutdown the LogManager to release all logger open file handles.
+    // Unfortunately, Apache commons logging library does not provide
+    // means to release underlying loggers. For additional info look up
+    // commons library FAQ.
+    LogManager.shutdown();
+
     File file = new File(auditLogFile);
     File file = new File(auditLogFile);
     if (file.exists()) {
     if (file.exists()) {
-      file.delete();
+      assertTrue(file.delete());
     }
     }
     Logger logger = ((Log4JLogger) FSNamesystem.auditLog).getLogger();
     Logger logger = ((Log4JLogger) FSNamesystem.auditLog).getLogger();
     logger.setLevel(Level.INFO);
     logger.setLevel(Level.INFO);

+ 9 - 8
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestGetConf.java

@@ -224,7 +224,7 @@ public class TestGetConf {
   /**
   /**
    * Test empty configuration
    * Test empty configuration
    */
    */
-  @Test
+  @Test(timeout=10000)
   public void testEmptyConf() throws Exception {
   public void testEmptyConf() throws Exception {
     HdfsConfiguration conf = new HdfsConfiguration(false);
     HdfsConfiguration conf = new HdfsConfiguration(false);
     // Verify getting addresses fails
     // Verify getting addresses fails
@@ -247,7 +247,7 @@ public class TestGetConf {
   /**
   /**
    * Test invalid argument to the tool
    * Test invalid argument to the tool
    */
    */
-  @Test
+  @Test(timeout=10000)
   public void testInvalidArgument() throws Exception {
   public void testInvalidArgument() throws Exception {
     HdfsConfiguration conf = new HdfsConfiguration();
     HdfsConfiguration conf = new HdfsConfiguration();
     String[] args = {"-invalidArgument"};
     String[] args = {"-invalidArgument"};
@@ -259,7 +259,7 @@ public class TestGetConf {
    * Tests to make sure the returned addresses are correct in case of default
    * Tests to make sure the returned addresses are correct in case of default
    * configuration with no federation
    * configuration with no federation
    */
    */
-  @Test
+  @Test(timeout=10000)
   public void testNonFederation() throws Exception {
   public void testNonFederation() throws Exception {
     HdfsConfiguration conf = new HdfsConfiguration(false);
     HdfsConfiguration conf = new HdfsConfiguration(false);
   
   
@@ -294,7 +294,7 @@ public class TestGetConf {
    * Tests to make sure the returned addresses are correct in case of federation
    * Tests to make sure the returned addresses are correct in case of federation
    * of setup.
    * of setup.
    */
    */
-  @Test
+  @Test(timeout=10000)
   public void testFederation() throws Exception {
   public void testFederation() throws Exception {
     final int nsCount = 10;
     final int nsCount = 10;
     HdfsConfiguration conf = new HdfsConfiguration(false);
     HdfsConfiguration conf = new HdfsConfiguration(false);
@@ -333,15 +333,16 @@ public class TestGetConf {
     verifyAddresses(conf, TestType.NNRPCADDRESSES, true, nnAddresses);
     verifyAddresses(conf, TestType.NNRPCADDRESSES, true, nnAddresses);
   }
   }
   
   
-  @Test
+  @Test(timeout=10000)
   public void testGetSpecificKey() throws Exception {
   public void testGetSpecificKey() throws Exception {
     HdfsConfiguration conf = new HdfsConfiguration();
     HdfsConfiguration conf = new HdfsConfiguration();
     conf.set("mykey", " myval ");
     conf.set("mykey", " myval ");
     String[] args = {"-confKey", "mykey"};
     String[] args = {"-confKey", "mykey"};
-    assertTrue(runTool(conf, args, true).equals("myval\n"));
+    String toolResult = runTool(conf, args, true);
+    assertEquals(String.format("myval%n"), toolResult);
   }
   }
   
   
-  @Test
+  @Test(timeout=10000)
   public void testExtraArgsThrowsError() throws Exception {
   public void testExtraArgsThrowsError() throws Exception {
     HdfsConfiguration conf = new HdfsConfiguration();
     HdfsConfiguration conf = new HdfsConfiguration();
     conf.set("mykey", "myval");
     conf.set("mykey", "myval");
@@ -354,7 +355,7 @@ public class TestGetConf {
    * Tests commands other than {@link Command#NAMENODE}, {@link Command#BACKUP},
    * Tests commands other than {@link Command#NAMENODE}, {@link Command#BACKUP},
    * {@link Command#SECONDARY} and {@link Command#NNRPCADDRESSES}
    * {@link Command#SECONDARY} and {@link Command#NNRPCADDRESSES}
    */
    */
-  @Test
+  @Test(timeout=10000)
   public void testTool() throws Exception {
   public void testTool() throws Exception {
     HdfsConfiguration conf = new HdfsConfiguration(false);
     HdfsConfiguration conf = new HdfsConfiguration(false);
     for (Command cmd : Command.values()) {
     for (Command cmd : Command.values()) {

+ 72 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/net/TestNetworkTopology.java

@@ -26,12 +26,23 @@ import static org.junit.Assert.fail;
 import java.util.HashMap;
 import java.util.HashMap;
 import java.util.Map;
 import java.util.Map;
 
 
+import junit.framework.Assert;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hdfs.DFSTestUtil;
 import org.apache.hadoop.hdfs.DFSTestUtil;
+import org.apache.hadoop.hdfs.HdfsConfiguration;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
+import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType;
 import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
 import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
+import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
 import org.junit.Before;
 import org.junit.Before;
 import org.junit.Test;
 import org.junit.Test;
 
 
 public class TestNetworkTopology {
 public class TestNetworkTopology {
+  private static final Log LOG = LogFactory.getLog(TestNetworkTopology.class);
   private final static NetworkTopology cluster = new NetworkTopology();
   private final static NetworkTopology cluster = new NetworkTopology();
   private DatanodeDescriptor dataNodes[];
   private DatanodeDescriptor dataNodes[];
   
   
@@ -213,4 +224,65 @@ public class TestNetworkTopology {
       }
       }
     }
     }
   }
   }
+
+  @Test(timeout=180000)
+  public void testInvalidNetworkTopologiesNotCachedInHdfs() throws Exception {
+    // start a cluster
+    Configuration conf = new HdfsConfiguration();
+    MiniDFSCluster cluster = null;
+    try {
+      // bad rack topology
+      String racks[] = { "/a/b", "/c" };
+      String hosts[] = { "foo1.example.com", "foo2.example.com" };
+      cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).
+          racks(racks).hosts(hosts).build();
+      cluster.waitActive();
+      
+      NamenodeProtocols nn = cluster.getNameNodeRpc();
+      Assert.assertNotNull(nn);
+      
+      // Wait for one DataNode to register.
+      // The other DataNode will not be able to register up because of the rack mismatch.
+      DatanodeInfo[] info;
+      while (true) {
+        info = nn.getDatanodeReport(DatanodeReportType.LIVE);
+        Assert.assertFalse(info.length == 2);
+        if (info.length == 1) {
+          break;
+        }
+        Thread.sleep(1000);
+      }
+      // Set the network topology of the other node to the match the network
+      // topology of the node that came up.
+      int validIdx = info[0].getHostName().equals(hosts[0]) ? 0 : 1;
+      int invalidIdx = validIdx == 1 ? 0 : 1;
+      StaticMapping.addNodeToRack(hosts[invalidIdx], racks[validIdx]);
+      LOG.info("datanode " + validIdx + " came up with network location " + 
+        info[0].getNetworkLocation());
+
+      // Restart the DN with the invalid topology and wait for it to register.
+      cluster.restartDataNode(invalidIdx);
+      Thread.sleep(5000);
+      while (true) {
+        info = nn.getDatanodeReport(DatanodeReportType.LIVE);
+        if (info.length == 2) {
+          break;
+        }
+        if (info.length == 0) {
+          LOG.info("got no valid DNs");
+        } else if (info.length == 1) {
+          LOG.info("got one valid DN: " + info[0].getHostName() +
+              " (at " + info[0].getNetworkLocation() + ")");
+        }
+        Thread.sleep(1000);
+      }
+      Assert.assertEquals(info[0].getNetworkLocation(),
+                          info[1].getNetworkLocation());
+    } finally {
+      if (cluster != null) {
+        cluster.shutdown();
+      }
+    }
+  }
+
 }
 }

+ 24 - 0
hadoop-mapreduce-project/CHANGES.txt

@@ -75,6 +75,9 @@ Trunk (Unreleased)
     MAPREDUCE-4735. Make arguments in TestDFSIO case insensitive.
     MAPREDUCE-4735. Make arguments in TestDFSIO case insensitive.
     (Brandon Li via suresh)
     (Brandon Li via suresh)
 
 
+    MAPREDUCE-5014. Extend Distcp to accept a custom CopyListing. 
+    (Srikanth Sundarrajan via amareshwari)
+
   BUG FIXES
   BUG FIXES
 
 
     MAPREDUCE-4272. SortedRanges.Range#compareTo is not spec compliant.
     MAPREDUCE-4272. SortedRanges.Range#compareTo is not spec compliant.
@@ -158,6 +161,9 @@ Trunk (Unreleased)
 
 
     MAPREDUCE-5012. Typo in javadoc for IdentityMapper class. (Adam Monsen
     MAPREDUCE-5012. Typo in javadoc for IdentityMapper class. (Adam Monsen
     via suresh)
     via suresh)
+    
+    MAPREDUCE-5078. TestMRAppMaster fails on Windows due to mismatched path
+    separators. (Chris Nauroth via sseth)
 
 
   BREAKDOWN OF HADOOP-8562 SUBTASKS
   BREAKDOWN OF HADOOP-8562 SUBTASKS
 
 
@@ -194,6 +200,9 @@ Release 2.0.5-beta - UNRELEASED
     MAPREDUCE-4892. Modify CombineFileInputFormat to not skew input slits'
     MAPREDUCE-4892. Modify CombineFileInputFormat to not skew input slits'
     allocation on small clusters. (Bikas Saha via vinodkv)
     allocation on small clusters. (Bikas Saha via vinodkv)
 
 
+    MAPREDUCE-4990. Construct debug strings conditionally in 
+    ShuffleHandler.Shuffle#sendMapOutput(). (kkambatl via tucu)
+
   OPTIMIZATIONS
   OPTIMIZATIONS
 
 
   BUG FIXES
   BUG FIXES
@@ -235,6 +244,15 @@ Release 2.0.5-beta - UNRELEASED
     MAPREDUCE-4716. TestHsWebServicesJobsQuery.testJobsQueryStateInvalid 
     MAPREDUCE-4716. TestHsWebServicesJobsQuery.testJobsQueryStateInvalid 
     fails with jdk7. (tgraves via tucu)
     fails with jdk7. (tgraves via tucu)
 
 
+    MAPREDUCE-5075. DistCp leaks input file handles since ThrottledInputStream
+    does not close the wrapped InputStream.  (Chris Nauroth via szetszwo)
+
+    MAPREDUCE-3872. Fix an event handling races in ContainerLauncherImpl.
+    (Robert Kanter via sseth)
+
+    MAPREDUCE-5083. MiniMRCluster should use a random component when creating an
+    actual cluster (Siddharth Seth via hitesh)
+
 Release 2.0.4-alpha - UNRELEASED
 Release 2.0.4-alpha - UNRELEASED
 
 
   INCOMPATIBLE CHANGES
   INCOMPATIBLE CHANGES
@@ -780,6 +798,9 @@ Release 0.23.7 - UNRELEASED
     MAPREDUCE-5027. Shuffle does not limit number of outstanding connections
     MAPREDUCE-5027. Shuffle does not limit number of outstanding connections
     (Robert Parker via jeagles)
     (Robert Parker via jeagles)
 
 
+    MAPREDUCE-4972. Coverage fixing for org.apache.hadoop.mapreduce.jobhistory
+    (Aleksey Gorshkov via bobby)
+
   OPTIMIZATIONS
   OPTIMIZATIONS
 
 
     MAPREDUCE-4946. Fix a performance problem for large jobs by reducing the
     MAPREDUCE-4946. Fix a performance problem for large jobs by reducing the
@@ -818,6 +839,9 @@ Release 0.23.7 - UNRELEASED
     MAPREDUCE-5042. Reducer unable to fetch for a map task that was recovered
     MAPREDUCE-5042. Reducer unable to fetch for a map task that was recovered
     (Jason Lowe via bobby)
     (Jason Lowe via bobby)
 
 
+    MAPREDUCE-5053. java.lang.InternalError from decompression codec cause
+    reducer to fail (Robert Parker via jeagles)
+
 Release 0.23.6 - UNRELEASED
 Release 0.23.6 - UNRELEASED
 
 
   INCOMPATIBLE CHANGES
   INCOMPATIBLE CHANGES

+ 4 - 4
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/launcher/ContainerLauncherImpl.java

@@ -230,9 +230,6 @@ public class ContainerLauncherImpl extends AbstractService implements
     }
     }
   }
   }
 
 
-  // To track numNodes.
-  Set<String> allNodes = new HashSet<String>();
-
   public ContainerLauncherImpl(AppContext context) {
   public ContainerLauncherImpl(AppContext context) {
     super(ContainerLauncherImpl.class.getName());
     super(ContainerLauncherImpl.class.getName());
     this.context = context;
     this.context = context;
@@ -271,6 +268,8 @@ public class ContainerLauncherImpl extends AbstractService implements
       @Override
       @Override
       public void run() {
       public void run() {
         ContainerLauncherEvent event = null;
         ContainerLauncherEvent event = null;
+        Set<String> allNodes = new HashSet<String>();
+
         while (!stopped.get() && !Thread.currentThread().isInterrupted()) {
         while (!stopped.get() && !Thread.currentThread().isInterrupted()) {
           try {
           try {
             event = eventQueue.take();
             event = eventQueue.take();
@@ -280,6 +279,8 @@ public class ContainerLauncherImpl extends AbstractService implements
             }
             }
             return;
             return;
           }
           }
+          allNodes.add(event.getContainerMgrAddress());
+
           int poolSize = launcherPool.getCorePoolSize();
           int poolSize = launcherPool.getCorePoolSize();
 
 
           // See if we need up the pool size only if haven't reached the
           // See if we need up the pool size only if haven't reached the
@@ -419,7 +420,6 @@ public class ContainerLauncherImpl extends AbstractService implements
   public void handle(ContainerLauncherEvent event) {
   public void handle(ContainerLauncherEvent event) {
     try {
     try {
       eventQueue.put(event);
       eventQueue.put(event);
-      this.allNodes.add(event.getContainerMgrAddress());
     } catch (InterruptedException e) {
     } catch (InterruptedException e) {
       throw new YarnException(e);
       throw new YarnException(e);
     }
     }

+ 3 - 5
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/local/LocalContainerAllocator.java

@@ -38,7 +38,6 @@ import org.apache.hadoop.mapreduce.v2.app.rm.RMCommunicator;
 import org.apache.hadoop.yarn.YarnException;
 import org.apache.hadoop.yarn.YarnException;
 import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
-import org.apache.hadoop.yarn.api.records.AMResponse;
 import org.apache.hadoop.yarn.api.records.Container;
 import org.apache.hadoop.yarn.api.records.Container;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.NodeId;
 import org.apache.hadoop.yarn.api.records.NodeId;
@@ -100,10 +99,9 @@ public class LocalContainerAllocator extends RMCommunicator
         this.applicationAttemptId, this.lastResponseID, super
         this.applicationAttemptId, this.lastResponseID, super
             .getApplicationProgress(), new ArrayList<ResourceRequest>(),
             .getApplicationProgress(), new ArrayList<ResourceRequest>(),
         new ArrayList<ContainerId>());
         new ArrayList<ContainerId>());
-    AMResponse response;
+    AllocateResponse allocateResponse;
     try {
     try {
-      AllocateResponse allocateResponse = scheduler.allocate(allocateRequest);
-      response = allocateResponse.getAMResponse();
+      allocateResponse = scheduler.allocate(allocateRequest);
       // Reset retry count if no exception occurred.
       // Reset retry count if no exception occurred.
       retrystartTime = System.currentTimeMillis();
       retrystartTime = System.currentTimeMillis();
     } catch (Exception e) {
     } catch (Exception e) {
@@ -120,7 +118,7 @@ public class LocalContainerAllocator extends RMCommunicator
       // continue to attempt to contact the RM.
       // continue to attempt to contact the RM.
       throw e;
       throw e;
     }
     }
-    if (response.getReboot()) {
+    if (allocateResponse.getReboot()) {
       LOG.info("Event from RM: shutting down Application Master");
       LOG.info("Event from RM: shutting down Application Master");
       // This can happen if the RM has been restarted. If it is in that state,
       // This can happen if the RM has been restarted. If it is in that state,
       // this application must clean itself up.
       // this application must clean itself up.

+ 5 - 4
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerAllocator.java

@@ -59,7 +59,7 @@ import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEventType;
 import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptKillEvent;
 import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptKillEvent;
 import org.apache.hadoop.util.StringInterner;
 import org.apache.hadoop.util.StringInterner;
 import org.apache.hadoop.yarn.YarnException;
 import org.apache.hadoop.yarn.YarnException;
-import org.apache.hadoop.yarn.api.records.AMResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
 import org.apache.hadoop.yarn.api.records.Container;
 import org.apache.hadoop.yarn.api.records.Container;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.ContainerStatus;
 import org.apache.hadoop.yarn.api.records.ContainerStatus;
@@ -544,8 +544,9 @@ public class RMContainerAllocator extends RMContainerRequestor
   
   
   @SuppressWarnings("unchecked")
   @SuppressWarnings("unchecked")
   private List<Container> getResources() throws Exception {
   private List<Container> getResources() throws Exception {
-    int headRoom = getAvailableResources() != null ? getAvailableResources().getMemory() : 0;//first time it would be null
-    AMResponse response;
+    int headRoom = getAvailableResources() != null
+        ? getAvailableResources().getMemory() : 0;//first time it would be null
+    AllocateResponse response;
     /*
     /*
      * If contact with RM is lost, the AM will wait MR_AM_TO_RM_WAIT_INTERVAL_MS
      * If contact with RM is lost, the AM will wait MR_AM_TO_RM_WAIT_INTERVAL_MS
      * milliseconds before aborting. During this interval, AM will still try
      * milliseconds before aborting. During this interval, AM will still try
@@ -634,7 +635,7 @@ public class RMContainerAllocator extends RMContainerRequestor
   }
   }
   
   
   @SuppressWarnings("unchecked")
   @SuppressWarnings("unchecked")
-  private void handleUpdatedNodes(AMResponse response) {
+  private void handleUpdatedNodes(AllocateResponse response) {
     // send event to the job about on updated nodes
     // send event to the job about on updated nodes
     List<NodeReport> updatedNodes = response.getUpdatedNodes();
     List<NodeReport> updatedNodes = response.getUpdatedNodes();
     if (!updatedNodes.isEmpty()) {
     if (!updatedNodes.isEmpty()) {

+ 7 - 8
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerRequestor.java

@@ -38,7 +38,6 @@ import org.apache.hadoop.mapreduce.v2.app.client.ClientService;
 import org.apache.hadoop.yarn.YarnException;
 import org.apache.hadoop.yarn.YarnException;
 import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
-import org.apache.hadoop.yarn.api.records.AMResponse;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.Priority;
 import org.apache.hadoop.yarn.api.records.Priority;
 import org.apache.hadoop.yarn.api.records.Resource;
 import org.apache.hadoop.yarn.api.records.Resource;
@@ -146,30 +145,30 @@ public abstract class RMContainerRequestor extends RMCommunicator {
     LOG.info("blacklistDisablePercent is " + blacklistDisablePercent);
     LOG.info("blacklistDisablePercent is " + blacklistDisablePercent);
   }
   }
 
 
-  protected AMResponse makeRemoteRequest() throws YarnRemoteException {
+  protected AllocateResponse makeRemoteRequest() throws YarnRemoteException {
     AllocateRequest allocateRequest = BuilderUtils.newAllocateRequest(
     AllocateRequest allocateRequest = BuilderUtils.newAllocateRequest(
         applicationAttemptId, lastResponseID, super.getApplicationProgress(),
         applicationAttemptId, lastResponseID, super.getApplicationProgress(),
         new ArrayList<ResourceRequest>(ask), new ArrayList<ContainerId>(
         new ArrayList<ResourceRequest>(ask), new ArrayList<ContainerId>(
             release));
             release));
     AllocateResponse allocateResponse = scheduler.allocate(allocateRequest);
     AllocateResponse allocateResponse = scheduler.allocate(allocateRequest);
-    AMResponse response = allocateResponse.getAMResponse();
-    lastResponseID = response.getResponseId();
-    availableResources = response.getAvailableResources();
+    lastResponseID = allocateResponse.getResponseId();
+    availableResources = allocateResponse.getAvailableResources();
     lastClusterNmCount = clusterNmCount;
     lastClusterNmCount = clusterNmCount;
     clusterNmCount = allocateResponse.getNumClusterNodes();
     clusterNmCount = allocateResponse.getNumClusterNodes();
 
 
     if (ask.size() > 0 || release.size() > 0) {
     if (ask.size() > 0 || release.size() > 0) {
       LOG.info("getResources() for " + applicationId + ":" + " ask="
       LOG.info("getResources() for " + applicationId + ":" + " ask="
           + ask.size() + " release= " + release.size() + " newContainers="
           + ask.size() + " release= " + release.size() + " newContainers="
-          + response.getAllocatedContainers().size() + " finishedContainers="
-          + response.getCompletedContainersStatuses().size()
+          + allocateResponse.getAllocatedContainers().size()
+          + " finishedContainers="
+          + allocateResponse.getCompletedContainersStatuses().size()
           + " resourcelimit=" + availableResources + " knownNMs="
           + " resourcelimit=" + availableResources + " knownNMs="
           + clusterNmCount);
           + clusterNmCount);
     }
     }
 
 
     ask.clear();
     ask.clear();
     release.clear();
     release.clear();
-    return response;
+    return allocateResponse;
   }
   }
 
 
   // May be incorrect if there's multiple NodeManagers running on a single host.
   // May be incorrect if there's multiple NodeManagers running on a single host.

+ 397 - 0
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/jobhistory/TestEvents.java

@@ -0,0 +1,397 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.mapreduce.jobhistory;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.DataInputStream;
+import java.util.ArrayList;
+import java.util.Arrays;
+
+import static junit.framework.Assert.*;
+
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.mapred.JobPriority;
+import org.apache.hadoop.mapreduce.Counters;
+import org.apache.hadoop.mapreduce.JobID;
+import org.apache.hadoop.mapreduce.TaskAttemptID;
+import org.apache.hadoop.mapreduce.TaskID;
+import org.apache.hadoop.mapreduce.TaskType;
+import org.junit.Test;
+
+public class TestEvents {
+
+  /**
+   * test a getters of TaskAttemptFinishedEvent and TaskAttemptFinished
+   * 
+   * @throws Exception
+   */
+  @Test(timeout = 10000)
+  public void testTaskAttemptFinishedEvent() throws Exception {
+
+    JobID jid = new JobID("001", 1);
+    TaskID tid = new TaskID(jid, TaskType.REDUCE, 2);
+    TaskAttemptID taskAttemptId = new TaskAttemptID(tid, 3);
+    Counters counters = new Counters();
+    TaskAttemptFinishedEvent test = new TaskAttemptFinishedEvent(taskAttemptId,
+        TaskType.REDUCE, "TEST", 123L, "RAKNAME", "HOSTNAME", "STATUS",
+        counters);
+    assertEquals(test.getAttemptId().toString(), taskAttemptId.toString());
+
+    assertEquals(test.getCounters(), counters);
+    assertEquals(test.getFinishTime(), 123L);
+    assertEquals(test.getHostname(), "HOSTNAME");
+    assertEquals(test.getRackName(), "RAKNAME");
+    assertEquals(test.getState(), "STATUS");
+    assertEquals(test.getTaskId(), tid);
+    assertEquals(test.getTaskStatus(), "TEST");
+    assertEquals(test.getTaskType(), TaskType.REDUCE);
+
+  }
+
+  /**
+   * simple test JobPriorityChangeEvent and JobPriorityChange
+   * 
+   * @throws Exception
+   */
+
+  @Test(timeout = 10000)
+  public void testJobPriorityChange() throws Exception {
+    org.apache.hadoop.mapreduce.JobID jid = new JobID("001", 1);
+    JobPriorityChangeEvent test = new JobPriorityChangeEvent(jid,
+        JobPriority.LOW);
+    assertEquals(test.getJobId().toString(), jid.toString());
+    assertEquals(test.getPriority(), JobPriority.LOW);
+
+  }
+
+  /**
+   * simple test TaskUpdatedEvent and TaskUpdated
+   * 
+   * @throws Exception
+   */
+  @Test(timeout = 10000)
+  public void testTaskUpdated() throws Exception {
+    JobID jid = new JobID("001", 1);
+    TaskID tid = new TaskID(jid, TaskType.REDUCE, 2);
+    TaskUpdatedEvent test = new TaskUpdatedEvent(tid, 1234L);
+    assertEquals(test.getTaskId().toString(), tid.toString());
+    assertEquals(test.getFinishTime(), 1234L);
+
+  }
+
+  /*
+   * test EventReader EventReader should read the list of events and return
+   * instance of HistoryEvent Different HistoryEvent should have a different
+   * datum.
+   */
+  @Test(timeout = 10000)
+  public void testEvents() throws Exception {
+
+    EventReader reader = new EventReader(new DataInputStream(
+        new ByteArrayInputStream(getEvents())));
+    HistoryEvent e = reader.getNextEvent();
+    assertTrue(e.getEventType().equals(EventType.JOB_PRIORITY_CHANGED));
+    assertEquals("ID", ((JobPriorityChange) e.getDatum()).jobid.toString());
+
+    e = reader.getNextEvent();
+    assertTrue(e.getEventType().equals(EventType.JOB_STATUS_CHANGED));
+    assertEquals("ID", ((JobStatusChanged) e.getDatum()).jobid.toString());
+
+    e = reader.getNextEvent();
+    assertTrue(e.getEventType().equals(EventType.TASK_UPDATED));
+    assertEquals("ID", ((TaskUpdated) e.getDatum()).taskid.toString());
+
+    e = reader.getNextEvent();
+    assertTrue(e.getEventType().equals(EventType.REDUCE_ATTEMPT_KILLED));
+    assertEquals("task_1_2_r03_4",
+        ((TaskAttemptUnsuccessfulCompletion) e.getDatum()).taskid.toString());
+
+    e = reader.getNextEvent();
+    assertTrue(e.getEventType().equals(EventType.JOB_KILLED));
+    assertEquals("ID",
+        ((JobUnsuccessfulCompletion) e.getDatum()).jobid.toString());
+
+    e = reader.getNextEvent();
+    assertTrue(e.getEventType().equals(EventType.REDUCE_ATTEMPT_STARTED));
+    assertEquals("task_1_2_r03_4",
+        ((TaskAttemptStarted) e.getDatum()).taskid.toString());
+
+    e = reader.getNextEvent();
+    assertTrue(e.getEventType().equals(EventType.REDUCE_ATTEMPT_FINISHED));
+    assertEquals("task_1_2_r03_4",
+        ((TaskAttemptFinished) e.getDatum()).taskid.toString());
+
+    e = reader.getNextEvent();
+    assertTrue(e.getEventType().equals(EventType.REDUCE_ATTEMPT_KILLED));
+    assertEquals("task_1_2_r03_4",
+        ((TaskAttemptUnsuccessfulCompletion) e.getDatum()).taskid.toString());
+
+    e = reader.getNextEvent();
+    assertTrue(e.getEventType().equals(EventType.REDUCE_ATTEMPT_KILLED));
+    assertEquals("task_1_2_r03_4",
+        ((TaskAttemptUnsuccessfulCompletion) e.getDatum()).taskid.toString());
+
+    e = reader.getNextEvent();
+    assertTrue(e.getEventType().equals(EventType.REDUCE_ATTEMPT_STARTED));
+    assertEquals("task_1_2_r03_4",
+        ((TaskAttemptStarted) e.getDatum()).taskid.toString());
+
+    e = reader.getNextEvent();
+    assertTrue(e.getEventType().equals(EventType.REDUCE_ATTEMPT_FINISHED));
+    assertEquals("task_1_2_r03_4",
+        ((TaskAttemptFinished) e.getDatum()).taskid.toString());
+
+    e = reader.getNextEvent();
+    assertTrue(e.getEventType().equals(EventType.REDUCE_ATTEMPT_KILLED));
+    assertEquals("task_1_2_r03_4",
+        ((TaskAttemptUnsuccessfulCompletion) e.getDatum()).taskid.toString());
+
+    e = reader.getNextEvent();
+    assertTrue(e.getEventType().equals(EventType.REDUCE_ATTEMPT_KILLED));
+    assertEquals("task_1_2_r03_4",
+        ((TaskAttemptUnsuccessfulCompletion) e.getDatum()).taskid.toString());
+
+    reader.close();
+  }
+
+  /*
+   * makes array of bytes with History events
+   */
+  private byte[] getEvents() throws Exception {
+    ByteArrayOutputStream output = new ByteArrayOutputStream();
+    FSDataOutputStream fsOutput = new FSDataOutputStream(output,
+        new FileSystem.Statistics("scheme"));
+    EventWriter writer = new EventWriter(fsOutput);
+    writer.write(getJobPriorityChangedEvent());
+    writer.write(getJobStatusChangedEvent());
+    writer.write(getTaskUpdatedEvent());
+    writer.write(getReduceAttemptKilledEvent());
+    writer.write(getJobKilledEvent());
+    writer.write(getSetupAttemptStartedEvent());
+    writer.write(getTaskAttemptFinishedEvent());
+    writer.write(getSetupAttemptFieledEvent());
+    writer.write(getSetupAttemptKilledEvent());
+    writer.write(getCleanupAttemptStartedEvent());
+    writer.write(getCleanupAttemptFinishedEvent());
+    writer.write(getCleanupAttemptFiledEvent());
+    writer.write(getCleanupAttemptKilledEvent());
+
+    writer.flush();
+    writer.close();
+
+    return output.toByteArray();
+  }
+
+  private FakeEvent getCleanupAttemptKilledEvent() {
+    FakeEvent result = new FakeEvent(EventType.CLEANUP_ATTEMPT_KILLED);
+
+    result.setDatum(getTaskAttemptUnsuccessfulCompletion());
+    return result;
+  }
+
+  private FakeEvent getCleanupAttemptFiledEvent() {
+    FakeEvent result = new FakeEvent(EventType.CLEANUP_ATTEMPT_FAILED);
+
+    result.setDatum(getTaskAttemptUnsuccessfulCompletion());
+    return result;
+  }
+
+  private TaskAttemptUnsuccessfulCompletion getTaskAttemptUnsuccessfulCompletion() {
+    TaskAttemptUnsuccessfulCompletion datum = new TaskAttemptUnsuccessfulCompletion();
+    datum.attemptId = "attempt_1_2_r3_4_5";
+    datum.clockSplits = Arrays.asList(1, 2, 3);
+    datum.cpuUsages = Arrays.asList(100, 200, 300);
+    datum.error = "Error";
+    datum.finishTime = 2;
+    datum.hostname = "hostname";
+    datum.rackname = "rackname";
+    datum.physMemKbytes = Arrays.asList(1000, 2000, 3000);
+    datum.taskid = "task_1_2_r03_4";
+    datum.port = 1000;
+    datum.taskType = "REDUCE";
+    datum.status = "STATUS";
+    datum.counters = getCounters();
+    datum.vMemKbytes = Arrays.asList(1000, 2000, 3000);
+    return datum;
+  }
+
+  private JhCounters getCounters() {
+    JhCounters counters = new JhCounters();
+    counters.groups = new ArrayList<JhCounterGroup>(0);
+    counters.name = "name";
+    return counters;
+  }
+
+  private FakeEvent getCleanupAttemptFinishedEvent() {
+    FakeEvent result = new FakeEvent(EventType.CLEANUP_ATTEMPT_FINISHED);
+    TaskAttemptFinished datum = new TaskAttemptFinished();
+    datum.attemptId = "attempt_1_2_r3_4_5";
+
+    datum.counters = getCounters();
+    datum.finishTime = 2;
+    datum.hostname = "hostname";
+    datum.rackname = "rackName";
+    datum.state = "state";
+    datum.taskid = "task_1_2_r03_4";
+    datum.taskStatus = "taskStatus";
+    datum.taskType = "REDUCE";
+    result.setDatum(datum);
+    return result;
+  }
+
+  private FakeEvent getCleanupAttemptStartedEvent() {
+    FakeEvent result = new FakeEvent(EventType.CLEANUP_ATTEMPT_STARTED);
+    TaskAttemptStarted datum = new TaskAttemptStarted();
+
+    datum.attemptId = "attempt_1_2_r3_4_5";
+    datum.avataar = "avatar";
+    datum.containerId = "containerId";
+    datum.httpPort = 10000;
+    datum.locality = "locality";
+    datum.shufflePort = 10001;
+    datum.startTime = 1;
+    datum.taskid = "task_1_2_r03_4";
+    datum.taskType = "taskType";
+    datum.trackerName = "trackerName";
+    result.setDatum(datum);
+    return result;
+  }
+
+  private FakeEvent getSetupAttemptKilledEvent() {
+    FakeEvent result = new FakeEvent(EventType.SETUP_ATTEMPT_KILLED);
+    result.setDatum(getTaskAttemptUnsuccessfulCompletion());
+    return result;
+  }
+
+  private FakeEvent getSetupAttemptFieledEvent() {
+    FakeEvent result = new FakeEvent(EventType.SETUP_ATTEMPT_FAILED);
+
+    result.setDatum(getTaskAttemptUnsuccessfulCompletion());
+    return result;
+  }
+
+  private FakeEvent getTaskAttemptFinishedEvent() {
+    FakeEvent result = new FakeEvent(EventType.SETUP_ATTEMPT_FINISHED);
+    TaskAttemptFinished datum = new TaskAttemptFinished();
+
+    datum.attemptId = "attempt_1_2_r3_4_5";
+    datum.counters = getCounters();
+    datum.finishTime = 2;
+    datum.hostname = "hostname";
+    datum.rackname = "rackname";
+    datum.state = "state";
+    datum.taskid = "task_1_2_r03_4";
+    datum.taskStatus = "taskStatus";
+    datum.taskType = "REDUCE";
+    result.setDatum(datum);
+    return result;
+  }
+
+  private FakeEvent getSetupAttemptStartedEvent() {
+    FakeEvent result = new FakeEvent(EventType.SETUP_ATTEMPT_STARTED);
+    TaskAttemptStarted datum = new TaskAttemptStarted();
+    datum.attemptId = "ID";
+    datum.avataar = "avataar";
+    datum.containerId = "containerId";
+    datum.httpPort = 10000;
+    datum.locality = "locality";
+    datum.shufflePort = 10001;
+    datum.startTime = 1;
+    datum.taskid = "task_1_2_r03_4";
+    datum.taskType = "taskType";
+    datum.trackerName = "trackerName";
+    result.setDatum(datum);
+    return result;
+  }
+
+  private FakeEvent getJobKilledEvent() {
+    FakeEvent result = new FakeEvent(EventType.JOB_KILLED);
+    JobUnsuccessfulCompletion datum = new JobUnsuccessfulCompletion();
+    datum.finishedMaps = 1;
+    datum.finishedReduces = 2;
+    datum.finishTime = 3;
+    datum.jobid = "ID";
+    datum.jobStatus = "STATUS";
+    result.setDatum(datum);
+    return result;
+  }
+
+  private FakeEvent getReduceAttemptKilledEvent() {
+    FakeEvent result = new FakeEvent(EventType.REDUCE_ATTEMPT_KILLED);
+
+    result.setDatum(getTaskAttemptUnsuccessfulCompletion());
+    return result;
+  }
+
+  private FakeEvent getJobPriorityChangedEvent() {
+    FakeEvent result = new FakeEvent(EventType.JOB_PRIORITY_CHANGED);
+    JobPriorityChange datum = new JobPriorityChange();
+    datum.jobid = "ID";
+    datum.priority = "priority";
+    result.setDatum(datum);
+    return result;
+  }
+
+  private FakeEvent getJobStatusChangedEvent() {
+    FakeEvent result = new FakeEvent(EventType.JOB_STATUS_CHANGED);
+    JobStatusChanged datum = new JobStatusChanged();
+    datum.jobid = "ID";
+    datum.jobStatus = "newStatus";
+    result.setDatum(datum);
+    return result;
+  }
+
+  private FakeEvent getTaskUpdatedEvent() {
+    FakeEvent result = new FakeEvent(EventType.TASK_UPDATED);
+    TaskUpdated datum = new TaskUpdated();
+    datum.finishTime = 2;
+    datum.taskid = "ID";
+    result.setDatum(datum);
+    return result;
+  }
+
+  private class FakeEvent implements HistoryEvent {
+    private EventType eventType;
+    private Object datum;
+
+    public FakeEvent(EventType eventType) {
+      this.eventType = eventType;
+    }
+
+    @Override
+    public EventType getEventType() {
+      return eventType;
+    }
+
+    @Override
+    public Object getDatum() {
+
+      return datum;
+    }
+
+    @Override
+    public void setDatum(Object datum) {
+      this.datum = datum;
+    }
+
+  }
+
+}

+ 15 - 21
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/jobhistory/TestJobHistoryEventHandler.java

@@ -31,6 +31,7 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileContext;
 import org.apache.hadoop.fs.FileContext;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.mapreduce.Counters;
 import org.apache.hadoop.mapreduce.Counters;
+import org.apache.hadoop.mapreduce.JobID;
 import org.apache.hadoop.mapreduce.MRJobConfig;
 import org.apache.hadoop.mapreduce.MRJobConfig;
 import org.apache.hadoop.mapreduce.TaskID;
 import org.apache.hadoop.mapreduce.TaskID;
 import org.apache.hadoop.mapreduce.TaskType;
 import org.apache.hadoop.mapreduce.TaskType;
@@ -53,7 +54,7 @@ public class TestJobHistoryEventHandler {
   private static final Log LOG = LogFactory
   private static final Log LOG = LogFactory
       .getLog(TestJobHistoryEventHandler.class);
       .getLog(TestJobHistoryEventHandler.class);
 
 
-  @Test
+  @Test (timeout=50000)
   public void testFirstFlushOnCompletionEvent() throws Exception {
   public void testFirstFlushOnCompletionEvent() throws Exception {
     TestParams t = new TestParams();
     TestParams t = new TestParams();
     Configuration conf = new Configuration();
     Configuration conf = new Configuration();
@@ -96,7 +97,7 @@ public class TestJobHistoryEventHandler {
     }
     }
   }
   }
 
 
-  @Test
+  @Test (timeout=50000)
   public void testMaxUnflushedCompletionEvents() throws Exception {
   public void testMaxUnflushedCompletionEvents() throws Exception {
     TestParams t = new TestParams();
     TestParams t = new TestParams();
     Configuration conf = new Configuration();
     Configuration conf = new Configuration();
@@ -131,17 +132,17 @@ public class TestJobHistoryEventHandler {
 
 
       handleNextNEvents(jheh, 1);
       handleNextNEvents(jheh, 1);
       verify(mockWriter).flush();
       verify(mockWriter).flush();
-      
+
       handleNextNEvents(jheh, 50);
       handleNextNEvents(jheh, 50);
       verify(mockWriter, times(6)).flush();
       verify(mockWriter, times(6)).flush();
-      
+
     } finally {
     } finally {
       jheh.stop();
       jheh.stop();
       verify(mockWriter).close();
       verify(mockWriter).close();
     }
     }
   }
   }
-  
-  @Test
+
+  @Test (timeout=50000)
   public void testUnflushedTimer() throws Exception {
   public void testUnflushedTimer() throws Exception {
     TestParams t = new TestParams();
     TestParams t = new TestParams();
     Configuration conf = new Configuration();
     Configuration conf = new Configuration();
@@ -181,8 +182,8 @@ public class TestJobHistoryEventHandler {
       verify(mockWriter).close();
       verify(mockWriter).close();
     }
     }
   }
   }
-  
-  @Test
+
+  @Test (timeout=50000)
   public void testBatchedFlushJobEndMultiplier() throws Exception {
   public void testBatchedFlushJobEndMultiplier() throws Exception {
     TestParams t = new TestParams();
     TestParams t = new TestParams();
     Configuration conf = new Configuration();
     Configuration conf = new Configuration();
@@ -265,7 +266,7 @@ public class TestJobHistoryEventHandler {
     when(mockContext.getApplicationID()).thenReturn(appId);
     when(mockContext.getApplicationID()).thenReturn(appId);
     return mockContext;
     return mockContext;
   }
   }
-  
+
 
 
   private class TestParams {
   private class TestParams {
     String workDir = setupTestWorkDir();
     String workDir = setupTestWorkDir();
@@ -279,12 +280,8 @@ public class TestJobHistoryEventHandler {
   }
   }
 
 
   private JobHistoryEvent getEventToEnqueue(JobId jobId) {
   private JobHistoryEvent getEventToEnqueue(JobId jobId) {
-    JobHistoryEvent toReturn = Mockito.mock(JobHistoryEvent.class);
-    HistoryEvent he = Mockito.mock(HistoryEvent.class);
-    Mockito.when(he.getEventType()).thenReturn(EventType.JOB_STATUS_CHANGED);
-    Mockito.when(toReturn.getHistoryEvent()).thenReturn(he);
-    Mockito.when(toReturn.getJobID()).thenReturn(jobId);
-    return toReturn;
+    HistoryEvent toReturn = new JobStatusChangedEvent(new JobID(Integer.toString(jobId.getId()), jobId.getId()), "change status");
+    return new JobHistoryEvent(jobId, toReturn);
   }
   }
 
 
   @Test
   @Test
@@ -344,8 +341,6 @@ public class TestJobHistoryEventHandler {
 class JHEvenHandlerForTest extends JobHistoryEventHandler {
 class JHEvenHandlerForTest extends JobHistoryEventHandler {
 
 
   private EventWriter eventWriter;
   private EventWriter eventWriter;
-  volatile int handleEventCompleteCalls = 0;
-  volatile int handleEventStartedCalls = 0;
 
 
   public JHEvenHandlerForTest(AppContext context, int startCount) {
   public JHEvenHandlerForTest(AppContext context, int startCount) {
     super(context, startCount);
     super(context, startCount);
@@ -354,7 +349,7 @@ class JHEvenHandlerForTest extends JobHistoryEventHandler {
   @Override
   @Override
   public void start() {
   public void start() {
   }
   }
-  
+
   @Override
   @Override
   protected EventWriter createEventWriter(Path historyFilePath)
   protected EventWriter createEventWriter(Path historyFilePath)
       throws IOException {
       throws IOException {
@@ -365,7 +360,7 @@ class JHEvenHandlerForTest extends JobHistoryEventHandler {
   @Override
   @Override
   protected void closeEventWriter(JobId jobId) {
   protected void closeEventWriter(JobId jobId) {
   }
   }
-  
+
   public EventWriter getEventWriter() {
   public EventWriter getEventWriter() {
     return this.eventWriter;
     return this.eventWriter;
   }
   }
@@ -375,13 +370,12 @@ class JHEvenHandlerForTest extends JobHistoryEventHandler {
  * Class to help with testSigTermedFunctionality
  * Class to help with testSigTermedFunctionality
  */
  */
 class JHEventHandlerForSigtermTest extends JobHistoryEventHandler {
 class JHEventHandlerForSigtermTest extends JobHistoryEventHandler {
-  private MetaInfo metaInfo;
   public JHEventHandlerForSigtermTest(AppContext context, int startCount) {
   public JHEventHandlerForSigtermTest(AppContext context, int startCount) {
     super(context, startCount);
     super(context, startCount);
   }
   }
 
 
   public void addToFileMap(JobId jobId) {
   public void addToFileMap(JobId jobId) {
-    metaInfo = Mockito.mock(MetaInfo.class);
+    MetaInfo metaInfo = Mockito.mock(MetaInfo.class);
     Mockito.when(metaInfo.isWriterActive()).thenReturn(true);
     Mockito.when(metaInfo.isWriterActive()).thenReturn(true);
     fileMap.put(jobId, metaInfo);
     fileMap.put(jobId, metaInfo);
   }
   }

+ 2 - 5
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/MRAppBenchmark.java

@@ -40,7 +40,6 @@ import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterRequest
 import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse;
-import org.apache.hadoop.yarn.api.records.AMResponse;
 import org.apache.hadoop.yarn.api.records.Container;
 import org.apache.hadoop.yarn.api.records.Container;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.NodeId;
 import org.apache.hadoop.yarn.api.records.NodeId;
@@ -248,10 +247,8 @@ public class MRAppBenchmark {
                   }
                   }
                 }
                 }
 
 
-                AMResponse amResponse = Records.newRecord(AMResponse.class);
-                amResponse.setAllocatedContainers(containers);
-                amResponse.setResponseId(request.getResponseId() + 1);
-                response.setAMResponse(amResponse);
+                response.setAllocatedContainers(containers);
+                response.setResponseId(request.getResponseId() + 1);
                 response.setNumClusterNodes(350);
                 response.setNumClusterNodes(350);
                 return response;
                 return response;
               }
               }

+ 4 - 2
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestMRAppMaster.java

@@ -84,8 +84,10 @@ public class TestMRAppMaster {
     YarnConfiguration conf = new YarnConfiguration();
     YarnConfiguration conf = new YarnConfiguration();
     conf.set(MRJobConfig.MR_AM_STAGING_DIR, stagingDir);
     conf.set(MRJobConfig.MR_AM_STAGING_DIR, stagingDir);
     MRAppMaster.initAndStartAppMaster(appMaster, conf, userName);
     MRAppMaster.initAndStartAppMaster(appMaster, conf, userName);
-    assertEquals(stagingDir + Path.SEPARATOR + userName + Path.SEPARATOR
-        + ".staging", appMaster.stagingDirPath.toString());
+    Path userPath = new Path(stagingDir, userName);
+    Path userStagingPath = new Path(userPath, ".staging");
+    assertEquals(userStagingPath.toString(),
+      appMaster.stagingDirPath.toString());
   }
   }
   
   
   @Test
   @Test

+ 14 - 7
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/Fetcher.java

@@ -357,13 +357,20 @@ class Fetcher<K,V> extends Thread {
         return EMPTY_ATTEMPT_ID_ARRAY;
         return EMPTY_ATTEMPT_ID_ARRAY;
       } 
       } 
       
       
-      // Go!
-      LOG.info("fetcher#" + id + " about to shuffle output of map " + 
-               mapOutput.getMapId() + " decomp: " +
-               decompressedLength + " len: " + compressedLength + " to " +
-               mapOutput.getDescription());
-      mapOutput.shuffle(host, input, compressedLength, decompressedLength,
-                        metrics, reporter);
+      // The codec for lz0,lz4,snappy,bz2,etc. throw java.lang.InternalError
+      // on decompression failures. Catching and re-throwing as IOException
+      // to allow fetch failure logic to be processed
+      try {
+        // Go!
+        LOG.info("fetcher#" + id + " about to shuffle output of map "
+            + mapOutput.getMapId() + " decomp: " + decompressedLength
+            + " len: " + compressedLength + " to " + mapOutput.getDescription());
+        mapOutput.shuffle(host, input, compressedLength, decompressedLength,
+            metrics, reporter);
+      } catch (java.lang.InternalError e) {
+        LOG.warn("Failed to shuffle for fetcher#"+id, e);
+        throw new IOException(e);
+      }
       
       
       // Inform the shuffle scheduler
       // Inform the shuffle scheduler
       long endTime = System.currentTimeMillis();
       long endTime = System.currentTimeMillis();

+ 60 - 1
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/task/reduce/TestFetcher.java

@@ -25,6 +25,7 @@ import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
 import java.io.ByteArrayOutputStream;
 import java.io.DataOutputStream;
 import java.io.DataOutputStream;
 import java.io.IOException;
 import java.io.IOException;
+import java.io.InputStream;
 import java.net.HttpURLConnection;
 import java.net.HttpURLConnection;
 import java.net.SocketTimeoutException;
 import java.net.SocketTimeoutException;
 import java.net.URL;
 import java.net.URL;
@@ -233,4 +234,62 @@ public class TestFetcher {
     verify(ss).putBackKnownMapOutput(any(MapHost.class), eq(map2ID));
     verify(ss).putBackKnownMapOutput(any(MapHost.class), eq(map2ID));
   }
   }
   
   
-}
+  @SuppressWarnings("unchecked")
+  @Test(timeout=10000) 
+  public void testCopyFromHostCompressFailure() throws Exception {
+    LOG.info("testCopyFromHostCompressFailure");
+    JobConf job = new JobConf();
+    TaskAttemptID id = TaskAttemptID.forName("attempt_0_1_r_1_1");
+    ShuffleScheduler<Text, Text> ss = mock(ShuffleScheduler.class);
+    MergeManagerImpl<Text, Text> mm = mock(MergeManagerImpl.class);
+    InMemoryMapOutput<Text, Text> immo = mock(InMemoryMapOutput.class);
+    Reporter r = mock(Reporter.class);
+    ShuffleClientMetrics metrics = mock(ShuffleClientMetrics.class);
+    ExceptionReporter except = mock(ExceptionReporter.class);
+    SecretKey key = JobTokenSecretManager.createSecretKey(new byte[]{0,0,0,0});
+    HttpURLConnection connection = mock(HttpURLConnection.class);
+    
+    Counters.Counter allErrs = mock(Counters.Counter.class);
+    when(r.getCounter(anyString(), anyString()))
+      .thenReturn(allErrs);
+    
+    Fetcher<Text,Text> underTest = new FakeFetcher<Text,Text>(job, id, ss, mm,
+        r, metrics, except, key, connection);
+    
+
+    MapHost host = new MapHost("localhost", "http://localhost:8080/");
+    
+    ArrayList<TaskAttemptID> maps = new ArrayList<TaskAttemptID>(1);
+    TaskAttemptID map1ID = TaskAttemptID.forName("attempt_0_1_m_1_1");
+    maps.add(map1ID);
+    TaskAttemptID map2ID = TaskAttemptID.forName("attempt_0_1_m_2_1");
+    maps.add(map2ID);
+    when(ss.getMapsForHost(host)).thenReturn(maps);
+    
+    String encHash = "vFE234EIFCiBgYs2tCXY/SjT8Kg=";
+    String replyHash = SecureShuffleUtils.generateHash(encHash.getBytes(), key);
+    
+    when(connection.getResponseCode()).thenReturn(200);
+    when(connection.getHeaderField(SecureShuffleUtils.HTTP_HEADER_REPLY_URL_HASH))
+      .thenReturn(replyHash);
+    ShuffleHeader header = new ShuffleHeader(map1ID.toString(), 10, 10, 1);
+    ByteArrayOutputStream bout = new ByteArrayOutputStream();
+    header.write(new DataOutputStream(bout));
+    ByteArrayInputStream in = new ByteArrayInputStream(bout.toByteArray());
+    when(connection.getInputStream()).thenReturn(in);
+    when(mm.reserve(any(TaskAttemptID.class), anyLong(), anyInt()))
+      .thenReturn(immo);
+    
+    doThrow(new java.lang.InternalError())
+    .when(immo)
+      .shuffle(any(MapHost.class), any(InputStream.class), anyLong(), 
+               anyLong(), any(ShuffleClientMetrics.class), any(Reporter.class));
+
+    underTest.copyFromHost(host);
+       
+    verify(connection)
+      .addRequestProperty(SecureShuffleUtils.HTTP_HEADER_URL_HASH, 
+          encHash);
+    verify(ss, times(1)).copyFailed(map1ID, host, true, false);
+  }
+}

+ 3 - 3
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistoryEntities.java

@@ -79,7 +79,7 @@ public class TestJobHistoryEntities {
   }
   }
 
 
   /* Verify some expected values based on the history file */
   /* Verify some expected values based on the history file */
-  @Test
+  @Test (timeout=10000)
   public void testCompletedJob() throws Exception {
   public void testCompletedJob() throws Exception {
     HistoryFileInfo info = mock(HistoryFileInfo.class);
     HistoryFileInfo info = mock(HistoryFileInfo.class);
     when(info.getConfFile()).thenReturn(fullConfPath);
     when(info.getConfFile()).thenReturn(fullConfPath);
@@ -104,7 +104,7 @@ public class TestJobHistoryEntities {
     assertEquals(JobState.SUCCEEDED, jobReport.getJobState());
     assertEquals(JobState.SUCCEEDED, jobReport.getJobState());
   }
   }
   
   
-  @Test
+  @Test (timeout=10000)
   public void testCompletedTask() throws Exception {
   public void testCompletedTask() throws Exception {
     HistoryFileInfo info = mock(HistoryFileInfo.class);
     HistoryFileInfo info = mock(HistoryFileInfo.class);
     when(info.getConfFile()).thenReturn(fullConfPath);
     when(info.getConfFile()).thenReturn(fullConfPath);
@@ -133,7 +133,7 @@ public class TestJobHistoryEntities {
     assertEquals(rt1Id, rt1Report.getTaskId());
     assertEquals(rt1Id, rt1Report.getTaskId());
   }
   }
   
   
-  @Test
+  @Test (timeout=10000)
   public void testCompletedTaskAttempt() throws Exception {
   public void testCompletedTaskAttempt() throws Exception {
     HistoryFileInfo info = mock(HistoryFileInfo.class);
     HistoryFileInfo info = mock(HistoryFileInfo.class);
     when(info.getConfFile()).thenReturn(fullConfPath);
     when(info.getConfFile()).thenReturn(fullConfPath);

+ 12 - 5
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistoryEvents.java

@@ -25,7 +25,6 @@ import junit.framework.Assert;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.mapreduce.MRJobConfig;
 import org.apache.hadoop.mapreduce.TypeConverter;
 import org.apache.hadoop.mapreduce.TypeConverter;
 import org.apache.hadoop.mapreduce.jobhistory.JobHistoryEvent;
 import org.apache.hadoop.mapreduce.jobhistory.JobHistoryEvent;
 import org.apache.hadoop.mapreduce.jobhistory.JobHistoryEventHandler;
 import org.apache.hadoop.mapreduce.jobhistory.JobHistoryEventHandler;
@@ -67,8 +66,17 @@ public class TestJobHistoryEvents {
      * completed maps 
      * completed maps 
     */
     */
     HistoryContext context = new JobHistory();
     HistoryContext context = new JobHistory();
+    // test start and stop states
     ((JobHistory)context).init(conf);
     ((JobHistory)context).init(conf);
-    Job parsedJob = context.getJob(jobId);
+    ((JobHistory)context).start();
+    Assert.assertTrue( context.getStartTime()>0);
+    Assert.assertEquals(((JobHistory)context).getServiceState(),Service.STATE.STARTED);
+    
+    
+    ((JobHistory)context).stop();
+    Assert.assertEquals(((JobHistory)context).getServiceState(),Service.STATE.STOPPED);
+      Job parsedJob = context.getJob(jobId);
+    
     Assert.assertEquals("CompletedMaps not correct", 2,
     Assert.assertEquals("CompletedMaps not correct", 2,
         parsedJob.getCompletedMaps());
         parsedJob.getCompletedMaps());
     Assert.assertEquals(System.getProperty("user.name"), parsedJob.getUserName());
     Assert.assertEquals(System.getProperty("user.name"), parsedJob.getUserName());
@@ -177,9 +185,8 @@ public class TestJobHistoryEvents {
     @Override
     @Override
     protected EventHandler<JobHistoryEvent> createJobHistoryHandler(
     protected EventHandler<JobHistoryEvent> createJobHistoryHandler(
         AppContext context) {
         AppContext context) {
-      JobHistoryEventHandler eventHandler = new JobHistoryEventHandler(
-          context, getStartCount());
-      return eventHandler;
+      return new JobHistoryEventHandler(
+              context, getStartCount());
     }
     }
   }
   }
 
 

+ 39 - 12
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistoryParsing.java

@@ -18,7 +18,9 @@
 
 
 package org.apache.hadoop.mapreduce.v2.hs;
 package org.apache.hadoop.mapreduce.v2.hs;
 
 
+import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.io.IOException;
+import java.io.PrintStream;
 import java.util.Arrays;
 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.HashMap;
 import java.util.List;
 import java.util.List;
@@ -40,6 +42,7 @@ import org.apache.hadoop.mapreduce.TaskID;
 import org.apache.hadoop.mapreduce.TypeConverter;
 import org.apache.hadoop.mapreduce.TypeConverter;
 import org.apache.hadoop.mapreduce.jobhistory.EventReader;
 import org.apache.hadoop.mapreduce.jobhistory.EventReader;
 import org.apache.hadoop.mapreduce.jobhistory.HistoryEvent;
 import org.apache.hadoop.mapreduce.jobhistory.HistoryEvent;
+import org.apache.hadoop.mapreduce.jobhistory.HistoryViewer;
 import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser;
 import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser;
 import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.AMInfo;
 import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.AMInfo;
 import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.JobInfo;
 import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.JobInfo;
@@ -60,7 +63,6 @@ import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEventType;
 import org.apache.hadoop.mapreduce.v2.hs.HistoryFileManager.HistoryFileInfo;
 import org.apache.hadoop.mapreduce.v2.hs.HistoryFileManager.HistoryFileInfo;
 import org.apache.hadoop.mapreduce.v2.hs.TestJobHistoryEvents.MRAppWithHistory;
 import org.apache.hadoop.mapreduce.v2.hs.TestJobHistoryEvents.MRAppWithHistory;
 import org.apache.hadoop.mapreduce.v2.jobhistory.FileNameIndexUtils;
 import org.apache.hadoop.mapreduce.v2.jobhistory.FileNameIndexUtils;
-import org.apache.hadoop.mapreduce.v2.jobhistory.JHAdminConfig;
 import org.apache.hadoop.mapreduce.v2.jobhistory.JobHistoryUtils;
 import org.apache.hadoop.mapreduce.v2.jobhistory.JobHistoryUtils;
 import org.apache.hadoop.mapreduce.v2.jobhistory.JobIndexInfo;
 import org.apache.hadoop.mapreduce.v2.jobhistory.JobIndexInfo;
 import org.apache.hadoop.net.DNSToSwitchMapping;
 import org.apache.hadoop.net.DNSToSwitchMapping;
@@ -78,21 +80,27 @@ public class TestJobHistoryParsing {
 
 
   private static final String RACK_NAME = "/MyRackName";
   private static final String RACK_NAME = "/MyRackName";
 
 
+  private  ByteArrayOutputStream outContent = new ByteArrayOutputStream();
+
   public static class MyResolver implements DNSToSwitchMapping {
   public static class MyResolver implements DNSToSwitchMapping {
     @Override
     @Override
     public List<String> resolve(List<String> names) {
     public List<String> resolve(List<String> names) {
       return Arrays.asList(new String[]{RACK_NAME});
       return Arrays.asList(new String[]{RACK_NAME});
     }
     }
+
+    @Override
+    public void reloadCachedMappings() {
+    }
   }
   }
 
 
-  @Test
+  @Test (timeout=50000)
   public void testJobInfo() throws Exception {
   public void testJobInfo() throws Exception {
     JobInfo info = new JobInfo();
     JobInfo info = new JobInfo();
     Assert.assertEquals("NORMAL", info.getPriority());
     Assert.assertEquals("NORMAL", info.getPriority());
     info.printAll();
     info.printAll();
   }
   }
 
 
-  @Test
+  @Test (timeout=50000)
   public void testHistoryParsing() throws Exception {
   public void testHistoryParsing() throws Exception {
     LOG.info("STARTING testHistoryParsing()");
     LOG.info("STARTING testHistoryParsing()");
     try {
     try {
@@ -102,7 +110,7 @@ public class TestJobHistoryParsing {
     }
     }
   }
   }
   
   
-  @Test
+  @Test (timeout=50000)
   public void testHistoryParsingWithParseErrors() throws Exception {
   public void testHistoryParsingWithParseErrors() throws Exception {
     LOG.info("STARTING testHistoryParsingWithParseErrors()");
     LOG.info("STARTING testHistoryParsingWithParseErrors()");
     try {
     try {
@@ -317,18 +325,37 @@ public class TestJobHistoryParsing {
         }
         }
       }
       }
     }
     }
+    
+    // test output for HistoryViewer
+    PrintStream stdps=System.out;
+    try {
+      System.setOut(new PrintStream(outContent));
+      HistoryViewer viewer = new HistoryViewer(fc.makeQualified(
+          fileInfo.getHistoryFile()).toString(), conf, true);
+      viewer.print();
+      
+      for (TaskInfo taskInfo : allTasks.values()) { 
+        
+        String test=  (taskInfo.getTaskStatus()==null?"":taskInfo.getTaskStatus())+" "+taskInfo.getTaskType()+" task list for "+taskInfo.getTaskId().getJobID();
+        Assert.assertTrue(outContent.toString().indexOf(test)>0);
+        Assert.assertTrue(outContent.toString().indexOf(taskInfo.getTaskId().toString())>0);
+      }
+    } finally {
+      System.setOut(stdps);
+
+    }
   }
   }
-  
+
   // Computes finished maps similar to RecoveryService...
   // Computes finished maps similar to RecoveryService...
-  private long computeFinishedMaps(JobInfo jobInfo, 
-      int numMaps, int numSuccessfulMaps) {
+  private long computeFinishedMaps(JobInfo jobInfo, int numMaps,
+      int numSuccessfulMaps) {
     if (numMaps == numSuccessfulMaps) {
     if (numMaps == numSuccessfulMaps) {
       return jobInfo.getFinishedMaps();
       return jobInfo.getFinishedMaps();
     }
     }
-    
+
     long numFinishedMaps = 0;
     long numFinishedMaps = 0;
-    Map<org.apache.hadoop.mapreduce.TaskID, TaskInfo> taskInfos = 
-        jobInfo.getAllTasks();
+    Map<org.apache.hadoop.mapreduce.TaskID, TaskInfo> taskInfos = jobInfo
+        .getAllTasks();
     for (TaskInfo taskInfo : taskInfos.values()) {
     for (TaskInfo taskInfo : taskInfos.values()) {
       if (TaskState.SUCCEEDED.toString().equals(taskInfo.getTaskStatus())) {
       if (TaskState.SUCCEEDED.toString().equals(taskInfo.getTaskStatus())) {
         ++numFinishedMaps;
         ++numFinishedMaps;
@@ -337,7 +364,7 @@ public class TestJobHistoryParsing {
     return numFinishedMaps;
     return numFinishedMaps;
   }
   }
   
   
-  @Test
+  @Test (timeout=50000)
   public void testHistoryParsingForFailedAttempts() throws Exception {
   public void testHistoryParsingForFailedAttempts() throws Exception {
     LOG.info("STARTING testHistoryParsingForFailedAttempts");
     LOG.info("STARTING testHistoryParsingForFailedAttempts");
     try {
     try {
@@ -464,7 +491,7 @@ public class TestJobHistoryParsing {
     }
     }
   }
   }
 
 
-  @Test
+  @Test (timeout=50000)
   public void testScanningOldDirs() throws Exception {
   public void testScanningOldDirs() throws Exception {
     LOG.info("STARTING testScanningOldDirs");
     LOG.info("STARTING testScanningOldDirs");
     try {
     try {

+ 9 - 4
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/MiniMRClientClusterFactory.java

@@ -38,6 +38,11 @@ public class MiniMRClientClusterFactory {
 
 
   public static MiniMRClientCluster create(Class<?> caller, int noOfNMs,
   public static MiniMRClientCluster create(Class<?> caller, int noOfNMs,
       Configuration conf) throws IOException {
       Configuration conf) throws IOException {
+    return create(caller, caller.getSimpleName(), noOfNMs, conf);
+  }
+
+  public static MiniMRClientCluster create(Class<?> caller, String identifier,
+      int noOfNMs, Configuration conf) throws IOException {
 
 
     if (conf == null) {
     if (conf == null) {
       conf = new Configuration();
       conf = new Configuration();
@@ -45,7 +50,7 @@ public class MiniMRClientClusterFactory {
 
 
     FileSystem fs = FileSystem.get(conf);
     FileSystem fs = FileSystem.get(conf);
 
 
-    Path testRootDir = new Path("target", caller.getSimpleName() + "-tmpDir")
+    Path testRootDir = new Path("target", identifier + "-tmpDir")
         .makeQualified(fs);
         .makeQualified(fs);
     Path appJar = new Path(testRootDir, "MRAppJar.jar");
     Path appJar = new Path(testRootDir, "MRAppJar.jar");
 
 
@@ -65,10 +70,10 @@ public class MiniMRClientClusterFactory {
     fs.setPermission(remoteCallerJar, new FsPermission("744"));
     fs.setPermission(remoteCallerJar, new FsPermission("744"));
     job.addFileToClassPath(remoteCallerJar);
     job.addFileToClassPath(remoteCallerJar);
 
 
-    MiniMRYarnCluster miniMRYarnCluster = new MiniMRYarnCluster(caller
-        .getSimpleName(), noOfNMs);
+    MiniMRYarnCluster miniMRYarnCluster = new MiniMRYarnCluster(identifier,
+        noOfNMs);
     job.getConfiguration().set("minimrclientcluster.caller.name",
     job.getConfiguration().set("minimrclientcluster.caller.name",
-        caller.getSimpleName());
+        identifier);
     job.getConfiguration().setInt("minimrclientcluster.nodemanagers.number",
     job.getConfiguration().setInt("minimrclientcluster.nodemanagers.number",
         noOfNMs);
         noOfNMs);
     miniMRYarnCluster.init(job.getConfiguration());
     miniMRYarnCluster.init(job.getConfiguration());

+ 4 - 1
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/MiniMRCluster.java

@@ -18,6 +18,7 @@
 package org.apache.hadoop.mapred;
 package org.apache.hadoop.mapred;
 
 
 import java.io.IOException;
 import java.io.IOException;
+import java.util.Random;
 
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.commons.logging.LogFactory;
@@ -177,8 +178,10 @@ public class MiniMRCluster {
       int numTrackerToExclude, Clock clock) throws IOException {
       int numTrackerToExclude, Clock clock) throws IOException {
     if (conf == null) conf = new JobConf();
     if (conf == null) conf = new JobConf();
     FileSystem.setDefaultUri(conf, namenode);
     FileSystem.setDefaultUri(conf, namenode);
+    String identifier = this.getClass().getSimpleName() + "_"
+        + Integer.toString(new Random().nextInt(Integer.MAX_VALUE));
     mrClientCluster = MiniMRClientClusterFactory.create(this.getClass(),
     mrClientCluster = MiniMRClientClusterFactory.create(this.getClass(),
-        numTaskTrackers, conf);
+        identifier, numTaskTrackers, conf);
   }
   }
 
 
   public UserGroupInformation getUgi() {
   public UserGroupInformation getUgi() {

+ 7 - 3
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-shuffle/src/main/java/org/apache/hadoop/mapred/ShuffleHandler.java

@@ -549,15 +549,19 @@ public class ShuffleHandler extends AbstractService
           ContainerLocalizer.USERCACHE + "/" + user + "/"
           ContainerLocalizer.USERCACHE + "/" + user + "/"
               + ContainerLocalizer.APPCACHE + "/"
               + ContainerLocalizer.APPCACHE + "/"
               + ConverterUtils.toString(appID) + "/output" + "/" + mapId;
               + ConverterUtils.toString(appID) + "/output" + "/" + mapId;
-      LOG.debug("DEBUG0 " + base);
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("DEBUG0 " + base);
+      }
       // Index file
       // Index file
       Path indexFileName = lDirAlloc.getLocalPathToRead(
       Path indexFileName = lDirAlloc.getLocalPathToRead(
           base + "/file.out.index", conf);
           base + "/file.out.index", conf);
       // Map-output file
       // Map-output file
       Path mapOutputFileName = lDirAlloc.getLocalPathToRead(
       Path mapOutputFileName = lDirAlloc.getLocalPathToRead(
           base + "/file.out", conf);
           base + "/file.out", conf);
-      LOG.debug("DEBUG1 " + base + " : " + mapOutputFileName + " : " +
-          indexFileName);
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("DEBUG1 " + base + " : " + mapOutputFileName + " : "
+            + indexFileName);
+      }
       final IndexRecord info = 
       final IndexRecord info = 
         indexCache.getIndexInformation(mapId, reduce, indexFileName, user);
         indexCache.getIndexInformation(mapId, reduce, indexFileName, user);
       final ShuffleHeader header =
       final ShuffleHeader header =

+ 26 - 5
hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/CopyListing.java

@@ -30,6 +30,7 @@ import org.apache.hadoop.tools.util.DistCpUtils;
 import org.apache.hadoop.security.Credentials;
 import org.apache.hadoop.security.Credentials;
 
 
 import java.io.IOException;
 import java.io.IOException;
+import java.lang.reflect.Constructor;
 
 
 /**
 /**
  * The CopyListing abstraction is responsible for how the list of
  * The CopyListing abstraction is responsible for how the list of
@@ -193,14 +194,34 @@ public abstract class CopyListing extends Configured {
    * @param credentials Credentials object on which the FS delegation tokens are cached
    * @param credentials Credentials object on which the FS delegation tokens are cached
    * @param options The input Options, to help choose the appropriate CopyListing Implementation.
    * @param options The input Options, to help choose the appropriate CopyListing Implementation.
    * @return An instance of the appropriate CopyListing implementation.
    * @return An instance of the appropriate CopyListing implementation.
+   * @throws java.io.IOException - Exception if any
    */
    */
   public static CopyListing getCopyListing(Configuration configuration,
   public static CopyListing getCopyListing(Configuration configuration,
                                            Credentials credentials,
                                            Credentials credentials,
-                                           DistCpOptions options) {
-    if (options.getSourceFileListing() == null) {
-      return new GlobbedCopyListing(configuration, credentials);
-    } else {
-      return new FileBasedCopyListing(configuration, credentials);
+                                           DistCpOptions options)
+      throws IOException {
+
+    String copyListingClassName = configuration.get(DistCpConstants.
+        CONF_LABEL_COPY_LISTING_CLASS, "");
+    Class<? extends CopyListing> copyListingClass;
+    try {
+      if (! copyListingClassName.isEmpty()) {
+        copyListingClass = configuration.getClass(DistCpConstants.
+            CONF_LABEL_COPY_LISTING_CLASS, GlobbedCopyListing.class,
+            CopyListing.class);
+      } else {
+        if (options.getSourceFileListing() == null) {
+            copyListingClass = GlobbedCopyListing.class;
+        } else {
+            copyListingClass = FileBasedCopyListing.class;
+        }
+      }
+      copyListingClassName = copyListingClass.getName();
+      Constructor<? extends CopyListing> constructor = copyListingClass.
+          getDeclaredConstructor(Configuration.class, Credentials.class);
+      return constructor.newInstance(configuration, credentials);
+    } catch (Exception e) {
+      throw new IOException("Unable to instantiate " + copyListingClassName, e);
     }
     }
   }
   }
 
 

+ 2 - 2
hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCp.java

@@ -319,7 +319,7 @@ public class DistCp extends Configured implements Tool {
    * @return Returns the path where the copy listing is created
    * @return Returns the path where the copy listing is created
    * @throws IOException - If any
    * @throws IOException - If any
    */
    */
-  private Path createInputFileListing(Job job) throws IOException {
+  protected Path createInputFileListing(Job job) throws IOException {
     Path fileListingPath = getFileListingPath();
     Path fileListingPath = getFileListingPath();
     CopyListing copyListing = CopyListing.getCopyListing(job.getConfiguration(),
     CopyListing copyListing = CopyListing.getCopyListing(job.getConfiguration(),
         job.getCredentials(), inputOptions);
         job.getCredentials(), inputOptions);
@@ -334,7 +334,7 @@ public class DistCp extends Configured implements Tool {
    * @return - Path where the copy listing file has to be saved
    * @return - Path where the copy listing file has to be saved
    * @throws IOException - Exception if any
    * @throws IOException - Exception if any
    */
    */
-  private Path getFileListingPath() throws IOException {
+  protected Path getFileListingPath() throws IOException {
     String fileListPathStr = metaFolder + "/fileList.seq";
     String fileListPathStr = metaFolder + "/fileList.seq";
     Path path = new Path(fileListPathStr);
     Path path = new Path(fileListPathStr);
     return new Path(path.toUri().normalize().toString());
     return new Path(path.toUri().normalize().toString());

+ 3 - 0
hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpConstants.java

@@ -82,6 +82,9 @@ public class DistCpConstants {
   /* Meta folder where the job's intermediate data is kept */
   /* Meta folder where the job's intermediate data is kept */
   public static final String CONF_LABEL_META_FOLDER = "distcp.meta.folder";
   public static final String CONF_LABEL_META_FOLDER = "distcp.meta.folder";
 
 
+  /* DistCp CopyListing class override param */
+  public static final String CONF_LABEL_COPY_LISTING_CLASS = "distcp.copy.listing.class";
+
   /**
   /**
    * Conf label for SSL Trust-store location.
    * Conf label for SSL Trust-store location.
    */
    */

+ 30 - 7
hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/SimpleCopyListing.java

@@ -127,17 +127,20 @@ public class SimpleCopyListing extends CopyListing {
             if (LOG.isDebugEnabled()) {
             if (LOG.isDebugEnabled()) {
               LOG.debug("Recording source-path: " + sourceStatus.getPath() + " for copy.");
               LOG.debug("Recording source-path: " + sourceStatus.getPath() + " for copy.");
             }
             }
-            writeToFileListing(fileListWriter, sourceStatus, sourcePathRoot, localFile);
+            writeToFileListing(fileListWriter, sourceStatus, sourcePathRoot,
+                localFile, options);
 
 
             if (isDirectoryAndNotEmpty(sourceFS, sourceStatus)) {
             if (isDirectoryAndNotEmpty(sourceFS, sourceStatus)) {
               if (LOG.isDebugEnabled()) {
               if (LOG.isDebugEnabled()) {
                 LOG.debug("Traversing non-empty source dir: " + sourceStatus.getPath());
                 LOG.debug("Traversing non-empty source dir: " + sourceStatus.getPath());
               }
               }
-              traverseNonEmptyDirectory(fileListWriter, sourceStatus, sourcePathRoot, localFile);
+              traverseNonEmptyDirectory(fileListWriter, sourceStatus, sourcePathRoot,
+                  localFile, options);
             }
             }
           }
           }
         } else {
         } else {
-          writeToFileListing(fileListWriter, rootStatus, sourcePathRoot, localFile);
+          writeToFileListing(fileListWriter, rootStatus, sourcePathRoot,
+              localFile, options);
         }
         }
       }
       }
     } finally {
     } finally {
@@ -169,6 +172,17 @@ public class SimpleCopyListing extends CopyListing {
     }
     }
   }
   }
 
 
+  /**
+   * Provide an option to skip copy of a path, Allows for exclusion
+   * of files such as {@link org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter#SUCCEEDED_FILE_NAME}
+   * @param path - Path being considered for copy while building the file listing
+   * @param options - Input options passed during DistCp invocation
+   * @return - True if the path should be considered for copy, false otherwise
+   */
+  protected boolean shouldCopy(Path path, DistCpOptions options) {
+    return true;
+  }
+
   /** {@inheritDoc} */
   /** {@inheritDoc} */
   @Override
   @Override
   protected long getBytesToCopy() {
   protected long getBytesToCopy() {
@@ -210,7 +224,9 @@ public class SimpleCopyListing extends CopyListing {
 
 
   private void traverseNonEmptyDirectory(SequenceFile.Writer fileListWriter,
   private void traverseNonEmptyDirectory(SequenceFile.Writer fileListWriter,
                                          FileStatus sourceStatus,
                                          FileStatus sourceStatus,
-                                         Path sourcePathRoot, boolean localFile)
+                                         Path sourcePathRoot,
+                                         boolean localFile,
+                                         DistCpOptions options)
                                          throws IOException {
                                          throws IOException {
     FileSystem sourceFS = sourcePathRoot.getFileSystem(getConf());
     FileSystem sourceFS = sourcePathRoot.getFileSystem(getConf());
     Stack<FileStatus> pathStack = new Stack<FileStatus>();
     Stack<FileStatus> pathStack = new Stack<FileStatus>();
@@ -221,7 +237,8 @@ public class SimpleCopyListing extends CopyListing {
         if (LOG.isDebugEnabled())
         if (LOG.isDebugEnabled())
           LOG.debug("Recording source-path: "
           LOG.debug("Recording source-path: "
                     + sourceStatus.getPath() + " for copy.");
                     + sourceStatus.getPath() + " for copy.");
-        writeToFileListing(fileListWriter, child, sourcePathRoot, localFile);
+        writeToFileListing(fileListWriter, child, sourcePathRoot,
+             localFile, options);
         if (isDirectoryAndNotEmpty(sourceFS, child)) {
         if (isDirectoryAndNotEmpty(sourceFS, child)) {
           if (LOG.isDebugEnabled())
           if (LOG.isDebugEnabled())
             LOG.debug("Traversing non-empty source dir: "
             LOG.debug("Traversing non-empty source dir: "
@@ -233,8 +250,10 @@ public class SimpleCopyListing extends CopyListing {
   }
   }
 
 
   private void writeToFileListing(SequenceFile.Writer fileListWriter,
   private void writeToFileListing(SequenceFile.Writer fileListWriter,
-                                  FileStatus fileStatus, Path sourcePathRoot,
-                                  boolean localFile) throws IOException {
+                                  FileStatus fileStatus,
+                                  Path sourcePathRoot,
+                                  boolean localFile,
+                                  DistCpOptions options) throws IOException {
     if (fileStatus.getPath().equals(sourcePathRoot) && fileStatus.isDirectory())
     if (fileStatus.getPath().equals(sourcePathRoot) && fileStatus.isDirectory())
       return; // Skip the root-paths.
       return; // Skip the root-paths.
 
 
@@ -248,6 +267,10 @@ public class SimpleCopyListing extends CopyListing {
       status = getFileStatus(fileStatus);
       status = getFileStatus(fileStatus);
     }
     }
 
 
+    if (!shouldCopy(fileStatus.getPath(), options)) {
+      return;
+    }
+
     fileListWriter.append(new Text(DistCpUtils.getRelativePath(sourcePathRoot,
     fileListWriter.append(new Text(DistCpUtils.getRelativePath(sourcePathRoot,
         fileStatus.getPath())), status);
         fileStatus.getPath())), status);
     fileListWriter.sync();
     fileListWriter.sync();

+ 4 - 5
hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableFileCopyCommand.java

@@ -124,7 +124,7 @@ public class RetriableFileCopyCommand extends RetriableCommand {
             tmpTargetPath, true, BUFFER_SIZE,
             tmpTargetPath, true, BUFFER_SIZE,
             getReplicationFactor(fileAttributes, sourceFileStatus, targetFS, tmpTargetPath),
             getReplicationFactor(fileAttributes, sourceFileStatus, targetFS, tmpTargetPath),
             getBlockSize(fileAttributes, sourceFileStatus, targetFS, tmpTargetPath), context));
             getBlockSize(fileAttributes, sourceFileStatus, targetFS, tmpTargetPath), context));
-    return copyBytes(sourceFileStatus, outStream, BUFFER_SIZE, true, context);
+    return copyBytes(sourceFileStatus, outStream, BUFFER_SIZE, context);
   }
   }
 
 
   private void compareFileLengths(FileStatus sourceFileStatus, Path target,
   private void compareFileLengths(FileStatus sourceFileStatus, Path target,
@@ -170,8 +170,8 @@ public class RetriableFileCopyCommand extends RetriableCommand {
   }
   }
 
 
   private long copyBytes(FileStatus sourceFileStatus, OutputStream outStream,
   private long copyBytes(FileStatus sourceFileStatus, OutputStream outStream,
-                         int bufferSize, boolean mustCloseStream,
-                         Mapper.Context context) throws IOException {
+                         int bufferSize, Mapper.Context context)
+      throws IOException {
     Path source = sourceFileStatus.getPath();
     Path source = sourceFileStatus.getPath();
     byte buf[] = new byte[bufferSize];
     byte buf[] = new byte[bufferSize];
     ThrottledInputStream inStream = null;
     ThrottledInputStream inStream = null;
@@ -187,8 +187,7 @@ public class RetriableFileCopyCommand extends RetriableCommand {
         bytesRead = inStream.read(buf);
         bytesRead = inStream.read(buf);
       }
       }
     } finally {
     } finally {
-      if (mustCloseStream)
-        IOUtils.cleanup(LOG, outStream, inStream);
+      IOUtils.cleanup(LOG, outStream, inStream);
     }
     }
 
 
     return totalBytesRead;
     return totalBytesRead;

+ 5 - 0
hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/ThrottledInputStream.java

@@ -52,6 +52,11 @@ public class ThrottledInputStream extends InputStream {
     this.maxBytesPerSec = maxBytesPerSec;
     this.maxBytesPerSec = maxBytesPerSec;
   }
   }
 
 
+  @Override
+  public void close() throws IOException {
+    rawStream.close();
+  }
+
   /** @inheritDoc */
   /** @inheritDoc */
   @Override
   @Override
   public int read() throws IOException {
   public int read() throws IOException {

+ 37 - 4
hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestCopyListing.java

@@ -24,6 +24,7 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter;
 import org.apache.hadoop.tools.util.TestDistCpUtils;
 import org.apache.hadoop.tools.util.TestDistCpUtils;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.security.Credentials;
 import org.apache.hadoop.security.Credentials;
@@ -79,7 +80,39 @@ public class TestCopyListing extends SimpleCopyListing {
     return 0;
     return 0;
   }
   }
 
 
-  @Test
+  @Test(timeout=10000)
+  public void testSkipCopy() throws Exception {
+    SimpleCopyListing listing = new SimpleCopyListing(getConf(), CREDENTIALS) {
+      @Override
+      protected boolean shouldCopy(Path path, DistCpOptions options) {
+        return !path.getName().equals(FileOutputCommitter.SUCCEEDED_FILE_NAME);
+      }
+    };
+    FileSystem fs = FileSystem.get(getConf());
+    List<Path> srcPaths = new ArrayList<Path>();
+    srcPaths.add(new Path("/tmp/in4/1"));
+    srcPaths.add(new Path("/tmp/in4/2"));
+    Path target = new Path("/tmp/out4/1");
+    TestDistCpUtils.createFile(fs, "/tmp/in4/1/_SUCCESS");
+    TestDistCpUtils.createFile(fs, "/tmp/in4/1/file");
+    TestDistCpUtils.createFile(fs, "/tmp/in4/2");
+    fs.mkdirs(target);
+    DistCpOptions options = new DistCpOptions(srcPaths, target);
+    Path listingFile = new Path("/tmp/list4");
+    listing.buildListing(listingFile, options);
+    Assert.assertEquals(listing.getNumberOfPaths(), 2);
+    SequenceFile.Reader reader = new SequenceFile.Reader(getConf(),
+        SequenceFile.Reader.file(listingFile));
+    FileStatus fileStatus = new FileStatus();
+    Text relativePath = new Text();
+    Assert.assertTrue(reader.next(relativePath, fileStatus));
+    Assert.assertEquals(relativePath.toString(), "/1/file");
+    Assert.assertTrue(reader.next(relativePath, fileStatus));
+    Assert.assertEquals(relativePath.toString(), "/2");
+    Assert.assertFalse(reader.next(relativePath, fileStatus));
+  }
+
+  @Test(timeout=10000)
   public void testMultipleSrcToFile() {
   public void testMultipleSrcToFile() {
     FileSystem fs = null;
     FileSystem fs = null;
     try {
     try {
@@ -124,7 +157,7 @@ public class TestCopyListing extends SimpleCopyListing {
     }
     }
   }
   }
 
 
-  @Test
+  @Test(timeout=10000)
   public void testDuplicates() {
   public void testDuplicates() {
     FileSystem fs = null;
     FileSystem fs = null;
     try {
     try {
@@ -150,7 +183,7 @@ public class TestCopyListing extends SimpleCopyListing {
     }
     }
   }
   }
 
 
-  @Test
+  @Test(timeout=10000)
   public void testBuildListing() {
   public void testBuildListing() {
     FileSystem fs = null;
     FileSystem fs = null;
     try {
     try {
@@ -206,7 +239,7 @@ public class TestCopyListing extends SimpleCopyListing {
     }
     }
   }
   }
 
 
-  @Test
+  @Test(timeout=10000)
   public void testBuildListingForSingleFile() {
   public void testBuildListingForSingleFile() {
     FileSystem fs = null;
     FileSystem fs = null;
     String testRootString = "/singleFileListing";
     String testRootString = "/singleFileListing";

+ 71 - 20
hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestIntegration.java

@@ -26,6 +26,7 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.mapreduce.Cluster;
 import org.apache.hadoop.mapreduce.Cluster;
 import org.apache.hadoop.mapreduce.JobSubmissionFiles;
 import org.apache.hadoop.mapreduce.JobSubmissionFiles;
+import org.apache.hadoop.security.Credentials;
 import org.apache.hadoop.tools.util.TestDistCpUtils;
 import org.apache.hadoop.tools.util.TestDistCpUtils;
 import org.junit.Assert;
 import org.junit.Assert;
 import org.junit.BeforeClass;
 import org.junit.BeforeClass;
@@ -34,6 +35,7 @@ import org.junit.Test;
 import java.io.IOException;
 import java.io.IOException;
 import java.io.OutputStream;
 import java.io.OutputStream;
 import java.util.ArrayList;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.List;
 import java.util.List;
 
 
 public class TestIntegration {
 public class TestIntegration {
@@ -68,7 +70,7 @@ public class TestIntegration {
     }
     }
   }
   }
 
 
-  @Test
+  @Test(timeout=100000)
   public void testSingleFileMissingTarget() {
   public void testSingleFileMissingTarget() {
     caseSingleFileMissingTarget(false);
     caseSingleFileMissingTarget(false);
     caseSingleFileMissingTarget(true);
     caseSingleFileMissingTarget(true);
@@ -91,7 +93,7 @@ public class TestIntegration {
     }
     }
   }
   }
 
 
-  @Test
+  @Test(timeout=100000)
   public void testSingleFileTargetFile() {
   public void testSingleFileTargetFile() {
     caseSingleFileTargetFile(false);
     caseSingleFileTargetFile(false);
     caseSingleFileTargetFile(true);
     caseSingleFileTargetFile(true);
@@ -101,7 +103,7 @@ public class TestIntegration {
 
 
     try {
     try {
       addEntries(listFile, "singlefile1/file1");
       addEntries(listFile, "singlefile1/file1");
-      createFiles("singlefile1/file1", target.toString());
+      createFiles("singlefile1/file1", "target");
 
 
       runTest(listFile, target, sync);
       runTest(listFile, target, sync);
 
 
@@ -114,7 +116,7 @@ public class TestIntegration {
     }
     }
   }
   }
 
 
-  @Test
+  @Test(timeout=100000)
   public void testSingleFileTargetDir() {
   public void testSingleFileTargetDir() {
     caseSingleFileTargetDir(false);
     caseSingleFileTargetDir(false);
     caseSingleFileTargetDir(true);
     caseSingleFileTargetDir(true);
@@ -138,7 +140,7 @@ public class TestIntegration {
     }
     }
   }
   }
 
 
-  @Test
+  @Test(timeout=100000)
   public void testSingleDirTargetMissing() {
   public void testSingleDirTargetMissing() {
     caseSingleDirTargetMissing(false);
     caseSingleDirTargetMissing(false);
     caseSingleDirTargetMissing(true);
     caseSingleDirTargetMissing(true);
@@ -161,7 +163,7 @@ public class TestIntegration {
     }
     }
   }
   }
 
 
-  @Test
+  @Test(timeout=100000)
   public void testSingleDirTargetPresent() {
   public void testSingleDirTargetPresent() {
 
 
     try {
     try {
@@ -180,7 +182,7 @@ public class TestIntegration {
     }
     }
   }
   }
 
 
-  @Test
+  @Test(timeout=100000)
   public void testUpdateSingleDirTargetPresent() {
   public void testUpdateSingleDirTargetPresent() {
 
 
     try {
     try {
@@ -199,7 +201,7 @@ public class TestIntegration {
     }
     }
   }
   }
 
 
-  @Test
+  @Test(timeout=100000)
   public void testMultiFileTargetPresent() {
   public void testMultiFileTargetPresent() {
     caseMultiFileTargetPresent(false);
     caseMultiFileTargetPresent(false);
     caseMultiFileTargetPresent(true);
     caseMultiFileTargetPresent(true);
@@ -223,7 +225,56 @@ public class TestIntegration {
     }
     }
   }
   }
 
 
-  @Test
+  @Test(timeout=100000)
+  public void testCustomCopyListing() {
+
+    try {
+      addEntries(listFile, "multifile1/file3", "multifile1/file4", "multifile1/file5");
+      createFiles("multifile1/file3", "multifile1/file4", "multifile1/file5");
+      mkdirs(target.toString());
+
+      Configuration conf = getConf();
+      try {
+        conf.setClass(DistCpConstants.CONF_LABEL_COPY_LISTING_CLASS,
+            CustomCopyListing.class, CopyListing.class);
+        DistCpOptions options = new DistCpOptions(Arrays.
+            asList(new Path(root + "/" + "multifile1")), target);
+        options.setSyncFolder(true);
+        options.setDeleteMissing(false);
+        options.setOverwrite(false);
+        try {
+          new DistCp(conf, options).execute();
+        } catch (Exception e) {
+          LOG.error("Exception encountered ", e);
+          throw new IOException(e);
+        }
+      } finally {
+        conf.unset(DistCpConstants.CONF_LABEL_COPY_LISTING_CLASS);
+      }
+
+      checkResult(target, 2, "file4", "file5");
+    } catch (IOException e) {
+      LOG.error("Exception encountered while testing distcp", e);
+      Assert.fail("distcp failure");
+    } finally {
+      TestDistCpUtils.delete(fs, root);
+    }
+  }
+
+  private static class CustomCopyListing extends SimpleCopyListing {
+
+    public CustomCopyListing(Configuration configuration,
+                             Credentials credentials) {
+      super(configuration, credentials);
+    }
+
+    @Override
+    protected boolean shouldCopy(Path path, DistCpOptions options) {
+      return !path.getName().equals("file3");
+    }
+  }
+
+  @Test(timeout=100000)
   public void testMultiFileTargetMissing() {
   public void testMultiFileTargetMissing() {
     caseMultiFileTargetMissing(false);
     caseMultiFileTargetMissing(false);
     caseMultiFileTargetMissing(true);
     caseMultiFileTargetMissing(true);
@@ -246,7 +297,7 @@ public class TestIntegration {
     }
     }
   }
   }
 
 
-  @Test
+  @Test(timeout=100000)
   public void testMultiDirTargetPresent() {
   public void testMultiDirTargetPresent() {
 
 
     try {
     try {
@@ -265,7 +316,7 @@ public class TestIntegration {
     }
     }
   }
   }
 
 
-  @Test
+  @Test(timeout=100000)
   public void testUpdateMultiDirTargetPresent() {
   public void testUpdateMultiDirTargetPresent() {
 
 
     try {
     try {
@@ -284,7 +335,7 @@ public class TestIntegration {
     }
     }
   }
   }
 
 
-  @Test
+  @Test(timeout=100000)
   public void testMultiDirTargetMissing() {
   public void testMultiDirTargetMissing() {
 
 
     try {
     try {
@@ -304,7 +355,7 @@ public class TestIntegration {
     }
     }
   }
   }
 
 
-  @Test
+  @Test(timeout=100000)
   public void testUpdateMultiDirTargetMissing() {
   public void testUpdateMultiDirTargetMissing() {
 
 
     try {
     try {
@@ -323,7 +374,7 @@ public class TestIntegration {
     }
     }
   }
   }
   
   
-  @Test
+  @Test(timeout=100000)
   public void testDeleteMissingInDestination() {
   public void testDeleteMissingInDestination() {
     
     
     try {
     try {
@@ -343,7 +394,7 @@ public class TestIntegration {
     }
     }
   }
   }
   
   
-  @Test
+  @Test(timeout=100000)
   public void testOverwrite() {
   public void testOverwrite() {
     byte[] contents1 = "contents1".getBytes();
     byte[] contents1 = "contents1".getBytes();
     byte[] contents2 = "contents2".getBytes();
     byte[] contents2 = "contents2".getBytes();
@@ -375,7 +426,7 @@ public class TestIntegration {
     }
     }
   }
   }
 
 
-  @Test
+  @Test(timeout=100000)
   public void testGlobTargetMissingSingleLevel() {
   public void testGlobTargetMissingSingleLevel() {
 
 
     try {
     try {
@@ -398,7 +449,7 @@ public class TestIntegration {
     }
     }
   }
   }
 
 
-  @Test
+  @Test(timeout=100000)
   public void testUpdateGlobTargetMissingSingleLevel() {
   public void testUpdateGlobTargetMissingSingleLevel() {
 
 
     try {
     try {
@@ -420,7 +471,7 @@ public class TestIntegration {
     }
     }
   }
   }
 
 
-  @Test
+  @Test(timeout=100000)
   public void testGlobTargetMissingMultiLevel() {
   public void testGlobTargetMissingMultiLevel() {
 
 
     try {
     try {
@@ -444,7 +495,7 @@ public class TestIntegration {
     }
     }
   }
   }
 
 
-  @Test
+  @Test(timeout=100000)
   public void testUpdateGlobTargetMissingMultiLevel() {
   public void testUpdateGlobTargetMissingMultiLevel() {
 
 
     try {
     try {
@@ -468,7 +519,7 @@ public class TestIntegration {
     }
     }
   }
   }
   
   
-  @Test
+  @Test(timeout=100000)
   public void testCleanup() {
   public void testCleanup() {
     try {
     try {
       Path sourcePath = new Path("noscheme:///file");
       Path sourcePath = new Path("noscheme:///file");

+ 28 - 0
hadoop-yarn-project/CHANGES.txt

@@ -18,6 +18,13 @@ Trunk - Unreleased
 
 
   BUG FIXES
   BUG FIXES
 
 
+    YARN-488. TestContainerManagerSecurity fails on Windows. (Chris Nauroth
+    via hitesh)
+
+    YARN-490. TestDistributedShell fails on Windows. (Chris Nauroth via hitesh)
+
+    YARN-491. TestContainerLogsPage fails on Windows. (Chris Nauroth via hitesh)
+
   BREAKDOWN OF HADOOP-8562 SUBTASKS
   BREAKDOWN OF HADOOP-8562 SUBTASKS
 
 
     YARN-158. Yarn creating package-info.java must not depend on sh.
     YARN-158. Yarn creating package-info.java must not depend on sh.
@@ -52,6 +59,9 @@ Release 2.0.5-beta - UNRELEASED
 
 
   INCOMPATIBLE CHANGES
   INCOMPATIBLE CHANGES
 
 
+    YARN-396. Rationalize AllocateResponse in RM Scheduler API. (Zhijie Shen
+    via hitesh)
+
   NEW FEATURES
   NEW FEATURES
 
 
   IMPROVEMENTS
   IMPROVEMENTS
@@ -72,6 +82,15 @@ Release 2.0.5-beta - UNRELEASED
     YARN-237. Refreshing the RM page forgets how many rows I had in my
     YARN-237. Refreshing the RM page forgets how many rows I had in my
     Datatables (jian he via bobby)
     Datatables (jian he via bobby)
 
 
+    YARN-481. Add AM Host and RPC Port to ApplicationCLI Status Output 
+    (Chris Riccomini via bikas)
+
+    YARN-297. Improve hashCode implementations for PB records. (Xuan Gong via
+    hitesh)
+
+    YARN-417. Create AMRMClient wrapper that provides asynchronous callbacks.
+    (Sandy Ryza via bikas)
+
   OPTIMIZATIONS
   OPTIMIZATIONS
 
 
   BUG FIXES
   BUG FIXES
@@ -100,6 +119,12 @@ Release 2.0.5-beta - UNRELEASED
     YARN-196. Nodemanager should be more robust in handling connection failure
     YARN-196. Nodemanager should be more robust in handling connection failure
     to ResourceManager when a cluster is started (Xuan Gong via hitesh)
     to ResourceManager when a cluster is started (Xuan Gong via hitesh)
 
 
+    YARN-485. TestProcfsProcessTree#testProcessTree() doesn't wait long enough 
+    for the process to die. (kkambatl via tucu)
+ 
+    YARN-470. Support a way to disable resource monitoring on the NodeManager.
+    (Siddharth Seth via hitesh)
+
 Release 2.0.4-alpha - UNRELEASED
 Release 2.0.4-alpha - UNRELEASED
 
 
   INCOMPATIBLE CHANGES
   INCOMPATIBLE CHANGES
@@ -408,6 +433,9 @@ Release 0.23.7 - UNRELEASED
     YARN-468. coverage fix for org.apache.hadoop.yarn.server.webproxy.amfilter
     YARN-468. coverage fix for org.apache.hadoop.yarn.server.webproxy.amfilter
     (Aleksey Gorshkov via bobby)
     (Aleksey Gorshkov via bobby)
 
 
+    YARN-200. yarn log does not output all needed information, and is in a
+    binary format (Ravi Prakash via jlowe)
+
   OPTIMIZATIONS
   OPTIMIZATIONS
 
 
     YARN-357. App submission should not be synchronized (daryn)
     YARN-357. App submission should not be synchronized (daryn)

+ 86 - 8
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/AllocateResponse.java

@@ -18,19 +18,23 @@
 
 
 package org.apache.hadoop.yarn.api.protocolrecords;
 package org.apache.hadoop.yarn.api.protocolrecords;
 
 
+import java.util.List;
+
 import org.apache.hadoop.classification.InterfaceAudience.Private;
 import org.apache.hadoop.classification.InterfaceAudience.Private;
 import org.apache.hadoop.classification.InterfaceAudience.Public;
 import org.apache.hadoop.classification.InterfaceAudience.Public;
 import org.apache.hadoop.classification.InterfaceStability.Stable;
 import org.apache.hadoop.classification.InterfaceStability.Stable;
 import org.apache.hadoop.classification.InterfaceStability.Unstable;
 import org.apache.hadoop.classification.InterfaceStability.Unstable;
 import org.apache.hadoop.yarn.api.AMRMProtocol;
 import org.apache.hadoop.yarn.api.AMRMProtocol;
-import org.apache.hadoop.yarn.api.records.AMResponse;
 import org.apache.hadoop.yarn.api.records.Container;
 import org.apache.hadoop.yarn.api.records.Container;
+import org.apache.hadoop.yarn.api.records.ContainerStatus;
+import org.apache.hadoop.yarn.api.records.NodeReport;
+import org.apache.hadoop.yarn.api.records.Resource;
 
 
 /**
 /**
  * <p>The response sent by the <code>ResourceManager</code> the  
  * <p>The response sent by the <code>ResourceManager</code> the  
  * <code>ApplicationMaster</code> during resource negotiation.</p>
  * <code>ApplicationMaster</code> during resource negotiation.</p>
  *
  *
- * <p>The response, via {@link AMResponse}, includes:
+ * <p>The response, includes:
  *   <ul>
  *   <ul>
  *     <li>Response ID to track duplicate responses.</li>
  *     <li>Response ID to track duplicate responses.</li>
  *     <li>
  *     <li>
@@ -42,6 +46,8 @@ import org.apache.hadoop.yarn.api.records.Container;
  *       The available headroom for resources in the cluster for the
  *       The available headroom for resources in the cluster for the
  *       application. 
  *       application. 
  *     </li>
  *     </li>
+ *     <li>A list of nodes whose status has been updated.</li>
+ *     <li>The number of available nodes in a cluster.</li>
  *   </ul>
  *   </ul>
  * </p>
  * </p>
  * 
  * 
@@ -51,18 +57,90 @@ import org.apache.hadoop.yarn.api.records.Container;
 @Stable
 @Stable
 public interface AllocateResponse {
 public interface AllocateResponse {
   /**
   /**
-   * Get the {@link AMResponse} sent by the <code>ResourceManager</code>.
-   * @return <code>AMResponse</code> sent by the <code>ResourceManager</code>
+   * Should the <code>ApplicationMaster</code> reboot for being horribly
+   * out-of-sync with the <code>ResourceManager</code> as deigned by
+   * {@link #getResponseId()}?
+   *
+   * @return <code>true</code> if the <code>ApplicationMaster</code> should
+   *         reboot, <code>false</code> otherwise
    */
    */
   @Public
   @Public
   @Stable
   @Stable
-  public abstract AMResponse getAMResponse();
+  public boolean getReboot();
 
 
   @Private
   @Private
   @Unstable
   @Unstable
-  public abstract void setAMResponse(AMResponse amResponse);
-  
-  
+  public void setReboot(boolean reboot);
+
+  /**
+   * Get the <em>last response id</em>.
+   * @return <em>last response id</em>
+   */
+  @Public
+  @Stable
+  public int getResponseId();
+
+  @Private
+  @Unstable
+  public void setResponseId(int responseId);
+
+  /**
+   * Get the list of <em>newly allocated</em> <code>Container</code> by the
+   * <code>ResourceManager</code>.
+   * @return list of <em>newly allocated</em> <code>Container</code>
+   */
+  @Public
+  @Stable
+  public List<Container> getAllocatedContainers();
+
+  /**
+   * Set the list of <em>newly allocated</em> <code>Container</code> by the
+   * <code>ResourceManager</code>.
+   * @param containers list of <em>newly allocated</em> <code>Container</code>
+   */
+  @Public
+  @Stable
+  public void setAllocatedContainers(List<Container> containers);
+
+  /**
+   * Get the <em>available headroom</em> for resources in the cluster for the
+   * application.
+   * @return limit of available headroom for resources in the cluster for the
+   * application
+   */
+  @Public
+  @Stable
+  public Resource getAvailableResources();
+
+  @Private
+  @Unstable
+  public void setAvailableResources(Resource limit);
+
+  /**
+   * Get the list of <em>completed containers' statuses</em>.
+   * @return the list of <em>completed containers' statuses</em>
+   */
+  @Public
+  @Stable
+  public List<ContainerStatus> getCompletedContainersStatuses();
+
+  @Private
+  @Unstable
+  public void setCompletedContainersStatuses(List<ContainerStatus> containers);
+
+  /**
+   * Get the list of <em>updated <code>NodeReport</code>s</em>. Updates could
+   * be changes in health, availability etc of the nodes.
+   * @return The delta of updated nodes since the last response
+   */
+  @Public
+  @Unstable
+  public List<NodeReport> getUpdatedNodes();
+
+  @Private
+  @Unstable
+  public void setUpdatedNodes(final List<NodeReport> updatedNodes);
+
   /**
   /**
    * Get the number of hosts available on the cluster.
    * Get the number of hosts available on the cluster.
    * @return the available host count.
    * @return the available host count.

+ 305 - 29
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/AllocateResponsePBImpl.java

@@ -19,11 +19,24 @@
 package org.apache.hadoop.yarn.api.protocolrecords.impl.pb;
 package org.apache.hadoop.yarn.api.protocolrecords.impl.pb;
 
 
 
 
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
 import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
-import org.apache.hadoop.yarn.api.records.AMResponse;
+import org.apache.hadoop.yarn.api.records.Container;
+import org.apache.hadoop.yarn.api.records.ContainerStatus;
+import org.apache.hadoop.yarn.api.records.NodeReport;
 import org.apache.hadoop.yarn.api.records.ProtoBase;
 import org.apache.hadoop.yarn.api.records.ProtoBase;
-import org.apache.hadoop.yarn.api.records.impl.pb.AMResponsePBImpl;
-import org.apache.hadoop.yarn.proto.YarnProtos.AMResponseProto;
+import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.api.records.impl.pb.ContainerPBImpl;
+import org.apache.hadoop.yarn.api.records.impl.pb.ContainerStatusPBImpl;
+import org.apache.hadoop.yarn.api.records.impl.pb.NodeReportPBImpl;
+import org.apache.hadoop.yarn.api.records.impl.pb.ResourcePBImpl;
+import org.apache.hadoop.yarn.proto.YarnProtos.ContainerProto;
+import org.apache.hadoop.yarn.proto.YarnProtos.ContainerStatusProto;
+import org.apache.hadoop.yarn.proto.YarnProtos.NodeReportProto;
+import org.apache.hadoop.yarn.proto.YarnProtos.ResourceProto;
 import org.apache.hadoop.yarn.proto.YarnServiceProtos.AllocateResponseProto;
 import org.apache.hadoop.yarn.proto.YarnServiceProtos.AllocateResponseProto;
 import org.apache.hadoop.yarn.proto.YarnServiceProtos.AllocateResponseProtoOrBuilder;
 import org.apache.hadoop.yarn.proto.YarnServiceProtos.AllocateResponseProtoOrBuilder;
 
 
@@ -35,7 +48,12 @@ public class AllocateResponsePBImpl extends ProtoBase<AllocateResponseProto>
   AllocateResponseProto.Builder builder = null;
   AllocateResponseProto.Builder builder = null;
   boolean viaProto = false;
   boolean viaProto = false;
   
   
-  private AMResponse amResponse;
+  Resource limit;
+
+  private List<Container> allocatedContainers = null;
+  private List<ContainerStatus> completedContainersStatuses = null;
+
+  private List<NodeReport> updatedNodes = null;
   
   
   
   
   public AllocateResponsePBImpl() {
   public AllocateResponsePBImpl() {
@@ -47,20 +65,38 @@ public class AllocateResponsePBImpl extends ProtoBase<AllocateResponseProto>
     viaProto = true;
     viaProto = true;
   }
   }
   
   
-  public AllocateResponseProto getProto() {
+  public synchronized AllocateResponseProto getProto() {
       mergeLocalToProto();
       mergeLocalToProto();
     proto = viaProto ? proto : builder.build();
     proto = viaProto ? proto : builder.build();
     viaProto = true;
     viaProto = true;
     return proto;
     return proto;
   }
   }
 
 
-  private void mergeLocalToBuilder() {
-    if (this.amResponse != null) {
-      builder.setAMResponse(convertToProtoFormat(this.amResponse));
+  private synchronized void mergeLocalToBuilder() {
+    if (this.allocatedContainers != null) {
+      builder.clearAllocatedContainers();
+      Iterable<ContainerProto> iterable =
+          getProtoIterable(this.allocatedContainers);
+      builder.addAllAllocatedContainers(iterable);
+    }
+    if (this.completedContainersStatuses != null) {
+      builder.clearCompletedContainerStatuses();
+      Iterable<ContainerStatusProto> iterable =
+          getContainerStatusProtoIterable(this.completedContainersStatuses);
+      builder.addAllCompletedContainerStatuses(iterable);
+    }
+    if (this.updatedNodes != null) {
+      builder.clearUpdatedNodes();
+      Iterable<NodeReportProto> iterable =
+          getNodeReportProtoIterable(this.updatedNodes);
+      builder.addAllUpdatedNodes(iterable);
+    }
+    if (this.limit != null) {
+      builder.setLimit(convertToProtoFormat(this.limit));
     }
     }
   }
   }
 
 
-  private void mergeLocalToProto() {
+  private synchronized void mergeLocalToProto() {
     if (viaProto) 
     if (viaProto) 
       maybeInitBuilder();
       maybeInitBuilder();
     mergeLocalToBuilder();
     mergeLocalToBuilder();
@@ -68,53 +104,293 @@ public class AllocateResponsePBImpl extends ProtoBase<AllocateResponseProto>
     viaProto = true;
     viaProto = true;
   }
   }
 
 
-  private void maybeInitBuilder() {
+  private synchronized void maybeInitBuilder() {
     if (viaProto || builder == null) {
     if (viaProto || builder == null) {
       builder = AllocateResponseProto.newBuilder(proto);
       builder = AllocateResponseProto.newBuilder(proto);
     }
     }
     viaProto = false;
     viaProto = false;
   }
   }
-    
   
   
   @Override
   @Override
-  public AMResponse getAMResponse() {
+  public synchronized boolean getReboot() {
+    AllocateResponseProtoOrBuilder p = viaProto ? proto : builder;
+    return (p.getReboot());
+  }
+
+  @Override
+  public synchronized void setReboot(boolean reboot) {
+    maybeInitBuilder();
+    builder.setReboot((reboot));
+  }
+
+  @Override
+  public synchronized int getResponseId() {
     AllocateResponseProtoOrBuilder p = viaProto ? proto : builder;
     AllocateResponseProtoOrBuilder p = viaProto ? proto : builder;
-    if (this.amResponse != null) {
-      return this.amResponse;
+    return (p.getResponseId());
+  }
+
+  @Override
+  public synchronized void setResponseId(int responseId) {
+    maybeInitBuilder();
+    builder.setResponseId((responseId));
+  }
+
+  @Override
+  public synchronized Resource getAvailableResources() {
+    if (this.limit != null) {
+      return this.limit;
     }
     }
-    if (!p.hasAMResponse()) {
+
+    AllocateResponseProtoOrBuilder p = viaProto ? proto : builder;
+    if (!p.hasLimit()) {
       return null;
       return null;
     }
     }
-    this.amResponse= convertFromProtoFormat(p.getAMResponse());
-    return this.amResponse;
+    this.limit = convertFromProtoFormat(p.getLimit());
+    return this.limit;
   }
   }
 
 
   @Override
   @Override
-  public void setAMResponse(AMResponse aMResponse) {
+  public synchronized void setAvailableResources(Resource limit) {
     maybeInitBuilder();
     maybeInitBuilder();
-    if (aMResponse == null) 
-      builder.clearAMResponse();
-    this.amResponse = aMResponse;
+    if (limit == null)
+      builder.clearLimit();
+    this.limit = limit;
   }
   }
-  
+
   @Override
   @Override
-  public int getNumClusterNodes() {
+  public synchronized List<NodeReport> getUpdatedNodes() {
+    initLocalNewNodeReportList();
+    return this.updatedNodes;
+  }
+  @Override
+  public synchronized void setUpdatedNodes(
+      final List<NodeReport> updatedNodes) {
+    if (updatedNodes == null) {
+      this.updatedNodes.clear();
+      return;
+    }
+    this.updatedNodes = new ArrayList<NodeReport>(updatedNodes.size());
+    this.updatedNodes.addAll(updatedNodes);
+  }
+
+  @Override
+  public synchronized List<Container> getAllocatedContainers() {
+    initLocalNewContainerList();
+    return this.allocatedContainers;
+  }
+
+  @Override
+  public synchronized void setAllocatedContainers(
+      final List<Container> containers) {
+    if (containers == null)
+      return;
+    // this looks like a bug because it results in append and not set
+    initLocalNewContainerList();
+    allocatedContainers.addAll(containers);
+  }
+
+  //// Finished containers
+  @Override
+  public synchronized List<ContainerStatus> getCompletedContainersStatuses() {
+    initLocalFinishedContainerList();
+    return this.completedContainersStatuses;
+  }
+
+  @Override
+  public synchronized void setCompletedContainersStatuses(
+      final List<ContainerStatus> containers) {
+    if (containers == null)
+      return;
+    initLocalFinishedContainerList();
+    completedContainersStatuses.addAll(containers);
+  }
+
+  @Override
+  public synchronized int getNumClusterNodes() {
     AllocateResponseProtoOrBuilder p = viaProto ? proto : builder;
     AllocateResponseProtoOrBuilder p = viaProto ? proto : builder;
     return p.getNumClusterNodes();
     return p.getNumClusterNodes();
   }
   }
-  
+
   @Override
   @Override
-  public void setNumClusterNodes(int numNodes) {
+  public synchronized void setNumClusterNodes(int numNodes) {
     maybeInitBuilder();
     maybeInitBuilder();
     builder.setNumClusterNodes(numNodes);
     builder.setNumClusterNodes(numNodes);
   }
   }
 
 
+  // Once this is called. updatedNodes will never be null - until a getProto is
+  // called.
+  private synchronized void initLocalNewNodeReportList() {
+    if (this.updatedNodes != null) {
+      return;
+    }
+    AllocateResponseProtoOrBuilder p = viaProto ? proto : builder;
+    List<NodeReportProto> list = p.getUpdatedNodesList();
+    updatedNodes = new ArrayList<NodeReport>(list.size());
+
+    for (NodeReportProto n : list) {
+      updatedNodes.add(convertFromProtoFormat(n));
+    }
+  }
+
+  // Once this is called. containerList will never be null - until a getProto
+  // is called.
+  private synchronized void initLocalNewContainerList() {
+    if (this.allocatedContainers != null) {
+      return;
+    }
+    AllocateResponseProtoOrBuilder p = viaProto ? proto : builder;
+    List<ContainerProto> list = p.getAllocatedContainersList();
+    allocatedContainers = new ArrayList<Container>();
+
+    for (ContainerProto c : list) {
+      allocatedContainers.add(convertFromProtoFormat(c));
+    }
+  }
+
+  private synchronized Iterable<ContainerProto> getProtoIterable(
+      final List<Container> newContainersList) {
+    maybeInitBuilder();
+    return new Iterable<ContainerProto>() {
+      @Override
+      public synchronized Iterator<ContainerProto> iterator() {
+        return new Iterator<ContainerProto>() {
+
+          Iterator<Container> iter = newContainersList.iterator();
+
+          @Override
+          public synchronized boolean hasNext() {
+            return iter.hasNext();
+          }
+
+          @Override
+          public synchronized ContainerProto next() {
+            return convertToProtoFormat(iter.next());
+          }
+
+          @Override
+          public synchronized void remove() {
+            throw new UnsupportedOperationException();
+
+          }
+        };
+
+      }
+    };
+  }
+
+  private synchronized Iterable<ContainerStatusProto>
+  getContainerStatusProtoIterable(
+      final List<ContainerStatus> newContainersList) {
+    maybeInitBuilder();
+    return new Iterable<ContainerStatusProto>() {
+      @Override
+      public synchronized Iterator<ContainerStatusProto> iterator() {
+        return new Iterator<ContainerStatusProto>() {
+
+          Iterator<ContainerStatus> iter = newContainersList.iterator();
+
+          @Override
+          public synchronized boolean hasNext() {
+            return iter.hasNext();
+          }
+
+          @Override
+          public synchronized ContainerStatusProto next() {
+            return convertToProtoFormat(iter.next());
+          }
+
+          @Override
+          public synchronized void remove() {
+            throw new UnsupportedOperationException();
+
+          }
+        };
+
+      }
+    };
+  }
   
   
-  private AMResponsePBImpl convertFromProtoFormat(AMResponseProto p) {
-    return new AMResponsePBImpl(p);
+  private synchronized Iterable<NodeReportProto>
+  getNodeReportProtoIterable(
+      final List<NodeReport> newNodeReportsList) {
+    maybeInitBuilder();
+    return new Iterable<NodeReportProto>() {
+      @Override
+      public synchronized Iterator<NodeReportProto> iterator() {
+        return new Iterator<NodeReportProto>() {
+
+          Iterator<NodeReport> iter = newNodeReportsList.iterator();
+
+          @Override
+          public synchronized boolean hasNext() {
+            return iter.hasNext();
+          }
+
+          @Override
+          public synchronized NodeReportProto next() {
+            return convertToProtoFormat(iter.next());
+          }
+
+          @Override
+          public synchronized void remove() {
+            throw new UnsupportedOperationException();
+
+          }
+        };
+
+      }
+    };
   }
   }
 
 
-  private AMResponseProto convertToProtoFormat(AMResponse t) {
-    return ((AMResponsePBImpl)t).getProto();
+  // Once this is called. containerList will never be null - until a getProto
+  // is called.
+  private synchronized void initLocalFinishedContainerList() {
+    if (this.completedContainersStatuses != null) {
+      return;
+    }
+    AllocateResponseProtoOrBuilder p = viaProto ? proto : builder;
+    List<ContainerStatusProto> list = p.getCompletedContainerStatusesList();
+    completedContainersStatuses = new ArrayList<ContainerStatus>();
+
+    for (ContainerStatusProto c : list) {
+      completedContainersStatuses.add(convertFromProtoFormat(c));
+    }
+  }
+
+  private synchronized NodeReportPBImpl convertFromProtoFormat(
+      NodeReportProto p) {
+    return new NodeReportPBImpl(p);
   }
   }
+
+  private synchronized NodeReportProto convertToProtoFormat(NodeReport t) {
+    return ((NodeReportPBImpl)t).getProto();
+  }
+
+  private synchronized ContainerPBImpl convertFromProtoFormat(
+      ContainerProto p) {
+    return new ContainerPBImpl(p);
+  }
+
+  private synchronized ContainerProto convertToProtoFormat(Container t) {
+    return ((ContainerPBImpl)t).getProto();
+  }
+
+  private synchronized ContainerStatusPBImpl convertFromProtoFormat(
+      ContainerStatusProto p) {
+    return new ContainerStatusPBImpl(p);
+  }
+
+  private synchronized ContainerStatusProto convertToProtoFormat(
+      ContainerStatus t) {
+    return ((ContainerStatusPBImpl)t).getProto();
+  }
+
+  private synchronized ResourcePBImpl convertFromProtoFormat(ResourceProto p) {
+    return new ResourcePBImpl(p);
+  }
+
+  private synchronized ResourceProto convertToProtoFormat(Resource r) {
+    return ((ResourcePBImpl) r).getProto();
+  }
+
 }  
 }  

+ 2 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/GetAllApplicationsResponsePBImpl.java

@@ -93,7 +93,8 @@ GetAllApplicationsResponse {
     viaProto = false;
     viaProto = false;
   }
   }
 
 
-  //Once this is called. containerList will never be null - untill a getProto is called.
+  // Once this is called. containerList will never be null - until a getProto
+  // is called.
   private void initLocalApplicationsList() {
   private void initLocalApplicationsList() {
     if (this.applicationList != null) {
     if (this.applicationList != null) {
       return;
       return;

+ 2 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/GetClusterNodesResponsePBImpl.java

@@ -92,7 +92,8 @@ public class GetClusterNodesResponsePBImpl extends
     viaProto = false;
     viaProto = false;
   }
   }
 
 
-  //Once this is called. containerList will never be null - untill a getProto is called.
+  // Once this is called. containerList will never be null - until a getProto
+  // is called.
   private void initLocalNodeManagerInfosList() {
   private void initLocalNodeManagerInfosList() {
     if (this.nodeManagerInfoList != null) {
     if (this.nodeManagerInfoList != null) {
       return;
       return;

+ 2 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/GetQueueUserAclsInfoResponsePBImpl.java

@@ -94,7 +94,8 @@ implements GetQueueUserAclsInfoResponse {
     viaProto = false;
     viaProto = false;
   }
   }
 
 
-  //Once this is called. containerList will never be null - untill a getProto is called.
+  // Once this is called. containerList will never be null - until a getProto
+  // is called.
   private void initLocalQueueUserAclsList() {
   private void initLocalQueueUserAclsList() {
     if (this.queueUserAclsInfoList != null) {
     if (this.queueUserAclsInfoList != null) {
       return;
       return;

+ 0 - 138
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/AMResponse.java

@@ -1,138 +0,0 @@
-/**
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*     http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
-
-package org.apache.hadoop.yarn.api.records;
-
-import java.util.List;
-
-import org.apache.hadoop.classification.InterfaceAudience.Private;
-import org.apache.hadoop.classification.InterfaceAudience.Public;
-import org.apache.hadoop.classification.InterfaceStability.Stable;
-import org.apache.hadoop.classification.InterfaceStability.Unstable;
-import org.apache.hadoop.yarn.api.AMRMProtocol;
-import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest;
-
-/**
- * <p>The response sent by the <code>ResourceManager</code> the  
- * <code>ApplicationMaster</code> during resource negotiation.</p>
- *
- * <p>The response includes:
- *   <ul>
- *     <li>Response ID to track duplicate responses.</li>
- *     <li>
- *       A reboot flag to let the <code>ApplicationMaster</code> know that its 
- *       horribly out of sync and needs to reboot.</li>
- *     <li>A list of newly allocated {@link Container}.</li>
- *     <li>A list of completed {@link Container}.</li>
- *     <li>
- *       The available headroom for resources in the cluster for the
- *       application. 
- *     </li>
- *   </ul>
- * </p>
- * 
- * @see AMRMProtocol#allocate(AllocateRequest)
- */
-@Public
-@Unstable
-public interface AMResponse {
-  /**
-   * Should the <code>ApplicationMaster</code> reboot for being horribly 
-   * out-of-sync with the <code>ResourceManager</code> as deigned by 
-   * {@link #getResponseId()}?
-   * 
-   * @return <code>true</code> if the <code>ApplicationMaster</code> should
-   *         reboot, <code>false</code> otherwise
-   */
-  @Public
-  @Stable
-  public boolean getReboot();
-  
-  @Private
-  @Unstable
-  public void setReboot(boolean reboot);
-
-  /**
-   * Get the <em>last response id</em>.
-   * @return <em>last response id</em>
-   */
-  @Public
-  @Stable
-  public int getResponseId();
-  
-  @Private
-  @Unstable
-  public void setResponseId(int responseId);
-
-  /**
-   * Get the list of <em>newly allocated</em> <code>Container</code> by the 
-   * <code>ResourceManager</code>.
-   * @return list of <em>newly allocated</em> <code>Container</code>
-   */
-  @Public
-  @Stable
-  public List<Container> getAllocatedContainers();
-
-  /**
-   * Set the list of <em>newly allocated</em> <code>Container</code> by the 
-   * <code>ResourceManager</code>.
-   * @param containers list of <em>newly allocated</em> <code>Container</code>
-   */
-  @Public
-  @Stable
-  public void setAllocatedContainers(List<Container> containers);
-
-  /**
-   * Get the <em>available headroom</em> for resources in the cluster for the 
-   * application.
-   * @return limit of available headroom for resources in the cluster for the 
-   * application
-   */
-  @Public
-  @Stable
-  public Resource getAvailableResources();
-
-  @Private
-  @Unstable
-  public void setAvailableResources(Resource limit);
-  
-  /**
-   * Get the list of <em>completed containers' statuses</em>.
-   * @return the list of <em>completed containers' statuses</em>
-   */
-  @Public
-  @Stable
-  public List<ContainerStatus> getCompletedContainersStatuses();
-
-  @Private
-  @Unstable
-  public void setCompletedContainersStatuses(List<ContainerStatus> containers);
-
-  /**
-   * Get the list of <em>updated <code>NodeReport</code>s</em>. Updates could be
-   * changes in health, availability etc of the nodes.
-   * @return The delta of updated nodes since the last response
-   */
-  @Public
-  @Unstable
-  public List<NodeReport> getUpdatedNodes();
-
-  @Private
-  @Unstable
-  public void setUpdatedNodes(final List<NodeReport> updatedNodes);
-}

+ 2 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ApplicationAttemptId.java

@@ -76,8 +76,8 @@ public abstract class ApplicationAttemptId implements
   @Override
   @Override
   public int hashCode() {
   public int hashCode() {
     // Generated by eclipse.
     // Generated by eclipse.
-    final int prime = 31;
-    int result = 1;
+    final int prime = 347671;
+    int result = 5501;
     ApplicationId appId = getApplicationId();
     ApplicationId appId = getApplicationId();
     result = prime * result +  appId.hashCode();
     result = prime * result +  appId.hashCode();
     result = prime * result + getAttemptId();
     result = prime * result + getAttemptId();

+ 2 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ApplicationId.java

@@ -97,8 +97,8 @@ public abstract class ApplicationId implements Comparable<ApplicationId> {
   @Override
   @Override
   public int hashCode() {
   public int hashCode() {
     // Generated by eclipse.
     // Generated by eclipse.
-    final int prime = 31;
-    int result = 1;
+    final int prime = 371237;
+    int result = 6521;
     long clusterTimestamp = getClusterTimestamp();
     long clusterTimestamp = getClusterTimestamp();
     result = prime * result
     result = prime * result
         + (int) (clusterTimestamp ^ (clusterTimestamp >>> 32));
         + (int) (clusterTimestamp ^ (clusterTimestamp >>> 32));

+ 2 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ContainerId.java

@@ -86,8 +86,8 @@ public abstract class ContainerId implements Comparable<ContainerId>{
   @Override
   @Override
   public int hashCode() {
   public int hashCode() {
     // Generated by eclipse.
     // Generated by eclipse.
-    final int prime = 31;
-    int result = 1;
+    final int prime = 435569;
+    int result = 7507;
     result = prime * result + getId();
     result = prime * result + getId();
     result = prime * result + getApplicationAttemptId().hashCode();
     result = prime * result + getApplicationAttemptId().hashCode();
     return result;
     return result;

+ 2 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/NodeId.java

@@ -65,8 +65,8 @@ public abstract class NodeId implements Comparable<NodeId> {
 
 
   @Override
   @Override
   public int hashCode() {
   public int hashCode() {
-    final int prime = 31;
-    int result = 1;
+    final int prime = 493217;
+    int result = 8501;
     result = prime * result + this.getHost().hashCode();
     result = prime * result + this.getHost().hashCode();
     result = prime * result + this.getPort();
     result = prime * result + this.getPort();
     return result;
     return result;

+ 2 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/Priority.java

@@ -39,8 +39,8 @@ public abstract class Priority implements Comparable<Priority> {
   
   
   @Override
   @Override
   public int hashCode() {
   public int hashCode() {
-    final int prime = 31;
-    int result = 1;
+    final int prime = 517861;
+    int result = 9511;
     result = prime * result + getPriority();
     result = prime * result + getPriority();
     return result;
     return result;
   }
   }

+ 2 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ResourceRequest.java

@@ -124,8 +124,8 @@ public abstract class ResourceRequest implements Comparable<ResourceRequest> {
 
 
   @Override
   @Override
   public int hashCode() {
   public int hashCode() {
-    final int prime = 31;
-    int result = 1;
+    final int prime = 2153;
+    int result = 2459;
     Resource capability = getCapability();
     Resource capability = getCapability();
     String hostName = getHostName();
     String hostName = getHostName();
     Priority priority = getPriority();
     Priority priority = getPriority();

+ 0 - 373
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/AMResponsePBImpl.java

@@ -1,373 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.yarn.api.records.impl.pb;
-
-
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
-
-import org.apache.hadoop.yarn.api.records.AMResponse;
-import org.apache.hadoop.yarn.api.records.Container;
-import org.apache.hadoop.yarn.api.records.ContainerStatus;
-import org.apache.hadoop.yarn.api.records.NodeReport;
-import org.apache.hadoop.yarn.api.records.ProtoBase;
-import org.apache.hadoop.yarn.api.records.Resource;
-import org.apache.hadoop.yarn.proto.YarnProtos.AMResponseProto;
-import org.apache.hadoop.yarn.proto.YarnProtos.AMResponseProtoOrBuilder;
-import org.apache.hadoop.yarn.proto.YarnProtos.ContainerProto;
-import org.apache.hadoop.yarn.proto.YarnProtos.ContainerStatusProto;
-import org.apache.hadoop.yarn.proto.YarnProtos.NodeReportProto;
-import org.apache.hadoop.yarn.proto.YarnProtos.ResourceProto;
-
-
-    
-public class AMResponsePBImpl extends ProtoBase<AMResponseProto> implements AMResponse {
-  AMResponseProto proto = AMResponseProto.getDefaultInstance();
-  AMResponseProto.Builder builder = null;
-  boolean viaProto = false;
-  
-  Resource limit;
-
-  private List<Container> allocatedContainers = null;
-  private List<ContainerStatus> completedContainersStatuses = null;
-//  private boolean hasLocalContainerList = false;
-  
-  private List<NodeReport> updatedNodes = null;
-  
-  public AMResponsePBImpl() {
-    builder = AMResponseProto.newBuilder();
-  }
-
-  public AMResponsePBImpl(AMResponseProto proto) {
-    this.proto = proto;
-    viaProto = true;
-  }
-  
-  public synchronized AMResponseProto getProto() {
-      mergeLocalToProto();
-    proto = viaProto ? proto : builder.build();
-    viaProto = true;
-    return proto;
-  }
-  
-  private synchronized void mergeLocalToBuilder() {
-    if (this.allocatedContainers != null) {
-      builder.clearAllocatedContainers();
-      Iterable<ContainerProto> iterable = 
-          getProtoIterable(this.allocatedContainers);
-      builder.addAllAllocatedContainers(iterable);
-    }
-    if (this.completedContainersStatuses != null) {
-      builder.clearCompletedContainerStatuses();
-      Iterable<ContainerStatusProto> iterable = 
-          getContainerStatusProtoIterable(this.completedContainersStatuses);
-      builder.addAllCompletedContainerStatuses(iterable);
-    }
-    if (this.updatedNodes != null) {
-      builder.clearUpdatedNodes();
-      Iterable<NodeReportProto> iterable = 
-          getNodeReportProtoIterable(this.updatedNodes);
-      builder.addAllUpdatedNodes(iterable);
-    }
-    if (this.limit != null) {
-      builder.setLimit(convertToProtoFormat(this.limit));
-    }
-  }
-  
-  private synchronized void mergeLocalToProto() {
-    if (viaProto) 
-      maybeInitBuilder();
-    mergeLocalToBuilder();
-    proto = builder.build();
-    viaProto = true;
-  }
-
-  private synchronized void maybeInitBuilder() {
-    if (viaProto || builder == null) {
-      builder = AMResponseProto.newBuilder(proto);
-    }
-    viaProto = false;
-  }
-    
-  
-  @Override
-  public synchronized boolean getReboot() {
-    AMResponseProtoOrBuilder p = viaProto ? proto : builder;
-    return (p.getReboot());
-  }
-
-  @Override
-  public synchronized void setReboot(boolean reboot) {
-    maybeInitBuilder();
-    builder.setReboot((reboot));
-  }
-  @Override
-  public synchronized int getResponseId() {
-    AMResponseProtoOrBuilder p = viaProto ? proto : builder;
-    return (p.getResponseId());
-  }
-
-  @Override
-  public synchronized void setResponseId(int responseId) {
-    maybeInitBuilder();
-    builder.setResponseId((responseId));
-  }
-  @Override
-  public synchronized Resource getAvailableResources() {
-    if (this.limit != null) {
-      return this.limit;
-    }
-
-    AMResponseProtoOrBuilder p = viaProto ? proto : builder;
-    if (!p.hasLimit()) {
-      return null;
-    }
-    this.limit = convertFromProtoFormat(p.getLimit());
-    return this.limit;
-  }
-
-  @Override
-  public synchronized void setAvailableResources(Resource limit) {
-    maybeInitBuilder();
-    if (limit == null)
-      builder.clearLimit();
-    this.limit = limit;
-  }
-  
-  @Override
-  public synchronized List<NodeReport> getUpdatedNodes() {
-    initLocalNewNodeReportList();
-    return this.updatedNodes;
-  }
-  
-  //Once this is called. updatedNodes will never be null - until a getProto is called.
-  private synchronized void initLocalNewNodeReportList() {
-    if (this.updatedNodes != null) {
-      return;
-    }
-    AMResponseProtoOrBuilder p = viaProto ? proto : builder;
-    List<NodeReportProto> list = p.getUpdatedNodesList();
-    updatedNodes = new ArrayList<NodeReport>(list.size());
-
-    for (NodeReportProto n : list) {
-      updatedNodes.add(convertFromProtoFormat(n));
-    }
-  }
-
-  @Override
-  public synchronized void setUpdatedNodes(final List<NodeReport> updatedNodes) {
-    if (updatedNodes == null) {
-      this.updatedNodes.clear();
-      return;
-    }
-    this.updatedNodes = new ArrayList<NodeReport>(updatedNodes.size());
-    this.updatedNodes.addAll(updatedNodes);
-  }
-
-  @Override
-  public synchronized List<Container> getAllocatedContainers() {
-    initLocalNewContainerList();
-    return this.allocatedContainers;
-  }
-  
-  //Once this is called. containerList will never be null - until a getProto is called.
-  private synchronized void initLocalNewContainerList() {
-    if (this.allocatedContainers != null) {
-      return;
-    }
-    AMResponseProtoOrBuilder p = viaProto ? proto : builder;
-    List<ContainerProto> list = p.getAllocatedContainersList();
-    allocatedContainers = new ArrayList<Container>();
-
-    for (ContainerProto c : list) {
-      allocatedContainers.add(convertFromProtoFormat(c));
-    }
-  }
-
-  @Override
-  public synchronized void setAllocatedContainers(final List<Container> containers) {
-    if (containers == null) 
-      return;
-    // this looks like a bug because it results in append and not set
-    initLocalNewContainerList();
-    allocatedContainers.addAll(containers);
-  }
-
-  private synchronized Iterable<ContainerProto> getProtoIterable(
-      final List<Container> newContainersList) {
-    maybeInitBuilder();
-    return new Iterable<ContainerProto>() {
-      @Override
-      public synchronized Iterator<ContainerProto> iterator() {
-        return new Iterator<ContainerProto>() {
-
-          Iterator<Container> iter = newContainersList.iterator();
-
-          @Override
-          public synchronized boolean hasNext() {
-            return iter.hasNext();
-          }
-
-          @Override
-          public synchronized ContainerProto next() {
-            return convertToProtoFormat(iter.next());
-          }
-
-          @Override
-          public synchronized void remove() {
-            throw new UnsupportedOperationException();
-
-          }
-        };
-
-      }
-    };
-  }
-
-  private synchronized Iterable<ContainerStatusProto> 
-  getContainerStatusProtoIterable(
-      final List<ContainerStatus> newContainersList) {
-    maybeInitBuilder();
-    return new Iterable<ContainerStatusProto>() {
-      @Override
-      public synchronized Iterator<ContainerStatusProto> iterator() {
-        return new Iterator<ContainerStatusProto>() {
-
-          Iterator<ContainerStatus> iter = newContainersList.iterator();
-
-          @Override
-          public synchronized boolean hasNext() {
-            return iter.hasNext();
-          }
-
-          @Override
-          public synchronized ContainerStatusProto next() {
-            return convertToProtoFormat(iter.next());
-          }
-
-          @Override
-          public synchronized void remove() {
-            throw new UnsupportedOperationException();
-
-          }
-        };
-
-      }
-    };
-  }
-  
-  private synchronized Iterable<NodeReportProto> 
-  getNodeReportProtoIterable(
-      final List<NodeReport> newNodeReportsList) {
-    maybeInitBuilder();
-    return new Iterable<NodeReportProto>() {
-      @Override
-      public synchronized Iterator<NodeReportProto> iterator() {
-        return new Iterator<NodeReportProto>() {
-
-          Iterator<NodeReport> iter = newNodeReportsList.iterator();
-
-          @Override
-          public synchronized boolean hasNext() {
-            return iter.hasNext();
-          }
-
-          @Override
-          public synchronized NodeReportProto next() {
-            return convertToProtoFormat(iter.next());
-          }
-
-          @Override
-          public synchronized void remove() {
-            throw new UnsupportedOperationException();
-
-          }
-        };
-
-      }
-    };
-  }
-
-  //// Finished containers
-  @Override
-  public synchronized List<ContainerStatus> getCompletedContainersStatuses() {
-    initLocalFinishedContainerList();
-    return this.completedContainersStatuses;
-  }
-  
-  //Once this is called. containerList will never be null - untill a getProto is called.
-  private synchronized void initLocalFinishedContainerList() {
-    if (this.completedContainersStatuses != null) {
-      return;
-    }
-    AMResponseProtoOrBuilder p = viaProto ? proto : builder;
-    List<ContainerStatusProto> list = p.getCompletedContainerStatusesList();
-    completedContainersStatuses = new ArrayList<ContainerStatus>();
-
-    for (ContainerStatusProto c : list) {
-      completedContainersStatuses.add(convertFromProtoFormat(c));
-    }
-  }
-
-  @Override
-  public synchronized void setCompletedContainersStatuses(
-      final List<ContainerStatus> containers) {
-    if (containers == null) 
-      return;
-    initLocalFinishedContainerList();
-    completedContainersStatuses.addAll(containers);
-  }
-  
-  private synchronized NodeReportPBImpl convertFromProtoFormat(
-      NodeReportProto p) {
-    return new NodeReportPBImpl(p);
-  }
-
-  private synchronized NodeReportProto convertToProtoFormat(NodeReport t) {
-    return ((NodeReportPBImpl)t).getProto();
-  }
-  
-  private synchronized ContainerPBImpl convertFromProtoFormat(
-      ContainerProto p) {
-    return new ContainerPBImpl(p);
-  }
-
-  private synchronized ContainerProto convertToProtoFormat(Container t) {
-    return ((ContainerPBImpl)t).getProto();
-  }
-
-  private synchronized ContainerStatusPBImpl convertFromProtoFormat(
-      ContainerStatusProto p) {
-    return new ContainerStatusPBImpl(p);
-  }
-
-  private synchronized ContainerStatusProto convertToProtoFormat(ContainerStatus t) {
-    return ((ContainerStatusPBImpl)t).getProto();
-  }
-
-  private synchronized ResourcePBImpl convertFromProtoFormat(ResourceProto p) {
-    return new ResourcePBImpl(p);
-  }
-
-  private synchronized ResourceProto convertToProtoFormat(Resource r) {
-    return ((ResourcePBImpl) r).getProto();
-  }
-
-}  

+ 0 - 10
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto

@@ -207,16 +207,6 @@ message ResourceRequestProto {
   optional int32 num_containers = 4;
   optional int32 num_containers = 4;
 }
 }
 
 
-message AMResponseProto {
-  optional bool reboot = 1;
-  optional int32 response_id = 2;
-  repeated ContainerProto allocated_containers = 3;
-  repeated ContainerStatusProto completed_container_statuses = 4;
-  optional ResourceProto limit = 5;
-  repeated NodeReportProto updated_nodes = 6;
-}
-
-
 ////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////
 ////// From client_RM_Protocol /////////////////////////////////////////
 ////// From client_RM_Protocol /////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////

+ 7 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto

@@ -59,8 +59,13 @@ message AllocateRequestProto {
 }
 }
 
 
 message AllocateResponseProto {
 message AllocateResponseProto {
-  optional AMResponseProto AM_response = 1;
-  optional int32 num_cluster_nodes = 2;
+  optional bool reboot = 1;
+  optional int32 response_id = 2;
+  repeated ContainerProto allocated_containers = 3;
+  repeated ContainerStatusProto completed_container_statuses = 4;
+  optional ResourceProto limit = 5;
+  repeated NodeReportProto updated_nodes = 6;
+  optional int32 num_cluster_nodes = 7;
 }
 }
 
 
 
 

+ 191 - 228
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java

@@ -53,7 +53,6 @@ import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterRequest
 import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest;
 
 
-import org.apache.hadoop.yarn.api.records.AMResponse;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.Container;
 import org.apache.hadoop.yarn.api.records.Container;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.ContainerId;
@@ -64,12 +63,12 @@ import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
 import org.apache.hadoop.yarn.api.records.LocalResource;
 import org.apache.hadoop.yarn.api.records.LocalResource;
 import org.apache.hadoop.yarn.api.records.LocalResourceType;
 import org.apache.hadoop.yarn.api.records.LocalResourceType;
 import org.apache.hadoop.yarn.api.records.LocalResourceVisibility;
 import org.apache.hadoop.yarn.api.records.LocalResourceVisibility;
+import org.apache.hadoop.yarn.api.records.NodeReport;
 import org.apache.hadoop.yarn.api.records.Priority;
 import org.apache.hadoop.yarn.api.records.Priority;
 import org.apache.hadoop.yarn.api.records.Resource;
 import org.apache.hadoop.yarn.api.records.Resource;
 import org.apache.hadoop.yarn.api.records.ResourceRequest;
 import org.apache.hadoop.yarn.api.records.ResourceRequest;
-import org.apache.hadoop.yarn.client.AMRMClient;
 import org.apache.hadoop.yarn.client.AMRMClient.ContainerRequest;
 import org.apache.hadoop.yarn.client.AMRMClient.ContainerRequest;
-import org.apache.hadoop.yarn.client.AMRMClientImpl;
+import org.apache.hadoop.yarn.client.AMRMClientAsync;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.exceptions.YarnRemoteException;
 import org.apache.hadoop.yarn.exceptions.YarnRemoteException;
 import org.apache.hadoop.yarn.ipc.YarnRPC;
 import org.apache.hadoop.yarn.ipc.YarnRPC;
@@ -148,8 +147,8 @@ public class ApplicationMaster {
   private YarnRPC rpc;
   private YarnRPC rpc;
 
 
   // Handle to communicate with the Resource Manager
   // Handle to communicate with the Resource Manager
-  private AMRMClient resourceManager;
-
+  private AMRMClientAsync resourceManager;
+  
   // Application Attempt Id ( combination of attemptId and fail count )
   // Application Attempt Id ( combination of attemptId and fail count )
   private ApplicationAttemptId appAttemptID;
   private ApplicationAttemptId appAttemptID;
 
 
@@ -170,8 +169,6 @@ public class ApplicationMaster {
   // Priority of the request
   // Priority of the request
   private int requestPriority;
   private int requestPriority;
 
 
-  // Simple flag to denote whether all works is done
-  private boolean appDone = false;
   // Counter for completed containers ( complete denotes successful or failed )
   // Counter for completed containers ( complete denotes successful or failed )
   private AtomicInteger numCompletedContainers = new AtomicInteger();
   private AtomicInteger numCompletedContainers = new AtomicInteger();
   // Allocated container count so that we know how many containers has the RM
   // Allocated container count so that we know how many containers has the RM
@@ -202,6 +199,9 @@ public class ApplicationMaster {
   // Hardcoded path to shell script in launch container's local env
   // Hardcoded path to shell script in launch container's local env
   private final String ExecShellStringPath = "ExecShellScript.sh";
   private final String ExecShellStringPath = "ExecShellScript.sh";
 
 
+  private volatile boolean done;
+  private volatile boolean success;
+  
   // Launch threads
   // Launch threads
   private List<Thread> launchThreads = new ArrayList<Thread>();
   private List<Thread> launchThreads = new ArrayList<Thread>();
 
 
@@ -394,6 +394,10 @@ public class ApplicationMaster {
         "container_memory", "10"));
         "container_memory", "10"));
     numTotalContainers = Integer.parseInt(cliParser.getOptionValue(
     numTotalContainers = Integer.parseInt(cliParser.getOptionValue(
         "num_containers", "1"));
         "num_containers", "1"));
+    if (numTotalContainers == 0) {
+      throw new IllegalArgumentException(
+          "Cannot run distributed shell with no containers");
+    }
     requestPriority = Integer.parseInt(cliParser
     requestPriority = Integer.parseInt(cliParser
         .getOptionValue("priority", "0"));
         .getOptionValue("priority", "0"));
 
 
@@ -417,225 +421,202 @@ public class ApplicationMaster {
   public boolean run() throws YarnRemoteException {
   public boolean run() throws YarnRemoteException {
     LOG.info("Starting ApplicationMaster");
     LOG.info("Starting ApplicationMaster");
 
 
-    // Connect to ResourceManager
-    resourceManager = new AMRMClientImpl(appAttemptID);
+    AMRMClientAsync.CallbackHandler allocListener = new RMCallbackHandler();
+    
+    resourceManager = new AMRMClientAsync(appAttemptID, 1000, allocListener);
     resourceManager.init(conf);
     resourceManager.init(conf);
     resourceManager.start();
     resourceManager.start();
 
 
-    try {
-      // Setup local RPC Server to accept status requests directly from clients
-      // TODO need to setup a protocol for client to be able to communicate to
-      // the RPC server
-      // TODO use the rpc port info to register with the RM for the client to
-      // send requests to this app master
-
-      // Register self with ResourceManager
-      RegisterApplicationMasterResponse response = resourceManager
-          .registerApplicationMaster(appMasterHostname, appMasterRpcPort,
-              appMasterTrackingUrl);
-      // Dump out information about cluster capability as seen by the
-      // resource manager
-      int minMem = response.getMinimumResourceCapability().getMemory();
-      int maxMem = response.getMaximumResourceCapability().getMemory();
-      LOG.info("Min mem capabililty of resources in this cluster " + minMem);
-      LOG.info("Max mem capabililty of resources in this cluster " + maxMem);
-
-      // A resource ask has to be atleast the minimum of the capability of the
-      // cluster, the value has to be a multiple of the min value and cannot
-      // exceed the max.
-      // If it is not an exact multiple of min, the RM will allocate to the
-      // nearest multiple of min
-      if (containerMemory < minMem) {
-        LOG.info("Container memory specified below min threshold of cluster."
-            + " Using min value." + ", specified=" + containerMemory + ", min="
-            + minMem);
-        containerMemory = minMem;
-      } else if (containerMemory > maxMem) {
-        LOG.info("Container memory specified above max threshold of cluster."
-            + " Using max value." + ", specified=" + containerMemory + ", max="
-            + maxMem);
-        containerMemory = maxMem;
-      }
+    // Setup local RPC Server to accept status requests directly from clients
+    // TODO need to setup a protocol for client to be able to communicate to
+    // the RPC server
+    // TODO use the rpc port info to register with the RM for the client to
+    // send requests to this app master
+
+    // Register self with ResourceManager
+    // This will start heartbeating to the RM
+    RegisterApplicationMasterResponse response = resourceManager
+        .registerApplicationMaster(appMasterHostname, appMasterRpcPort,
+            appMasterTrackingUrl);
+    // Dump out information about cluster capability as seen by the
+    // resource manager
+    int minMem = response.getMinimumResourceCapability().getMemory();
+    int maxMem = response.getMaximumResourceCapability().getMemory();
+    LOG.info("Min mem capabililty of resources in this cluster " + minMem);
+    LOG.info("Max mem capabililty of resources in this cluster " + maxMem);
+
+    // A resource ask has to be atleast the minimum of the capability of the
+    // cluster, the value has to be a multiple of the min value and cannot
+    // exceed the max.
+    // If it is not an exact multiple of min, the RM will allocate to the
+    // nearest multiple of min
+    if (containerMemory < minMem) {
+      LOG.info("Container memory specified below min threshold of cluster."
+          + " Using min value." + ", specified=" + containerMemory + ", min="
+          + minMem);
+      containerMemory = minMem;
+    } else if (containerMemory > maxMem) {
+      LOG.info("Container memory specified above max threshold of cluster."
+          + " Using max value." + ", specified=" + containerMemory + ", max="
+          + maxMem);
+      containerMemory = maxMem;
+    }
 
 
-      // Setup heartbeat emitter
-      // TODO poll RM every now and then with an empty request to let RM know
-      // that we are alive
-      // The heartbeat interval after which an AM is timed out by the RM is
-      // defined by a config setting:
-      // RM_AM_EXPIRY_INTERVAL_MS with default defined by
-      // DEFAULT_RM_AM_EXPIRY_INTERVAL_MS
-      // The allocate calls to the RM count as heartbeats so, for now,
-      // this additional heartbeat emitter is not required.
-
-      // Setup ask for containers from RM
-      // Send request for containers to RM
-      // Until we get our fully allocated quota, we keep on polling RM for
-      // containers
-      // Keep looping until all the containers are launched and shell script
-      // executed on them ( regardless of success/failure).
-
-      int loopCounter = -1;
-
-      while (numCompletedContainers.get() < numTotalContainers && !appDone) {
-        loopCounter++;
-
-        // log current state
-        LOG.info("Current application state: loop=" + loopCounter
-            + ", appDone=" + appDone + ", total=" + numTotalContainers
-            + ", requested=" + numRequestedContainers + ", completed="
-            + numCompletedContainers + ", failed=" + numFailedContainers
-            + ", currentAllocated=" + numAllocatedContainers);
-
-        // Sleep before each loop when asking RM for containers
-        // to avoid flooding RM with spurious requests when it
-        // need not have any available containers
-        // Sleeping for 1000 ms.
-        try {
-          Thread.sleep(1000);
-        } catch (InterruptedException e) {
-          LOG.info("Sleep interrupted " + e.getMessage());
-        }
 
 
-        // No. of containers to request
-        // For the first loop, askCount will be equal to total containers needed
-        // From that point on, askCount will always be 0 as current
-        // implementation does not change its ask on container failures.
-        int askCount = numTotalContainers - numRequestedContainers.get();
-        numRequestedContainers.addAndGet(askCount);
+    // Setup ask for containers from RM
+    // Send request for containers to RM
+    // Until we get our fully allocated quota, we keep on polling RM for
+    // containers
+    // Keep looping until all the containers are launched and shell script
+    // executed on them ( regardless of success/failure).
+    ContainerRequest containerAsk = setupContainerAskForRM(numTotalContainers);
+    resourceManager.addContainerRequest(containerAsk);
+    numRequestedContainers.set(numTotalContainers);
 
 
-        if (askCount > 0) {
-          ContainerRequest containerAsk = setupContainerAskForRM(askCount);
-          resourceManager.addContainerRequest(containerAsk);
-        }
+    while (!done) {
+      try {
+        Thread.sleep(200);
+      } catch (InterruptedException ex) {}
+    }
+    finish();
+    
+    return success;
+  }
+  
+  private void finish() {
+    // Join all launched threads
+    // needed for when we time out
+    // and we need to release containers
+    for (Thread launchThread : launchThreads) {
+      try {
+        launchThread.join(10000);
+      } catch (InterruptedException e) {
+        LOG.info("Exception thrown in thread join: " + e.getMessage());
+        e.printStackTrace();
+      }
+    }
 
 
-        // Send the request to RM
-        LOG.info("Asking RM for containers" + ", askCount=" + askCount);
-        AMResponse amResp = sendContainerAskToRM();
-
-        // Retrieve list of allocated containers from the response
-        List<Container> allocatedContainers = amResp.getAllocatedContainers();
-        LOG.info("Got response from RM for container ask, allocatedCnt="
-            + allocatedContainers.size());
-        numAllocatedContainers.addAndGet(allocatedContainers.size());
-        for (Container allocatedContainer : allocatedContainers) {
-          LOG.info("Launching shell command on a new container."
-              + ", containerId=" + allocatedContainer.getId()
-              + ", containerNode=" + allocatedContainer.getNodeId().getHost()
-              + ":" + allocatedContainer.getNodeId().getPort()
-              + ", containerNodeURI=" + allocatedContainer.getNodeHttpAddress()
-              + ", containerState" + allocatedContainer.getState()
-              + ", containerResourceMemory"
-              + allocatedContainer.getResource().getMemory());
-          // + ", containerToken"
-          // +allocatedContainer.getContainerToken().getIdentifier().toString());
-
-          LaunchContainerRunnable runnableLaunchContainer = new LaunchContainerRunnable(
-              allocatedContainer);
-          Thread launchThread = new Thread(runnableLaunchContainer);
-
-          // launch and start the container on a separate thread to keep
-          // the main thread unblocked
-          // as all containers may not be allocated at one go.
-          launchThreads.add(launchThread);
-          launchThread.start();
-        }
+    // When the application completes, it should send a finish application
+    // signal to the RM
+    LOG.info("Application completed. Signalling finish to RM");
 
 
-        // Check what the current available resources in the cluster are
-        // TODO should we do anything if the available resources are not enough?
-        Resource availableResources = amResp.getAvailableResources();
-        LOG.info("Current available resources in the cluster "
-            + availableResources);
-
-        // Check the completed containers
-        List<ContainerStatus> completedContainers = amResp
-            .getCompletedContainersStatuses();
-        LOG.info("Got response from RM for container ask, completedCnt="
-            + completedContainers.size());
-        for (ContainerStatus containerStatus : completedContainers) {
-          LOG.info("Got container status for containerID="
-              + containerStatus.getContainerId() + ", state="
-              + containerStatus.getState() + ", exitStatus="
-              + containerStatus.getExitStatus() + ", diagnostics="
-              + containerStatus.getDiagnostics());
-
-          // non complete containers should not be here
-          assert (containerStatus.getState() == ContainerState.COMPLETE);
-
-          // increment counters for completed/failed containers
-          int exitStatus = containerStatus.getExitStatus();
-          if (0 != exitStatus) {
-            // container failed
-            if (-100 != exitStatus) {
-              // shell script failed
-              // counts as completed
-              numCompletedContainers.incrementAndGet();
-              numFailedContainers.incrementAndGet();
-            } else {
-              // something else bad happened
-              // app job did not complete for some reason
-              // we should re-try as the container was lost for some reason
-              numAllocatedContainers.decrementAndGet();
-              numRequestedContainers.decrementAndGet();
-              // we do not need to release the container as it would be done
-              // by the RM/CM.
-            }
-          } else {
-            // nothing to do
-            // container completed successfully
+    FinalApplicationStatus appStatus;
+    String appMessage = null;
+    success = true;
+    if (numFailedContainers.get() == 0) {
+      appStatus = FinalApplicationStatus.SUCCEEDED;
+    } else {
+      appStatus = FinalApplicationStatus.FAILED;
+      appMessage = "Diagnostics." + ", total=" + numTotalContainers
+          + ", completed=" + numCompletedContainers.get() + ", allocated="
+          + numAllocatedContainers.get() + ", failed="
+          + numFailedContainers.get();
+      success = false;
+    }
+    try {
+      resourceManager.unregisterApplicationMaster(appStatus, appMessage, null);
+    } catch (YarnRemoteException ex) {
+      LOG.error("Failed to unregister application", ex);
+    }
+    
+    done = true;
+    resourceManager.stop();
+  }
+  
+  private class RMCallbackHandler implements AMRMClientAsync.CallbackHandler {
+    @Override
+    public void onContainersCompleted(List<ContainerStatus> completedContainers) {
+      LOG.info("Got response from RM for container ask, completedCnt="
+          + completedContainers.size());
+      for (ContainerStatus containerStatus : completedContainers) {
+        LOG.info("Got container status for containerID="
+            + containerStatus.getContainerId() + ", state="
+            + containerStatus.getState() + ", exitStatus="
+            + containerStatus.getExitStatus() + ", diagnostics="
+            + containerStatus.getDiagnostics());
+
+        // non complete containers should not be here
+        assert (containerStatus.getState() == ContainerState.COMPLETE);
+
+        // increment counters for completed/failed containers
+        int exitStatus = containerStatus.getExitStatus();
+        if (0 != exitStatus) {
+          // container failed
+          if (YarnConfiguration.ABORTED_CONTAINER_EXIT_STATUS != exitStatus) {
+            // shell script failed
+            // counts as completed
             numCompletedContainers.incrementAndGet();
             numCompletedContainers.incrementAndGet();
-            LOG.info("Container completed successfully." + ", containerId="
-                + containerStatus.getContainerId());
+            numFailedContainers.incrementAndGet();
+          } else {
+            // container was killed by framework, possibly preempted
+            // we should re-try as the container was lost for some reason
+            numAllocatedContainers.decrementAndGet();
+            numRequestedContainers.decrementAndGet();
+            // we do not need to release the container as it would be done
+            // by the RM
           }
           }
+        } else {
+          // nothing to do
+          // container completed successfully
+          numCompletedContainers.incrementAndGet();
+          LOG.info("Container completed successfully." + ", containerId="
+              + containerStatus.getContainerId());
         }
         }
-        if (numCompletedContainers.get() == numTotalContainers) {
-          appDone = true;
-        }
-
-        LOG.info("Current application state: loop=" + loopCounter
-            + ", appDone=" + appDone + ", total=" + numTotalContainers
-            + ", requested=" + numRequestedContainers + ", completed="
-            + numCompletedContainers + ", failed=" + numFailedContainers
-            + ", currentAllocated=" + numAllocatedContainers);
-
-        // TODO
-        // Add a timeout handling layer
-        // for misbehaving shell commands
       }
       }
-
-      // Join all launched threads
-      // needed for when we time out
-      // and we need to release containers
-      for (Thread launchThread : launchThreads) {
-        try {
-          launchThread.join(10000);
-        } catch (InterruptedException e) {
-          LOG.info("Exception thrown in thread join: " + e.getMessage());
-          e.printStackTrace();
-        }
+      
+      // ask for more containers if any failed
+      int askCount = numTotalContainers - numRequestedContainers.get();
+      numRequestedContainers.addAndGet(askCount);
+
+      if (askCount > 0) {
+        ContainerRequest containerAsk = setupContainerAskForRM(askCount);
+        resourceManager.addContainerRequest(containerAsk);
       }
       }
+      
+      // set progress to deliver to RM on next heartbeat
+      float progress = (float) numCompletedContainers.get()
+          / numTotalContainers;
+      resourceManager.setProgress(progress);
+      
+      if (numCompletedContainers.get() == numTotalContainers) {
+        done = true;
+      }
+    }
 
 
-      // When the application completes, it should send a finish application
-      // signal to the RM
-      LOG.info("Application completed. Signalling finish to RM");
-
-      FinalApplicationStatus appStatus;
-      String appMessage = null;
-      boolean isSuccess = true;
-      if (numFailedContainers.get() == 0) {
-        appStatus = FinalApplicationStatus.SUCCEEDED;
-      } else {
-        appStatus = FinalApplicationStatus.FAILED;
-        appMessage = "Diagnostics." + ", total=" + numTotalContainers
-            + ", completed=" + numCompletedContainers.get() + ", allocated="
-            + numAllocatedContainers.get() + ", failed="
-            + numFailedContainers.get();
-        isSuccess = false;
+    @Override
+    public void onContainersAllocated(List<Container> allocatedContainers) {
+      LOG.info("Got response from RM for container ask, allocatedCnt="
+          + allocatedContainers.size());
+      numAllocatedContainers.addAndGet(allocatedContainers.size());
+      for (Container allocatedContainer : allocatedContainers) {
+        LOG.info("Launching shell command on a new container."
+            + ", containerId=" + allocatedContainer.getId()
+            + ", containerNode=" + allocatedContainer.getNodeId().getHost()
+            + ":" + allocatedContainer.getNodeId().getPort()
+            + ", containerNodeURI=" + allocatedContainer.getNodeHttpAddress()
+            + ", containerState" + allocatedContainer.getState()
+            + ", containerResourceMemory"
+            + allocatedContainer.getResource().getMemory());
+        // + ", containerToken"
+        // +allocatedContainer.getContainerToken().getIdentifier().toString());
+
+        LaunchContainerRunnable runnableLaunchContainer = new LaunchContainerRunnable(
+            allocatedContainer);
+        Thread launchThread = new Thread(runnableLaunchContainer);
+
+        // launch and start the container on a separate thread to keep
+        // the main thread unblocked
+        // as all containers may not be allocated at one go.
+        launchThreads.add(launchThread);
+        launchThread.start();
       }
       }
-      resourceManager.unregisterApplicationMaster(appStatus, appMessage, null);
-      return isSuccess;
-    } finally {
-      resourceManager.stop();
     }
     }
+
+    @Override
+    public void onRebootRequest() {}
+
+    @Override
+    public void onNodesUpdated(List<NodeReport> updatedNodes) {}
   }
   }
 
 
   /**
   /**
@@ -811,22 +792,4 @@ public class ApplicationMaster {
     LOG.info("Requested container ask: " + request.toString());
     LOG.info("Requested container ask: " + request.toString());
     return request;
     return request;
   }
   }
-
-  /**
-   * Ask RM to allocate given no. of containers to this Application Master
-   *
-   * @param requestedContainers Containers to ask for from RM
-   * @return Response from RM to AM with allocated containers
-   * @throws YarnRemoteException
-   */
-  private AMResponse sendContainerAskToRM() throws YarnRemoteException {
-    float progressIndicator = (float) numCompletedContainers.get()
-        / numTotalContainers;
-
-    LOG.info("Sending request to RM for containers" + ", progress="
-        + progressIndicator);
-
-    AllocateResponse resp = resourceManager.allocate(progressIndicator);
-    return resp.getAMResponse();
-  }
 }
 }

+ 7 - 4
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/Client.java

@@ -18,6 +18,7 @@
 
 
 package org.apache.hadoop.yarn.applications.distributedshell;
 package org.apache.hadoop.yarn.applications.distributedshell;
 
 
+import java.io.File;
 import java.io.IOException;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.HashMap;
@@ -39,6 +40,7 @@ import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.yarn.api.ApplicationConstants;
 import org.apache.hadoop.yarn.api.ApplicationConstants;
+import org.apache.hadoop.yarn.api.ApplicationConstants.Environment;
 import org.apache.hadoop.yarn.api.ClientRMProtocol;
 import org.apache.hadoop.yarn.api.ClientRMProtocol;
 import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationRequest;
@@ -481,14 +483,15 @@ public class Client extends YarnClientImpl {
     // It should be provided out of the box. 
     // It should be provided out of the box. 
     // For now setting all required classpaths including
     // For now setting all required classpaths including
     // the classpath to "." for the application jar
     // the classpath to "." for the application jar
-    StringBuilder classPathEnv = new StringBuilder("${CLASSPATH}:./*");
+    StringBuilder classPathEnv = new StringBuilder(Environment.CLASSPATH.$())
+      .append(File.pathSeparatorChar).append("./*");
     for (String c : conf.getStrings(
     for (String c : conf.getStrings(
         YarnConfiguration.YARN_APPLICATION_CLASSPATH,
         YarnConfiguration.YARN_APPLICATION_CLASSPATH,
         YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH)) {
         YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH)) {
-      classPathEnv.append(':');
+      classPathEnv.append(File.pathSeparatorChar);
       classPathEnv.append(c.trim());
       classPathEnv.append(c.trim());
     }
     }
-    classPathEnv.append(":./log4j.properties");
+    classPathEnv.append(File.pathSeparatorChar).append("./log4j.properties");
 
 
     // add the runtime classpath needed for tests to work
     // add the runtime classpath needed for tests to work
     if (conf.getBoolean(YarnConfiguration.IS_MINI_YARN_CLUSTER, false)) {
     if (conf.getBoolean(YarnConfiguration.IS_MINI_YARN_CLUSTER, false)) {
@@ -505,7 +508,7 @@ public class Client extends YarnClientImpl {
 
 
     // Set java executable command 
     // Set java executable command 
     LOG.info("Setting up app master command");
     LOG.info("Setting up app master command");
-    vargs.add("${JAVA_HOME}" + "/bin/java");
+    vargs.add(Environment.JAVA_HOME.$() + "/bin/java");
     // Set Xmx based on am memory size
     // Set Xmx based on am memory size
     vargs.add("-Xmx" + amMemory + "m");
     vargs.add("-Xmx" + amMemory + "m");
     // Set class name 
     // Set class name 

+ 6 - 5
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDistributedShell.java

@@ -29,6 +29,7 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.util.JarFinder;
 import org.apache.hadoop.util.JarFinder;
+import org.apache.hadoop.util.Shell;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.server.MiniYARNCluster;
 import org.apache.hadoop.yarn.server.MiniYARNCluster;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
@@ -54,8 +55,8 @@ public class TestDistributedShell {
     conf.setClass(YarnConfiguration.RM_SCHEDULER, 
     conf.setClass(YarnConfiguration.RM_SCHEDULER, 
         FifoScheduler.class, ResourceScheduler.class);
         FifoScheduler.class, ResourceScheduler.class);
     if (yarnCluster == null) {
     if (yarnCluster == null) {
-      yarnCluster = new MiniYARNCluster(TestDistributedShell.class.getName(),
-          1, 1, 1);
+      yarnCluster = new MiniYARNCluster(
+        TestDistributedShell.class.getSimpleName(), 1, 1, 1);
       yarnCluster.init(conf);
       yarnCluster.init(conf);
       yarnCluster.start();
       yarnCluster.start();
       URL url = Thread.currentThread().getContextClassLoader().getResource("yarn-site.xml");
       URL url = Thread.currentThread().getContextClassLoader().getResource("yarn-site.xml");
@@ -82,7 +83,7 @@ public class TestDistributedShell {
     }
     }
   }
   }
 
 
-  @Test
+  @Test(timeout=30000)
   public void testDSShell() throws Exception {
   public void testDSShell() throws Exception {
 
 
     String[] args = {
     String[] args = {
@@ -91,7 +92,7 @@ public class TestDistributedShell {
         "--num_containers",
         "--num_containers",
         "2",
         "2",
         "--shell_command",
         "--shell_command",
-        "ls",
+        Shell.WINDOWS ? "dir" : "ls",
         "--master_memory",
         "--master_memory",
         "512",
         "512",
         "--container_memory",
         "--container_memory",
@@ -110,7 +111,7 @@ public class TestDistributedShell {
 
 
   }
   }
 
 
-  @Test
+  @Test(timeout=30000)
   public void testDSShellWithNoArgs() throws Exception {
   public void testDSShellWithNoArgs() throws Exception {
 
 
     String[] args = {};
     String[] args = {};

+ 2 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-unmanaged-am-launcher/src/test/java/org/apache/hadoop/yarn/applications/unmanagedamlauncher/TestUnmanagedAMLauncher.java

@@ -91,7 +91,7 @@ public class TestUnmanagedAMLauncher {
     return envClassPath;
     return envClassPath;
   }
   }
 
 
-  @Test
+  @Test(timeout=10000)
   public void testDSShell() throws Exception {
   public void testDSShell() throws Exception {
     String classpath = getTestRuntimeClasspath();
     String classpath = getTestRuntimeClasspath();
     String javaHome = System.getenv("JAVA_HOME");
     String javaHome = System.getenv("JAVA_HOME");
@@ -110,7 +110,7 @@ public class TestUnmanagedAMLauncher {
         javaHome
         javaHome
             + "/bin/java -Xmx512m "
             + "/bin/java -Xmx512m "
             + "org.apache.hadoop.yarn.applications.distributedshell.ApplicationMaster "
             + "org.apache.hadoop.yarn.applications.distributedshell.ApplicationMaster "
-            + "--container_memory 128 --num_containers 0 --priority 0 --shell_command ls" };
+            + "--container_memory 128 --num_containers 1 --priority 0 --shell_command ls" };
 
 
     LOG.info("Initializing Launcher");
     LOG.info("Initializing Launcher");
     UnmanagedAMLauncher launcher = new UnmanagedAMLauncher(new Configuration(
     UnmanagedAMLauncher launcher = new UnmanagedAMLauncher(new Configuration(

+ 354 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/AMRMClientAsync.java

@@ -0,0 +1,354 @@
+/**
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.hadoop.yarn.client;
+
+import java.util.List;
+import java.util.concurrent.BlockingQueue;
+import java.util.concurrent.LinkedBlockingQueue;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.classification.InterfaceAudience.Private;
+import org.apache.hadoop.classification.InterfaceStability.Evolving;
+import org.apache.hadoop.classification.InterfaceStability.Unstable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.yarn.YarnException;
+import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.api.records.Container;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.ContainerStatus;
+import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
+import org.apache.hadoop.yarn.api.records.NodeReport;
+import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.exceptions.YarnRemoteException;
+import org.apache.hadoop.yarn.service.AbstractService;
+
+import com.google.common.annotations.VisibleForTesting;
+
+/**
+ * <code>AMRMClientAsync</code> handles communication with the ResourceManager
+ * and provides asynchronous updates on events such as container allocations and
+ * completions.  It contains a thread that sends periodic heartbeats to the
+ * ResourceManager.
+ * 
+ * It should be used by implementing a CallbackHandler:
+ * <pre>
+ * {@code
+ * class MyCallbackHandler implements AMRMClientAsync.CallbackHandler {
+ *   public void onContainersAllocated(List<Container> containers) {
+ *     [run tasks on the containers]
+ *   }
+ *   
+ *   public void onContainersCompleted(List<ContainerStatus> statuses) {
+ *     [update progress, check whether app is done]
+ *   }
+ *   
+ *   public void onNodesUpdated(List<NodeReport> updated) {}
+ *   
+ *   public void onReboot() {}
+ * }
+ * }
+ * </pre>
+ * 
+ * The client's lifecycle should be managed similarly to the following:
+ * 
+ * <pre>
+ * {@code
+ * AMRMClientAsync asyncClient = new AMRMClientAsync(appAttId, 1000, new MyCallbackhandler());
+ * asyncClient.init(conf);
+ * asyncClient.start();
+ * RegisterApplicationMasterResponse response = asyncClient
+ *    .registerApplicationMaster(appMasterHostname, appMasterRpcPort,
+ *       appMasterTrackingUrl);
+ * asyncClient.addContainerRequest(containerRequest);
+ * [... wait for application to complete]
+ * asyncClient.unregisterApplicationMaster(status, appMsg, trackingUrl);
+ * asyncClient.stop();
+ * }
+ * </pre>
+ */
+@Unstable
+@Evolving
+public class AMRMClientAsync extends AbstractService {
+  
+  private static final Log LOG = LogFactory.getLog(AMRMClientAsync.class);
+  
+  private final AMRMClient client;
+  private final int intervalMs;
+  private final HeartbeatThread heartbeatThread;
+  private final CallbackHandlerThread handlerThread;
+  private final CallbackHandler handler;
+
+  private final BlockingQueue<AllocateResponse> responseQueue;
+  
+  private volatile boolean keepRunning;
+  private volatile float progress;
+  
+  public AMRMClientAsync(ApplicationAttemptId id, int intervalMs,
+      CallbackHandler callbackHandler) {
+    this(new AMRMClientImpl(id), intervalMs, callbackHandler);
+  }
+  
+  @Private
+  @VisibleForTesting
+  AMRMClientAsync(AMRMClient client, int intervalMs,
+      CallbackHandler callbackHandler) {
+    super(AMRMClientAsync.class.getName());
+    this.client = client;
+    this.intervalMs = intervalMs;
+    handler = callbackHandler;
+    heartbeatThread = new HeartbeatThread();
+    handlerThread = new CallbackHandlerThread();
+    responseQueue = new LinkedBlockingQueue<AllocateResponse>();
+    keepRunning = true;
+  }
+  
+  /**
+   * Sets the application's current progress. It will be transmitted to the
+   * resource manager on the next heartbeat.
+   * @param progress
+   *    the application's progress so far
+   */
+  public void setProgress(float progress) {
+    this.progress = progress;
+  }
+  
+  @Override
+  public void init(Configuration conf) {
+    super.init(conf);
+    client.init(conf);
+  }
+  
+  @Override
+  public void start() {
+    handlerThread.start();
+    client.start();
+    super.start();
+  }
+  
+  /**
+   * Tells the heartbeat and handler threads to stop and waits for them to
+   * terminate.  Calling this method from the callback handler thread would cause
+   * deadlock, and thus should be avoided.
+   */
+  @Override
+  public void stop() {
+    if (Thread.currentThread() == handlerThread) {
+      throw new YarnException("Cannot call stop from callback handler thread!");
+    }
+    keepRunning = false;
+    try {
+      heartbeatThread.join();
+    } catch (InterruptedException ex) {
+      LOG.error("Error joining with heartbeat thread", ex);
+    }
+    client.stop();
+    try {
+      handlerThread.interrupt();
+      handlerThread.join();
+    } catch (InterruptedException ex) {
+      LOG.error("Error joining with hander thread", ex);
+    }
+    super.stop();
+  }
+  
+  /**
+   * Registers this application master with the resource manager. On successful
+   * registration, starts the heartbeating thread.
+   */
+  public RegisterApplicationMasterResponse registerApplicationMaster(
+      String appHostName, int appHostPort, String appTrackingUrl)
+      throws YarnRemoteException {
+    RegisterApplicationMasterResponse response =
+        client.registerApplicationMaster(appHostName, appHostPort, appTrackingUrl);
+    heartbeatThread.start();
+    return response;
+  }
+
+  /**
+   * Unregister the application master. This must be called in the end.
+   * @param appStatus Success/Failure status of the master
+   * @param appMessage Diagnostics message on failure
+   * @param appTrackingUrl New URL to get master info
+   * @throws YarnRemoteException
+   */
+  public void unregisterApplicationMaster(FinalApplicationStatus appStatus,
+      String appMessage, String appTrackingUrl) throws YarnRemoteException {
+    synchronized (client) {
+      keepRunning = false;
+      client.unregisterApplicationMaster(appStatus, appMessage, appTrackingUrl);
+    }
+  }
+
+  /**
+   * Request containers for resources before calling <code>allocate</code>
+   * @param req Resource request
+   */
+  public void addContainerRequest(AMRMClient.ContainerRequest req) {
+    client.addContainerRequest(req);
+  }
+
+  /**
+   * Remove previous container request. The previous container request may have 
+   * already been sent to the ResourceManager. So even after the remove request 
+   * the app must be prepared to receive an allocation for the previous request 
+   * even after the remove request
+   * @param req Resource request
+   */
+  public void removeContainerRequest(AMRMClient.ContainerRequest req) {
+    client.removeContainerRequest(req);
+  }
+
+  /**
+   * Release containers assigned by the Resource Manager. If the app cannot use
+   * the container or wants to give up the container then it can release them.
+   * The app needs to make new requests for the released resource capability if
+   * it still needs it. eg. it released non-local resources
+   * @param containerId
+   */
+  public void releaseAssignedContainer(ContainerId containerId) {
+    client.releaseAssignedContainer(containerId);
+  }
+
+  /**
+   * Get the currently available resources in the cluster.
+   * A valid value is available after a call to allocate has been made
+   * @return Currently available resources
+   */
+  public Resource getClusterAvailableResources() {
+    return client.getClusterAvailableResources();
+  }
+
+  /**
+   * Get the current number of nodes in the cluster.
+   * A valid values is available after a call to allocate has been made
+   * @return Current number of nodes in the cluster
+   */
+  public int getClusterNodeCount() {
+    return client.getClusterNodeCount();
+  }
+  
+  private class HeartbeatThread extends Thread {
+    public HeartbeatThread() {
+      super("AMRM Heartbeater thread");
+    }
+    
+    public void run() {
+      while (true) {
+        AllocateResponse response = null;
+        // synchronization ensures we don't send heartbeats after unregistering
+        synchronized (client) {
+          if (!keepRunning) {
+            break;
+          }
+            
+          try {
+            response = client.allocate(progress);
+          } catch (YarnRemoteException ex) {
+            LOG.error("Failed to heartbeat", ex);
+          }
+        }
+        if (response != null) {
+          while (true) {
+            try {
+              responseQueue.put(response);
+              break;
+            } catch (InterruptedException ex) {
+              LOG.warn("Interrupted while waiting to put on response queue", ex);
+            }
+          }
+        }
+        
+        try {
+          Thread.sleep(intervalMs);
+        } catch (InterruptedException ex) {
+          LOG.warn("Heartbeater interrupted", ex);
+        }
+      }
+    }
+  }
+  
+  private class CallbackHandlerThread extends Thread {
+    public CallbackHandlerThread() {
+      super("AMRM Callback Handler Thread");
+    }
+    
+    public void run() {
+      while (keepRunning) {
+        AllocateResponse response;
+        try {
+          response = responseQueue.take();
+        } catch (InterruptedException ex) {
+          LOG.info("Interrupted while waiting for queue");
+          continue;
+        }
+
+        if (response.getReboot()) {
+          handler.onRebootRequest();
+        }
+        List<NodeReport> updatedNodes = response.getUpdatedNodes();
+        if (!updatedNodes.isEmpty()) {
+          handler.onNodesUpdated(updatedNodes);
+        }
+        
+        List<ContainerStatus> completed =
+            response.getCompletedContainersStatuses();
+        if (!completed.isEmpty()) {
+          handler.onContainersCompleted(completed);
+        }
+
+        List<Container> allocated = response.getAllocatedContainers();
+        if (!allocated.isEmpty()) {
+          handler.onContainersAllocated(allocated);
+        }
+      }
+    }
+  }
+  
+  public interface CallbackHandler {
+    
+    /**
+     * Called when the ResourceManager responds to a heartbeat with completed
+     * containers. If the response contains both completed containers and
+     * allocated containers, this will be called before containersAllocated.
+     */
+    public void onContainersCompleted(List<ContainerStatus> statuses);
+    
+    /**
+     * Called when the ResourceManager responds to a heartbeat with allocated
+     * containers. If the response containers both completed containers and
+     * allocated containers, this will be called after containersCompleted.
+     */
+    public void onContainersAllocated(List<Container> containers);
+    
+    /**
+     * Called when the ResourceManager wants the ApplicationMaster to reboot
+     * for being out of sync.
+     */
+    public void onRebootRequest();
+    
+    /**
+     * Called when nodes tracked by the ResourceManager have changed in in health,
+     * availability etc.
+     */
+    public void onNodesUpdated(List<NodeReport> updatedNodes);
+  }
+}

+ 2 - 4
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/AMRMClientImpl.java

@@ -45,7 +45,6 @@ import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse;
-import org.apache.hadoop.yarn.api.records.AMResponse;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
 import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
@@ -194,13 +193,12 @@ public class AMRMClientImpl extends AbstractService implements AMRMClient {
       }
       }
 
 
       allocateResponse = rmClient.allocate(allocateRequest);
       allocateResponse = rmClient.allocate(allocateRequest);
-      AMResponse response = allocateResponse.getAMResponse();
 
 
       synchronized (this) {
       synchronized (this) {
         // update these on successful RPC
         // update these on successful RPC
         clusterNodeCount = allocateResponse.getNumClusterNodes();
         clusterNodeCount = allocateResponse.getNumClusterNodes();
-        lastResponseId = response.getResponseId();
-        clusterAvailableResources = response.getAvailableResources();
+        lastResponseId = allocateResponse.getResponseId();
+        clusterAvailableResources = allocateResponse.getAvailableResources();
       }
       }
     } finally {
     } finally {
       // TODO how to differentiate remote yarn exception vs error in rpc
       // TODO how to differentiate remote yarn exception vs error in rpc

+ 4 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/cli/ApplicationCLI.java

@@ -153,6 +153,10 @@ public class ApplicationCLI extends YarnCLI {
       appReportStr.println(appReport.getFinalApplicationStatus());
       appReportStr.println(appReport.getFinalApplicationStatus());
       appReportStr.print("\tTracking-URL : ");
       appReportStr.print("\tTracking-URL : ");
       appReportStr.println(appReport.getOriginalTrackingUrl());
       appReportStr.println(appReport.getOriginalTrackingUrl());
+      appReportStr.print("\tRPC Port : ");
+      appReportStr.println(appReport.getRpcPort());
+      appReportStr.print("\tAM Host : ");
+      appReportStr.println(appReport.getHost());
       appReportStr.print("\tDiagnostics : ");
       appReportStr.print("\tDiagnostics : ");
       appReportStr.print(appReport.getDiagnostics());
       appReportStr.print(appReport.getDiagnostics());
     } else {
     } else {

+ 6 - 8
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestAMRMClient.java

@@ -36,7 +36,6 @@ import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationRequest;
-import org.apache.hadoop.yarn.api.records.AMResponse;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.ApplicationReport;
 import org.apache.hadoop.yarn.api.records.ApplicationReport;
@@ -202,9 +201,8 @@ public class TestAMRMClient {
       assertTrue(amClient.release.size() == 0);
       assertTrue(amClient.release.size() == 0);
       
       
       assertTrue(nodeCount == amClient.getClusterNodeCount());
       assertTrue(nodeCount == amClient.getClusterNodeCount());
-      AMResponse amResponse = allocResponse.getAMResponse();
-      allocatedContainerCount += amResponse.getAllocatedContainers().size();
-      for(Container container : amResponse.getAllocatedContainers()) {
+      allocatedContainerCount += allocResponse.getAllocatedContainers().size();
+      for(Container container : allocResponse.getAllocatedContainers()) {
         ContainerId rejectContainerId = container.getId();
         ContainerId rejectContainerId = container.getId();
         releases.add(rejectContainerId);
         releases.add(rejectContainerId);
         amClient.releaseAssignedContainer(rejectContainerId);
         amClient.releaseAssignedContainer(rejectContainerId);
@@ -264,11 +262,11 @@ public class TestAMRMClient {
     while(!releases.isEmpty() || iterationsLeft-- > 0) {
     while(!releases.isEmpty() || iterationsLeft-- > 0) {
       // inform RM of rejection
       // inform RM of rejection
       AllocateResponse allocResponse = amClient.allocate(0.1f);
       AllocateResponse allocResponse = amClient.allocate(0.1f);
-      AMResponse amResponse = allocResponse.getAMResponse();
       // RM did not send new containers because AM does not need any
       // RM did not send new containers because AM does not need any
-      assertTrue(amResponse.getAllocatedContainers().size() == 0);
-      if(amResponse.getCompletedContainersStatuses().size() > 0) {
-        for(ContainerStatus cStatus : amResponse.getCompletedContainersStatuses()) {
+      assertTrue(allocResponse.getAllocatedContainers().size() == 0);
+      if(allocResponse.getCompletedContainersStatuses().size() > 0) {
+        for(ContainerStatus cStatus :allocResponse
+            .getCompletedContainersStatuses()) {
           if(releases.contains(cStatus.getContainerId())) {
           if(releases.contains(cStatus.getContainerId())) {
             assertTrue(cStatus.getState() == ContainerState.COMPLETE);
             assertTrue(cStatus.getState() == ContainerState.COMPLETE);
             assertTrue(cStatus.getExitStatus() == -100);
             assertTrue(cStatus.getExitStatus() == -100);

+ 184 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestAMRMClientAsync.java

@@ -0,0 +1,184 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.client;
+
+import static org.mockito.Mockito.anyFloat;
+import static org.mockito.Mockito.anyInt;
+import static org.mockito.Mockito.anyString;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicBoolean;
+
+import junit.framework.Assert;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
+import org.apache.hadoop.yarn.api.records.Container;
+import org.apache.hadoop.yarn.api.records.ContainerState;
+import org.apache.hadoop.yarn.api.records.ContainerStatus;
+import org.apache.hadoop.yarn.api.records.NodeReport;
+import org.apache.hadoop.yarn.util.BuilderUtils;
+import org.junit.Test;
+import org.mockito.invocation.InvocationOnMock;
+import org.mockito.stubbing.Answer;
+
+public class TestAMRMClientAsync {
+
+  private static final Log LOG = LogFactory.getLog(TestAMRMClientAsync.class);
+  
+  @Test(timeout=10000)
+  public void testAMRMClientAsync() throws Exception {
+    Configuration conf = new Configuration();
+    List<ContainerStatus> completed1 = Arrays.asList(
+        BuilderUtils.newContainerStatus(
+            BuilderUtils.newContainerId(0, 0, 0, 0),
+            ContainerState.COMPLETE, "", 0));
+    List<Container> allocated1 = Arrays.asList(
+        BuilderUtils.newContainer(null, null, null, null, null, null));
+    final AllocateResponse response1 = createAllocateResponse(
+        new ArrayList<ContainerStatus>(), allocated1);
+    final AllocateResponse response2 = createAllocateResponse(completed1,
+        new ArrayList<Container>());
+    final AllocateResponse emptyResponse = createAllocateResponse(
+        new ArrayList<ContainerStatus>(), new ArrayList<Container>());
+
+    TestCallbackHandler callbackHandler = new TestCallbackHandler();
+    AMRMClient client = mock(AMRMClient.class);
+    final AtomicBoolean secondHeartbeatReceived = new AtomicBoolean(false);
+    when(client.allocate(anyFloat())).thenReturn(response1).thenAnswer(new Answer<AllocateResponse>() {
+      @Override
+      public AllocateResponse answer(InvocationOnMock invocation)
+          throws Throwable {
+        secondHeartbeatReceived.set(true);
+        return response2;
+      }
+    }).thenReturn(emptyResponse);
+    when(client.registerApplicationMaster(anyString(), anyInt(), anyString()))
+      .thenReturn(null);
+    
+    AMRMClientAsync asyncClient = new AMRMClientAsync(client, 20, callbackHandler);
+    asyncClient.init(conf);
+    asyncClient.start();
+    asyncClient.registerApplicationMaster("localhost", 1234, null);
+    
+    // while the CallbackHandler will still only be processing the first response,
+    // heartbeater thread should still be sending heartbeats.
+    // To test this, wait for the second heartbeat to be received. 
+    while (!secondHeartbeatReceived.get()) {
+      Thread.sleep(10);
+    }
+    
+    // allocated containers should come before completed containers
+    Assert.assertEquals(null, callbackHandler.takeCompletedContainers());
+    
+    // wait for the allocated containers from the first heartbeat's response
+    while (callbackHandler.takeAllocatedContainers() == null) {
+      Assert.assertEquals(null, callbackHandler.takeCompletedContainers());
+      Thread.sleep(10);
+    }
+    
+    // wait for the completed containers from the second heartbeat's response
+    while (callbackHandler.takeCompletedContainers() == null) {
+      Thread.sleep(10);
+    }
+    
+    asyncClient.stop();
+    
+    Assert.assertEquals(null, callbackHandler.takeAllocatedContainers());
+    Assert.assertEquals(null, callbackHandler.takeCompletedContainers());
+  }
+  
+  private AllocateResponse createAllocateResponse(
+      List<ContainerStatus> completed, List<Container> allocated) {
+    AllocateResponse response = BuilderUtils.newAllocateResponse(0, completed, allocated,
+        new ArrayList<NodeReport>(), null, false, 1);
+    return response;
+  }
+  
+  private class TestCallbackHandler implements AMRMClientAsync.CallbackHandler {
+    private volatile List<ContainerStatus> completedContainers;
+    private volatile List<Container> allocatedContainers;
+    
+    public List<ContainerStatus> takeCompletedContainers() {
+      List<ContainerStatus> ret = completedContainers;
+      if (ret == null) {
+        return null;
+      }
+      completedContainers = null;
+      synchronized (ret) {
+        ret.notify();
+      }
+      return ret;
+    }
+    
+    public List<Container> takeAllocatedContainers() {
+      List<Container> ret = allocatedContainers;
+      if (ret == null) {
+        return null;
+      }
+      allocatedContainers = null;
+      synchronized (ret) {
+        ret.notify();
+      }
+      return ret;
+    }
+    
+    @Override
+    public void onContainersCompleted(List<ContainerStatus> statuses) {
+      completedContainers = statuses;
+      // wait for containers to be taken before returning
+      synchronized (completedContainers) {
+        while (completedContainers != null) {
+          try {
+            completedContainers.wait();
+          } catch (InterruptedException ex) {
+            LOG.error("Interrupted during wait", ex);
+          }
+        }
+      }
+    }
+
+    @Override
+    public void onContainersAllocated(List<Container> containers) {
+      allocatedContainers = containers;
+      // wait for containers to be taken before returning
+      synchronized (allocatedContainers) {
+        while (allocatedContainers != null) {
+          try {
+            allocatedContainers.wait();
+          } catch (InterruptedException ex) {
+            LOG.error("Interrupted during wait", ex);
+          }
+        }
+      }
+    }
+
+    @Override
+    public void onRebootRequest() {}
+
+    @Override
+    public void onNodesUpdated(List<NodeReport> updatedNodes) {}
+  }
+}

+ 2 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/cli/TestYarnCLI.java

@@ -94,6 +94,8 @@ public class TestYarnCLI {
     pw.println("\tState : FINISHED");
     pw.println("\tState : FINISHED");
     pw.println("\tFinal-State : SUCCEEDED");
     pw.println("\tFinal-State : SUCCEEDED");
     pw.println("\tTracking-URL : N/A");
     pw.println("\tTracking-URL : N/A");
+    pw.println("\tRPC Port : 124");
+    pw.println("\tAM Host : host");
     pw.println("\tDiagnostics : diagnostics");
     pw.println("\tDiagnostics : diagnostics");
     pw.close();
     pw.close();
     String appReportStr = baos.toString("UTF-8");
     String appReportStr = baos.toString("UTF-8");

+ 10 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java

@@ -437,6 +437,16 @@ public class YarnConfiguration extends Configuration {
   public static final String NM_PMEM_MB = NM_PREFIX + "resource.memory-mb";
   public static final String NM_PMEM_MB = NM_PREFIX + "resource.memory-mb";
   public static final int DEFAULT_NM_PMEM_MB = 8 * 1024;
   public static final int DEFAULT_NM_PMEM_MB = 8 * 1024;
 
 
+  /** Specifies whether physical memory check is enabled. */
+  public static final String NM_PMEM_CHECK_ENABLED = NM_PREFIX
+      + "pmem-check-enabled";
+  public static final boolean DEFAULT_NM_PMEM_CHECK_ENABLED = true;
+
+  /** Specifies whether physical memory check is enabled. */
+  public static final String NM_VMEM_CHECK_ENABLED = NM_PREFIX
+      + "vmem-check-enabled";
+  public static final boolean DEFAULT_NM_VMEM_CHECK_ENABLED = true;
+
   /** Conversion ratio for physical memory to virtual memory. */
   /** Conversion ratio for physical memory to virtual memory. */
   public static final String NM_VMEM_PMEM_RATIO =
   public static final String NM_VMEM_PMEM_RATIO =
     NM_PREFIX + "vmem-pmem-ratio";
     NM_PREFIX + "vmem-pmem-ratio";

+ 8 - 6
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/AggregatedLogFormat.java

@@ -27,6 +27,7 @@ import java.io.File;
 import java.io.FileInputStream;
 import java.io.FileInputStream;
 import java.io.InputStreamReader;
 import java.io.InputStreamReader;
 import java.io.IOException;
 import java.io.IOException;
+import java.io.PrintStream;
 import java.io.Writer;
 import java.io.Writer;
 import java.security.PrivilegedExceptionAction;
 import java.security.PrivilegedExceptionAction;
 import java.util.ArrayList;
 import java.util.ArrayList;
@@ -505,7 +506,7 @@ public class AggregatedLogFormat {
      * @throws IOException
      * @throws IOException
      */
      */
     public static void readAContainerLogsForALogType(
     public static void readAContainerLogsForALogType(
-        DataInputStream valueStream, DataOutputStream out)
+        DataInputStream valueStream, PrintStream out)
           throws IOException {
           throws IOException {
 
 
       byte[] buf = new byte[65535];
       byte[] buf = new byte[65535];
@@ -513,11 +514,11 @@ public class AggregatedLogFormat {
       String fileType = valueStream.readUTF();
       String fileType = valueStream.readUTF();
       String fileLengthStr = valueStream.readUTF();
       String fileLengthStr = valueStream.readUTF();
       long fileLength = Long.parseLong(fileLengthStr);
       long fileLength = Long.parseLong(fileLengthStr);
-      out.writeUTF("\nLogType:");
-      out.writeUTF(fileType);
-      out.writeUTF("\nLogLength:");
-      out.writeUTF(fileLengthStr);
-      out.writeUTF("\nLog Contents:\n");
+      out.print("LogType: ");
+      out.println(fileType);
+      out.print("LogLength: ");
+      out.println(fileLengthStr);
+      out.println("Log Contents:");
 
 
       int curRead = 0;
       int curRead = 0;
       long pendingRead = fileLength - curRead;
       long pendingRead = fileLength - curRead;
@@ -533,6 +534,7 @@ public class AggregatedLogFormat {
                   pendingRead > buf.length ? buf.length : (int) pendingRead;
                   pendingRead > buf.length ? buf.length : (int) pendingRead;
         len = valueStream.read(buf, 0, toRead);
         len = valueStream.read(buf, 0, toRead);
       }
       }
+      out.println("");
     }
     }
 
 
     public void close() throws IOException {
     public void close() throws IOException {

+ 17 - 13
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/LogDumper.java

@@ -19,10 +19,10 @@
 package org.apache.hadoop.yarn.logaggregation;
 package org.apache.hadoop.yarn.logaggregation;
 
 
 import java.io.DataInputStream;
 import java.io.DataInputStream;
-import java.io.DataOutputStream;
 import java.io.EOFException;
 import java.io.EOFException;
 import java.io.FileNotFoundException;
 import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.IOException;
+import java.io.PrintStream;
 
 
 import org.apache.commons.cli.CommandLine;
 import org.apache.commons.cli.CommandLine;
 import org.apache.commons.cli.CommandLineParser;
 import org.apache.commons.cli.CommandLineParser;
@@ -30,6 +30,7 @@ import org.apache.commons.cli.GnuParser;
 import org.apache.commons.cli.HelpFormatter;
 import org.apache.commons.cli.HelpFormatter;
 import org.apache.commons.cli.Options;
 import org.apache.commons.cli.Options;
 import org.apache.commons.cli.ParseException;
 import org.apache.commons.cli.ParseException;
+import org.apache.commons.lang.StringUtils;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configured;
 import org.apache.hadoop.conf.Configured;
 import org.apache.hadoop.fs.FileContext;
 import org.apache.hadoop.fs.FileContext;
@@ -57,10 +58,13 @@ public class LogDumper extends Configured implements Tool {
   public int run(String[] args) throws Exception {
   public int run(String[] args) throws Exception {
 
 
     Options opts = new Options();
     Options opts = new Options();
-    opts.addOption(APPLICATION_ID_OPTION, true, "ApplicationId");
-    opts.addOption(CONTAINER_ID_OPTION, true, "ContainerId");
-    opts.addOption(NODE_ADDRESS_OPTION, true, "NodeAddress");
-    opts.addOption(APP_OWNER_OPTION, true, "AppOwner");
+    opts.addOption(APPLICATION_ID_OPTION, true, "ApplicationId (required)");
+    opts.addOption(CONTAINER_ID_OPTION, true,
+      "ContainerId (must be specified if node address is specified)");
+    opts.addOption(NODE_ADDRESS_OPTION, true, "NodeAddress in the format "
+      + "nodename:port (must be specified if container id is specified)");
+    opts.addOption(APP_OWNER_OPTION, true,
+      "AppOwner (assumed to be current user if not specified)");
 
 
     if (args.length < 1) {
     if (args.length < 1) {
       HelpFormatter formatter = new HelpFormatter();
       HelpFormatter formatter = new HelpFormatter();
@@ -99,14 +103,12 @@ public class LogDumper extends Configured implements Tool {
     ApplicationId appId =
     ApplicationId appId =
         ConverterUtils.toApplicationId(recordFactory, appIdStr);
         ConverterUtils.toApplicationId(recordFactory, appIdStr);
 
 
-    DataOutputStream out = new DataOutputStream(System.out);
-
     if (appOwner == null || appOwner.isEmpty()) {
     if (appOwner == null || appOwner.isEmpty()) {
       appOwner = UserGroupInformation.getCurrentUser().getShortUserName();
       appOwner = UserGroupInformation.getCurrentUser().getShortUserName();
     }
     }
     int resultCode = 0;
     int resultCode = 0;
     if (containerIdStr == null && nodeAddress == null) {
     if (containerIdStr == null && nodeAddress == null) {
-      resultCode = dumpAllContainersLogs(appId, appOwner, out);
+      resultCode = dumpAllContainersLogs(appId, appOwner, System.out);
     } else if ((containerIdStr == null && nodeAddress != null)
     } else if ((containerIdStr == null && nodeAddress != null)
         || (containerIdStr != null && nodeAddress == null)) {
         || (containerIdStr != null && nodeAddress == null)) {
       System.out.println("ContainerId or NodeAddress cannot be null!");
       System.out.println("ContainerId or NodeAddress cannot be null!");
@@ -125,7 +127,7 @@ public class LogDumper extends Configured implements Tool {
                   appOwner,
                   appOwner,
                   ConverterUtils.toNodeId(nodeAddress),
                   ConverterUtils.toNodeId(nodeAddress),
                   LogAggregationUtils.getRemoteNodeLogDirSuffix(getConf())));
                   LogAggregationUtils.getRemoteNodeLogDirSuffix(getConf())));
-      resultCode = dumpAContainerLogs(containerIdStr, reader, out);
+      resultCode = dumpAContainerLogs(containerIdStr, reader, System.out);
     }
     }
 
 
     return resultCode;
     return resultCode;
@@ -149,12 +151,11 @@ public class LogDumper extends Configured implements Tool {
           "Log aggregation has not completed or is not enabled.");
           "Log aggregation has not completed or is not enabled.");
       return -1;
       return -1;
     }
     }
-    DataOutputStream out = new DataOutputStream(System.out);
-    return dumpAContainerLogs(containerId, reader, out);
+    return dumpAContainerLogs(containerId, reader, System.out);
   }
   }
 
 
   private int dumpAContainerLogs(String containerIdStr,
   private int dumpAContainerLogs(String containerIdStr,
-      AggregatedLogFormat.LogReader reader, DataOutputStream out)
+      AggregatedLogFormat.LogReader reader, PrintStream out)
       throws IOException {
       throws IOException {
     DataInputStream valueStream;
     DataInputStream valueStream;
     LogKey key = new LogKey();
     LogKey key = new LogKey();
@@ -183,7 +184,7 @@ public class LogDumper extends Configured implements Tool {
   }
   }
 
 
   private int dumpAllContainersLogs(ApplicationId appId, String appOwner,
   private int dumpAllContainersLogs(ApplicationId appId, String appOwner,
-      DataOutputStream out) throws IOException {
+      PrintStream out) throws IOException {
     Path remoteRootLogDir =
     Path remoteRootLogDir =
         new Path(getConf().get(YarnConfiguration.NM_REMOTE_APP_LOG_DIR,
         new Path(getConf().get(YarnConfiguration.NM_REMOTE_APP_LOG_DIR,
             YarnConfiguration.DEFAULT_NM_REMOTE_APP_LOG_DIR));
             YarnConfiguration.DEFAULT_NM_REMOTE_APP_LOG_DIR));
@@ -216,6 +217,9 @@ public class LogDumper extends Configured implements Tool {
         valueStream = reader.next(key);
         valueStream = reader.next(key);
 
 
         while (valueStream != null) {
         while (valueStream != null) {
+          String containerString = "\n\nContainer: " + key + " on " + thisNodeFile.getPath().getName();
+          out.println(containerString);
+          out.println(StringUtils.repeat("=", containerString.length()));
           while (true) {
           while (true) {
             try {
             try {
               LogReader.readAContainerLogsForALogType(valueStream, out);
               LogReader.readAContainerLogsForALogType(valueStream, out);

+ 18 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/BuilderUtils.java

@@ -28,6 +28,7 @@ import java.util.Map;
 import org.apache.hadoop.net.NetUtils;
 import org.apache.hadoop.net.NetUtils;
 import org.apache.hadoop.security.SecurityUtil;
 import org.apache.hadoop.security.SecurityUtil;
 import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
 import org.apache.hadoop.yarn.api.records.ApplicationAccessType;
 import org.apache.hadoop.yarn.api.records.ApplicationAccessType;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
@@ -404,4 +405,21 @@ public class BuilderUtils {
     allocateRequest.addAllReleases(containersToBeReleased);
     allocateRequest.addAllReleases(containersToBeReleased);
     return allocateRequest;
     return allocateRequest;
   }
   }
+  
+  public static AllocateResponse newAllocateResponse(int responseId,
+      List<ContainerStatus> completedContainers,
+      List<Container> allocatedContainers, List<NodeReport> updatedNodes,
+      Resource availResources, boolean reboot, int numClusterNodes) {
+    AllocateResponse response = recordFactory
+        .newRecordInstance(AllocateResponse.class);
+    response.setNumClusterNodes(numClusterNodes);
+    response.setResponseId(responseId);
+    response.setCompletedContainersStatuses(completedContainers);
+    response.setAllocatedContainers(allocatedContainers);
+    response.setUpdatedNodes(updatedNodes);
+    response.setAvailableResources(availResources);
+    response.setReboot(reboot);
+
+    return response;
+  }
 }
 }

+ 14 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml

@@ -447,6 +447,20 @@
     <value>8192</value>
     <value>8192</value>
   </property>
   </property>
 
 
+  <property>
+    <description>Whether physical memory limits will be enforced for
+    containers.</description>
+    <name>yarn.nodemanager.pmem-check-enabled</name>
+    <value>true</value>
+  </property>
+
+  <property>
+    <description>Whether virtual memory limits will be enforced for
+    containers.</description>
+    <name>yarn.nodemanager.vmem-check-enabled</name>
+    <value>true</value>
+  </property>
+
   <property>
   <property>
     <description>Ratio between virtual memory to physical memory when
     <description>Ratio between virtual memory to physical memory when
     setting memory limits for containers. Container allocations are
     setting memory limits for containers. Container allocations are

+ 7 - 5
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/TestRecordFactory.java

@@ -23,9 +23,9 @@ import junit.framework.Assert;
 import org.apache.hadoop.yarn.factories.RecordFactory;
 import org.apache.hadoop.yarn.factories.RecordFactory;
 import org.apache.hadoop.yarn.factories.impl.pb.RecordFactoryPBImpl;
 import org.apache.hadoop.yarn.factories.impl.pb.RecordFactoryPBImpl;
 import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.AllocateRequestPBImpl;
 import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.AllocateRequestPBImpl;
-import org.apache.hadoop.yarn.api.records.AMResponse;
-import org.apache.hadoop.yarn.api.records.impl.pb.AMResponsePBImpl;
+import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.AllocateResponsePBImpl;
 import org.junit.Test;
 import org.junit.Test;
 
 
 public class TestRecordFactory {
 public class TestRecordFactory {
@@ -35,15 +35,17 @@ public class TestRecordFactory {
     RecordFactory pbRecordFactory = RecordFactoryPBImpl.get();
     RecordFactory pbRecordFactory = RecordFactoryPBImpl.get();
     
     
     try {
     try {
-      AMResponse response = pbRecordFactory.newRecordInstance(AMResponse.class);
-      Assert.assertEquals(AMResponsePBImpl.class, response.getClass());
+      AllocateResponse response =
+          pbRecordFactory.newRecordInstance(AllocateResponse.class);
+      Assert.assertEquals(AllocateResponsePBImpl.class, response.getClass());
     } catch (YarnException e) {
     } catch (YarnException e) {
       e.printStackTrace();
       e.printStackTrace();
       Assert.fail("Failed to crete record");
       Assert.fail("Failed to crete record");
     }
     }
     
     
     try {
     try {
-      AllocateRequest response = pbRecordFactory.newRecordInstance(AllocateRequest.class);
+      AllocateRequest response =
+          pbRecordFactory.newRecordInstance(AllocateRequest.class);
       Assert.assertEquals(AllocateRequestPBImpl.class, response.getClass());
       Assert.assertEquals(AllocateRequestPBImpl.class, response.getClass());
     } catch (YarnException e) {
     } catch (YarnException e) {
       e.printStackTrace();
       e.printStackTrace();

+ 16 - 5
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestProcfsBasedProcessTree.java

@@ -18,6 +18,8 @@
 
 
 package org.apache.hadoop.yarn.util;
 package org.apache.hadoop.yarn.util;
 
 
+import static org.junit.Assert.fail;
+
 import java.io.BufferedReader;
 import java.io.BufferedReader;
 import java.io.BufferedWriter;
 import java.io.BufferedWriter;
 import java.io.File;
 import java.io.File;
@@ -188,11 +190,20 @@ public class TestProcfsBasedProcessTree {
     // destroy the process and all its subprocesses
     // destroy the process and all its subprocesses
     destroyProcessTree(pid);
     destroyProcessTree(pid);
 
 
-    if (isSetsidAvailable()) { // whole processtree should be gone
-      Assert.assertFalse("Proceesses in process group live",
-          isAnyProcessInTreeAlive(p));
-    } else {// process should be gone
-      Assert.assertFalse("ProcessTree must have been gone", isAlive(pid));
+    boolean isAlive = true;
+    for (int tries = 100; tries > 0; tries--) {
+      if (isSetsidAvailable()) {// whole processtree
+        isAlive = isAnyProcessInTreeAlive(p);
+      } else {// process
+        isAlive = isAlive(pid);
+      }
+      if (!isAlive) {
+        break;
+      }
+      Thread.sleep(100);
+    }
+    if (isAlive) {
+      fail("ProcessTree shouldn't be alive");
     }
     }
 
 
     LOG.info("Process-tree dump follows: \n" + processTreeDump);
     LOG.info("Process-tree dump follows: \n" + processTreeDump);

+ 4 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestRackResolver.java

@@ -63,6 +63,10 @@ public class TestRackResolver {
       return returnList;
       return returnList;
     }
     }
 
 
+    @Override
+    public void reloadCachedMappings() {
+      // nothing to do here, since RawScriptBasedMapping has no cache.
+    }
   }
   }
 
 
   @Test
   @Test

+ 4 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ResourceView.java

@@ -22,5 +22,9 @@ public interface ResourceView {
 
 
   long getVmemAllocatedForContainers();
   long getVmemAllocatedForContainers();
 
 
+  boolean isVmemCheckEnabled();
+
   long getPmemAllocatedForContainers();
   long getPmemAllocatedForContainers();
+
+  boolean isPmemCheckEnabled();
 }
 }

+ 4 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java

@@ -570,8 +570,10 @@ public class ContainerLaunch implements Callable<Integer> {
     // additional testing.  See YARN-358.
     // additional testing.  See YARN-358.
     if (Shell.WINDOWS) {
     if (Shell.WINDOWS) {
       String inputClassPath = environment.get(Environment.CLASSPATH.name());
       String inputClassPath = environment.get(Environment.CLASSPATH.name());
-      environment.put(Environment.CLASSPATH.name(),
-          FileUtil.createJarWithClassPath(inputClassPath, pwd));
+      if (inputClassPath != null && !inputClassPath.isEmpty()) {
+        environment.put(Environment.CLASSPATH.name(),
+            FileUtil.createJarWithClassPath(inputClassPath, pwd));
+      }
     }
     }
 
 
     /**
     /**

+ 69 - 58
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java

@@ -63,14 +63,13 @@ public class ContainersMonitorImpl extends AbstractService implements
   private Configuration conf;
   private Configuration conf;
   private Class<? extends ResourceCalculatorProcessTree> processTreeClass;
   private Class<? extends ResourceCalculatorProcessTree> processTreeClass;
 
 
-  private long maxVmemAllottedForContainers = DISABLED_MEMORY_LIMIT;
-  private long maxPmemAllottedForContainers = DISABLED_MEMORY_LIMIT;
+  private long maxVmemAllottedForContainers = UNKNOWN_MEMORY_LIMIT;
+  private long maxPmemAllottedForContainers = UNKNOWN_MEMORY_LIMIT;
 
 
-  /**
-   * A value which if set for memory related configuration options, indicates
-   * that the options are turned off.
-   */
-  public static final long DISABLED_MEMORY_LIMIT = -1L;
+  private boolean pmemCheckEnabled;
+  private boolean vmemCheckEnabled;
+
+  private static final long UNKNOWN_MEMORY_LIMIT = -1L;
 
 
   public ContainersMonitorImpl(ContainerExecutor exec,
   public ContainersMonitorImpl(ContainerExecutor exec,
       AsyncDispatcher dispatcher, Context context) {
       AsyncDispatcher dispatcher, Context context) {
@@ -104,65 +103,57 @@ public class ContainersMonitorImpl extends AbstractService implements
     LOG.info(" Using ResourceCalculatorProcessTree : "
     LOG.info(" Using ResourceCalculatorProcessTree : "
         + this.processTreeClass);
         + this.processTreeClass);
 
 
-    long totalPhysicalMemoryOnNM = DISABLED_MEMORY_LIMIT;
-    if (this.resourceCalculatorPlugin != null) {
-      totalPhysicalMemoryOnNM =
-          this.resourceCalculatorPlugin.getPhysicalMemorySize();
-      if (totalPhysicalMemoryOnNM <= 0) {
-        LOG.warn("NodeManager's totalPmem could not be calculated. "
-            + "Setting it to " + DISABLED_MEMORY_LIMIT);
-        totalPhysicalMemoryOnNM = DISABLED_MEMORY_LIMIT;
-      }
-    }
+    long configuredPMemForContainers = conf.getLong(
+        YarnConfiguration.NM_PMEM_MB,
+        YarnConfiguration.DEFAULT_NM_PMEM_MB) * 1024 * 1024l;
 
 
+    // Setting these irrespective of whether checks are enabled. Required in
+    // the UI.
     // ///////// Physical memory configuration //////
     // ///////// Physical memory configuration //////
-    this.maxPmemAllottedForContainers =
-        conf.getLong(YarnConfiguration.NM_PMEM_MB, YarnConfiguration.DEFAULT_NM_PMEM_MB);
-    this.maxPmemAllottedForContainers =
-        this.maxPmemAllottedForContainers * 1024 * 1024L; //Normalize to bytes
-
-    if (totalPhysicalMemoryOnNM != DISABLED_MEMORY_LIMIT &&
-        this.maxPmemAllottedForContainers >
-        totalPhysicalMemoryOnNM * 0.80f) {
-      LOG.warn("NodeManager configured with " +
-          TraditionalBinaryPrefix.long2String(maxPmemAllottedForContainers, "", 1) +
-          " physical memory allocated to containers, which is more than " +
-          "80% of the total physical memory available (" +
-          TraditionalBinaryPrefix.long2String(totalPhysicalMemoryOnNM, "", 1) +
-          "). Thrashing might happen.");
-    }
+    this.maxPmemAllottedForContainers = configuredPMemForContainers;
 
 
     // ///////// Virtual memory configuration //////
     // ///////// Virtual memory configuration //////
-    float vmemRatio = conf.getFloat(
-        YarnConfiguration.NM_VMEM_PMEM_RATIO,
+    float vmemRatio = conf.getFloat(YarnConfiguration.NM_VMEM_PMEM_RATIO,
         YarnConfiguration.DEFAULT_NM_VMEM_PMEM_RATIO);
         YarnConfiguration.DEFAULT_NM_VMEM_PMEM_RATIO);
     Preconditions.checkArgument(vmemRatio > 0.99f,
     Preconditions.checkArgument(vmemRatio > 0.99f,
-        YarnConfiguration.NM_VMEM_PMEM_RATIO +
-        " should be at least 1.0");
+        YarnConfiguration.NM_VMEM_PMEM_RATIO + " should be at least 1.0");
     this.maxVmemAllottedForContainers =
     this.maxVmemAllottedForContainers =
-      (long)(vmemRatio * maxPmemAllottedForContainers);
+        (long) (vmemRatio * configuredPMemForContainers);
+
+    pmemCheckEnabled = conf.getBoolean(YarnConfiguration.NM_PMEM_CHECK_ENABLED,
+        YarnConfiguration.DEFAULT_NM_PMEM_CHECK_ENABLED);
+    vmemCheckEnabled = conf.getBoolean(YarnConfiguration.NM_VMEM_CHECK_ENABLED,
+        YarnConfiguration.DEFAULT_NM_VMEM_CHECK_ENABLED);
+    LOG.info("Physical memory check enabled: " + pmemCheckEnabled);
+    LOG.info("Virtual memory check enabled: " + vmemCheckEnabled);
+
+    if (pmemCheckEnabled) {
+      // Logging if actual pmem cannot be determined.
+      long totalPhysicalMemoryOnNM = UNKNOWN_MEMORY_LIMIT;
+      if (this.resourceCalculatorPlugin != null) {
+        totalPhysicalMemoryOnNM = this.resourceCalculatorPlugin
+            .getPhysicalMemorySize();
+        if (totalPhysicalMemoryOnNM <= 0) {
+          LOG.warn("NodeManager's totalPmem could not be calculated. "
+              + "Setting it to " + UNKNOWN_MEMORY_LIMIT);
+          totalPhysicalMemoryOnNM = UNKNOWN_MEMORY_LIMIT;
+        }
+      }
 
 
+      if (totalPhysicalMemoryOnNM != UNKNOWN_MEMORY_LIMIT &&
+          this.maxPmemAllottedForContainers > totalPhysicalMemoryOnNM * 0.80f) {
+        LOG.warn("NodeManager configured with "
+            + TraditionalBinaryPrefix.long2String(maxPmemAllottedForContainers,
+                "", 1)
+            + " physical memory allocated to containers, which is more than "
+            + "80% of the total physical memory available ("
+            + TraditionalBinaryPrefix.long2String(totalPhysicalMemoryOnNM, "",
+                1) + "). Thrashing might happen.");
+      }
+    }
     super.init(conf);
     super.init(conf);
   }
   }
 
 
-  /**
-   * Is the total physical memory check enabled?
-   *
-   * @return true if total physical memory check is enabled.
-   */
-  boolean isPhysicalMemoryCheckEnabled() {
-    return !(this.maxPmemAllottedForContainers == DISABLED_MEMORY_LIMIT);
-  }
-
-  /**
-   * Is the total virtual memory check enabled?
-   *
-   * @return true if total virtual memory check is enabled.
-   */
-  boolean isVirtualMemoryCheckEnabled() {
-    return !(this.maxVmemAllottedForContainers == DISABLED_MEMORY_LIMIT);
-  }
-
   private boolean isEnabled() {
   private boolean isEnabled() {
     if (resourceCalculatorPlugin == null) {
     if (resourceCalculatorPlugin == null) {
             LOG.info("ResourceCalculatorPlugin is unavailable on this system. "
             LOG.info("ResourceCalculatorPlugin is unavailable on this system. "
@@ -174,7 +165,7 @@ public class ContainersMonitorImpl extends AbstractService implements
                 + this.getClass().getName() + " is disabled.");
                 + this.getClass().getName() + " is disabled.");
             return false;
             return false;
     }
     }
-    if (!(isPhysicalMemoryCheckEnabled() || isVirtualMemoryCheckEnabled())) {
+    if (!(isPmemCheckEnabled() || isVmemCheckEnabled())) {
       LOG.info("Neither virutal-memory nor physical-memory monitoring is " +
       LOG.info("Neither virutal-memory nor physical-memory monitoring is " +
           "needed. Not running the monitor-thread");
           "needed. Not running the monitor-thread");
       return false;
       return false;
@@ -412,7 +403,7 @@ public class ContainersMonitorImpl extends AbstractService implements
 
 
             boolean isMemoryOverLimit = false;
             boolean isMemoryOverLimit = false;
             String msg = "";
             String msg = "";
-            if (isVirtualMemoryCheckEnabled()
+            if (isVmemCheckEnabled()
                 && isProcessTreeOverLimit(containerId.toString(),
                 && isProcessTreeOverLimit(containerId.toString(),
                     currentVmemUsage, curMemUsageOfAgedProcesses, vmemLimit)) {
                     currentVmemUsage, curMemUsageOfAgedProcesses, vmemLimit)) {
               // Container (the root process) is still alive and overflowing
               // Container (the root process) is still alive and overflowing
@@ -423,7 +414,7 @@ public class ContainersMonitorImpl extends AbstractService implements
                   currentPmemUsage, pmemLimit,
                   currentPmemUsage, pmemLimit,
                   pId, containerId, pTree);
                   pId, containerId, pTree);
               isMemoryOverLimit = true;
               isMemoryOverLimit = true;
-            } else if (isPhysicalMemoryCheckEnabled()
+            } else if (isPmemCheckEnabled()
                 && isProcessTreeOverLimit(containerId.toString(),
                 && isProcessTreeOverLimit(containerId.toString(),
                     currentPmemUsage, curRssMemUsageOfAgedProcesses,
                     currentPmemUsage, curRssMemUsageOfAgedProcesses,
                     pmemLimit)) {
                     pmemLimit)) {
@@ -507,11 +498,31 @@ public class ContainersMonitorImpl extends AbstractService implements
     return this.maxVmemAllottedForContainers;
     return this.maxVmemAllottedForContainers;
   }
   }
 
 
+  /**
+   * Is the total physical memory check enabled?
+   *
+   * @return true if total physical memory check is enabled.
+   */
+  @Override
+  public boolean isPmemCheckEnabled() {
+    return this.pmemCheckEnabled;
+  }
+
   @Override
   @Override
   public long getPmemAllocatedForContainers() {
   public long getPmemAllocatedForContainers() {
     return this.maxPmemAllottedForContainers;
     return this.maxPmemAllottedForContainers;
   }
   }
 
 
+  /**
+   * Is the total virtual memory check enabled?
+   *
+   * @return true if total virtual memory check is enabled.
+   */
+  @Override
+  public boolean isVmemCheckEnabled() {
+    return this.vmemCheckEnabled;
+  }
+
   @Override
   @Override
   public void handle(ContainersMonitorEvent monitoringEvent) {
   public void handle(ContainersMonitorEvent monitoringEvent) {
 
 

+ 4 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/NodePage.java

@@ -67,8 +67,12 @@ public class NodePage extends NMView {
       info("NodeManager information")
       info("NodeManager information")
           ._("Total Vmem allocated for Containers",
           ._("Total Vmem allocated for Containers",
               StringUtils.byteDesc(info.getTotalVmemAllocated() * BYTES_IN_MB))
               StringUtils.byteDesc(info.getTotalVmemAllocated() * BYTES_IN_MB))
+          ._("Vmem enforcement enabled",
+              info.isVmemCheckEnabled())
           ._("Total Pmem allocated for Container",
           ._("Total Pmem allocated for Container",
               StringUtils.byteDesc(info.getTotalPmemAllocated() * BYTES_IN_MB))
               StringUtils.byteDesc(info.getTotalPmemAllocated() * BYTES_IN_MB))
+          ._("Pmem enforcement enabled",
+              info.isVmemCheckEnabled())
           ._("NodeHealthyStatus",
           ._("NodeHealthyStatus",
               info.getHealthStatus())
               info.getHealthStatus())
           ._("LastNodeHealthTime", new Date(
           ._("LastNodeHealthTime", new Date(

+ 12 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/NodeInfo.java

@@ -36,6 +36,8 @@ public class NodeInfo {
   protected String healthReport;
   protected String healthReport;
   protected long totalVmemAllocatedContainersMB;
   protected long totalVmemAllocatedContainersMB;
   protected long totalPmemAllocatedContainersMB;
   protected long totalPmemAllocatedContainersMB;
+  protected boolean vmemCheckEnabled;
+  protected boolean pmemCheckEnabled;
   protected long lastNodeUpdateTime;
   protected long lastNodeUpdateTime;
   protected boolean nodeHealthy;
   protected boolean nodeHealthy;
   protected String nodeManagerVersion;
   protected String nodeManagerVersion;
@@ -56,8 +58,10 @@ public class NodeInfo {
     this.nodeHostName = context.getNodeId().getHost();
     this.nodeHostName = context.getNodeId().getHost();
     this.totalVmemAllocatedContainersMB = resourceView
     this.totalVmemAllocatedContainersMB = resourceView
         .getVmemAllocatedForContainers() / BYTES_IN_MB;
         .getVmemAllocatedForContainers() / BYTES_IN_MB;
+    this.vmemCheckEnabled = resourceView.isVmemCheckEnabled();
     this.totalPmemAllocatedContainersMB = resourceView
     this.totalPmemAllocatedContainersMB = resourceView
         .getPmemAllocatedForContainers() / BYTES_IN_MB;
         .getPmemAllocatedForContainers() / BYTES_IN_MB;
+    this.pmemCheckEnabled = resourceView.isPmemCheckEnabled();
     this.nodeHealthy = context.getNodeHealthStatus().getIsNodeHealthy();
     this.nodeHealthy = context.getNodeHealthStatus().getIsNodeHealthy();
     this.lastNodeUpdateTime = context.getNodeHealthStatus()
     this.lastNodeUpdateTime = context.getNodeHealthStatus()
         .getLastHealthReportTime();
         .getLastHealthReportTime();
@@ -120,8 +124,16 @@ public class NodeInfo {
     return this.totalVmemAllocatedContainersMB;
     return this.totalVmemAllocatedContainersMB;
   }
   }
 
 
+  public boolean isVmemCheckEnabled() {
+    return this.vmemCheckEnabled;
+  }
+
   public long getTotalPmemAllocated() {
   public long getTotalPmemAllocated() {
     return this.totalPmemAllocatedContainersMB;
     return this.totalPmemAllocatedContainersMB;
   }
   }
 
 
+  public boolean isPmemCheckEnabled() {
+    return this.pmemCheckEnabled;
+  }
+
 }
 }

+ 18 - 13
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/TestLogAggregationService.java

@@ -22,11 +22,13 @@ import static org.mockito.Mockito.*;
 import static junit.framework.Assert.assertEquals;
 import static junit.framework.Assert.assertEquals;
 import static junit.framework.Assert.assertTrue;
 import static junit.framework.Assert.assertTrue;
 
 
+import java.io.ByteArrayOutputStream;
 import java.io.DataInputStream;
 import java.io.DataInputStream;
 import java.io.EOFException;
 import java.io.EOFException;
 import java.io.File;
 import java.io.File;
 import java.io.FileWriter;
 import java.io.FileWriter;
 import java.io.IOException;
 import java.io.IOException;
+import java.io.PrintStream;
 import java.io.PrintWriter;
 import java.io.PrintWriter;
 import java.io.Writer;
 import java.io.Writer;
 import java.lang.reflect.Method;
 import java.lang.reflect.Method;
@@ -40,6 +42,7 @@ import java.util.Set;
 
 
 import junit.framework.Assert;
 import junit.framework.Assert;
 
 
+import org.apache.commons.lang.StringUtils;
 import org.apache.commons.logging.LogFactory;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.UnsupportedFileSystemException;
 import org.apache.hadoop.fs.UnsupportedFileSystemException;
@@ -531,24 +534,26 @@ public class TestLogAggregationService extends BaseContainerManagerTest {
 
 
         while (true) {
         while (true) {
           try {
           try {
-            DataOutputBuffer dob = new DataOutputBuffer();
-            LogReader.readAContainerLogsForALogType(valueStream, dob);
+            ByteArrayOutputStream baos = new ByteArrayOutputStream();
+            PrintStream ps = new PrintStream(baos);
+            LogReader.readAContainerLogsForALogType(valueStream, ps);
 
 
-            DataInputBuffer dib = new DataInputBuffer();
-            dib.reset(dob.getData(), dob.getLength());
+            String writtenLines[] = baos.toString().split(
+              System.getProperty("line.separator"));
 
 
-            Assert.assertEquals("\nLogType:", dib.readUTF());
-            String fileType = dib.readUTF();
+            Assert.assertEquals("LogType:", writtenLines[0].substring(0, 8));
+            String fileType = writtenLines[0].substring(9);
 
 
-            Assert.assertEquals("\nLogLength:", dib.readUTF());
-            String fileLengthStr = dib.readUTF();
+            Assert.assertEquals("LogLength:", writtenLines[1].substring(0, 10));
+            String fileLengthStr = writtenLines[1].substring(11);
             long fileLength = Long.parseLong(fileLengthStr);
             long fileLength = Long.parseLong(fileLengthStr);
 
 
-            Assert.assertEquals("\nLog Contents:\n", dib.readUTF());
-            byte[] buf = new byte[(int) fileLength]; // cast is okay in this
-                                                     // test.
-            dib.read(buf, 0, (int) fileLength);
-            perContainerMap.put(fileType, new String(buf));
+            Assert.assertEquals("Log Contents:",
+              writtenLines[2].substring(0, 13));
+
+            String logContents = StringUtils.join(
+              Arrays.copyOfRange(writtenLines, 3, writtenLines.length), "\n");
+            perContainerMap.put(fileType, logContents);
 
 
             LOG.info("LogType:" + fileType);
             LOG.info("LogType:" + fileType);
             LOG.info("LogType:" + fileLength);
             LOG.info("LogType:" + fileLength);

部分文件因为文件数量过多而无法显示