Forráskód Böngészése

Merging trunk to branch HDFS-2802

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-2802@1460410 13f79535-47bb-0310-9956-ffa450edef68
Suresh Srinivas 12 éve
szülő
commit
698e3f8ae5
100 módosított fájl, 3153 hozzáadás és 1440 törlés
  1. 8 18
      BUILDING.txt
  2. 17 1
      hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/util/KerberosName.java
  3. 14 0
      hadoop-common-project/hadoop-common/CHANGES.txt
  4. 3 1
      hadoop-common-project/hadoop-common/pom.xml
  5. 28 59
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/DF.java
  6. 25 4
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java
  7. 46 11
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtobufRpcEngine.java
  8. 44 14
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java
  9. 5 0
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/CachedDNSToSwitchMapping.java
  10. 8 0
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/DNSToSwitchMapping.java
  11. 8 8
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetworkTopology.java
  12. 12 0
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NodeBase.java
  13. 6 0
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/ScriptBasedMapping.java
  14. 37 19
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/TableMapping.java
  15. 19 11
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/HadoopKerberosName.java
  16. 22 24
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/UserGroupInformation.java
  17. 15 15
      hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestDFVariations.java
  18. 5 0
      hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/StaticMapping.java
  19. 4 0
      hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/TestSwitchMapping.java
  20. 55 17
      hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/TestTableMapping.java
  21. 2 1
      hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestSecurityUtil.java
  22. 130 17
      hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestUserGroupInformation.java
  23. 24 0
      hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
  24. 6 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
  25. 2 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
  26. 30 4
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java
  27. 112 81
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java
  28. 1 2
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
  29. 13 2
      hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
  30. 88 6
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSClientExcludedNodes.java
  31. 69 53
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSShell.java
  32. 12 7
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/TestNNWithQJM.java
  33. 8 1
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestAuditLogs.java
  34. 9 8
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestGetConf.java
  35. 72 0
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/net/TestNetworkTopology.java
  36. 24 0
      hadoop-mapreduce-project/CHANGES.txt
  37. 4 4
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/launcher/ContainerLauncherImpl.java
  38. 3 5
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/local/LocalContainerAllocator.java
  39. 5 4
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerAllocator.java
  40. 7 8
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerRequestor.java
  41. 397 0
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/jobhistory/TestEvents.java
  42. 15 21
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/jobhistory/TestJobHistoryEventHandler.java
  43. 2 5
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/MRAppBenchmark.java
  44. 4 2
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestMRAppMaster.java
  45. 14 7
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/Fetcher.java
  46. 60 1
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/task/reduce/TestFetcher.java
  47. 3 3
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistoryEntities.java
  48. 12 5
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistoryEvents.java
  49. 39 12
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistoryParsing.java
  50. 9 4
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/MiniMRClientClusterFactory.java
  51. 4 1
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/MiniMRCluster.java
  52. 7 3
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-shuffle/src/main/java/org/apache/hadoop/mapred/ShuffleHandler.java
  53. 26 5
      hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/CopyListing.java
  54. 2 2
      hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCp.java
  55. 3 0
      hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpConstants.java
  56. 30 7
      hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/SimpleCopyListing.java
  57. 4 5
      hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableFileCopyCommand.java
  58. 5 0
      hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/ThrottledInputStream.java
  59. 37 4
      hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestCopyListing.java
  60. 71 20
      hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestIntegration.java
  61. 28 0
      hadoop-yarn-project/CHANGES.txt
  62. 86 8
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/AllocateResponse.java
  63. 305 29
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/AllocateResponsePBImpl.java
  64. 2 1
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/GetAllApplicationsResponsePBImpl.java
  65. 2 1
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/GetClusterNodesResponsePBImpl.java
  66. 2 1
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/GetQueueUserAclsInfoResponsePBImpl.java
  67. 0 138
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/AMResponse.java
  68. 2 2
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ApplicationAttemptId.java
  69. 2 2
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ApplicationId.java
  70. 2 2
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ContainerId.java
  71. 2 2
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/NodeId.java
  72. 2 2
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/Priority.java
  73. 2 2
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ResourceRequest.java
  74. 0 373
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/AMResponsePBImpl.java
  75. 0 10
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto
  76. 7 2
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto
  77. 191 228
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java
  78. 7 4
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/Client.java
  79. 6 5
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDistributedShell.java
  80. 2 2
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-unmanaged-am-launcher/src/test/java/org/apache/hadoop/yarn/applications/unmanagedamlauncher/TestUnmanagedAMLauncher.java
  81. 354 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/AMRMClientAsync.java
  82. 2 4
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/AMRMClientImpl.java
  83. 4 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/cli/ApplicationCLI.java
  84. 6 8
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestAMRMClient.java
  85. 184 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestAMRMClientAsync.java
  86. 2 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/cli/TestYarnCLI.java
  87. 10 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
  88. 8 6
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/AggregatedLogFormat.java
  89. 17 13
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/LogDumper.java
  90. 18 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/BuilderUtils.java
  91. 14 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
  92. 7 5
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/TestRecordFactory.java
  93. 16 5
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestProcfsBasedProcessTree.java
  94. 4 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestRackResolver.java
  95. 4 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ResourceView.java
  96. 4 2
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java
  97. 69 58
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java
  98. 4 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/NodePage.java
  99. 12 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/NodeInfo.java
  100. 18 13
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/TestLogAggregationService.java

+ 8 - 18
BUILDING.txt

@@ -107,7 +107,7 @@ When you import the project to eclipse, install hadoop-maven-plugins at first.
   $ cd hadoop-maven-plugins
   $ mvn install
 
-Then, generate ecplise project files.
+Then, generate eclipse project files.
 
   $ mvn eclipse:eclipse -DskipTests
 
@@ -147,10 +147,10 @@ Requirements:
 * Windows System
 * JDK 1.6
 * Maven 3.0
-* Findbugs 1.3.9 (if running findbugs)
+* Windows SDK or Visual Studio 2010 Professional
 * ProtocolBuffer 2.4.1+ (for MapReduce and HDFS)
+* Findbugs 1.3.9 (if running findbugs)
 * Unix command-line tools from GnuWin32 or Cygwin: sh, mkdir, rm, cp, tar, gzip
-* Windows SDK or Visual Studio 2010 Professional
 * Internet connection for first build (to fetch all Maven and Hadoop dependencies)
 
 If using Visual Studio, it must be Visual Studio 2010 Professional (not 2012).
@@ -185,23 +185,13 @@ set Platform=Win32 (when building on a 32-bit system)
 Several tests require that the user must have the Create Symbolic Links
 privilege.
 
-All Maven goals are the same as described above, with the addition of profile
--Pnative-win to trigger building Windows native components.  The native
-components are required (not optional) on Windows.  For example:
-
- * Run tests                 : mvn -Pnative-win test
+All Maven goals are the same as described above with the exception that
+native code is built by enabling the 'native-win' Maven profile. -Pnative-win 
+is enabled by default when building on Windows since the native components 
+are required (not optional) on Windows.
 
 ----------------------------------------------------------------------------------
 Building distributions:
 
-Create binary distribution with native code and with documentation:
-
-  $ mvn package -Pdist,native-win,docs -DskipTests -Dtar
-
-Create source distribution:
-
-  $ mvn package -Pnative-win,src -DskipTests
-
-Create source and binary distributions with native code and documentation:
+ * Build distribution with native code    : mvn package [-Pdist][-Pdocs][-Psrc][-Dtar]
 
-  $ mvn package -Pdist,native-win,docs,src -DskipTests -Dtar

+ 17 - 1
hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/util/KerberosName.java

@@ -383,9 +383,25 @@ public class KerberosName {
    * @param ruleString the rules string.
    */
   public static void setRules(String ruleString) {
-    rules = parseRules(ruleString);
+    rules = (ruleString != null) ? parseRules(ruleString) : null;
   }
 
+  /**
+   * Get the rules.
+   * @return String of configured rules, or null if not yet configured
+   */
+  public static String getRules() {
+    String ruleString = null;
+    if (rules != null) {
+      StringBuilder sb = new StringBuilder();
+      for (Rule rule : rules) {
+        sb.append(rule.toString()).append("\n");
+      }
+      ruleString = sb.toString().trim();
+    }
+    return ruleString;
+  }
+  
   /**
    * Indicates if the name rules have been set.
    * 

+ 14 - 0
hadoop-common-project/hadoop-common/CHANGES.txt

@@ -15,6 +15,8 @@ Trunk (Unreleased)
     HADOOP-9151 Include RPC error info in RpcResponseHeader instead of sending
     it separately (sanjay Radia)
 
+    HADOOP-9380 Add totalLength to rpc response  (sanjay Radia)
+
   NEW FEATURES
     
     HADOOP-8561. Introduce HADOOP_PROXY_USER for secure impersonation in child
@@ -353,6 +355,9 @@ Trunk (Unreleased)
     HADOOP-9405. TestGridmixSummary#testExecutionSummarizer is broken. (Andrew
     Wang via atm)
 
+    HADOOP-9431 TestSecurityUtil#testLocalHostNameForNullOrWild on systems where hostname
+    contains capital letters  (Chris Nauroth via sanjay)
+
   OPTIMIZATIONS
 
     HADOOP-7761. Improve the performance of raw comparisons. (todd)
@@ -485,6 +490,12 @@ Trunk (Unreleased)
     Windows with NTFS ACLs. (Chris Nauroth via suresh)
 
     HADOOP-9388. TestFsShellCopy fails on Windows. (Ivan Mitic via suresh)
+
+    HADOOP-9387. Fix DF so that it won't execute a shell command on Windows
+    to compute the file system/mount point.  (Ivan Mitic via szetszwo)
+
+    HADOOP-9353. Activate native-win maven profile by default on Windows.
+    (Arpit Agarwal via szetszwo)
     
 Release 2.0.5-beta - UNRELEASED
 
@@ -571,6 +582,9 @@ Release 2.0.5-beta - UNRELEASED
     HADOOP-9407. commons-daemon 1.0.3 dependency has bad group id causing
     build issues. (Sangjin Lee via suresh)
 
+    HADOOP-9299.  kerberos name resolution is kicking in even when kerberos
+    is not configured (daryn)
+
 Release 2.0.4-alpha - UNRELEASED
 
   INCOMPATIBLE CHANGES

+ 3 - 1
hadoop-common-project/hadoop-common/pom.xml

@@ -573,7 +573,9 @@
     <profile>
       <id>native-win</id>
       <activation>
-        <activeByDefault>false</activeByDefault>
+        <os>
+          <family>Windows</family>
+        </os>
       </activation>
       <build>
         <plugins>

+ 28 - 59
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/DF.java

@@ -50,37 +50,6 @@ public class DF extends Shell {
   
   private ArrayList<String> output;
 
-  enum OSType {
-    OS_TYPE_UNIX("UNIX"),
-    OS_TYPE_WIN("Windows"),
-    OS_TYPE_SOLARIS("SunOS"),
-    OS_TYPE_MAC("Mac"),
-    OS_TYPE_AIX("AIX");
-
-    private String id;
-    OSType(String id) {
-      this.id = id;
-    }
-    public boolean match(String osStr) {
-      return osStr != null && osStr.indexOf(id) >= 0;
-    }
-    String getId() {
-      return id;
-    }
-  }
-
-  private static final String OS_NAME = System.getProperty("os.name");
-  private static final OSType OS_TYPE = getOSType(OS_NAME);
-
-  protected static OSType getOSType(String osName) {
-    for (OSType ost : EnumSet.allOf(OSType.class)) {
-      if (ost.match(osName)) {
-        return ost;
-      }
-    }
-    return OSType.OS_TYPE_UNIX;
-  }
-
   public DF(File path, Configuration conf) throws IOException {
     this(path, conf.getLong(CommonConfigurationKeys.FS_DF_INTERVAL_KEY, DF.DF_INTERVAL_DEFAULT));
   }
@@ -92,10 +61,6 @@ public class DF extends Shell {
     this.output = new ArrayList<String>();
   }
 
-  protected OSType getOSType() {
-    return OS_TYPE;
-  }
-  
   /// ACCESSORS
 
   /** @return the canonical path to the volume we're checking. */
@@ -105,8 +70,13 @@ public class DF extends Shell {
 
   /** @return a string indicating which filesystem volume we're checking. */
   public String getFilesystem() throws IOException {
-    run();
-    return filesystem;
+    if (Shell.WINDOWS) {
+      this.filesystem = dirFile.getCanonicalPath().substring(0, 2);
+      return this.filesystem;
+    } else {
+      run();
+      return filesystem;
+    }
   }
 
   /** @return the capacity of the measured filesystem in bytes. */
@@ -138,16 +108,23 @@ public class DF extends Shell {
       throw new FileNotFoundException("Specified path " + dirFile.getPath()
           + "does not exist");
     }
-    run();
-    // Skip parsing if df was not successful
-    if (getExitCode() != 0) {
-      StringBuffer sb = new StringBuffer("df could not be run successfully: ");
-      for (String line: output) {
-        sb.append(line);
+
+    if (Shell.WINDOWS) {
+      // Assume a drive letter for a mount point
+      this.mount = dirFile.getCanonicalPath().substring(0, 2);
+    } else {
+      run();
+      // Skip parsing if df was not successful
+      if (getExitCode() != 0) {
+        StringBuffer sb = new StringBuffer("df could not be run successfully: ");
+        for (String line: output) {
+          sb.append(line);
+        }
+        throw new IOException(sb.toString());
       }
-      throw new IOException(sb.toString());
+      parseOutput();
     }
-    parseOutput();
+
     return mount;
   }
   
@@ -163,24 +140,16 @@ public class DF extends Shell {
       mount;
   }
 
-  @Override
-  protected void run() throws IOException {
-    if (WINDOWS) {
-      try {
-        this.mount = dirFile.getCanonicalPath().substring(0,2);
-      } catch (IOException e) {
-      }
-      return;
-    }
-    super.run();
-  }
-
   @Override
   protected String[] getExecString() {
     // ignoring the error since the exit code it enough
-    return (WINDOWS)? new String[]{"cmd", "/c", "df -k " + dirPath + " 2>nul"}:
-        new String[] {"bash","-c","exec 'df' '-k' '-P' '" + dirPath 
+    if (Shell.WINDOWS){
+      throw new AssertionError(
+          "DF.getExecString() should never be called on Windows");
+    } else {
+      return new String[] {"bash","-c","exec 'df' '-k' '-P' '" + dirPath 
                       + "' 2>/dev/null"};
+    }
   }
 
   @Override

+ 25 - 4
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java

@@ -83,6 +83,7 @@ import org.apache.hadoop.util.ReflectionUtils;
 import org.apache.hadoop.util.Time;
 
 import com.google.common.util.concurrent.ThreadFactoryBuilder;
+import com.google.protobuf.CodedOutputStream;
 
 /** A client for an IPC service.  IPC calls take a single {@link Writable} as a
  * parameter, and return a {@link Writable} as their value.  A service runs on
@@ -242,7 +243,7 @@ public class Client {
       callComplete();
     }
     
-    public synchronized Writable getRpcResult() {
+    public synchronized Writable getRpcResponse() {
       return rpcResponse;
     }
   }
@@ -944,11 +945,14 @@ public class Client {
       touch();
       
       try {
+        int totalLen = in.readInt();
         RpcResponseHeaderProto header = 
             RpcResponseHeaderProto.parseDelimitedFrom(in);
         if (header == null) {
           throw new IOException("Response is null.");
         }
+        int headerLen = header.getSerializedSize();
+        headerLen += CodedOutputStream.computeRawVarint32Size(headerLen);
 
         int callId = header.getCallId();
         if (LOG.isDebugEnabled())
@@ -961,11 +965,28 @@ public class Client {
           value.readFields(in);                 // read value
           call.setRpcResponse(value);
           calls.remove(callId);
+          
+          // verify that length was correct
+          // only for ProtobufEngine where len can be verified easily
+          if (call.getRpcResponse() instanceof ProtobufRpcEngine.RpcWrapper) {
+            ProtobufRpcEngine.RpcWrapper resWrapper = 
+                (ProtobufRpcEngine.RpcWrapper) call.getRpcResponse();
+            if (totalLen != headerLen + resWrapper.getLength()) { 
+              throw new RpcClientException(
+                  "RPC response length mismatch on rpc success");
+            }
+          }
         } else { // Rpc Request failed
-            final String exceptionClassName = header.hasExceptionClassName() ?
+          // Verify that length was correct
+          if (totalLen != headerLen) {
+            throw new RpcClientException(
+                "RPC response length mismatch on rpc error");
+          }
+          
+          final String exceptionClassName = header.hasExceptionClassName() ?
                 header.getExceptionClassName() : 
                   "ServerDidNotSetExceptionClassName";
-            final String errorMsg = header.hasErrorMsg() ? 
+          final String errorMsg = header.hasErrorMsg() ? 
                 header.getErrorMsg() : "ServerDidNotSetErrorMsg" ;
           RemoteException re = 
               new RemoteException(exceptionClassName, errorMsg);
@@ -1251,7 +1272,7 @@ public class Client {
                   call.error);
         }
       } else {
-        return call.getRpcResult();
+        return call.getRpcResponse();
       }
     }
   }

+ 46 - 11
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtobufRpcEngine.java

@@ -48,7 +48,9 @@ import org.apache.hadoop.util.ProtoUtil;
 import org.apache.hadoop.util.Time;
 
 import com.google.common.annotations.VisibleForTesting;
+import com.google.protobuf.AbstractMessageLite;
 import com.google.protobuf.BlockingService;
+import com.google.protobuf.CodedOutputStream;
 import com.google.protobuf.Descriptors.MethodDescriptor;
 import com.google.protobuf.Message;
 import com.google.protobuf.ServiceException;
@@ -226,7 +228,7 @@ public class ProtobufRpcEngine implements RpcEngine {
       Message returnMessage;
       try {
         returnMessage = prototype.newBuilderForType()
-            .mergeFrom(val.responseMessage).build();
+            .mergeFrom(val.theResponseRead).build();
 
         if (LOG.isTraceEnabled()) {
           LOG.trace(Thread.currentThread().getId() + ": Response <- " +
@@ -267,6 +269,9 @@ public class ProtobufRpcEngine implements RpcEngine {
     }
   }
 
+  interface RpcWrapper extends Writable {
+    int getLength();
+  }
   /**
    * Wrapper for Protocol Buffer Requests
    * 
@@ -274,7 +279,7 @@ public class ProtobufRpcEngine implements RpcEngine {
    * Protobuf. Several methods on {@link org.apache.hadoop.ipc.Server and RPC} 
    * use type Writable as a wrapper to work across multiple RpcEngine kinds.
    */
-  private static class RpcRequestWrapper implements Writable {
+  private static class RpcRequestWrapper implements RpcWrapper {
     RequestHeaderProto requestHeader;
     Message theRequest; // for clientSide, the request is here
     byte[] theRequestRead; // for server side, the request is here
@@ -312,6 +317,22 @@ public class ProtobufRpcEngine implements RpcEngine {
       return requestHeader.getDeclaringClassProtocolName() + "." +
           requestHeader.getMethodName();
     }
+
+    @Override
+    public int getLength() {
+      int headerLen = requestHeader.getSerializedSize();
+      int reqLen;
+      if (theRequest != null) {
+        reqLen = theRequest.getSerializedSize();
+      } else if (theRequestRead != null ) {
+        reqLen = theRequestRead.length;
+      } else {
+        throw new IllegalArgumentException(
+            "getLenght on uninilialized RpcWrapper");      
+      }
+      return CodedOutputStream.computeRawVarint32Size(headerLen) +  headerLen
+          + CodedOutputStream.computeRawVarint32Size(reqLen) + reqLen;
+    }
   }
 
   /**
@@ -321,29 +342,43 @@ public class ProtobufRpcEngine implements RpcEngine {
    * Protobuf. Several methods on {@link org.apache.hadoop.ipc.Server and RPC} 
    * use type Writable as a wrapper to work across multiple RpcEngine kinds.
    */
-  private static class RpcResponseWrapper implements Writable {
-    byte[] responseMessage;
+  private static class RpcResponseWrapper implements RpcWrapper {
+    Message theResponse; // for senderSide, the response is here
+    byte[] theResponseRead; // for receiver side, the response is here
 
     @SuppressWarnings("unused")
     public RpcResponseWrapper() {
     }
 
     public RpcResponseWrapper(Message message) {
-      this.responseMessage = message.toByteArray();
+      this.theResponse = message;
     }
 
     @Override
     public void write(DataOutput out) throws IOException {
-      out.writeInt(responseMessage.length);
-      out.write(responseMessage);     
+      OutputStream os = DataOutputOutputStream.constructOutputStream(out);
+      theResponse.writeDelimitedTo(os);   
     }
 
     @Override
     public void readFields(DataInput in) throws IOException {
-      int length = in.readInt();
-      byte[] bytes = new byte[length];
-      in.readFully(bytes);
-      responseMessage = bytes;
+      int length = ProtoUtil.readRawVarint32(in);
+      theResponseRead = new byte[length];
+      in.readFully(theResponseRead);
+    }
+    
+    @Override
+    public int getLength() {
+      int resLen;
+      if (theResponse != null) {
+        resLen = theResponse.getSerializedSize();
+      } else if (theResponseRead != null ) {
+        resLen = theResponseRead.length;
+      } else {
+        throw new IllegalArgumentException(
+            "getLenght on uninilialized RpcWrapper");      
+      }
+      return CodedOutputStream.computeRawVarint32Size(resLen) + resLen;
     }
   }
 

+ 44 - 14
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java

@@ -73,6 +73,7 @@ import org.apache.hadoop.conf.Configuration.IntegerRanges;
 import org.apache.hadoop.fs.CommonConfigurationKeys;
 import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
 import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.DataOutputBuffer;
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.io.WritableUtils;
@@ -107,6 +108,7 @@ import org.apache.hadoop.util.StringUtils;
 import org.apache.hadoop.util.Time;
 
 import com.google.common.annotations.VisibleForTesting;
+import com.google.protobuf.CodedOutputStream;
 
 /** An abstract IPC service.  IPC calls take a single {@link Writable} as a
  * parameter, and return a {@link Writable} as their value.  A service runs on
@@ -202,7 +204,8 @@ public abstract class Server {
   // 6 : Made RPC Request header explicit
   // 7 : Changed Ipc Connection Header to use Protocol buffers
   // 8 : SASL server always sends a final response
-  public static final byte CURRENT_VERSION = 8;
+  // 9 : Changes to protocol for HADOOP-8990
+  public static final byte CURRENT_VERSION = 9;
 
   /**
    * Initial and max size of response buffer
@@ -1512,10 +1515,15 @@ public abstract class Server {
       " cannot communicate with client version " + clientVersion;
       ByteArrayOutputStream buffer = new ByteArrayOutputStream();
       
-      if (clientVersion >= 3) {
+      if (clientVersion >= 9) {
+        // Versions >>9  understand the normal response
         Call fakeCall =  new Call(-1, null, this);
-        // Versions 3 and greater can interpret this exception
-        // response in the same manner
+        setupResponse(buffer, fakeCall, RpcStatusProto.FATAL,
+            null, VersionMismatch.class.getName(), errMsg);
+        responder.doRespond(fakeCall);
+      } else if (clientVersion >= 3) {
+        Call fakeCall =  new Call(-1, null, this);
+        // Versions 3 to 8 use older response
         setupResponseOldVersionFatal(buffer, fakeCall,
             null, VersionMismatch.class.getName(), errMsg);
 
@@ -1997,17 +2005,34 @@ public abstract class Server {
   throws IOException {
     responseBuf.reset();
     DataOutputStream out = new DataOutputStream(responseBuf);
-    RpcResponseHeaderProto.Builder response =  
+    RpcResponseHeaderProto.Builder headerBuilder =  
         RpcResponseHeaderProto.newBuilder();
-    response.setCallId(call.callId);
-    response.setStatus(status);
-    response.setServerIpcVersionNum(Server.CURRENT_VERSION);
-
+    headerBuilder.setCallId(call.callId);
+    headerBuilder.setStatus(status);
+    headerBuilder.setServerIpcVersionNum(Server.CURRENT_VERSION);
 
     if (status == RpcStatusProto.SUCCESS) {
+      RpcResponseHeaderProto header = headerBuilder.build();
+      final int headerLen = header.getSerializedSize();
+      int fullLength  = CodedOutputStream.computeRawVarint32Size(headerLen) +
+          headerLen;
       try {
-        response.build().writeDelimitedTo(out);
-        rv.write(out);
+        if (rv instanceof ProtobufRpcEngine.RpcWrapper) {
+          ProtobufRpcEngine.RpcWrapper resWrapper = 
+              (ProtobufRpcEngine.RpcWrapper) rv;
+          fullLength += resWrapper.getLength();
+          out.writeInt(fullLength);
+          header.writeDelimitedTo(out);
+          rv.write(out);
+        } else { // Have to serialize to buffer to get len
+          final DataOutputBuffer buf = new DataOutputBuffer();
+          rv.write(buf);
+          byte[] data = buf.getData();
+          fullLength += buf.getLength();
+          out.writeInt(fullLength);
+          header.writeDelimitedTo(out);
+          out.write(data, 0, buf.getLength());
+        }
       } catch (Throwable t) {
         LOG.warn("Error serializing call response for call " + call, t);
         // Call back to same function - this is OK since the
@@ -2019,9 +2044,14 @@ public abstract class Server {
         return;
       }
     } else { // Rpc Failure
-      response.setExceptionClassName(errorClass);
-      response.setErrorMsg(error);
-      response.build().writeDelimitedTo(out);
+      headerBuilder.setExceptionClassName(errorClass);
+      headerBuilder.setErrorMsg(error);
+      RpcResponseHeaderProto header = headerBuilder.build();
+      int headerLen = header.getSerializedSize();
+      final int fullLength  = 
+          CodedOutputStream.computeRawVarint32Size(headerLen) + headerLen;
+      out.writeInt(fullLength);
+      header.writeDelimitedTo(out);
     }
     if (call.connection.useWrap) {
       wrapWithSasl(responseBuf, call);

+ 5 - 0
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/CachedDNSToSwitchMapping.java

@@ -149,4 +149,9 @@ public class CachedDNSToSwitchMapping extends AbstractDNSToSwitchMapping {
   public boolean isSingleSwitch() {
     return isMappingSingleSwitch(rawMapping);
   }
+  
+  @Override
+  public void reloadCachedMappings() {
+    cache.clear();
+  }
 }

+ 8 - 0
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/DNSToSwitchMapping.java

@@ -51,4 +51,12 @@ public interface DNSToSwitchMapping {
    * If <i>names</i> is empty, the returned list is also empty
    */
   public List<String> resolve(List<String> names);
+
+  /**
+   * Reload all of the cached mappings.
+   *
+   * If there is a cache, this method will clear it, so that future accesses
+   * will get a chance to see the new data.
+   */
+  public void reloadCachedMappings();
 }

+ 8 - 8
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetworkTopology.java

@@ -392,8 +392,16 @@ public class NetworkTopology {
       throw new IllegalArgumentException(
         "Not allow to add an inner node: "+NodeBase.getPath(node));
     }
+    int newDepth = NodeBase.locationToDepth(node.getNetworkLocation()) + 1;
     netlock.writeLock().lock();
     try {
+      if ((depthOfAllLeaves != -1) && (depthOfAllLeaves != newDepth)) {
+        LOG.error("Error: can't add leaf node at depth " +
+            newDepth + " to topology:\n" + oldTopoStr);
+        throw new InvalidTopologyException("Invalid network topology. " +
+            "You cannot have a rack and a non-rack node at the same " +
+            "level of the network topology.");
+      }
       Node rack = getNodeForNetworkLocation(node);
       if (rack != null && !(rack instanceof InnerNode)) {
         throw new IllegalArgumentException("Unexpected data node " 
@@ -408,14 +416,6 @@ public class NetworkTopology {
         if (!(node instanceof InnerNode)) {
           if (depthOfAllLeaves == -1) {
             depthOfAllLeaves = node.getLevel();
-          } else {
-            if (depthOfAllLeaves != node.getLevel()) {
-              LOG.error("Error: can't add leaf node at depth " +
-                  node.getLevel() + " to topology:\n" + oldTopoStr);
-              throw new InvalidTopologyException("Invalid network topology. " +
-                  "You cannot have a rack and a non-rack node at the same " +
-                  "level of the network topology.");
-            }
           }
         }
       }

+ 12 - 0
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NodeBase.java

@@ -167,4 +167,16 @@ public class NodeBase implements Node {
   public void setLevel(int level) {
     this.level = level;
   }
+  
+  public static int locationToDepth(String location) {
+    String normalizedLocation = normalize(location);
+    int length = normalizedLocation.length();
+    int depth = 0;
+    for (int i = 0; i < length; i++) {
+      if (normalizedLocation.charAt(i) == PATH_SEPARATOR) {
+        depth++;
+      }
+    }
+    return depth;
+  }
 }

+ 6 - 0
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/ScriptBasedMapping.java

@@ -263,5 +263,11 @@ public final class ScriptBasedMapping extends CachedDNSToSwitchMapping {
     public String toString() {
       return scriptName != null ? ("script " + scriptName) : NO_SCRIPT;
     }
+
+    @Override
+    public void reloadCachedMappings() {
+      // Nothing to do here, since RawScriptBasedMapping has no cache, and
+      // does not inherit from CachedDNSToSwitchMapping
+    }
   }
 }

+ 37 - 19
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/TableMapping.java

@@ -76,20 +76,24 @@ public class TableMapping extends CachedDNSToSwitchMapping {
     getRawMapping().setConf(conf);
   }
   
+  @Override
+  public void reloadCachedMappings() {
+    super.reloadCachedMappings();
+    getRawMapping().reloadCachedMappings();
+  }
+  
   private static final class RawTableMapping extends Configured
       implements DNSToSwitchMapping {
     
-    private final Map<String, String> map = new HashMap<String, String>();
-    private boolean initialized = false;
+    private Map<String, String> map;
   
-    private synchronized void load() {
-      map.clear();
+    private Map<String, String> load() {
+      Map<String, String> loadMap = new HashMap<String, String>();
   
       String filename = getConf().get(NET_TOPOLOGY_TABLE_MAPPING_FILE_KEY, null);
       if (StringUtils.isBlank(filename)) {
-        LOG.warn(NET_TOPOLOGY_TABLE_MAPPING_FILE_KEY + " not configured. "
-            + NetworkTopology.DEFAULT_RACK + " will be returned.");
-        return;
+        LOG.warn(NET_TOPOLOGY_TABLE_MAPPING_FILE_KEY + " not configured. ");
+        return null;
       }
   
       BufferedReader reader = null;
@@ -101,7 +105,7 @@ public class TableMapping extends CachedDNSToSwitchMapping {
           if (line.length() != 0 && line.charAt(0) != '#') {
             String[] columns = line.split("\\s+");
             if (columns.length == 2) {
-              map.put(columns[0], columns[1]);
+              loadMap.put(columns[0], columns[1]);
             } else {
               LOG.warn("Line does not have two columns. Ignoring. " + line);
             }
@@ -109,29 +113,31 @@ public class TableMapping extends CachedDNSToSwitchMapping {
           line = reader.readLine();
         }
       } catch (Exception e) {
-        LOG.warn(filename + " cannot be read. " + NetworkTopology.DEFAULT_RACK
-            + " will be returned.", e);
-        map.clear();
+        LOG.warn(filename + " cannot be read.", e);
+        return null;
       } finally {
         if (reader != null) {
           try {
             reader.close();
           } catch (IOException e) {
-            LOG.warn(filename + " cannot be read. "
-                + NetworkTopology.DEFAULT_RACK + " will be returned.", e);
-            map.clear();
+            LOG.warn(filename + " cannot be read.", e);
+            return null;
           }
         }
       }
+      return loadMap;
     }
   
     @Override
     public synchronized List<String> resolve(List<String> names) {
-      if (!initialized) {
-        initialized = true;
-        load();
+      if (map == null) {
+        map = load();
+        if (map == null) {
+          LOG.warn("Failed to read topology table. " +
+            NetworkTopology.DEFAULT_RACK + " will be used for all nodes.");
+          map = new HashMap<String, String>();
+        }
       }
-  
       List<String> results = new ArrayList<String>(names.size());
       for (String name : names) {
         String result = map.get(name);
@@ -143,6 +149,18 @@ public class TableMapping extends CachedDNSToSwitchMapping {
       }
       return results;
     }
-    
+
+    @Override
+    public void reloadCachedMappings() {
+      Map<String, String> newMap = load();
+      if (newMap == null) {
+        LOG.error("Failed to reload the topology table.  The cached " +
+            "mappings will not be cleared.");
+      } else {
+        synchronized(this) {
+          map = newMap;
+        }
+      }
+    }
   }
 }

+ 19 - 11
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/HadoopKerberosName.java

@@ -18,6 +18,8 @@
 
 package org.apache.hadoop.security;
 
+import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTH_TO_LOCAL;
+
 import java.io.IOException;
 
 import org.apache.hadoop.classification.InterfaceAudience;
@@ -25,7 +27,6 @@ import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.security.authentication.util.KerberosName;
 import org.apache.hadoop.security.authentication.util.KerberosUtil;
-import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
 /**
  * This class implements parsing and handling of Kerberos principal names. In 
  * particular, it splits them apart and translates them down into local
@@ -36,15 +37,6 @@ import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
 @InterfaceStability.Evolving
 public class HadoopKerberosName extends KerberosName {
 
-  static {
-    try {
-      KerberosUtil.getDefaultRealm();
-    } catch (Exception ke) {
-      if(UserGroupInformation.isSecurityEnabled())
-        throw new IllegalArgumentException("Can't get Kerberos configuration",ke);
-    }
-  }
-
   /**
    * Create a name from the full Kerberos principal name.
    * @param name
@@ -63,7 +55,23 @@ public class HadoopKerberosName extends KerberosName {
    * @throws IOException
    */
   public static void setConfiguration(Configuration conf) throws IOException {
-    String ruleString = conf.get(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTH_TO_LOCAL, "DEFAULT");
+    final String defaultRule;
+    switch (SecurityUtil.getAuthenticationMethod(conf)) {
+      case KERBEROS:
+      case KERBEROS_SSL:
+        try {
+          KerberosUtil.getDefaultRealm();
+        } catch (Exception ke) {
+          throw new IllegalArgumentException("Can't get Kerberos realm", ke);
+        }
+        defaultRule = "DEFAULT";
+        break;
+      default:
+        // just extract the simple user name
+        defaultRule = "RULE:[1:$1] RULE:[2:$1]";
+        break; 
+    }
+    String ruleString = conf.get(HADOOP_SECURITY_AUTH_TO_LOCAL, defaultRule);
     setRules(ruleString);
   }
 

+ 22 - 24
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/UserGroupInformation.java

@@ -53,14 +53,12 @@ import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.metrics2.annotation.Metric;
 import org.apache.hadoop.metrics2.annotation.Metrics;
 import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
 import org.apache.hadoop.metrics2.lib.MutableRate;
 import org.apache.hadoop.security.SaslRpcServer.AuthMethod;
-import org.apache.hadoop.security.authentication.util.KerberosName;
 import org.apache.hadoop.security.authentication.util.KerberosUtil;
 import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.security.token.TokenIdentifier;
@@ -192,8 +190,6 @@ public class UserGroupInformation {
 
   /** Metrics to track UGI activity */
   static UgiMetrics metrics = UgiMetrics.create();
-  /** Are the static variables that depend on configuration initialized? */
-  private static boolean isInitialized = false;
   /** The auth method to use */
   private static AuthenticationMethod authenticationMethod;
   /** Server-side groups fetching service */
@@ -213,8 +209,8 @@ public class UserGroupInformation {
    * Must be called before useKerberos or groups is used.
    */
   private static synchronized void ensureInitialized() {
-    if (!isInitialized) {
-        initialize(new Configuration(), KerberosName.hasRulesBeenSet());
+    if (conf == null) {
+      initialize(new Configuration(), false);
     }
   }
 
@@ -222,25 +218,17 @@ public class UserGroupInformation {
    * Initialize UGI and related classes.
    * @param conf the configuration to use
    */
-  private static synchronized void initialize(Configuration conf, boolean skipRulesSetting) {
-    initUGI(conf);
-    // give the configuration on how to translate Kerberos names
-    try {
-      if (!skipRulesSetting) {
+  private static synchronized void initialize(Configuration conf,
+                                              boolean overrideNameRules) {
+    authenticationMethod = SecurityUtil.getAuthenticationMethod(conf);
+    if (overrideNameRules || !HadoopKerberosName.hasRulesBeenSet()) {
+      try {
         HadoopKerberosName.setConfiguration(conf);
+      } catch (IOException ioe) {
+        throw new RuntimeException(
+            "Problem with Kerberos auth_to_local name configuration", ioe);
       }
-    } catch (IOException ioe) {
-      throw new RuntimeException("Problem with Kerberos auth_to_local name " +
-          "configuration", ioe);
     }
-  }
-  
-  /**
-   * Set the configuration values for UGI.
-   * @param conf the configuration to use
-   */
-  private static synchronized void initUGI(Configuration conf) {
-    authenticationMethod = SecurityUtil.getAuthenticationMethod(conf);
     try {
         kerberosMinSecondsBeforeRelogin = 1000L * conf.getLong(
                 HADOOP_KERBEROS_MIN_SECONDS_BEFORE_RELOGIN,
@@ -255,7 +243,6 @@ public class UserGroupInformation {
     if (!(groups instanceof TestingGroups)) {
       groups = Groups.getUserToGroupsMappingService(conf);
     }
-    isInitialized = true;
     UserGroupInformation.conf = conf;
   }
 
@@ -268,7 +255,18 @@ public class UserGroupInformation {
   @InterfaceAudience.Public
   @InterfaceStability.Evolving
   public static void setConfiguration(Configuration conf) {
-    initialize(conf, false);
+    initialize(conf, true);
+  }
+  
+  @InterfaceAudience.Private
+  @VisibleForTesting
+  static void reset() {
+    authenticationMethod = null;
+    conf = null;
+    groups = null;
+    kerberosMinSecondsBeforeRelogin = 0;
+    setLoginUser(null);
+    HadoopKerberosName.setRules(null);
   }
   
   /**

+ 15 - 15
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestDFVariations.java

@@ -36,15 +36,10 @@ import static org.junit.Assert.*;
 public class TestDFVariations {
 
   public static class XXDF extends DF {
-    private final String osName;
-    public XXDF(String osName) throws IOException {
+    public XXDF() throws IOException {
       super(new File(System.getProperty("test.build.data","/tmp")), 0L);
-      this.osName = osName;
-    }
-    @Override
-    public DF.OSType getOSType() {
-      return DF.getOSType(osName);
     }
+
     @Override
     protected String[] getExecString() {
       return new String[] { "echo", "IGNORE\n", 
@@ -53,15 +48,20 @@ public class TestDFVariations {
   }
 
   @Test(timeout=5000)
-  public void testOSParsing() throws Exception {
-    for (DF.OSType ost : EnumSet.allOf(DF.OSType.class)) {
-      XXDF df = new XXDF(ost.getId());
-      assertEquals(ost.getId() + " mount",
-        Shell.WINDOWS ? df.getDirPath().substring(0, 2) : "/foo/bar",
-        df.getMount());
-    }
+  public void testMountAndFileSystem() throws Exception {
+    XXDF df = new XXDF();
+    String expectedMount =
+        Shell.WINDOWS ? df.getDirPath().substring(0, 2) : "/foo/bar";
+    String expectedFileSystem =
+        Shell.WINDOWS ? df.getDirPath().substring(0, 2) : "/dev/sda3";
+
+    assertEquals("Invalid mount point",
+        expectedMount, df.getMount());
+
+    assertEquals("Invalid filesystem",
+        expectedFileSystem, df.getFilesystem());
   }
-  
+
   @Test(timeout=5000)
   public void testDFInvalidPath() throws Exception {
     // Generate a path that doesn't exist

+ 5 - 0
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/StaticMapping.java

@@ -147,4 +147,9 @@ public class StaticMapping extends AbstractDNSToSwitchMapping  {
       nameToRackMap.clear();
     }
   }
+  
+  public void reloadCachedMappings() {
+    // reloadCachedMappings does nothing for StaticMapping; there is
+    // nowhere to reload from since all data is in memory.
+  }
 }

+ 4 - 0
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/TestSwitchMapping.java

@@ -116,5 +116,9 @@ public class TestSwitchMapping extends Assert {
     public List<String> resolve(List<String> names) {
       return names;
     }
+
+    @Override
+    public void reloadCachedMappings() {
+    }
   }
 }

+ 55 - 17
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/TestTableMapping.java

@@ -34,23 +34,17 @@ import org.junit.Before;
 import org.junit.Test;
 
 public class TestTableMapping {
-  
-  private File mappingFile;
-  
-  @Before
-  public void setUp() throws IOException {
-    mappingFile = File.createTempFile(getClass().getSimpleName(), ".txt");
-    Files.write("a.b.c /rack1\n" +
-                "1.2.3.4\t/rack2\n", mappingFile, Charsets.UTF_8);
-    mappingFile.deleteOnExit();
-  }
-
   @Test
   public void testResolve() throws IOException {
+    File mapFile = File.createTempFile(getClass().getSimpleName() +
+        ".testResolve", ".txt");
+    Files.write("a.b.c /rack1\n" +
+                "1.2.3.4\t/rack2\n", mapFile, Charsets.UTF_8);
+    mapFile.deleteOnExit();
     TableMapping mapping = new TableMapping();
 
     Configuration conf = new Configuration();
-    conf.set(NET_TOPOLOGY_TABLE_MAPPING_FILE_KEY, mappingFile.getCanonicalPath());
+    conf.set(NET_TOPOLOGY_TABLE_MAPPING_FILE_KEY, mapFile.getCanonicalPath());
     mapping.setConf(conf);
 
     List<String> names = new ArrayList<String>();
@@ -65,10 +59,15 @@ public class TestTableMapping {
 
   @Test
   public void testTableCaching() throws IOException {
+    File mapFile = File.createTempFile(getClass().getSimpleName() +
+        ".testTableCaching", ".txt");
+    Files.write("a.b.c /rack1\n" +
+                "1.2.3.4\t/rack2\n", mapFile, Charsets.UTF_8);
+    mapFile.deleteOnExit();
     TableMapping mapping = new TableMapping();
 
     Configuration conf = new Configuration();
-    conf.set(NET_TOPOLOGY_TABLE_MAPPING_FILE_KEY, mappingFile.getCanonicalPath());
+    conf.set(NET_TOPOLOGY_TABLE_MAPPING_FILE_KEY, mapFile.getCanonicalPath());
     mapping.setConf(conf);
 
     List<String> names = new ArrayList<String>();
@@ -123,13 +122,53 @@ public class TestTableMapping {
   }
 
   @Test
+  public void testClearingCachedMappings() throws IOException {
+    File mapFile = File.createTempFile(getClass().getSimpleName() +
+        ".testClearingCachedMappings", ".txt");
+    Files.write("a.b.c /rack1\n" +
+                "1.2.3.4\t/rack2\n", mapFile, Charsets.UTF_8);
+    mapFile.deleteOnExit();
+
+    TableMapping mapping = new TableMapping();
+
+    Configuration conf = new Configuration();
+    conf.set(NET_TOPOLOGY_TABLE_MAPPING_FILE_KEY, mapFile.getCanonicalPath());
+    mapping.setConf(conf);
+
+    List<String> names = new ArrayList<String>();
+    names.add("a.b.c");
+    names.add("1.2.3.4");
+
+    List<String> result = mapping.resolve(names);
+    assertEquals(names.size(), result.size());
+    assertEquals("/rack1", result.get(0));
+    assertEquals("/rack2", result.get(1));
+
+    Files.write("", mapFile, Charsets.UTF_8);
+
+    mapping.reloadCachedMappings();
+
+    names = new ArrayList<String>();
+    names.add("a.b.c");
+    names.add("1.2.3.4");
+
+    result = mapping.resolve(names);
+    assertEquals(names.size(), result.size());
+    assertEquals(NetworkTopology.DEFAULT_RACK, result.get(0));
+    assertEquals(NetworkTopology.DEFAULT_RACK, result.get(1));
+  }
+
+
+  @Test(timeout=60000)
   public void testBadFile() throws IOException {
-    Files.write("bad contents", mappingFile, Charsets.UTF_8);
-    
+    File mapFile = File.createTempFile(getClass().getSimpleName() +
+        ".testBadFile", ".txt");
+    Files.write("bad contents", mapFile, Charsets.UTF_8);
+    mapFile.deleteOnExit();
     TableMapping mapping = new TableMapping();
 
     Configuration conf = new Configuration();
-    conf.set(NET_TOPOLOGY_TABLE_MAPPING_FILE_KEY, mappingFile.getCanonicalPath());
+    conf.set(NET_TOPOLOGY_TABLE_MAPPING_FILE_KEY, mapFile.getCanonicalPath());
     mapping.setConf(conf);
 
     List<String> names = new ArrayList<String>();
@@ -141,5 +180,4 @@ public class TestTableMapping {
     assertEquals(result.get(0), NetworkTopology.DEFAULT_RACK);
     assertEquals(result.get(1), NetworkTopology.DEFAULT_RACK);
   }
-
 }

+ 2 - 1
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestSecurityUtil.java

@@ -24,6 +24,7 @@ import java.io.IOException;
 import java.net.InetAddress;
 import java.net.InetSocketAddress;
 import java.net.URI;
+import java.util.Locale;
 
 import javax.security.auth.kerberos.KerberosPrincipal;
 
@@ -112,7 +113,7 @@ public class TestSecurityUtil {
 
   @Test
   public void testLocalHostNameForNullOrWild() throws Exception {
-    String local = SecurityUtil.getLocalHostName();
+    String local = SecurityUtil.getLocalHostName().toLowerCase(Locale.US);
     assertEquals("hdfs/" + local + "@REALM",
                  SecurityUtil.getServerPrincipal("hdfs/_HOST@REALM", (String)null));
     assertEquals("hdfs/" + local + "@REALM",

+ 130 - 17
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestUserGroupInformation.java

@@ -38,10 +38,12 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.metrics2.MetricsRecordBuilder;
 import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod;
+import org.apache.hadoop.security.authentication.util.KerberosName;
 import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.security.token.TokenIdentifier;
 import static org.apache.hadoop.test.MetricsAsserts.*;
 import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
+import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTH_TO_LOCAL;
 import org.apache.hadoop.util.Shell;
 
 public class TestUserGroupInformation {
@@ -73,17 +75,18 @@ public class TestUserGroupInformation {
   public static void setup() {
     javax.security.auth.login.Configuration.setConfiguration(
         new DummyLoginConfiguration());
+    // doesn't matter what it is, but getGroups needs it set...
+    System.setProperty("hadoop.home.dir", "/tmp");
+    // fake the realm is kerberos is enabled
+    System.setProperty("java.security.krb5.kdc", "");
+    System.setProperty("java.security.krb5.realm", "DEFAULT.REALM");
   }
   
   @Before
   public void setupUgi() {
     conf = new Configuration();
-    conf.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTH_TO_LOCAL,
-        "RULE:[2:$1@$0](.*@HADOOP.APACHE.ORG)s/@.*//" +
-        "RULE:[1:$1@$0](.*@HADOOP.APACHE.ORG)s/@.*//"
-        + "DEFAULT");
+    UserGroupInformation.reset();
     UserGroupInformation.setConfiguration(conf);
-    UserGroupInformation.setLoginUser(null);
   }
   
   @After
@@ -230,28 +233,138 @@ public class TestUserGroupInformation {
   /** test constructor */
   @Test (timeout = 30000)
   public void testConstructor() throws Exception {
-    UserGroupInformation ugi = 
-      UserGroupInformation.createUserForTesting("user2/cron@HADOOP.APACHE.ORG", 
-                                                GROUP_NAMES);
-    // make sure the short and full user names are correct
-    assertEquals("user2/cron@HADOOP.APACHE.ORG", ugi.getUserName());
-    assertEquals("user2", ugi.getShortUserName());
-    ugi = UserGroupInformation.createUserForTesting(USER_NAME, GROUP_NAMES);
-    assertEquals("user1", ugi.getShortUserName());
+    // security off, so default should just return simple name
+    testConstructorSuccess("user1", "user1");
+    testConstructorSuccess("user2@DEFAULT.REALM", "user2");
+    testConstructorSuccess("user3/cron@DEFAULT.REALM", "user3");    
+    testConstructorSuccess("user4@OTHER.REALM", "user4");
+    testConstructorSuccess("user5/cron@OTHER.REALM", "user5");
+    // failure test
+    testConstructorFailures(null);
+    testConstructorFailures("");
+  }
+  
+  /** test constructor */
+  @Test (timeout = 30000)
+  public void testConstructorWithRules() throws Exception {
+    // security off, but use rules if explicitly set
+    conf.set(HADOOP_SECURITY_AUTH_TO_LOCAL,
+        "RULE:[1:$1@$0](.*@OTHER.REALM)s/(.*)@.*/other-$1/");
+    UserGroupInformation.setConfiguration(conf);
+    testConstructorSuccess("user1", "user1");
+    testConstructorSuccess("user4@OTHER.REALM", "other-user4");
+    // failure test
+    testConstructorFailures("user2@DEFAULT.REALM");
+    testConstructorFailures("user3/cron@DEFAULT.REALM");
+    testConstructorFailures("user5/cron@OTHER.REALM");
+    testConstructorFailures(null);
+    testConstructorFailures("");
+  }
+  
+  /** test constructor */
+  @Test (timeout = 30000)
+  public void testConstructorWithKerberos() throws Exception {
+    // security on, default is remove default realm
+    SecurityUtil.setAuthenticationMethod(AuthenticationMethod.KERBEROS, conf);
+    UserGroupInformation.setConfiguration(conf);
+
+    testConstructorSuccess("user1", "user1");
+    testConstructorSuccess("user2@DEFAULT.REALM", "user2");
+    testConstructorSuccess("user3/cron@DEFAULT.REALM", "user3");    
+    // failure test
+    testConstructorFailures("user4@OTHER.REALM");
+    testConstructorFailures("user5/cron@OTHER.REALM");
+    testConstructorFailures(null);
+    testConstructorFailures("");
+  }
+
+  /** test constructor */
+  @Test (timeout = 30000)
+  public void testConstructorWithKerberosRules() throws Exception {
+    // security on, explicit rules
+    SecurityUtil.setAuthenticationMethod(AuthenticationMethod.KERBEROS, conf);
+    conf.set(HADOOP_SECURITY_AUTH_TO_LOCAL,
+        "RULE:[2:$1@$0](.*@OTHER.REALM)s/(.*)@.*/other-$1/" +
+        "RULE:[1:$1@$0](.*@OTHER.REALM)s/(.*)@.*/other-$1/" +
+        "DEFAULT");
+    UserGroupInformation.setConfiguration(conf);
     
+    testConstructorSuccess("user1", "user1");
+    testConstructorSuccess("user2@DEFAULT.REALM", "user2");
+    testConstructorSuccess("user3/cron@DEFAULT.REALM", "user3");    
+    testConstructorSuccess("user4@OTHER.REALM", "other-user4");
+    testConstructorSuccess("user5/cron@OTHER.REALM", "other-user5");
     // failure test
     testConstructorFailures(null);
     testConstructorFailures("");
   }
 
+  private void testConstructorSuccess(String principal, String shortName) {
+    UserGroupInformation ugi = 
+        UserGroupInformation.createUserForTesting(principal, GROUP_NAMES);
+    // make sure the short and full user names are correct
+    assertEquals(principal, ugi.getUserName());
+    assertEquals(shortName, ugi.getShortUserName());
+  }
+  
   private void testConstructorFailures(String userName) {
-    boolean gotException = false;
     try {
       UserGroupInformation.createRemoteUser(userName);
-    } catch (Exception e) {
-      gotException = true;
+      fail("user:"+userName+" wasn't invalid");
+    } catch (IllegalArgumentException e) {
+      String expect = (userName == null || userName.isEmpty())
+          ? "Null user" : "Illegal principal name "+userName;
+      assertEquals(expect, e.getMessage());
     }
-    assertTrue(gotException);
+  }
+
+  @Test (timeout = 30000)
+  public void testSetConfigWithRules() {
+    String[] rules = { "RULE:[1:TEST1]", "RULE:[1:TEST2]", "RULE:[1:TEST3]" };
+
+    // explicitly set a rule
+    UserGroupInformation.reset();
+    assertFalse(KerberosName.hasRulesBeenSet());
+    KerberosName.setRules(rules[0]);
+    assertTrue(KerberosName.hasRulesBeenSet());
+    assertEquals(rules[0], KerberosName.getRules());
+
+    // implicit init should honor rules already being set
+    UserGroupInformation.createUserForTesting("someone", new String[0]);
+    assertEquals(rules[0], KerberosName.getRules());
+
+    // set conf, should override
+    conf.set(HADOOP_SECURITY_AUTH_TO_LOCAL, rules[1]);
+    UserGroupInformation.setConfiguration(conf);
+    assertEquals(rules[1], KerberosName.getRules());
+
+    // set conf, should again override
+    conf.set(HADOOP_SECURITY_AUTH_TO_LOCAL, rules[2]);
+    UserGroupInformation.setConfiguration(conf);
+    assertEquals(rules[2], KerberosName.getRules());
+    
+    // implicit init should honor rules already being set
+    UserGroupInformation.createUserForTesting("someone", new String[0]);
+    assertEquals(rules[2], KerberosName.getRules());
+  }
+
+  @Test (timeout = 30000)
+  public void testEnsureInitWithRules() throws IOException {
+    String rules = "RULE:[1:RULE1]";
+
+    // trigger implicit init, rules should init
+    UserGroupInformation.reset();
+    assertFalse(KerberosName.hasRulesBeenSet());
+    UserGroupInformation.createUserForTesting("someone", new String[0]);
+    assertTrue(KerberosName.hasRulesBeenSet());
+    
+    // set a rule, trigger implicit init, rule should not change 
+    UserGroupInformation.reset();
+    KerberosName.setRules(rules);
+    assertTrue(KerberosName.hasRulesBeenSet());
+    assertEquals(rules, KerberosName.getRules());
+    UserGroupInformation.createUserForTesting("someone", new String[0]);
+    assertEquals(rules, KerberosName.getRules());
   }
 
   @Test (timeout = 30000)

+ 24 - 0
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt

@@ -363,6 +363,12 @@ Release 2.0.5-beta - UNRELEASED
     HDFS-4569. Small image transfer related cleanups.
     (Andrew Wang via suresh)
 
+    HDFS-4521. Invalid network toploogies should not be cached. (Colin Patrick
+    McCabe via atm)
+
+    HDFS-4246. The exclude node list should be more forgiving, for each output
+    stream. (harsh via atm)
+
   OPTIMIZATIONS
 
   BUG FIXES
@@ -424,6 +430,24 @@ Release 2.0.5-beta - UNRELEASED
     HDFS-4596. Shutting down namenode during checkpointing can lead to md5sum
     error. (Andrew Wang via atm)
 
+    HDFS-4614. FSNamesystem#getContentSummary should use getPermissionChecker
+    helper method. (atm)
+
+    HDFS-4620. Documentation for dfs.namenode.rpc-address specifies wrong
+    format. (Sandy Ryza via atm)
+
+    HDFS-4607. In TestGetConf.testGetSpecificKey(), use a platform-specific
+    line separator; otherwise, it fails on Windows.  (Ivan Mitic via szetszwo)
+
+    HDFS-4609. TestAuditLogs should release log handles between tests. 
+    (Ivan Mitic via szetszwo)
+
+    HDFS-4615. Fix TestDFSShell failures on Windows.  (Arpit Agarwal
+    via szetszwo)
+
+    HDFS-4584. Skip TestNNWithQJM.testNewNamenodeTakesOverWriter() on Windows.
+    (Arpit Agarwal via szetszwo)
+
 Release 2.0.4-alpha - UNRELEASED
 
   INCOMPATIBLE CHANGES

+ 6 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java

@@ -41,6 +41,8 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_SOCKET_CACHE_CAPAC
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_SOCKET_CACHE_CAPACITY_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_SOCKET_CACHE_EXPIRY_MSEC_DEFAULT;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_SOCKET_CACHE_EXPIRY_MSEC_KEY;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_WRITE_EXCLUDE_NODES_CACHE_EXPIRY_INTERVAL;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_WRITE_EXCLUDE_NODES_CACHE_EXPIRY_INTERVAL_DEFAULT;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_SOCKET_TIMEOUT_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_USE_LEGACY_BLOCKREADER;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_USE_LEGACY_BLOCKREADER_DEFAULT;
@@ -215,6 +217,7 @@ public class DFSClient implements java.io.Closeable {
     final int socketTimeout;
     final int socketCacheCapacity;
     final long socketCacheExpiry;
+    final long excludedNodesCacheExpiry;
     /** Wait time window (in msec) if BlockMissingException is caught */
     final int timeWindow;
     final int nCachedConnRetry;
@@ -265,6 +268,9 @@ public class DFSClient implements java.io.Closeable {
           DFS_CLIENT_SOCKET_CACHE_CAPACITY_DEFAULT);
       socketCacheExpiry = conf.getLong(DFS_CLIENT_SOCKET_CACHE_EXPIRY_MSEC_KEY,
           DFS_CLIENT_SOCKET_CACHE_EXPIRY_MSEC_DEFAULT);
+      excludedNodesCacheExpiry = conf.getLong(
+          DFS_CLIENT_WRITE_EXCLUDE_NODES_CACHE_EXPIRY_INTERVAL,
+          DFS_CLIENT_WRITE_EXCLUDE_NODES_CACHE_EXPIRY_INTERVAL_DEFAULT);
       prefetchSize = conf.getLong(DFS_CLIENT_READ_PREFETCH_SIZE_KEY,
           10 * defaultBlockSize);
       timeWindow = conf

+ 2 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java

@@ -76,6 +76,8 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
   
   public static final String  DFS_CLIENT_SOCKET_CACHE_EXPIRY_MSEC_KEY = "dfs.client.socketcache.expiryMsec";
   public static final long    DFS_CLIENT_SOCKET_CACHE_EXPIRY_MSEC_DEFAULT = 2 * 60 * 1000;
+  public static final String  DFS_CLIENT_WRITE_EXCLUDE_NODES_CACHE_EXPIRY_INTERVAL = "dfs.client.write.exclude.nodes.cache.expiry.interval.millis";
+  public static final long    DFS_CLIENT_WRITE_EXCLUDE_NODES_CACHE_EXPIRY_INTERVAL_DEFAULT = 10 * 60 * 1000; // 10 minutes, in ms
   public static final String  DFS_NAMENODE_BACKUP_ADDRESS_KEY = "dfs.namenode.backup.address";
   public static final String  DFS_NAMENODE_BACKUP_ADDRESS_DEFAULT = "localhost:50100";
   public static final String  DFS_NAMENODE_BACKUP_HTTP_ADDRESS_KEY = "dfs.namenode.backup.http-address";

+ 30 - 4
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java

@@ -35,6 +35,7 @@ import java.util.Arrays;
 import java.util.EnumSet;
 import java.util.LinkedList;
 import java.util.List;
+import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicBoolean;
 
 import org.apache.hadoop.classification.InterfaceAudience;
@@ -83,6 +84,11 @@ import org.apache.hadoop.util.Progressable;
 import org.apache.hadoop.util.Time;
 
 import com.google.common.annotations.VisibleForTesting;
+import com.google.common.cache.CacheBuilder;
+import com.google.common.cache.CacheLoader;
+import com.google.common.cache.LoadingCache;
+import com.google.common.cache.RemovalListener;
+import com.google.common.cache.RemovalNotification;
 
 
 /****************************************************************
@@ -290,7 +296,25 @@ public class DFSOutputStream extends FSOutputSummer implements Syncable {
     private DataInputStream blockReplyStream;
     private ResponseProcessor response = null;
     private volatile DatanodeInfo[] nodes = null; // list of targets for current block
-    private ArrayList<DatanodeInfo> excludedNodes = new ArrayList<DatanodeInfo>();
+    private LoadingCache<DatanodeInfo, DatanodeInfo> excludedNodes =
+        CacheBuilder.newBuilder()
+        .expireAfterWrite(
+            dfsClient.getConf().excludedNodesCacheExpiry,
+            TimeUnit.MILLISECONDS)
+        .removalListener(new RemovalListener<DatanodeInfo, DatanodeInfo>() {
+          @Override
+          public void onRemoval(
+              RemovalNotification<DatanodeInfo, DatanodeInfo> notification) {
+            DFSClient.LOG.info("Removing node " +
+                notification.getKey() + " from the excluded nodes list");
+          }
+        })
+        .build(new CacheLoader<DatanodeInfo, DatanodeInfo>() {
+          @Override
+          public DatanodeInfo load(DatanodeInfo key) throws Exception {
+            return key;
+          }
+        });
     volatile boolean hasError = false;
     volatile int errorIndex = -1;
     private BlockConstructionStage stage;  // block construction stage
@@ -1000,8 +1024,10 @@ public class DFSOutputStream extends FSOutputSummer implements Syncable {
         success = false;
 
         long startTime = Time.now();
-        DatanodeInfo[] excluded = excludedNodes.toArray(
-            new DatanodeInfo[excludedNodes.size()]);
+        DatanodeInfo[] excluded =
+            excludedNodes.getAllPresent(excludedNodes.asMap().keySet())
+            .keySet()
+            .toArray(new DatanodeInfo[0]);
         block = oldBlock;
         lb = locateFollowingBlock(startTime,
             excluded.length > 0 ? excluded : null);
@@ -1020,7 +1046,7 @@ public class DFSOutputStream extends FSOutputSummer implements Syncable {
           dfsClient.namenode.abandonBlock(block, src, dfsClient.clientName);
           block = null;
           DFSClient.LOG.info("Excluding datanode " + nodes[errorIndex]);
-          excludedNodes.add(nodes[errorIndex]);
+          excludedNodes.put(nodes[errorIndex], nodes[errorIndex]);
         }
       } while (!success && --count >= 0);
 

+ 112 - 81
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java

@@ -66,6 +66,7 @@ import org.apache.hadoop.ipc.Server;
 import org.apache.hadoop.net.CachedDNSToSwitchMapping;
 import org.apache.hadoop.net.DNSToSwitchMapping;
 import org.apache.hadoop.net.NetworkTopology;
+import org.apache.hadoop.net.NetworkTopology.InvalidTopologyException;
 import org.apache.hadoop.net.Node;
 import org.apache.hadoop.net.NodeBase;
 import org.apache.hadoop.net.ScriptBasedMapping;
@@ -431,8 +432,8 @@ public class DatanodeManager {
       host2DatanodeMap.remove(datanodeMap.put(node.getStorageID(), node));
     }
 
+    networktopology.add(node); // may throw InvalidTopologyException
     host2DatanodeMap.add(node);
-    networktopology.add(node);
     checkIfClusterIsNowMultiRack(node);
 
     if (LOG.isDebugEnabled()) {
@@ -647,92 +648,122 @@ public class DatanodeManager {
       nodeReg.setIpAddr(ip);
       nodeReg.setPeerHostName(hostname);
     }
-
-    nodeReg.setExportedKeys(blockManager.getBlockKeys());
-
-    // Checks if the node is not on the hosts list.  If it is not, then
-    // it will be disallowed from registering. 
-    if (!inHostsList(nodeReg)) {
-      throw new DisallowedDatanodeException(nodeReg);
-    }
-      
-    NameNode.stateChangeLog.info("BLOCK* registerDatanode: from "
-        + nodeReg + " storage " + nodeReg.getStorageID());
-
-    DatanodeDescriptor nodeS = datanodeMap.get(nodeReg.getStorageID());
-    DatanodeDescriptor nodeN = host2DatanodeMap.getDatanodeByXferAddr(
-        nodeReg.getIpAddr(), nodeReg.getXferPort());
-      
-    if (nodeN != null && nodeN != nodeS) {
-      NameNode.LOG.info("BLOCK* registerDatanode: " + nodeN);
-      // nodeN previously served a different data storage, 
-      // which is not served by anybody anymore.
-      removeDatanode(nodeN);
-      // physically remove node from datanodeMap
-      wipeDatanode(nodeN);
-      nodeN = null;
-    }
-
-    if (nodeS != null) {
-      if (nodeN == nodeS) {
-        // The same datanode has been just restarted to serve the same data 
-        // storage. We do not need to remove old data blocks, the delta will
-        // be calculated on the next block report from the datanode
-        if(NameNode.stateChangeLog.isDebugEnabled()) {
-          NameNode.stateChangeLog.debug("BLOCK* registerDatanode: "
-              + "node restarted.");
+    
+    try {
+      nodeReg.setExportedKeys(blockManager.getBlockKeys());
+  
+      // Checks if the node is not on the hosts list.  If it is not, then
+      // it will be disallowed from registering. 
+      if (!inHostsList(nodeReg)) {
+        throw new DisallowedDatanodeException(nodeReg);
+      }
+        
+      NameNode.stateChangeLog.info("BLOCK* registerDatanode: from "
+          + nodeReg + " storage " + nodeReg.getStorageID());
+  
+      DatanodeDescriptor nodeS = datanodeMap.get(nodeReg.getStorageID());
+      DatanodeDescriptor nodeN = host2DatanodeMap.getDatanodeByXferAddr(
+          nodeReg.getIpAddr(), nodeReg.getXferPort());
+        
+      if (nodeN != null && nodeN != nodeS) {
+        NameNode.LOG.info("BLOCK* registerDatanode: " + nodeN);
+        // nodeN previously served a different data storage, 
+        // which is not served by anybody anymore.
+        removeDatanode(nodeN);
+        // physically remove node from datanodeMap
+        wipeDatanode(nodeN);
+        nodeN = null;
+      }
+  
+      if (nodeS != null) {
+        if (nodeN == nodeS) {
+          // The same datanode has been just restarted to serve the same data 
+          // storage. We do not need to remove old data blocks, the delta will
+          // be calculated on the next block report from the datanode
+          if(NameNode.stateChangeLog.isDebugEnabled()) {
+            NameNode.stateChangeLog.debug("BLOCK* registerDatanode: "
+                + "node restarted.");
+          }
+        } else {
+          // nodeS is found
+          /* The registering datanode is a replacement node for the existing 
+            data storage, which from now on will be served by a new node.
+            If this message repeats, both nodes might have same storageID 
+            by (insanely rare) random chance. User needs to restart one of the
+            nodes with its data cleared (or user can just remove the StorageID
+            value in "VERSION" file under the data directory of the datanode,
+            but this is might not work if VERSION file format has changed 
+         */        
+          NameNode.stateChangeLog.info("BLOCK* registerDatanode: " + nodeS
+              + " is replaced by " + nodeReg + " with the same storageID "
+              + nodeReg.getStorageID());
+        }
+        
+        boolean success = false;
+        try {
+          // update cluster map
+          getNetworkTopology().remove(nodeS);
+          nodeS.updateRegInfo(nodeReg);
+          nodeS.setDisallowed(false); // Node is in the include list
+          
+          // resolve network location
+          resolveNetworkLocation(nodeS);
+          getNetworkTopology().add(nodeS);
+            
+          // also treat the registration message as a heartbeat
+          heartbeatManager.register(nodeS);
+          checkDecommissioning(nodeS);
+          success = true;
+        } finally {
+          if (!success) {
+            removeDatanode(nodeS);
+            wipeDatanode(nodeS);
+          }
+        }
+        return;
+      } 
+  
+      // this is a new datanode serving a new data storage
+      if ("".equals(nodeReg.getStorageID())) {
+        // this data storage has never been registered
+        // it is either empty or was created by pre-storageID version of DFS
+        nodeReg.setStorageID(newStorageID());
+        if (NameNode.stateChangeLog.isDebugEnabled()) {
+          NameNode.stateChangeLog.debug(
+              "BLOCK* NameSystem.registerDatanode: "
+              + "new storageID " + nodeReg.getStorageID() + " assigned.");
         }
-      } else {
-        // nodeS is found
-        /* The registering datanode is a replacement node for the existing 
-          data storage, which from now on will be served by a new node.
-          If this message repeats, both nodes might have same storageID 
-          by (insanely rare) random chance. User needs to restart one of the
-          nodes with its data cleared (or user can just remove the StorageID
-          value in "VERSION" file under the data directory of the datanode,
-          but this is might not work if VERSION file format has changed 
-       */        
-        NameNode.stateChangeLog.info("BLOCK* registerDatanode: " + nodeS
-            + " is replaced by " + nodeReg + " with the same storageID "
-            + nodeReg.getStorageID());
       }
-      // update cluster map
-      getNetworkTopology().remove(nodeS);
-      nodeS.updateRegInfo(nodeReg);
-      nodeS.setDisallowed(false); // Node is in the include list
       
-      // resolve network location
-      resolveNetworkLocation(nodeS);
-      getNetworkTopology().add(nodeS);
+      DatanodeDescriptor nodeDescr 
+        = new DatanodeDescriptor(nodeReg, NetworkTopology.DEFAULT_RACK);
+      boolean success = false;
+      try {
+        resolveNetworkLocation(nodeDescr);
+        networktopology.add(nodeDescr);
+  
+        // register new datanode
+        addDatanode(nodeDescr);
+        checkDecommissioning(nodeDescr);
         
-      // also treat the registration message as a heartbeat
-      heartbeatManager.register(nodeS);
-      checkDecommissioning(nodeS);
-      return;
-    } 
-
-    // this is a new datanode serving a new data storage
-    if ("".equals(nodeReg.getStorageID())) {
-      // this data storage has never been registered
-      // it is either empty or was created by pre-storageID version of DFS
-      nodeReg.setStorageID(newStorageID());
-      if (NameNode.stateChangeLog.isDebugEnabled()) {
-        NameNode.stateChangeLog.debug(
-            "BLOCK* NameSystem.registerDatanode: "
-            + "new storageID " + nodeReg.getStorageID() + " assigned.");
+        // also treat the registration message as a heartbeat
+        // no need to update its timestamp
+        // because its is done when the descriptor is created
+        heartbeatManager.addDatanode(nodeDescr);
+        success = true;
+      } finally {
+        if (!success) {
+          removeDatanode(nodeDescr);
+          wipeDatanode(nodeDescr);
+        }
       }
+    } catch (InvalidTopologyException e) {
+      // If the network location is invalid, clear the cached mappings
+      // so that we have a chance to re-add this DataNode with the
+      // correct network location later.
+      dnsToSwitchMapping.reloadCachedMappings();
+      throw e;
     }
-    // register new datanode
-    DatanodeDescriptor nodeDescr 
-      = new DatanodeDescriptor(nodeReg, NetworkTopology.DEFAULT_RACK);
-    resolveNetworkLocation(nodeDescr);
-    addDatanode(nodeDescr);
-    checkDecommissioning(nodeDescr);
-    
-    // also treat the registration message as a heartbeat
-    // no need to update its timestamp
-    // because its is done when the descriptor is created
-    heartbeatManager.addDatanode(nodeDescr);
   }
 
   /**

+ 1 - 2
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java

@@ -3072,8 +3072,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
 
   ContentSummary getContentSummary(String src) throws AccessControlException,
       FileNotFoundException, UnresolvedLinkException, StandbyException {
-    FSPermissionChecker pc = new FSPermissionChecker(fsOwnerShortUserName,
-        supergroup);
+    FSPermissionChecker pc = getPermissionChecker();
     checkOperation(OperationCategory.READ);
     readLock();
     try {

+ 13 - 2
hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml

@@ -47,7 +47,7 @@
     RPC address that handles all clients requests. In the case of HA/Federation where multiple namenodes exist,
     the name service id is added to the name e.g. dfs.namenode.rpc-address.ns1
     dfs.namenode.rpc-address.EXAMPLENAMESERVICE
-    The value of this property will take the form of hdfs://nn-host1:rpc-port.
+    The value of this property will take the form of nn-host1:rpc-port.
   </description>
 </property>
 
@@ -59,7 +59,7 @@
     connecting to this address if it is configured. In the case of HA/Federation where multiple namenodes exist,
     the name service id is added to the name e.g. dfs.namenode.servicerpc-address.ns1
     dfs.namenode.rpc-address.EXAMPLENAMESERVICE
-    The value of this property will take the form of hdfs://nn-host1:rpc-port.
+    The value of this property will take the form of nn-host1:rpc-port.
     If the value of this property is unset the value of dfs.namenode.rpc-address will be used as the default.
   </description>
 </property>
@@ -594,6 +594,17 @@
   <description>Packet size for clients to write</description>
 </property>
 
+<property>
+  <name>dfs.client.write.exclude.nodes.cache.expiry.interval.millis</name>
+  <value>600000</value>
+  <description>The maximum period to keep a DN in the excluded nodes list
+  at a client. After this period, in milliseconds, the previously excluded node(s) will
+  be removed automatically from the cache and will be considered good for block allocations
+  again. Useful to lower or raise in situations where you keep a file open for very long
+  periods (such as a Write-Ahead-Log (WAL) file) to make the writer tolerant to cluster maintenance
+  restarts. Defaults to 10 minutes.</description>
+</property>
+
 <property>
   <name>dfs.namenode.checkpoint.dir</name>
   <value>file://${hadoop.tmp.dir}/dfs/namesecondary</value>

+ 88 - 6
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSClientExcludedNodes.java

@@ -21,20 +21,27 @@ import static org.junit.Assert.fail;
 
 import java.io.IOException;
 import java.io.OutputStream;
+import java.util.List;
+
+import junit.framework.Assert;
 
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties;
+import org.apache.hadoop.util.ThreadUtil;
+
 import org.junit.Test;
 
 
 /**
- * These tests make sure that DFSClient retries fetching data from DFS
- * properly in case of errors.
+ * These tests make sure that DFSClient excludes writing data to
+ * a DN properly in case of errors.
  */
 public class TestDFSClientExcludedNodes {
 
-  @Test
+  @Test(timeout=10000)
   public void testExcludedNodes() throws IOException {
     Configuration conf = new HdfsConfiguration();
     MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
@@ -43,14 +50,89 @@ public class TestDFSClientExcludedNodes {
 
     // kill a datanode
     cluster.stopDataNode(AppendTestUtil.nextInt(3));
-    OutputStream out = fs.create(filePath, true, 4096);
+    OutputStream out = fs.create(
+        filePath,
+        true,
+        4096,
+        (short) 3,
+        fs.getDefaultBlockSize(filePath)
+    );
     out.write(20);
 
     try {
       out.close();
     } catch (Exception e) {
-      fail("DataNode failure should not result in a block abort: \n" + e.getMessage());
+      fail("Single DN failure should not result in a block abort: \n" +
+          e.getMessage());
+    }
+  }
+
+  @Test(timeout=10000)
+  public void testExcludedNodesForgiveness() throws IOException {
+    Configuration conf = new HdfsConfiguration();
+    // Forgive nodes in under 1s for this test case.
+    conf.setLong(
+        DFSConfigKeys.DFS_CLIENT_WRITE_EXCLUDE_NODES_CACHE_EXPIRY_INTERVAL,
+        1000);
+    // We'll be using a 512 bytes block size just for tests
+    // so making sure the checksum bytes too match it.
+    conf.setInt("io.bytes.per.checksum", 512);
+    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
+    List<DataNodeProperties> props = cluster.dataNodes;
+    FileSystem fs = cluster.getFileSystem();
+    Path filePath = new Path("/testForgivingExcludedNodes");
+
+    // 256 bytes data chunk for writes
+    byte[] bytes = new byte[256];
+    for (int index=0; index<bytes.length; index++) {
+      bytes[index] = '0';
+    }
+
+    // File with a 512 bytes block size
+    FSDataOutputStream out = fs.create(filePath, true, 4096, (short) 3, 512);
+
+    // Write a block to all 3 DNs (2x256bytes).
+    out.write(bytes);
+    out.write(bytes);
+    out.hflush();
+
+    // Remove two DNs, to put them into the exclude list.
+    DataNodeProperties two = cluster.stopDataNode(2);
+    DataNodeProperties one = cluster.stopDataNode(1);
+
+    // Write another block.
+    // At this point, we have two nodes already in excluded list.
+    out.write(bytes);
+    out.write(bytes);
+    out.hflush();
+
+    // Bring back the older DNs, since they are gonna be forgiven only
+    // afterwards of this previous block write.
+    Assert.assertEquals(true, cluster.restartDataNode(one, true));
+    Assert.assertEquals(true, cluster.restartDataNode(two, true));
+    cluster.waitActive();
+
+    // Sleep for 2s, to let the excluded nodes be expired
+    // from the excludes list (i.e. forgiven after the configured wait period).
+    // [Sleeping just in case the restart of the DNs completed < 2s cause
+    // otherwise, we'll end up quickly excluding those again.]
+    ThreadUtil.sleepAtLeastIgnoreInterrupts(2000);
+
+    // Terminate the last good DN, to assert that there's no
+    // single-DN-available scenario, caused by not forgiving the other
+    // two by now.
+    cluster.stopDataNode(0);
+
+    try {
+      // Attempt writing another block, which should still pass
+      // cause the previous two should have been forgiven by now,
+      // while the last good DN added to excludes this time.
+      out.write(bytes);
+      out.hflush();
+      out.close();
+    } catch (Exception e) {
+      fail("Excluded DataNodes should be forgiven after a while and " +
+           "not cause file writing exception of: '" + e.getMessage() + "'");
     }
   }
-  
 }

+ 69 - 53
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSShell.java

@@ -35,6 +35,7 @@ import java.util.Arrays;
 import java.util.List;
 import java.util.Random;
 import java.util.Scanner;
+import java.util.concurrent.atomic.AtomicInteger;
 import java.util.zip.DeflaterOutputStream;
 import java.util.zip.GZIPOutputStream;
 
@@ -68,7 +69,8 @@ import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERV
  */
 public class TestDFSShell {
   private static final Log LOG = LogFactory.getLog(TestDFSShell.class);
-  
+  private static AtomicInteger counter = new AtomicInteger();
+
   static final String TEST_ROOT_DIR =
     new Path(System.getProperty("test.build.data","/tmp"))
     .toString().replace(' ', '+');
@@ -512,7 +514,7 @@ public class TestDFSShell {
       createLocalFile(furi);
       argv = new String[3];
       argv[0] = "-put";
-      argv[1] = furi.toString();
+      argv[1] = furi.toURI().toString();
       argv[2] = dstFs.getUri().toString() + "/furi";
       ret = ToolRunner.run(shell, argv);
       assertEquals(" put is working ", 0, ret);
@@ -867,52 +869,59 @@ public class TestDFSShell {
     shell.setConf(conf);
     
     try {
-     //first make dir
-     Path dir = new Path(chmodDir);
-     fs.delete(dir, true);
-     fs.mkdirs(dir);
+      //first make dir
+      Path dir = new Path(chmodDir);
+      fs.delete(dir, true);
+      fs.mkdirs(dir);
 
-     confirmPermissionChange(/* Setting */ "u+rwx,g=rw,o-rwx",
+      confirmPermissionChange(/* Setting */ "u+rwx,g=rw,o-rwx",
                              /* Should give */ "rwxrw----", fs, shell, dir);
-     
-     //create an empty file
-     Path file = new Path(chmodDir, "file");
-     TestDFSShell.writeFile(fs, file);
-
-     //test octal mode
-     confirmPermissionChange( "644", "rw-r--r--", fs, shell, file);
-
-     //test recursive
-     runCmd(shell, "-chmod", "-R", "a+rwX", chmodDir);
-     assertEquals("rwxrwxrwx",
-                  fs.getFileStatus(dir).getPermission().toString()); 
-     assertEquals("rw-rw-rw-",
-                  fs.getFileStatus(file).getPermission().toString());
-
-     // test sticky bit on directories
-     Path dir2 = new Path(dir, "stickybit" );
-     fs.mkdirs(dir2 );
-     LOG.info("Testing sticky bit on: " + dir2);
-     LOG.info("Sticky bit directory initial mode: " + 
-                   fs.getFileStatus(dir2).getPermission());
-     
-     confirmPermissionChange("u=rwx,g=rx,o=rx", "rwxr-xr-x", fs, shell, dir2);
-     
-     confirmPermissionChange("+t", "rwxr-xr-t", fs, shell, dir2);
-
-     confirmPermissionChange("-t", "rwxr-xr-x", fs, shell, dir2);
-
-     confirmPermissionChange("=t", "--------T", fs, shell, dir2);
-
-     confirmPermissionChange("0000", "---------", fs, shell, dir2);
-
-     confirmPermissionChange("1666", "rw-rw-rwT", fs, shell, dir2);
-
-     confirmPermissionChange("777", "rwxrwxrwt", fs, shell, dir2);
-     
-     fs.delete(dir2, true);
-     fs.delete(dir, true);
-     
+
+      //create an empty file
+      Path file = new Path(chmodDir, "file");
+      TestDFSShell.writeFile(fs, file);
+
+      //test octal mode
+      confirmPermissionChange("644", "rw-r--r--", fs, shell, file);
+
+      //test recursive
+      runCmd(shell, "-chmod", "-R", "a+rwX", chmodDir);
+      assertEquals("rwxrwxrwx",
+          fs.getFileStatus(dir).getPermission().toString());
+      assertEquals("rw-rw-rw-",
+          fs.getFileStatus(file).getPermission().toString());
+
+      // Skip "sticky bit" tests on Windows.
+      //
+      if (!Path.WINDOWS) {
+        // test sticky bit on directories
+        Path dir2 = new Path(dir, "stickybit");
+        fs.mkdirs(dir2);
+        LOG.info("Testing sticky bit on: " + dir2);
+        LOG.info("Sticky bit directory initial mode: " +
+            fs.getFileStatus(dir2).getPermission());
+
+        confirmPermissionChange("u=rwx,g=rx,o=rx", "rwxr-xr-x", fs, shell, dir2);
+
+        confirmPermissionChange("+t", "rwxr-xr-t", fs, shell, dir2);
+
+        confirmPermissionChange("-t", "rwxr-xr-x", fs, shell, dir2);
+
+        confirmPermissionChange("=t", "--------T", fs, shell, dir2);
+
+        confirmPermissionChange("0000", "---------", fs, shell, dir2);
+
+        confirmPermissionChange("1666", "rw-rw-rwT", fs, shell, dir2);
+
+        confirmPermissionChange("777", "rwxrwxrwt", fs, shell, dir2);
+
+        fs.delete(dir2, true);
+      } else {
+        LOG.info("Skipped sticky bit tests on Windows");
+      }
+
+      fs.delete(dir, true);
+
     } finally {
       try {
         fs.close();
@@ -1595,27 +1604,29 @@ public class TestDFSShell {
   // force Copy Option is -f
   @Test (timeout = 30000)
   public void testCopyCommandsWithForceOption() throws Exception {
+    final int SUCCESS = 0;
+    final int ERROR = 1;
+
     Configuration conf = new Configuration();
     MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1)
         .format(true).build();
     FsShell shell = null;
     FileSystem fs = null;
     final File localFile = new File(TEST_ROOT_DIR, "testFileForPut");
-    final String localfilepath = localFile.getAbsolutePath();
-    final String testdir = TEST_ROOT_DIR + "/ForceTestDir";
+    final String localfilepath = new Path(localFile.getAbsolutePath()).toUri().toString();
+    final String testdir = "/tmp/TestDFSShell-testCopyCommandsWithForceOption-"
+        + counter.getAndIncrement();
     final Path hdfsTestDir = new Path(testdir);
     try {
       fs = cluster.getFileSystem();
       fs.mkdirs(hdfsTestDir);
       localFile.createNewFile();
-      writeFile(fs, new Path(TEST_ROOT_DIR, "testFileForPut"));
+      writeFile(fs, new Path(testdir, "testFileForPut"));
       shell = new FsShell();
 
       // Tests for put
       String[] argv = new String[] { "-put", "-f", localfilepath, testdir };
       int res = ToolRunner.run(shell, argv);
-      int SUCCESS = 0;
-      int ERROR = 1;
       assertEquals("put -f is not working", SUCCESS, res);
 
       argv = new String[] { "-put", localfilepath, testdir };
@@ -1687,8 +1698,13 @@ public class TestDFSShell {
     try {
       // Create and delete a file
       fs = cluster.getFileSystem();
-      writeFile(fs, new Path(TEST_ROOT_DIR, "foo"));
-      final String testFile = TEST_ROOT_DIR + "/foo";
+
+      // Use a separate tmp dir for each invocation.
+      final String testdir = "/tmp/TestDFSShell-deleteFileUsingTrash-" +
+          counter.getAndIncrement();
+
+      writeFile(fs, new Path(testdir, "foo"));
+      final String testFile = testdir + "/foo";
       final String trashFile = shell.getCurrentTrashDir() + "/" + testFile;
       String[] argv = new String[] { "-rm", testFile };
       int res = ToolRunner.run(shell, argv);

+ 12 - 7
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/TestNNWithQJM.java

@@ -18,6 +18,7 @@
 package org.apache.hadoop.hdfs.qjournal;
 
 import static org.junit.Assert.*;
+import static org.junit.Assume.*;
 
 import java.io.File;
 import java.io.IOException;
@@ -43,7 +44,7 @@ import org.junit.Test;
 
 public class TestNNWithQJM {
   Configuration conf = new HdfsConfiguration();
-  private MiniJournalCluster mjc;
+  private MiniJournalCluster mjc = null;
   private Path TEST_PATH = new Path("/test-dir");
   private Path TEST_PATH_2 = new Path("/test-dir");
 
@@ -61,10 +62,11 @@ public class TestNNWithQJM {
   public void stopJNs() throws Exception {
     if (mjc != null) {
       mjc.shutdown();
+      mjc = null;
     }
   }
   
-  @Test
+  @Test (timeout = 30000)
   public void testLogAndRestart() throws IOException {
     conf.set(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY,
         MiniDFSCluster.getBaseDirectory() + "/TestNNWithQJM/image");
@@ -93,9 +95,12 @@ public class TestNNWithQJM {
       cluster.shutdown();
     }
   }
-  
-  @Test
+
+  @Test (timeout = 30000)
   public void testNewNamenodeTakesOverWriter() throws Exception {
+    // Skip the test on Windows. See HDFS-4584.
+    assumeTrue(!Path.WINDOWS);
+
     File nn1Dir = new File(
         MiniDFSCluster.getBaseDirectory() + "/TestNNWithQJM/image-nn1");
     File nn2Dir = new File(
@@ -154,7 +159,7 @@ public class TestNNWithQJM {
     }
   }
 
-  @Test
+  @Test (timeout = 30000)
   public void testMismatchedNNIsRejected() throws Exception {
     conf.set(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY,
         MiniDFSCluster.getBaseDirectory() + "/TestNNWithQJM/image");
@@ -188,8 +193,8 @@ public class TestNNWithQJM {
           "Unable to start log segment 1: too few journals", ioe);
     }
   }
-  
-  @Test
+
+  @Test (timeout = 30000)
   public void testWebPageHasQjmInfo() throws Exception {
     conf.set(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY,
         MiniDFSCluster.getBaseDirectory() + "/TestNNWithQJM/image");

+ 8 - 1
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestAuditLogs.java

@@ -46,6 +46,7 @@ import org.apache.hadoop.hdfs.web.WebHdfsFileSystem;
 import org.apache.hadoop.security.AccessControlException;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.log4j.Level;
+import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 import org.apache.log4j.PatternLayout;
 import org.apache.log4j.RollingFileAppender;
@@ -233,9 +234,15 @@ public class TestAuditLogs {
 
   /** Sets up log4j logger for auditlogs */
   private void setupAuditLogs() throws IOException {
+    // Shutdown the LogManager to release all logger open file handles.
+    // Unfortunately, Apache commons logging library does not provide
+    // means to release underlying loggers. For additional info look up
+    // commons library FAQ.
+    LogManager.shutdown();
+
     File file = new File(auditLogFile);
     if (file.exists()) {
-      file.delete();
+      assertTrue(file.delete());
     }
     Logger logger = ((Log4JLogger) FSNamesystem.auditLog).getLogger();
     logger.setLevel(Level.INFO);

+ 9 - 8
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestGetConf.java

@@ -224,7 +224,7 @@ public class TestGetConf {
   /**
    * Test empty configuration
    */
-  @Test
+  @Test(timeout=10000)
   public void testEmptyConf() throws Exception {
     HdfsConfiguration conf = new HdfsConfiguration(false);
     // Verify getting addresses fails
@@ -247,7 +247,7 @@ public class TestGetConf {
   /**
    * Test invalid argument to the tool
    */
-  @Test
+  @Test(timeout=10000)
   public void testInvalidArgument() throws Exception {
     HdfsConfiguration conf = new HdfsConfiguration();
     String[] args = {"-invalidArgument"};
@@ -259,7 +259,7 @@ public class TestGetConf {
    * Tests to make sure the returned addresses are correct in case of default
    * configuration with no federation
    */
-  @Test
+  @Test(timeout=10000)
   public void testNonFederation() throws Exception {
     HdfsConfiguration conf = new HdfsConfiguration(false);
   
@@ -294,7 +294,7 @@ public class TestGetConf {
    * Tests to make sure the returned addresses are correct in case of federation
    * of setup.
    */
-  @Test
+  @Test(timeout=10000)
   public void testFederation() throws Exception {
     final int nsCount = 10;
     HdfsConfiguration conf = new HdfsConfiguration(false);
@@ -333,15 +333,16 @@ public class TestGetConf {
     verifyAddresses(conf, TestType.NNRPCADDRESSES, true, nnAddresses);
   }
   
-  @Test
+  @Test(timeout=10000)
   public void testGetSpecificKey() throws Exception {
     HdfsConfiguration conf = new HdfsConfiguration();
     conf.set("mykey", " myval ");
     String[] args = {"-confKey", "mykey"};
-    assertTrue(runTool(conf, args, true).equals("myval\n"));
+    String toolResult = runTool(conf, args, true);
+    assertEquals(String.format("myval%n"), toolResult);
   }
   
-  @Test
+  @Test(timeout=10000)
   public void testExtraArgsThrowsError() throws Exception {
     HdfsConfiguration conf = new HdfsConfiguration();
     conf.set("mykey", "myval");
@@ -354,7 +355,7 @@ public class TestGetConf {
    * Tests commands other than {@link Command#NAMENODE}, {@link Command#BACKUP},
    * {@link Command#SECONDARY} and {@link Command#NNRPCADDRESSES}
    */
-  @Test
+  @Test(timeout=10000)
   public void testTool() throws Exception {
     HdfsConfiguration conf = new HdfsConfiguration(false);
     for (Command cmd : Command.values()) {

+ 72 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/net/TestNetworkTopology.java

@@ -26,12 +26,23 @@ import static org.junit.Assert.fail;
 import java.util.HashMap;
 import java.util.Map;
 
+import junit.framework.Assert;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hdfs.DFSTestUtil;
+import org.apache.hadoop.hdfs.HdfsConfiguration;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
+import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType;
 import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
+import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
 import org.junit.Before;
 import org.junit.Test;
 
 public class TestNetworkTopology {
+  private static final Log LOG = LogFactory.getLog(TestNetworkTopology.class);
   private final static NetworkTopology cluster = new NetworkTopology();
   private DatanodeDescriptor dataNodes[];
   
@@ -213,4 +224,65 @@ public class TestNetworkTopology {
       }
     }
   }
+
+  @Test(timeout=180000)
+  public void testInvalidNetworkTopologiesNotCachedInHdfs() throws Exception {
+    // start a cluster
+    Configuration conf = new HdfsConfiguration();
+    MiniDFSCluster cluster = null;
+    try {
+      // bad rack topology
+      String racks[] = { "/a/b", "/c" };
+      String hosts[] = { "foo1.example.com", "foo2.example.com" };
+      cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).
+          racks(racks).hosts(hosts).build();
+      cluster.waitActive();
+      
+      NamenodeProtocols nn = cluster.getNameNodeRpc();
+      Assert.assertNotNull(nn);
+      
+      // Wait for one DataNode to register.
+      // The other DataNode will not be able to register up because of the rack mismatch.
+      DatanodeInfo[] info;
+      while (true) {
+        info = nn.getDatanodeReport(DatanodeReportType.LIVE);
+        Assert.assertFalse(info.length == 2);
+        if (info.length == 1) {
+          break;
+        }
+        Thread.sleep(1000);
+      }
+      // Set the network topology of the other node to the match the network
+      // topology of the node that came up.
+      int validIdx = info[0].getHostName().equals(hosts[0]) ? 0 : 1;
+      int invalidIdx = validIdx == 1 ? 0 : 1;
+      StaticMapping.addNodeToRack(hosts[invalidIdx], racks[validIdx]);
+      LOG.info("datanode " + validIdx + " came up with network location " + 
+        info[0].getNetworkLocation());
+
+      // Restart the DN with the invalid topology and wait for it to register.
+      cluster.restartDataNode(invalidIdx);
+      Thread.sleep(5000);
+      while (true) {
+        info = nn.getDatanodeReport(DatanodeReportType.LIVE);
+        if (info.length == 2) {
+          break;
+        }
+        if (info.length == 0) {
+          LOG.info("got no valid DNs");
+        } else if (info.length == 1) {
+          LOG.info("got one valid DN: " + info[0].getHostName() +
+              " (at " + info[0].getNetworkLocation() + ")");
+        }
+        Thread.sleep(1000);
+      }
+      Assert.assertEquals(info[0].getNetworkLocation(),
+                          info[1].getNetworkLocation());
+    } finally {
+      if (cluster != null) {
+        cluster.shutdown();
+      }
+    }
+  }
+
 }

+ 24 - 0
hadoop-mapreduce-project/CHANGES.txt

@@ -75,6 +75,9 @@ Trunk (Unreleased)
     MAPREDUCE-4735. Make arguments in TestDFSIO case insensitive.
     (Brandon Li via suresh)
 
+    MAPREDUCE-5014. Extend Distcp to accept a custom CopyListing. 
+    (Srikanth Sundarrajan via amareshwari)
+
   BUG FIXES
 
     MAPREDUCE-4272. SortedRanges.Range#compareTo is not spec compliant.
@@ -158,6 +161,9 @@ Trunk (Unreleased)
 
     MAPREDUCE-5012. Typo in javadoc for IdentityMapper class. (Adam Monsen
     via suresh)
+    
+    MAPREDUCE-5078. TestMRAppMaster fails on Windows due to mismatched path
+    separators. (Chris Nauroth via sseth)
 
   BREAKDOWN OF HADOOP-8562 SUBTASKS
 
@@ -194,6 +200,9 @@ Release 2.0.5-beta - UNRELEASED
     MAPREDUCE-4892. Modify CombineFileInputFormat to not skew input slits'
     allocation on small clusters. (Bikas Saha via vinodkv)
 
+    MAPREDUCE-4990. Construct debug strings conditionally in 
+    ShuffleHandler.Shuffle#sendMapOutput(). (kkambatl via tucu)
+
   OPTIMIZATIONS
 
   BUG FIXES
@@ -235,6 +244,15 @@ Release 2.0.5-beta - UNRELEASED
     MAPREDUCE-4716. TestHsWebServicesJobsQuery.testJobsQueryStateInvalid 
     fails with jdk7. (tgraves via tucu)
 
+    MAPREDUCE-5075. DistCp leaks input file handles since ThrottledInputStream
+    does not close the wrapped InputStream.  (Chris Nauroth via szetszwo)
+
+    MAPREDUCE-3872. Fix an event handling races in ContainerLauncherImpl.
+    (Robert Kanter via sseth)
+
+    MAPREDUCE-5083. MiniMRCluster should use a random component when creating an
+    actual cluster (Siddharth Seth via hitesh)
+
 Release 2.0.4-alpha - UNRELEASED
 
   INCOMPATIBLE CHANGES
@@ -780,6 +798,9 @@ Release 0.23.7 - UNRELEASED
     MAPREDUCE-5027. Shuffle does not limit number of outstanding connections
     (Robert Parker via jeagles)
 
+    MAPREDUCE-4972. Coverage fixing for org.apache.hadoop.mapreduce.jobhistory
+    (Aleksey Gorshkov via bobby)
+
   OPTIMIZATIONS
 
     MAPREDUCE-4946. Fix a performance problem for large jobs by reducing the
@@ -818,6 +839,9 @@ Release 0.23.7 - UNRELEASED
     MAPREDUCE-5042. Reducer unable to fetch for a map task that was recovered
     (Jason Lowe via bobby)
 
+    MAPREDUCE-5053. java.lang.InternalError from decompression codec cause
+    reducer to fail (Robert Parker via jeagles)
+
 Release 0.23.6 - UNRELEASED
 
   INCOMPATIBLE CHANGES

+ 4 - 4
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/launcher/ContainerLauncherImpl.java

@@ -230,9 +230,6 @@ public class ContainerLauncherImpl extends AbstractService implements
     }
   }
 
-  // To track numNodes.
-  Set<String> allNodes = new HashSet<String>();
-
   public ContainerLauncherImpl(AppContext context) {
     super(ContainerLauncherImpl.class.getName());
     this.context = context;
@@ -271,6 +268,8 @@ public class ContainerLauncherImpl extends AbstractService implements
       @Override
       public void run() {
         ContainerLauncherEvent event = null;
+        Set<String> allNodes = new HashSet<String>();
+
         while (!stopped.get() && !Thread.currentThread().isInterrupted()) {
           try {
             event = eventQueue.take();
@@ -280,6 +279,8 @@ public class ContainerLauncherImpl extends AbstractService implements
             }
             return;
           }
+          allNodes.add(event.getContainerMgrAddress());
+
           int poolSize = launcherPool.getCorePoolSize();
 
           // See if we need up the pool size only if haven't reached the
@@ -419,7 +420,6 @@ public class ContainerLauncherImpl extends AbstractService implements
   public void handle(ContainerLauncherEvent event) {
     try {
       eventQueue.put(event);
-      this.allNodes.add(event.getContainerMgrAddress());
     } catch (InterruptedException e) {
       throw new YarnException(e);
     }

+ 3 - 5
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/local/LocalContainerAllocator.java

@@ -38,7 +38,6 @@ import org.apache.hadoop.mapreduce.v2.app.rm.RMCommunicator;
 import org.apache.hadoop.yarn.YarnException;
 import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
-import org.apache.hadoop.yarn.api.records.AMResponse;
 import org.apache.hadoop.yarn.api.records.Container;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.NodeId;
@@ -100,10 +99,9 @@ public class LocalContainerAllocator extends RMCommunicator
         this.applicationAttemptId, this.lastResponseID, super
             .getApplicationProgress(), new ArrayList<ResourceRequest>(),
         new ArrayList<ContainerId>());
-    AMResponse response;
+    AllocateResponse allocateResponse;
     try {
-      AllocateResponse allocateResponse = scheduler.allocate(allocateRequest);
-      response = allocateResponse.getAMResponse();
+      allocateResponse = scheduler.allocate(allocateRequest);
       // Reset retry count if no exception occurred.
       retrystartTime = System.currentTimeMillis();
     } catch (Exception e) {
@@ -120,7 +118,7 @@ public class LocalContainerAllocator extends RMCommunicator
       // continue to attempt to contact the RM.
       throw e;
     }
-    if (response.getReboot()) {
+    if (allocateResponse.getReboot()) {
       LOG.info("Event from RM: shutting down Application Master");
       // This can happen if the RM has been restarted. If it is in that state,
       // this application must clean itself up.

+ 5 - 4
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerAllocator.java

@@ -59,7 +59,7 @@ import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEventType;
 import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptKillEvent;
 import org.apache.hadoop.util.StringInterner;
 import org.apache.hadoop.yarn.YarnException;
-import org.apache.hadoop.yarn.api.records.AMResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
 import org.apache.hadoop.yarn.api.records.Container;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.ContainerStatus;
@@ -544,8 +544,9 @@ public class RMContainerAllocator extends RMContainerRequestor
   
   @SuppressWarnings("unchecked")
   private List<Container> getResources() throws Exception {
-    int headRoom = getAvailableResources() != null ? getAvailableResources().getMemory() : 0;//first time it would be null
-    AMResponse response;
+    int headRoom = getAvailableResources() != null
+        ? getAvailableResources().getMemory() : 0;//first time it would be null
+    AllocateResponse response;
     /*
      * If contact with RM is lost, the AM will wait MR_AM_TO_RM_WAIT_INTERVAL_MS
      * milliseconds before aborting. During this interval, AM will still try
@@ -634,7 +635,7 @@ public class RMContainerAllocator extends RMContainerRequestor
   }
   
   @SuppressWarnings("unchecked")
-  private void handleUpdatedNodes(AMResponse response) {
+  private void handleUpdatedNodes(AllocateResponse response) {
     // send event to the job about on updated nodes
     List<NodeReport> updatedNodes = response.getUpdatedNodes();
     if (!updatedNodes.isEmpty()) {

+ 7 - 8
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerRequestor.java

@@ -38,7 +38,6 @@ import org.apache.hadoop.mapreduce.v2.app.client.ClientService;
 import org.apache.hadoop.yarn.YarnException;
 import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
-import org.apache.hadoop.yarn.api.records.AMResponse;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.Priority;
 import org.apache.hadoop.yarn.api.records.Resource;
@@ -146,30 +145,30 @@ public abstract class RMContainerRequestor extends RMCommunicator {
     LOG.info("blacklistDisablePercent is " + blacklistDisablePercent);
   }
 
-  protected AMResponse makeRemoteRequest() throws YarnRemoteException {
+  protected AllocateResponse makeRemoteRequest() throws YarnRemoteException {
     AllocateRequest allocateRequest = BuilderUtils.newAllocateRequest(
         applicationAttemptId, lastResponseID, super.getApplicationProgress(),
         new ArrayList<ResourceRequest>(ask), new ArrayList<ContainerId>(
             release));
     AllocateResponse allocateResponse = scheduler.allocate(allocateRequest);
-    AMResponse response = allocateResponse.getAMResponse();
-    lastResponseID = response.getResponseId();
-    availableResources = response.getAvailableResources();
+    lastResponseID = allocateResponse.getResponseId();
+    availableResources = allocateResponse.getAvailableResources();
     lastClusterNmCount = clusterNmCount;
     clusterNmCount = allocateResponse.getNumClusterNodes();
 
     if (ask.size() > 0 || release.size() > 0) {
       LOG.info("getResources() for " + applicationId + ":" + " ask="
           + ask.size() + " release= " + release.size() + " newContainers="
-          + response.getAllocatedContainers().size() + " finishedContainers="
-          + response.getCompletedContainersStatuses().size()
+          + allocateResponse.getAllocatedContainers().size()
+          + " finishedContainers="
+          + allocateResponse.getCompletedContainersStatuses().size()
           + " resourcelimit=" + availableResources + " knownNMs="
           + clusterNmCount);
     }
 
     ask.clear();
     release.clear();
-    return response;
+    return allocateResponse;
   }
 
   // May be incorrect if there's multiple NodeManagers running on a single host.

+ 397 - 0
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/jobhistory/TestEvents.java

@@ -0,0 +1,397 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.mapreduce.jobhistory;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.DataInputStream;
+import java.util.ArrayList;
+import java.util.Arrays;
+
+import static junit.framework.Assert.*;
+
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.mapred.JobPriority;
+import org.apache.hadoop.mapreduce.Counters;
+import org.apache.hadoop.mapreduce.JobID;
+import org.apache.hadoop.mapreduce.TaskAttemptID;
+import org.apache.hadoop.mapreduce.TaskID;
+import org.apache.hadoop.mapreduce.TaskType;
+import org.junit.Test;
+
+public class TestEvents {
+
+  /**
+   * test a getters of TaskAttemptFinishedEvent and TaskAttemptFinished
+   * 
+   * @throws Exception
+   */
+  @Test(timeout = 10000)
+  public void testTaskAttemptFinishedEvent() throws Exception {
+
+    JobID jid = new JobID("001", 1);
+    TaskID tid = new TaskID(jid, TaskType.REDUCE, 2);
+    TaskAttemptID taskAttemptId = new TaskAttemptID(tid, 3);
+    Counters counters = new Counters();
+    TaskAttemptFinishedEvent test = new TaskAttemptFinishedEvent(taskAttemptId,
+        TaskType.REDUCE, "TEST", 123L, "RAKNAME", "HOSTNAME", "STATUS",
+        counters);
+    assertEquals(test.getAttemptId().toString(), taskAttemptId.toString());
+
+    assertEquals(test.getCounters(), counters);
+    assertEquals(test.getFinishTime(), 123L);
+    assertEquals(test.getHostname(), "HOSTNAME");
+    assertEquals(test.getRackName(), "RAKNAME");
+    assertEquals(test.getState(), "STATUS");
+    assertEquals(test.getTaskId(), tid);
+    assertEquals(test.getTaskStatus(), "TEST");
+    assertEquals(test.getTaskType(), TaskType.REDUCE);
+
+  }
+
+  /**
+   * simple test JobPriorityChangeEvent and JobPriorityChange
+   * 
+   * @throws Exception
+   */
+
+  @Test(timeout = 10000)
+  public void testJobPriorityChange() throws Exception {
+    org.apache.hadoop.mapreduce.JobID jid = new JobID("001", 1);
+    JobPriorityChangeEvent test = new JobPriorityChangeEvent(jid,
+        JobPriority.LOW);
+    assertEquals(test.getJobId().toString(), jid.toString());
+    assertEquals(test.getPriority(), JobPriority.LOW);
+
+  }
+
+  /**
+   * simple test TaskUpdatedEvent and TaskUpdated
+   * 
+   * @throws Exception
+   */
+  @Test(timeout = 10000)
+  public void testTaskUpdated() throws Exception {
+    JobID jid = new JobID("001", 1);
+    TaskID tid = new TaskID(jid, TaskType.REDUCE, 2);
+    TaskUpdatedEvent test = new TaskUpdatedEvent(tid, 1234L);
+    assertEquals(test.getTaskId().toString(), tid.toString());
+    assertEquals(test.getFinishTime(), 1234L);
+
+  }
+
+  /*
+   * test EventReader EventReader should read the list of events and return
+   * instance of HistoryEvent Different HistoryEvent should have a different
+   * datum.
+   */
+  @Test(timeout = 10000)
+  public void testEvents() throws Exception {
+
+    EventReader reader = new EventReader(new DataInputStream(
+        new ByteArrayInputStream(getEvents())));
+    HistoryEvent e = reader.getNextEvent();
+    assertTrue(e.getEventType().equals(EventType.JOB_PRIORITY_CHANGED));
+    assertEquals("ID", ((JobPriorityChange) e.getDatum()).jobid.toString());
+
+    e = reader.getNextEvent();
+    assertTrue(e.getEventType().equals(EventType.JOB_STATUS_CHANGED));
+    assertEquals("ID", ((JobStatusChanged) e.getDatum()).jobid.toString());
+
+    e = reader.getNextEvent();
+    assertTrue(e.getEventType().equals(EventType.TASK_UPDATED));
+    assertEquals("ID", ((TaskUpdated) e.getDatum()).taskid.toString());
+
+    e = reader.getNextEvent();
+    assertTrue(e.getEventType().equals(EventType.REDUCE_ATTEMPT_KILLED));
+    assertEquals("task_1_2_r03_4",
+        ((TaskAttemptUnsuccessfulCompletion) e.getDatum()).taskid.toString());
+
+    e = reader.getNextEvent();
+    assertTrue(e.getEventType().equals(EventType.JOB_KILLED));
+    assertEquals("ID",
+        ((JobUnsuccessfulCompletion) e.getDatum()).jobid.toString());
+
+    e = reader.getNextEvent();
+    assertTrue(e.getEventType().equals(EventType.REDUCE_ATTEMPT_STARTED));
+    assertEquals("task_1_2_r03_4",
+        ((TaskAttemptStarted) e.getDatum()).taskid.toString());
+
+    e = reader.getNextEvent();
+    assertTrue(e.getEventType().equals(EventType.REDUCE_ATTEMPT_FINISHED));
+    assertEquals("task_1_2_r03_4",
+        ((TaskAttemptFinished) e.getDatum()).taskid.toString());
+
+    e = reader.getNextEvent();
+    assertTrue(e.getEventType().equals(EventType.REDUCE_ATTEMPT_KILLED));
+    assertEquals("task_1_2_r03_4",
+        ((TaskAttemptUnsuccessfulCompletion) e.getDatum()).taskid.toString());
+
+    e = reader.getNextEvent();
+    assertTrue(e.getEventType().equals(EventType.REDUCE_ATTEMPT_KILLED));
+    assertEquals("task_1_2_r03_4",
+        ((TaskAttemptUnsuccessfulCompletion) e.getDatum()).taskid.toString());
+
+    e = reader.getNextEvent();
+    assertTrue(e.getEventType().equals(EventType.REDUCE_ATTEMPT_STARTED));
+    assertEquals("task_1_2_r03_4",
+        ((TaskAttemptStarted) e.getDatum()).taskid.toString());
+
+    e = reader.getNextEvent();
+    assertTrue(e.getEventType().equals(EventType.REDUCE_ATTEMPT_FINISHED));
+    assertEquals("task_1_2_r03_4",
+        ((TaskAttemptFinished) e.getDatum()).taskid.toString());
+
+    e = reader.getNextEvent();
+    assertTrue(e.getEventType().equals(EventType.REDUCE_ATTEMPT_KILLED));
+    assertEquals("task_1_2_r03_4",
+        ((TaskAttemptUnsuccessfulCompletion) e.getDatum()).taskid.toString());
+
+    e = reader.getNextEvent();
+    assertTrue(e.getEventType().equals(EventType.REDUCE_ATTEMPT_KILLED));
+    assertEquals("task_1_2_r03_4",
+        ((TaskAttemptUnsuccessfulCompletion) e.getDatum()).taskid.toString());
+
+    reader.close();
+  }
+
+  /*
+   * makes array of bytes with History events
+   */
+  private byte[] getEvents() throws Exception {
+    ByteArrayOutputStream output = new ByteArrayOutputStream();
+    FSDataOutputStream fsOutput = new FSDataOutputStream(output,
+        new FileSystem.Statistics("scheme"));
+    EventWriter writer = new EventWriter(fsOutput);
+    writer.write(getJobPriorityChangedEvent());
+    writer.write(getJobStatusChangedEvent());
+    writer.write(getTaskUpdatedEvent());
+    writer.write(getReduceAttemptKilledEvent());
+    writer.write(getJobKilledEvent());
+    writer.write(getSetupAttemptStartedEvent());
+    writer.write(getTaskAttemptFinishedEvent());
+    writer.write(getSetupAttemptFieledEvent());
+    writer.write(getSetupAttemptKilledEvent());
+    writer.write(getCleanupAttemptStartedEvent());
+    writer.write(getCleanupAttemptFinishedEvent());
+    writer.write(getCleanupAttemptFiledEvent());
+    writer.write(getCleanupAttemptKilledEvent());
+
+    writer.flush();
+    writer.close();
+
+    return output.toByteArray();
+  }
+
+  private FakeEvent getCleanupAttemptKilledEvent() {
+    FakeEvent result = new FakeEvent(EventType.CLEANUP_ATTEMPT_KILLED);
+
+    result.setDatum(getTaskAttemptUnsuccessfulCompletion());
+    return result;
+  }
+
+  private FakeEvent getCleanupAttemptFiledEvent() {
+    FakeEvent result = new FakeEvent(EventType.CLEANUP_ATTEMPT_FAILED);
+
+    result.setDatum(getTaskAttemptUnsuccessfulCompletion());
+    return result;
+  }
+
+  private TaskAttemptUnsuccessfulCompletion getTaskAttemptUnsuccessfulCompletion() {
+    TaskAttemptUnsuccessfulCompletion datum = new TaskAttemptUnsuccessfulCompletion();
+    datum.attemptId = "attempt_1_2_r3_4_5";
+    datum.clockSplits = Arrays.asList(1, 2, 3);
+    datum.cpuUsages = Arrays.asList(100, 200, 300);
+    datum.error = "Error";
+    datum.finishTime = 2;
+    datum.hostname = "hostname";
+    datum.rackname = "rackname";
+    datum.physMemKbytes = Arrays.asList(1000, 2000, 3000);
+    datum.taskid = "task_1_2_r03_4";
+    datum.port = 1000;
+    datum.taskType = "REDUCE";
+    datum.status = "STATUS";
+    datum.counters = getCounters();
+    datum.vMemKbytes = Arrays.asList(1000, 2000, 3000);
+    return datum;
+  }
+
+  private JhCounters getCounters() {
+    JhCounters counters = new JhCounters();
+    counters.groups = new ArrayList<JhCounterGroup>(0);
+    counters.name = "name";
+    return counters;
+  }
+
+  private FakeEvent getCleanupAttemptFinishedEvent() {
+    FakeEvent result = new FakeEvent(EventType.CLEANUP_ATTEMPT_FINISHED);
+    TaskAttemptFinished datum = new TaskAttemptFinished();
+    datum.attemptId = "attempt_1_2_r3_4_5";
+
+    datum.counters = getCounters();
+    datum.finishTime = 2;
+    datum.hostname = "hostname";
+    datum.rackname = "rackName";
+    datum.state = "state";
+    datum.taskid = "task_1_2_r03_4";
+    datum.taskStatus = "taskStatus";
+    datum.taskType = "REDUCE";
+    result.setDatum(datum);
+    return result;
+  }
+
+  private FakeEvent getCleanupAttemptStartedEvent() {
+    FakeEvent result = new FakeEvent(EventType.CLEANUP_ATTEMPT_STARTED);
+    TaskAttemptStarted datum = new TaskAttemptStarted();
+
+    datum.attemptId = "attempt_1_2_r3_4_5";
+    datum.avataar = "avatar";
+    datum.containerId = "containerId";
+    datum.httpPort = 10000;
+    datum.locality = "locality";
+    datum.shufflePort = 10001;
+    datum.startTime = 1;
+    datum.taskid = "task_1_2_r03_4";
+    datum.taskType = "taskType";
+    datum.trackerName = "trackerName";
+    result.setDatum(datum);
+    return result;
+  }
+
+  private FakeEvent getSetupAttemptKilledEvent() {
+    FakeEvent result = new FakeEvent(EventType.SETUP_ATTEMPT_KILLED);
+    result.setDatum(getTaskAttemptUnsuccessfulCompletion());
+    return result;
+  }
+
+  private FakeEvent getSetupAttemptFieledEvent() {
+    FakeEvent result = new FakeEvent(EventType.SETUP_ATTEMPT_FAILED);
+
+    result.setDatum(getTaskAttemptUnsuccessfulCompletion());
+    return result;
+  }
+
+  private FakeEvent getTaskAttemptFinishedEvent() {
+    FakeEvent result = new FakeEvent(EventType.SETUP_ATTEMPT_FINISHED);
+    TaskAttemptFinished datum = new TaskAttemptFinished();
+
+    datum.attemptId = "attempt_1_2_r3_4_5";
+    datum.counters = getCounters();
+    datum.finishTime = 2;
+    datum.hostname = "hostname";
+    datum.rackname = "rackname";
+    datum.state = "state";
+    datum.taskid = "task_1_2_r03_4";
+    datum.taskStatus = "taskStatus";
+    datum.taskType = "REDUCE";
+    result.setDatum(datum);
+    return result;
+  }
+
+  private FakeEvent getSetupAttemptStartedEvent() {
+    FakeEvent result = new FakeEvent(EventType.SETUP_ATTEMPT_STARTED);
+    TaskAttemptStarted datum = new TaskAttemptStarted();
+    datum.attemptId = "ID";
+    datum.avataar = "avataar";
+    datum.containerId = "containerId";
+    datum.httpPort = 10000;
+    datum.locality = "locality";
+    datum.shufflePort = 10001;
+    datum.startTime = 1;
+    datum.taskid = "task_1_2_r03_4";
+    datum.taskType = "taskType";
+    datum.trackerName = "trackerName";
+    result.setDatum(datum);
+    return result;
+  }
+
+  private FakeEvent getJobKilledEvent() {
+    FakeEvent result = new FakeEvent(EventType.JOB_KILLED);
+    JobUnsuccessfulCompletion datum = new JobUnsuccessfulCompletion();
+    datum.finishedMaps = 1;
+    datum.finishedReduces = 2;
+    datum.finishTime = 3;
+    datum.jobid = "ID";
+    datum.jobStatus = "STATUS";
+    result.setDatum(datum);
+    return result;
+  }
+
+  private FakeEvent getReduceAttemptKilledEvent() {
+    FakeEvent result = new FakeEvent(EventType.REDUCE_ATTEMPT_KILLED);
+
+    result.setDatum(getTaskAttemptUnsuccessfulCompletion());
+    return result;
+  }
+
+  private FakeEvent getJobPriorityChangedEvent() {
+    FakeEvent result = new FakeEvent(EventType.JOB_PRIORITY_CHANGED);
+    JobPriorityChange datum = new JobPriorityChange();
+    datum.jobid = "ID";
+    datum.priority = "priority";
+    result.setDatum(datum);
+    return result;
+  }
+
+  private FakeEvent getJobStatusChangedEvent() {
+    FakeEvent result = new FakeEvent(EventType.JOB_STATUS_CHANGED);
+    JobStatusChanged datum = new JobStatusChanged();
+    datum.jobid = "ID";
+    datum.jobStatus = "newStatus";
+    result.setDatum(datum);
+    return result;
+  }
+
+  private FakeEvent getTaskUpdatedEvent() {
+    FakeEvent result = new FakeEvent(EventType.TASK_UPDATED);
+    TaskUpdated datum = new TaskUpdated();
+    datum.finishTime = 2;
+    datum.taskid = "ID";
+    result.setDatum(datum);
+    return result;
+  }
+
+  private class FakeEvent implements HistoryEvent {
+    private EventType eventType;
+    private Object datum;
+
+    public FakeEvent(EventType eventType) {
+      this.eventType = eventType;
+    }
+
+    @Override
+    public EventType getEventType() {
+      return eventType;
+    }
+
+    @Override
+    public Object getDatum() {
+
+      return datum;
+    }
+
+    @Override
+    public void setDatum(Object datum) {
+      this.datum = datum;
+    }
+
+  }
+
+}

+ 15 - 21
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/jobhistory/TestJobHistoryEventHandler.java

@@ -31,6 +31,7 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileContext;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.mapreduce.Counters;
+import org.apache.hadoop.mapreduce.JobID;
 import org.apache.hadoop.mapreduce.MRJobConfig;
 import org.apache.hadoop.mapreduce.TaskID;
 import org.apache.hadoop.mapreduce.TaskType;
@@ -53,7 +54,7 @@ public class TestJobHistoryEventHandler {
   private static final Log LOG = LogFactory
       .getLog(TestJobHistoryEventHandler.class);
 
-  @Test
+  @Test (timeout=50000)
   public void testFirstFlushOnCompletionEvent() throws Exception {
     TestParams t = new TestParams();
     Configuration conf = new Configuration();
@@ -96,7 +97,7 @@ public class TestJobHistoryEventHandler {
     }
   }
 
-  @Test
+  @Test (timeout=50000)
   public void testMaxUnflushedCompletionEvents() throws Exception {
     TestParams t = new TestParams();
     Configuration conf = new Configuration();
@@ -131,17 +132,17 @@ public class TestJobHistoryEventHandler {
 
       handleNextNEvents(jheh, 1);
       verify(mockWriter).flush();
-      
+
       handleNextNEvents(jheh, 50);
       verify(mockWriter, times(6)).flush();
-      
+
     } finally {
       jheh.stop();
       verify(mockWriter).close();
     }
   }
-  
-  @Test
+
+  @Test (timeout=50000)
   public void testUnflushedTimer() throws Exception {
     TestParams t = new TestParams();
     Configuration conf = new Configuration();
@@ -181,8 +182,8 @@ public class TestJobHistoryEventHandler {
       verify(mockWriter).close();
     }
   }
-  
-  @Test
+
+  @Test (timeout=50000)
   public void testBatchedFlushJobEndMultiplier() throws Exception {
     TestParams t = new TestParams();
     Configuration conf = new Configuration();
@@ -265,7 +266,7 @@ public class TestJobHistoryEventHandler {
     when(mockContext.getApplicationID()).thenReturn(appId);
     return mockContext;
   }
-  
+
 
   private class TestParams {
     String workDir = setupTestWorkDir();
@@ -279,12 +280,8 @@ public class TestJobHistoryEventHandler {
   }
 
   private JobHistoryEvent getEventToEnqueue(JobId jobId) {
-    JobHistoryEvent toReturn = Mockito.mock(JobHistoryEvent.class);
-    HistoryEvent he = Mockito.mock(HistoryEvent.class);
-    Mockito.when(he.getEventType()).thenReturn(EventType.JOB_STATUS_CHANGED);
-    Mockito.when(toReturn.getHistoryEvent()).thenReturn(he);
-    Mockito.when(toReturn.getJobID()).thenReturn(jobId);
-    return toReturn;
+    HistoryEvent toReturn = new JobStatusChangedEvent(new JobID(Integer.toString(jobId.getId()), jobId.getId()), "change status");
+    return new JobHistoryEvent(jobId, toReturn);
   }
 
   @Test
@@ -344,8 +341,6 @@ public class TestJobHistoryEventHandler {
 class JHEvenHandlerForTest extends JobHistoryEventHandler {
 
   private EventWriter eventWriter;
-  volatile int handleEventCompleteCalls = 0;
-  volatile int handleEventStartedCalls = 0;
 
   public JHEvenHandlerForTest(AppContext context, int startCount) {
     super(context, startCount);
@@ -354,7 +349,7 @@ class JHEvenHandlerForTest extends JobHistoryEventHandler {
   @Override
   public void start() {
   }
-  
+
   @Override
   protected EventWriter createEventWriter(Path historyFilePath)
       throws IOException {
@@ -365,7 +360,7 @@ class JHEvenHandlerForTest extends JobHistoryEventHandler {
   @Override
   protected void closeEventWriter(JobId jobId) {
   }
-  
+
   public EventWriter getEventWriter() {
     return this.eventWriter;
   }
@@ -375,13 +370,12 @@ class JHEvenHandlerForTest extends JobHistoryEventHandler {
  * Class to help with testSigTermedFunctionality
  */
 class JHEventHandlerForSigtermTest extends JobHistoryEventHandler {
-  private MetaInfo metaInfo;
   public JHEventHandlerForSigtermTest(AppContext context, int startCount) {
     super(context, startCount);
   }
 
   public void addToFileMap(JobId jobId) {
-    metaInfo = Mockito.mock(MetaInfo.class);
+    MetaInfo metaInfo = Mockito.mock(MetaInfo.class);
     Mockito.when(metaInfo.isWriterActive()).thenReturn(true);
     fileMap.put(jobId, metaInfo);
   }

+ 2 - 5
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/MRAppBenchmark.java

@@ -40,7 +40,6 @@ import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterRequest
 import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse;
-import org.apache.hadoop.yarn.api.records.AMResponse;
 import org.apache.hadoop.yarn.api.records.Container;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.NodeId;
@@ -248,10 +247,8 @@ public class MRAppBenchmark {
                   }
                 }
 
-                AMResponse amResponse = Records.newRecord(AMResponse.class);
-                amResponse.setAllocatedContainers(containers);
-                amResponse.setResponseId(request.getResponseId() + 1);
-                response.setAMResponse(amResponse);
+                response.setAllocatedContainers(containers);
+                response.setResponseId(request.getResponseId() + 1);
                 response.setNumClusterNodes(350);
                 return response;
               }

+ 4 - 2
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestMRAppMaster.java

@@ -84,8 +84,10 @@ public class TestMRAppMaster {
     YarnConfiguration conf = new YarnConfiguration();
     conf.set(MRJobConfig.MR_AM_STAGING_DIR, stagingDir);
     MRAppMaster.initAndStartAppMaster(appMaster, conf, userName);
-    assertEquals(stagingDir + Path.SEPARATOR + userName + Path.SEPARATOR
-        + ".staging", appMaster.stagingDirPath.toString());
+    Path userPath = new Path(stagingDir, userName);
+    Path userStagingPath = new Path(userPath, ".staging");
+    assertEquals(userStagingPath.toString(),
+      appMaster.stagingDirPath.toString());
   }
   
   @Test

+ 14 - 7
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/Fetcher.java

@@ -357,13 +357,20 @@ class Fetcher<K,V> extends Thread {
         return EMPTY_ATTEMPT_ID_ARRAY;
       } 
       
-      // Go!
-      LOG.info("fetcher#" + id + " about to shuffle output of map " + 
-               mapOutput.getMapId() + " decomp: " +
-               decompressedLength + " len: " + compressedLength + " to " +
-               mapOutput.getDescription());
-      mapOutput.shuffle(host, input, compressedLength, decompressedLength,
-                        metrics, reporter);
+      // The codec for lz0,lz4,snappy,bz2,etc. throw java.lang.InternalError
+      // on decompression failures. Catching and re-throwing as IOException
+      // to allow fetch failure logic to be processed
+      try {
+        // Go!
+        LOG.info("fetcher#" + id + " about to shuffle output of map "
+            + mapOutput.getMapId() + " decomp: " + decompressedLength
+            + " len: " + compressedLength + " to " + mapOutput.getDescription());
+        mapOutput.shuffle(host, input, compressedLength, decompressedLength,
+            metrics, reporter);
+      } catch (java.lang.InternalError e) {
+        LOG.warn("Failed to shuffle for fetcher#"+id, e);
+        throw new IOException(e);
+      }
       
       // Inform the shuffle scheduler
       long endTime = System.currentTimeMillis();

+ 60 - 1
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/task/reduce/TestFetcher.java

@@ -25,6 +25,7 @@ import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
 import java.io.DataOutputStream;
 import java.io.IOException;
+import java.io.InputStream;
 import java.net.HttpURLConnection;
 import java.net.SocketTimeoutException;
 import java.net.URL;
@@ -233,4 +234,62 @@ public class TestFetcher {
     verify(ss).putBackKnownMapOutput(any(MapHost.class), eq(map2ID));
   }
   
-}
+  @SuppressWarnings("unchecked")
+  @Test(timeout=10000) 
+  public void testCopyFromHostCompressFailure() throws Exception {
+    LOG.info("testCopyFromHostCompressFailure");
+    JobConf job = new JobConf();
+    TaskAttemptID id = TaskAttemptID.forName("attempt_0_1_r_1_1");
+    ShuffleScheduler<Text, Text> ss = mock(ShuffleScheduler.class);
+    MergeManagerImpl<Text, Text> mm = mock(MergeManagerImpl.class);
+    InMemoryMapOutput<Text, Text> immo = mock(InMemoryMapOutput.class);
+    Reporter r = mock(Reporter.class);
+    ShuffleClientMetrics metrics = mock(ShuffleClientMetrics.class);
+    ExceptionReporter except = mock(ExceptionReporter.class);
+    SecretKey key = JobTokenSecretManager.createSecretKey(new byte[]{0,0,0,0});
+    HttpURLConnection connection = mock(HttpURLConnection.class);
+    
+    Counters.Counter allErrs = mock(Counters.Counter.class);
+    when(r.getCounter(anyString(), anyString()))
+      .thenReturn(allErrs);
+    
+    Fetcher<Text,Text> underTest = new FakeFetcher<Text,Text>(job, id, ss, mm,
+        r, metrics, except, key, connection);
+    
+
+    MapHost host = new MapHost("localhost", "http://localhost:8080/");
+    
+    ArrayList<TaskAttemptID> maps = new ArrayList<TaskAttemptID>(1);
+    TaskAttemptID map1ID = TaskAttemptID.forName("attempt_0_1_m_1_1");
+    maps.add(map1ID);
+    TaskAttemptID map2ID = TaskAttemptID.forName("attempt_0_1_m_2_1");
+    maps.add(map2ID);
+    when(ss.getMapsForHost(host)).thenReturn(maps);
+    
+    String encHash = "vFE234EIFCiBgYs2tCXY/SjT8Kg=";
+    String replyHash = SecureShuffleUtils.generateHash(encHash.getBytes(), key);
+    
+    when(connection.getResponseCode()).thenReturn(200);
+    when(connection.getHeaderField(SecureShuffleUtils.HTTP_HEADER_REPLY_URL_HASH))
+      .thenReturn(replyHash);
+    ShuffleHeader header = new ShuffleHeader(map1ID.toString(), 10, 10, 1);
+    ByteArrayOutputStream bout = new ByteArrayOutputStream();
+    header.write(new DataOutputStream(bout));
+    ByteArrayInputStream in = new ByteArrayInputStream(bout.toByteArray());
+    when(connection.getInputStream()).thenReturn(in);
+    when(mm.reserve(any(TaskAttemptID.class), anyLong(), anyInt()))
+      .thenReturn(immo);
+    
+    doThrow(new java.lang.InternalError())
+    .when(immo)
+      .shuffle(any(MapHost.class), any(InputStream.class), anyLong(), 
+               anyLong(), any(ShuffleClientMetrics.class), any(Reporter.class));
+
+    underTest.copyFromHost(host);
+       
+    verify(connection)
+      .addRequestProperty(SecureShuffleUtils.HTTP_HEADER_URL_HASH, 
+          encHash);
+    verify(ss, times(1)).copyFailed(map1ID, host, true, false);
+  }
+}

+ 3 - 3
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistoryEntities.java

@@ -79,7 +79,7 @@ public class TestJobHistoryEntities {
   }
 
   /* Verify some expected values based on the history file */
-  @Test
+  @Test (timeout=10000)
   public void testCompletedJob() throws Exception {
     HistoryFileInfo info = mock(HistoryFileInfo.class);
     when(info.getConfFile()).thenReturn(fullConfPath);
@@ -104,7 +104,7 @@ public class TestJobHistoryEntities {
     assertEquals(JobState.SUCCEEDED, jobReport.getJobState());
   }
   
-  @Test
+  @Test (timeout=10000)
   public void testCompletedTask() throws Exception {
     HistoryFileInfo info = mock(HistoryFileInfo.class);
     when(info.getConfFile()).thenReturn(fullConfPath);
@@ -133,7 +133,7 @@ public class TestJobHistoryEntities {
     assertEquals(rt1Id, rt1Report.getTaskId());
   }
   
-  @Test
+  @Test (timeout=10000)
   public void testCompletedTaskAttempt() throws Exception {
     HistoryFileInfo info = mock(HistoryFileInfo.class);
     when(info.getConfFile()).thenReturn(fullConfPath);

+ 12 - 5
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistoryEvents.java

@@ -25,7 +25,6 @@ import junit.framework.Assert;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.mapreduce.MRJobConfig;
 import org.apache.hadoop.mapreduce.TypeConverter;
 import org.apache.hadoop.mapreduce.jobhistory.JobHistoryEvent;
 import org.apache.hadoop.mapreduce.jobhistory.JobHistoryEventHandler;
@@ -67,8 +66,17 @@ public class TestJobHistoryEvents {
      * completed maps 
     */
     HistoryContext context = new JobHistory();
+    // test start and stop states
     ((JobHistory)context).init(conf);
-    Job parsedJob = context.getJob(jobId);
+    ((JobHistory)context).start();
+    Assert.assertTrue( context.getStartTime()>0);
+    Assert.assertEquals(((JobHistory)context).getServiceState(),Service.STATE.STARTED);
+    
+    
+    ((JobHistory)context).stop();
+    Assert.assertEquals(((JobHistory)context).getServiceState(),Service.STATE.STOPPED);
+      Job parsedJob = context.getJob(jobId);
+    
     Assert.assertEquals("CompletedMaps not correct", 2,
         parsedJob.getCompletedMaps());
     Assert.assertEquals(System.getProperty("user.name"), parsedJob.getUserName());
@@ -177,9 +185,8 @@ public class TestJobHistoryEvents {
     @Override
     protected EventHandler<JobHistoryEvent> createJobHistoryHandler(
         AppContext context) {
-      JobHistoryEventHandler eventHandler = new JobHistoryEventHandler(
-          context, getStartCount());
-      return eventHandler;
+      return new JobHistoryEventHandler(
+              context, getStartCount());
     }
   }
 

+ 39 - 12
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistoryParsing.java

@@ -18,7 +18,9 @@
 
 package org.apache.hadoop.mapreduce.v2.hs;
 
+import java.io.ByteArrayOutputStream;
 import java.io.IOException;
+import java.io.PrintStream;
 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.List;
@@ -40,6 +42,7 @@ import org.apache.hadoop.mapreduce.TaskID;
 import org.apache.hadoop.mapreduce.TypeConverter;
 import org.apache.hadoop.mapreduce.jobhistory.EventReader;
 import org.apache.hadoop.mapreduce.jobhistory.HistoryEvent;
+import org.apache.hadoop.mapreduce.jobhistory.HistoryViewer;
 import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser;
 import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.AMInfo;
 import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.JobInfo;
@@ -60,7 +63,6 @@ import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEventType;
 import org.apache.hadoop.mapreduce.v2.hs.HistoryFileManager.HistoryFileInfo;
 import org.apache.hadoop.mapreduce.v2.hs.TestJobHistoryEvents.MRAppWithHistory;
 import org.apache.hadoop.mapreduce.v2.jobhistory.FileNameIndexUtils;
-import org.apache.hadoop.mapreduce.v2.jobhistory.JHAdminConfig;
 import org.apache.hadoop.mapreduce.v2.jobhistory.JobHistoryUtils;
 import org.apache.hadoop.mapreduce.v2.jobhistory.JobIndexInfo;
 import org.apache.hadoop.net.DNSToSwitchMapping;
@@ -78,21 +80,27 @@ public class TestJobHistoryParsing {
 
   private static final String RACK_NAME = "/MyRackName";
 
+  private  ByteArrayOutputStream outContent = new ByteArrayOutputStream();
+
   public static class MyResolver implements DNSToSwitchMapping {
     @Override
     public List<String> resolve(List<String> names) {
       return Arrays.asList(new String[]{RACK_NAME});
     }
+
+    @Override
+    public void reloadCachedMappings() {
+    }
   }
 
-  @Test
+  @Test (timeout=50000)
   public void testJobInfo() throws Exception {
     JobInfo info = new JobInfo();
     Assert.assertEquals("NORMAL", info.getPriority());
     info.printAll();
   }
 
-  @Test
+  @Test (timeout=50000)
   public void testHistoryParsing() throws Exception {
     LOG.info("STARTING testHistoryParsing()");
     try {
@@ -102,7 +110,7 @@ public class TestJobHistoryParsing {
     }
   }
   
-  @Test
+  @Test (timeout=50000)
   public void testHistoryParsingWithParseErrors() throws Exception {
     LOG.info("STARTING testHistoryParsingWithParseErrors()");
     try {
@@ -317,18 +325,37 @@ public class TestJobHistoryParsing {
         }
       }
     }
+    
+    // test output for HistoryViewer
+    PrintStream stdps=System.out;
+    try {
+      System.setOut(new PrintStream(outContent));
+      HistoryViewer viewer = new HistoryViewer(fc.makeQualified(
+          fileInfo.getHistoryFile()).toString(), conf, true);
+      viewer.print();
+      
+      for (TaskInfo taskInfo : allTasks.values()) { 
+        
+        String test=  (taskInfo.getTaskStatus()==null?"":taskInfo.getTaskStatus())+" "+taskInfo.getTaskType()+" task list for "+taskInfo.getTaskId().getJobID();
+        Assert.assertTrue(outContent.toString().indexOf(test)>0);
+        Assert.assertTrue(outContent.toString().indexOf(taskInfo.getTaskId().toString())>0);
+      }
+    } finally {
+      System.setOut(stdps);
+
+    }
   }
-  
+
   // Computes finished maps similar to RecoveryService...
-  private long computeFinishedMaps(JobInfo jobInfo, 
-      int numMaps, int numSuccessfulMaps) {
+  private long computeFinishedMaps(JobInfo jobInfo, int numMaps,
+      int numSuccessfulMaps) {
     if (numMaps == numSuccessfulMaps) {
       return jobInfo.getFinishedMaps();
     }
-    
+
     long numFinishedMaps = 0;
-    Map<org.apache.hadoop.mapreduce.TaskID, TaskInfo> taskInfos = 
-        jobInfo.getAllTasks();
+    Map<org.apache.hadoop.mapreduce.TaskID, TaskInfo> taskInfos = jobInfo
+        .getAllTasks();
     for (TaskInfo taskInfo : taskInfos.values()) {
       if (TaskState.SUCCEEDED.toString().equals(taskInfo.getTaskStatus())) {
         ++numFinishedMaps;
@@ -337,7 +364,7 @@ public class TestJobHistoryParsing {
     return numFinishedMaps;
   }
   
-  @Test
+  @Test (timeout=50000)
   public void testHistoryParsingForFailedAttempts() throws Exception {
     LOG.info("STARTING testHistoryParsingForFailedAttempts");
     try {
@@ -464,7 +491,7 @@ public class TestJobHistoryParsing {
     }
   }
 
-  @Test
+  @Test (timeout=50000)
   public void testScanningOldDirs() throws Exception {
     LOG.info("STARTING testScanningOldDirs");
     try {

+ 9 - 4
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/MiniMRClientClusterFactory.java

@@ -38,6 +38,11 @@ public class MiniMRClientClusterFactory {
 
   public static MiniMRClientCluster create(Class<?> caller, int noOfNMs,
       Configuration conf) throws IOException {
+    return create(caller, caller.getSimpleName(), noOfNMs, conf);
+  }
+
+  public static MiniMRClientCluster create(Class<?> caller, String identifier,
+      int noOfNMs, Configuration conf) throws IOException {
 
     if (conf == null) {
       conf = new Configuration();
@@ -45,7 +50,7 @@ public class MiniMRClientClusterFactory {
 
     FileSystem fs = FileSystem.get(conf);
 
-    Path testRootDir = new Path("target", caller.getSimpleName() + "-tmpDir")
+    Path testRootDir = new Path("target", identifier + "-tmpDir")
         .makeQualified(fs);
     Path appJar = new Path(testRootDir, "MRAppJar.jar");
 
@@ -65,10 +70,10 @@ public class MiniMRClientClusterFactory {
     fs.setPermission(remoteCallerJar, new FsPermission("744"));
     job.addFileToClassPath(remoteCallerJar);
 
-    MiniMRYarnCluster miniMRYarnCluster = new MiniMRYarnCluster(caller
-        .getSimpleName(), noOfNMs);
+    MiniMRYarnCluster miniMRYarnCluster = new MiniMRYarnCluster(identifier,
+        noOfNMs);
     job.getConfiguration().set("minimrclientcluster.caller.name",
-        caller.getSimpleName());
+        identifier);
     job.getConfiguration().setInt("minimrclientcluster.nodemanagers.number",
         noOfNMs);
     miniMRYarnCluster.init(job.getConfiguration());

+ 4 - 1
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/MiniMRCluster.java

@@ -18,6 +18,7 @@
 package org.apache.hadoop.mapred;
 
 import java.io.IOException;
+import java.util.Random;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -177,8 +178,10 @@ public class MiniMRCluster {
       int numTrackerToExclude, Clock clock) throws IOException {
     if (conf == null) conf = new JobConf();
     FileSystem.setDefaultUri(conf, namenode);
+    String identifier = this.getClass().getSimpleName() + "_"
+        + Integer.toString(new Random().nextInt(Integer.MAX_VALUE));
     mrClientCluster = MiniMRClientClusterFactory.create(this.getClass(),
-        numTaskTrackers, conf);
+        identifier, numTaskTrackers, conf);
   }
 
   public UserGroupInformation getUgi() {

+ 7 - 3
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-shuffle/src/main/java/org/apache/hadoop/mapred/ShuffleHandler.java

@@ -549,15 +549,19 @@ public class ShuffleHandler extends AbstractService
           ContainerLocalizer.USERCACHE + "/" + user + "/"
               + ContainerLocalizer.APPCACHE + "/"
               + ConverterUtils.toString(appID) + "/output" + "/" + mapId;
-      LOG.debug("DEBUG0 " + base);
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("DEBUG0 " + base);
+      }
       // Index file
       Path indexFileName = lDirAlloc.getLocalPathToRead(
           base + "/file.out.index", conf);
       // Map-output file
       Path mapOutputFileName = lDirAlloc.getLocalPathToRead(
           base + "/file.out", conf);
-      LOG.debug("DEBUG1 " + base + " : " + mapOutputFileName + " : " +
-          indexFileName);
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("DEBUG1 " + base + " : " + mapOutputFileName + " : "
+            + indexFileName);
+      }
       final IndexRecord info = 
         indexCache.getIndexInformation(mapId, reduce, indexFileName, user);
       final ShuffleHeader header =

+ 26 - 5
hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/CopyListing.java

@@ -30,6 +30,7 @@ import org.apache.hadoop.tools.util.DistCpUtils;
 import org.apache.hadoop.security.Credentials;
 
 import java.io.IOException;
+import java.lang.reflect.Constructor;
 
 /**
  * The CopyListing abstraction is responsible for how the list of
@@ -193,14 +194,34 @@ public abstract class CopyListing extends Configured {
    * @param credentials Credentials object on which the FS delegation tokens are cached
    * @param options The input Options, to help choose the appropriate CopyListing Implementation.
    * @return An instance of the appropriate CopyListing implementation.
+   * @throws java.io.IOException - Exception if any
    */
   public static CopyListing getCopyListing(Configuration configuration,
                                            Credentials credentials,
-                                           DistCpOptions options) {
-    if (options.getSourceFileListing() == null) {
-      return new GlobbedCopyListing(configuration, credentials);
-    } else {
-      return new FileBasedCopyListing(configuration, credentials);
+                                           DistCpOptions options)
+      throws IOException {
+
+    String copyListingClassName = configuration.get(DistCpConstants.
+        CONF_LABEL_COPY_LISTING_CLASS, "");
+    Class<? extends CopyListing> copyListingClass;
+    try {
+      if (! copyListingClassName.isEmpty()) {
+        copyListingClass = configuration.getClass(DistCpConstants.
+            CONF_LABEL_COPY_LISTING_CLASS, GlobbedCopyListing.class,
+            CopyListing.class);
+      } else {
+        if (options.getSourceFileListing() == null) {
+            copyListingClass = GlobbedCopyListing.class;
+        } else {
+            copyListingClass = FileBasedCopyListing.class;
+        }
+      }
+      copyListingClassName = copyListingClass.getName();
+      Constructor<? extends CopyListing> constructor = copyListingClass.
+          getDeclaredConstructor(Configuration.class, Credentials.class);
+      return constructor.newInstance(configuration, credentials);
+    } catch (Exception e) {
+      throw new IOException("Unable to instantiate " + copyListingClassName, e);
     }
   }
 

+ 2 - 2
hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCp.java

@@ -319,7 +319,7 @@ public class DistCp extends Configured implements Tool {
    * @return Returns the path where the copy listing is created
    * @throws IOException - If any
    */
-  private Path createInputFileListing(Job job) throws IOException {
+  protected Path createInputFileListing(Job job) throws IOException {
     Path fileListingPath = getFileListingPath();
     CopyListing copyListing = CopyListing.getCopyListing(job.getConfiguration(),
         job.getCredentials(), inputOptions);
@@ -334,7 +334,7 @@ public class DistCp extends Configured implements Tool {
    * @return - Path where the copy listing file has to be saved
    * @throws IOException - Exception if any
    */
-  private Path getFileListingPath() throws IOException {
+  protected Path getFileListingPath() throws IOException {
     String fileListPathStr = metaFolder + "/fileList.seq";
     Path path = new Path(fileListPathStr);
     return new Path(path.toUri().normalize().toString());

+ 3 - 0
hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpConstants.java

@@ -82,6 +82,9 @@ public class DistCpConstants {
   /* Meta folder where the job's intermediate data is kept */
   public static final String CONF_LABEL_META_FOLDER = "distcp.meta.folder";
 
+  /* DistCp CopyListing class override param */
+  public static final String CONF_LABEL_COPY_LISTING_CLASS = "distcp.copy.listing.class";
+
   /**
    * Conf label for SSL Trust-store location.
    */

+ 30 - 7
hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/SimpleCopyListing.java

@@ -127,17 +127,20 @@ public class SimpleCopyListing extends CopyListing {
             if (LOG.isDebugEnabled()) {
               LOG.debug("Recording source-path: " + sourceStatus.getPath() + " for copy.");
             }
-            writeToFileListing(fileListWriter, sourceStatus, sourcePathRoot, localFile);
+            writeToFileListing(fileListWriter, sourceStatus, sourcePathRoot,
+                localFile, options);
 
             if (isDirectoryAndNotEmpty(sourceFS, sourceStatus)) {
               if (LOG.isDebugEnabled()) {
                 LOG.debug("Traversing non-empty source dir: " + sourceStatus.getPath());
               }
-              traverseNonEmptyDirectory(fileListWriter, sourceStatus, sourcePathRoot, localFile);
+              traverseNonEmptyDirectory(fileListWriter, sourceStatus, sourcePathRoot,
+                  localFile, options);
             }
           }
         } else {
-          writeToFileListing(fileListWriter, rootStatus, sourcePathRoot, localFile);
+          writeToFileListing(fileListWriter, rootStatus, sourcePathRoot,
+              localFile, options);
         }
       }
     } finally {
@@ -169,6 +172,17 @@ public class SimpleCopyListing extends CopyListing {
     }
   }
 
+  /**
+   * Provide an option to skip copy of a path, Allows for exclusion
+   * of files such as {@link org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter#SUCCEEDED_FILE_NAME}
+   * @param path - Path being considered for copy while building the file listing
+   * @param options - Input options passed during DistCp invocation
+   * @return - True if the path should be considered for copy, false otherwise
+   */
+  protected boolean shouldCopy(Path path, DistCpOptions options) {
+    return true;
+  }
+
   /** {@inheritDoc} */
   @Override
   protected long getBytesToCopy() {
@@ -210,7 +224,9 @@ public class SimpleCopyListing extends CopyListing {
 
   private void traverseNonEmptyDirectory(SequenceFile.Writer fileListWriter,
                                          FileStatus sourceStatus,
-                                         Path sourcePathRoot, boolean localFile)
+                                         Path sourcePathRoot,
+                                         boolean localFile,
+                                         DistCpOptions options)
                                          throws IOException {
     FileSystem sourceFS = sourcePathRoot.getFileSystem(getConf());
     Stack<FileStatus> pathStack = new Stack<FileStatus>();
@@ -221,7 +237,8 @@ public class SimpleCopyListing extends CopyListing {
         if (LOG.isDebugEnabled())
           LOG.debug("Recording source-path: "
                     + sourceStatus.getPath() + " for copy.");
-        writeToFileListing(fileListWriter, child, sourcePathRoot, localFile);
+        writeToFileListing(fileListWriter, child, sourcePathRoot,
+             localFile, options);
         if (isDirectoryAndNotEmpty(sourceFS, child)) {
           if (LOG.isDebugEnabled())
             LOG.debug("Traversing non-empty source dir: "
@@ -233,8 +250,10 @@ public class SimpleCopyListing extends CopyListing {
   }
 
   private void writeToFileListing(SequenceFile.Writer fileListWriter,
-                                  FileStatus fileStatus, Path sourcePathRoot,
-                                  boolean localFile) throws IOException {
+                                  FileStatus fileStatus,
+                                  Path sourcePathRoot,
+                                  boolean localFile,
+                                  DistCpOptions options) throws IOException {
     if (fileStatus.getPath().equals(sourcePathRoot) && fileStatus.isDirectory())
       return; // Skip the root-paths.
 
@@ -248,6 +267,10 @@ public class SimpleCopyListing extends CopyListing {
       status = getFileStatus(fileStatus);
     }
 
+    if (!shouldCopy(fileStatus.getPath(), options)) {
+      return;
+    }
+
     fileListWriter.append(new Text(DistCpUtils.getRelativePath(sourcePathRoot,
         fileStatus.getPath())), status);
     fileListWriter.sync();

+ 4 - 5
hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableFileCopyCommand.java

@@ -124,7 +124,7 @@ public class RetriableFileCopyCommand extends RetriableCommand {
             tmpTargetPath, true, BUFFER_SIZE,
             getReplicationFactor(fileAttributes, sourceFileStatus, targetFS, tmpTargetPath),
             getBlockSize(fileAttributes, sourceFileStatus, targetFS, tmpTargetPath), context));
-    return copyBytes(sourceFileStatus, outStream, BUFFER_SIZE, true, context);
+    return copyBytes(sourceFileStatus, outStream, BUFFER_SIZE, context);
   }
 
   private void compareFileLengths(FileStatus sourceFileStatus, Path target,
@@ -170,8 +170,8 @@ public class RetriableFileCopyCommand extends RetriableCommand {
   }
 
   private long copyBytes(FileStatus sourceFileStatus, OutputStream outStream,
-                         int bufferSize, boolean mustCloseStream,
-                         Mapper.Context context) throws IOException {
+                         int bufferSize, Mapper.Context context)
+      throws IOException {
     Path source = sourceFileStatus.getPath();
     byte buf[] = new byte[bufferSize];
     ThrottledInputStream inStream = null;
@@ -187,8 +187,7 @@ public class RetriableFileCopyCommand extends RetriableCommand {
         bytesRead = inStream.read(buf);
       }
     } finally {
-      if (mustCloseStream)
-        IOUtils.cleanup(LOG, outStream, inStream);
+      IOUtils.cleanup(LOG, outStream, inStream);
     }
 
     return totalBytesRead;

+ 5 - 0
hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/ThrottledInputStream.java

@@ -52,6 +52,11 @@ public class ThrottledInputStream extends InputStream {
     this.maxBytesPerSec = maxBytesPerSec;
   }
 
+  @Override
+  public void close() throws IOException {
+    rawStream.close();
+  }
+
   /** @inheritDoc */
   @Override
   public int read() throws IOException {

+ 37 - 4
hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestCopyListing.java

@@ -24,6 +24,7 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter;
 import org.apache.hadoop.tools.util.TestDistCpUtils;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.security.Credentials;
@@ -79,7 +80,39 @@ public class TestCopyListing extends SimpleCopyListing {
     return 0;
   }
 
-  @Test
+  @Test(timeout=10000)
+  public void testSkipCopy() throws Exception {
+    SimpleCopyListing listing = new SimpleCopyListing(getConf(), CREDENTIALS) {
+      @Override
+      protected boolean shouldCopy(Path path, DistCpOptions options) {
+        return !path.getName().equals(FileOutputCommitter.SUCCEEDED_FILE_NAME);
+      }
+    };
+    FileSystem fs = FileSystem.get(getConf());
+    List<Path> srcPaths = new ArrayList<Path>();
+    srcPaths.add(new Path("/tmp/in4/1"));
+    srcPaths.add(new Path("/tmp/in4/2"));
+    Path target = new Path("/tmp/out4/1");
+    TestDistCpUtils.createFile(fs, "/tmp/in4/1/_SUCCESS");
+    TestDistCpUtils.createFile(fs, "/tmp/in4/1/file");
+    TestDistCpUtils.createFile(fs, "/tmp/in4/2");
+    fs.mkdirs(target);
+    DistCpOptions options = new DistCpOptions(srcPaths, target);
+    Path listingFile = new Path("/tmp/list4");
+    listing.buildListing(listingFile, options);
+    Assert.assertEquals(listing.getNumberOfPaths(), 2);
+    SequenceFile.Reader reader = new SequenceFile.Reader(getConf(),
+        SequenceFile.Reader.file(listingFile));
+    FileStatus fileStatus = new FileStatus();
+    Text relativePath = new Text();
+    Assert.assertTrue(reader.next(relativePath, fileStatus));
+    Assert.assertEquals(relativePath.toString(), "/1/file");
+    Assert.assertTrue(reader.next(relativePath, fileStatus));
+    Assert.assertEquals(relativePath.toString(), "/2");
+    Assert.assertFalse(reader.next(relativePath, fileStatus));
+  }
+
+  @Test(timeout=10000)
   public void testMultipleSrcToFile() {
     FileSystem fs = null;
     try {
@@ -124,7 +157,7 @@ public class TestCopyListing extends SimpleCopyListing {
     }
   }
 
-  @Test
+  @Test(timeout=10000)
   public void testDuplicates() {
     FileSystem fs = null;
     try {
@@ -150,7 +183,7 @@ public class TestCopyListing extends SimpleCopyListing {
     }
   }
 
-  @Test
+  @Test(timeout=10000)
   public void testBuildListing() {
     FileSystem fs = null;
     try {
@@ -206,7 +239,7 @@ public class TestCopyListing extends SimpleCopyListing {
     }
   }
 
-  @Test
+  @Test(timeout=10000)
   public void testBuildListingForSingleFile() {
     FileSystem fs = null;
     String testRootString = "/singleFileListing";

+ 71 - 20
hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestIntegration.java

@@ -26,6 +26,7 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.mapreduce.Cluster;
 import org.apache.hadoop.mapreduce.JobSubmissionFiles;
+import org.apache.hadoop.security.Credentials;
 import org.apache.hadoop.tools.util.TestDistCpUtils;
 import org.junit.Assert;
 import org.junit.BeforeClass;
@@ -34,6 +35,7 @@ import org.junit.Test;
 import java.io.IOException;
 import java.io.OutputStream;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.List;
 
 public class TestIntegration {
@@ -68,7 +70,7 @@ public class TestIntegration {
     }
   }
 
-  @Test
+  @Test(timeout=100000)
   public void testSingleFileMissingTarget() {
     caseSingleFileMissingTarget(false);
     caseSingleFileMissingTarget(true);
@@ -91,7 +93,7 @@ public class TestIntegration {
     }
   }
 
-  @Test
+  @Test(timeout=100000)
   public void testSingleFileTargetFile() {
     caseSingleFileTargetFile(false);
     caseSingleFileTargetFile(true);
@@ -101,7 +103,7 @@ public class TestIntegration {
 
     try {
       addEntries(listFile, "singlefile1/file1");
-      createFiles("singlefile1/file1", target.toString());
+      createFiles("singlefile1/file1", "target");
 
       runTest(listFile, target, sync);
 
@@ -114,7 +116,7 @@ public class TestIntegration {
     }
   }
 
-  @Test
+  @Test(timeout=100000)
   public void testSingleFileTargetDir() {
     caseSingleFileTargetDir(false);
     caseSingleFileTargetDir(true);
@@ -138,7 +140,7 @@ public class TestIntegration {
     }
   }
 
-  @Test
+  @Test(timeout=100000)
   public void testSingleDirTargetMissing() {
     caseSingleDirTargetMissing(false);
     caseSingleDirTargetMissing(true);
@@ -161,7 +163,7 @@ public class TestIntegration {
     }
   }
 
-  @Test
+  @Test(timeout=100000)
   public void testSingleDirTargetPresent() {
 
     try {
@@ -180,7 +182,7 @@ public class TestIntegration {
     }
   }
 
-  @Test
+  @Test(timeout=100000)
   public void testUpdateSingleDirTargetPresent() {
 
     try {
@@ -199,7 +201,7 @@ public class TestIntegration {
     }
   }
 
-  @Test
+  @Test(timeout=100000)
   public void testMultiFileTargetPresent() {
     caseMultiFileTargetPresent(false);
     caseMultiFileTargetPresent(true);
@@ -223,7 +225,56 @@ public class TestIntegration {
     }
   }
 
-  @Test
+  @Test(timeout=100000)
+  public void testCustomCopyListing() {
+
+    try {
+      addEntries(listFile, "multifile1/file3", "multifile1/file4", "multifile1/file5");
+      createFiles("multifile1/file3", "multifile1/file4", "multifile1/file5");
+      mkdirs(target.toString());
+
+      Configuration conf = getConf();
+      try {
+        conf.setClass(DistCpConstants.CONF_LABEL_COPY_LISTING_CLASS,
+            CustomCopyListing.class, CopyListing.class);
+        DistCpOptions options = new DistCpOptions(Arrays.
+            asList(new Path(root + "/" + "multifile1")), target);
+        options.setSyncFolder(true);
+        options.setDeleteMissing(false);
+        options.setOverwrite(false);
+        try {
+          new DistCp(conf, options).execute();
+        } catch (Exception e) {
+          LOG.error("Exception encountered ", e);
+          throw new IOException(e);
+        }
+      } finally {
+        conf.unset(DistCpConstants.CONF_LABEL_COPY_LISTING_CLASS);
+      }
+
+      checkResult(target, 2, "file4", "file5");
+    } catch (IOException e) {
+      LOG.error("Exception encountered while testing distcp", e);
+      Assert.fail("distcp failure");
+    } finally {
+      TestDistCpUtils.delete(fs, root);
+    }
+  }
+
+  private static class CustomCopyListing extends SimpleCopyListing {
+
+    public CustomCopyListing(Configuration configuration,
+                             Credentials credentials) {
+      super(configuration, credentials);
+    }
+
+    @Override
+    protected boolean shouldCopy(Path path, DistCpOptions options) {
+      return !path.getName().equals("file3");
+    }
+  }
+
+  @Test(timeout=100000)
   public void testMultiFileTargetMissing() {
     caseMultiFileTargetMissing(false);
     caseMultiFileTargetMissing(true);
@@ -246,7 +297,7 @@ public class TestIntegration {
     }
   }
 
-  @Test
+  @Test(timeout=100000)
   public void testMultiDirTargetPresent() {
 
     try {
@@ -265,7 +316,7 @@ public class TestIntegration {
     }
   }
 
-  @Test
+  @Test(timeout=100000)
   public void testUpdateMultiDirTargetPresent() {
 
     try {
@@ -284,7 +335,7 @@ public class TestIntegration {
     }
   }
 
-  @Test
+  @Test(timeout=100000)
   public void testMultiDirTargetMissing() {
 
     try {
@@ -304,7 +355,7 @@ public class TestIntegration {
     }
   }
 
-  @Test
+  @Test(timeout=100000)
   public void testUpdateMultiDirTargetMissing() {
 
     try {
@@ -323,7 +374,7 @@ public class TestIntegration {
     }
   }
   
-  @Test
+  @Test(timeout=100000)
   public void testDeleteMissingInDestination() {
     
     try {
@@ -343,7 +394,7 @@ public class TestIntegration {
     }
   }
   
-  @Test
+  @Test(timeout=100000)
   public void testOverwrite() {
     byte[] contents1 = "contents1".getBytes();
     byte[] contents2 = "contents2".getBytes();
@@ -375,7 +426,7 @@ public class TestIntegration {
     }
   }
 
-  @Test
+  @Test(timeout=100000)
   public void testGlobTargetMissingSingleLevel() {
 
     try {
@@ -398,7 +449,7 @@ public class TestIntegration {
     }
   }
 
-  @Test
+  @Test(timeout=100000)
   public void testUpdateGlobTargetMissingSingleLevel() {
 
     try {
@@ -420,7 +471,7 @@ public class TestIntegration {
     }
   }
 
-  @Test
+  @Test(timeout=100000)
   public void testGlobTargetMissingMultiLevel() {
 
     try {
@@ -444,7 +495,7 @@ public class TestIntegration {
     }
   }
 
-  @Test
+  @Test(timeout=100000)
   public void testUpdateGlobTargetMissingMultiLevel() {
 
     try {
@@ -468,7 +519,7 @@ public class TestIntegration {
     }
   }
   
-  @Test
+  @Test(timeout=100000)
   public void testCleanup() {
     try {
       Path sourcePath = new Path("noscheme:///file");

+ 28 - 0
hadoop-yarn-project/CHANGES.txt

@@ -18,6 +18,13 @@ Trunk - Unreleased
 
   BUG FIXES
 
+    YARN-488. TestContainerManagerSecurity fails on Windows. (Chris Nauroth
+    via hitesh)
+
+    YARN-490. TestDistributedShell fails on Windows. (Chris Nauroth via hitesh)
+
+    YARN-491. TestContainerLogsPage fails on Windows. (Chris Nauroth via hitesh)
+
   BREAKDOWN OF HADOOP-8562 SUBTASKS
 
     YARN-158. Yarn creating package-info.java must not depend on sh.
@@ -52,6 +59,9 @@ Release 2.0.5-beta - UNRELEASED
 
   INCOMPATIBLE CHANGES
 
+    YARN-396. Rationalize AllocateResponse in RM Scheduler API. (Zhijie Shen
+    via hitesh)
+
   NEW FEATURES
 
   IMPROVEMENTS
@@ -72,6 +82,15 @@ Release 2.0.5-beta - UNRELEASED
     YARN-237. Refreshing the RM page forgets how many rows I had in my
     Datatables (jian he via bobby)
 
+    YARN-481. Add AM Host and RPC Port to ApplicationCLI Status Output 
+    (Chris Riccomini via bikas)
+
+    YARN-297. Improve hashCode implementations for PB records. (Xuan Gong via
+    hitesh)
+
+    YARN-417. Create AMRMClient wrapper that provides asynchronous callbacks.
+    (Sandy Ryza via bikas)
+
   OPTIMIZATIONS
 
   BUG FIXES
@@ -100,6 +119,12 @@ Release 2.0.5-beta - UNRELEASED
     YARN-196. Nodemanager should be more robust in handling connection failure
     to ResourceManager when a cluster is started (Xuan Gong via hitesh)
 
+    YARN-485. TestProcfsProcessTree#testProcessTree() doesn't wait long enough 
+    for the process to die. (kkambatl via tucu)
+ 
+    YARN-470. Support a way to disable resource monitoring on the NodeManager.
+    (Siddharth Seth via hitesh)
+
 Release 2.0.4-alpha - UNRELEASED
 
   INCOMPATIBLE CHANGES
@@ -408,6 +433,9 @@ Release 0.23.7 - UNRELEASED
     YARN-468. coverage fix for org.apache.hadoop.yarn.server.webproxy.amfilter
     (Aleksey Gorshkov via bobby)
 
+    YARN-200. yarn log does not output all needed information, and is in a
+    binary format (Ravi Prakash via jlowe)
+
   OPTIMIZATIONS
 
     YARN-357. App submission should not be synchronized (daryn)

+ 86 - 8
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/AllocateResponse.java

@@ -18,19 +18,23 @@
 
 package org.apache.hadoop.yarn.api.protocolrecords;
 
+import java.util.List;
+
 import org.apache.hadoop.classification.InterfaceAudience.Private;
 import org.apache.hadoop.classification.InterfaceAudience.Public;
 import org.apache.hadoop.classification.InterfaceStability.Stable;
 import org.apache.hadoop.classification.InterfaceStability.Unstable;
 import org.apache.hadoop.yarn.api.AMRMProtocol;
-import org.apache.hadoop.yarn.api.records.AMResponse;
 import org.apache.hadoop.yarn.api.records.Container;
+import org.apache.hadoop.yarn.api.records.ContainerStatus;
+import org.apache.hadoop.yarn.api.records.NodeReport;
+import org.apache.hadoop.yarn.api.records.Resource;
 
 /**
  * <p>The response sent by the <code>ResourceManager</code> the  
  * <code>ApplicationMaster</code> during resource negotiation.</p>
  *
- * <p>The response, via {@link AMResponse}, includes:
+ * <p>The response, includes:
  *   <ul>
  *     <li>Response ID to track duplicate responses.</li>
  *     <li>
@@ -42,6 +46,8 @@ import org.apache.hadoop.yarn.api.records.Container;
  *       The available headroom for resources in the cluster for the
  *       application. 
  *     </li>
+ *     <li>A list of nodes whose status has been updated.</li>
+ *     <li>The number of available nodes in a cluster.</li>
  *   </ul>
  * </p>
  * 
@@ -51,18 +57,90 @@ import org.apache.hadoop.yarn.api.records.Container;
 @Stable
 public interface AllocateResponse {
   /**
-   * Get the {@link AMResponse} sent by the <code>ResourceManager</code>.
-   * @return <code>AMResponse</code> sent by the <code>ResourceManager</code>
+   * Should the <code>ApplicationMaster</code> reboot for being horribly
+   * out-of-sync with the <code>ResourceManager</code> as deigned by
+   * {@link #getResponseId()}?
+   *
+   * @return <code>true</code> if the <code>ApplicationMaster</code> should
+   *         reboot, <code>false</code> otherwise
    */
   @Public
   @Stable
-  public abstract AMResponse getAMResponse();
+  public boolean getReboot();
 
   @Private
   @Unstable
-  public abstract void setAMResponse(AMResponse amResponse);
-  
-  
+  public void setReboot(boolean reboot);
+
+  /**
+   * Get the <em>last response id</em>.
+   * @return <em>last response id</em>
+   */
+  @Public
+  @Stable
+  public int getResponseId();
+
+  @Private
+  @Unstable
+  public void setResponseId(int responseId);
+
+  /**
+   * Get the list of <em>newly allocated</em> <code>Container</code> by the
+   * <code>ResourceManager</code>.
+   * @return list of <em>newly allocated</em> <code>Container</code>
+   */
+  @Public
+  @Stable
+  public List<Container> getAllocatedContainers();
+
+  /**
+   * Set the list of <em>newly allocated</em> <code>Container</code> by the
+   * <code>ResourceManager</code>.
+   * @param containers list of <em>newly allocated</em> <code>Container</code>
+   */
+  @Public
+  @Stable
+  public void setAllocatedContainers(List<Container> containers);
+
+  /**
+   * Get the <em>available headroom</em> for resources in the cluster for the
+   * application.
+   * @return limit of available headroom for resources in the cluster for the
+   * application
+   */
+  @Public
+  @Stable
+  public Resource getAvailableResources();
+
+  @Private
+  @Unstable
+  public void setAvailableResources(Resource limit);
+
+  /**
+   * Get the list of <em>completed containers' statuses</em>.
+   * @return the list of <em>completed containers' statuses</em>
+   */
+  @Public
+  @Stable
+  public List<ContainerStatus> getCompletedContainersStatuses();
+
+  @Private
+  @Unstable
+  public void setCompletedContainersStatuses(List<ContainerStatus> containers);
+
+  /**
+   * Get the list of <em>updated <code>NodeReport</code>s</em>. Updates could
+   * be changes in health, availability etc of the nodes.
+   * @return The delta of updated nodes since the last response
+   */
+  @Public
+  @Unstable
+  public List<NodeReport> getUpdatedNodes();
+
+  @Private
+  @Unstable
+  public void setUpdatedNodes(final List<NodeReport> updatedNodes);
+
   /**
    * Get the number of hosts available on the cluster.
    * @return the available host count.

+ 305 - 29
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/AllocateResponsePBImpl.java

@@ -19,11 +19,24 @@
 package org.apache.hadoop.yarn.api.protocolrecords.impl.pb;
 
 
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
 import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
-import org.apache.hadoop.yarn.api.records.AMResponse;
+import org.apache.hadoop.yarn.api.records.Container;
+import org.apache.hadoop.yarn.api.records.ContainerStatus;
+import org.apache.hadoop.yarn.api.records.NodeReport;
 import org.apache.hadoop.yarn.api.records.ProtoBase;
-import org.apache.hadoop.yarn.api.records.impl.pb.AMResponsePBImpl;
-import org.apache.hadoop.yarn.proto.YarnProtos.AMResponseProto;
+import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.api.records.impl.pb.ContainerPBImpl;
+import org.apache.hadoop.yarn.api.records.impl.pb.ContainerStatusPBImpl;
+import org.apache.hadoop.yarn.api.records.impl.pb.NodeReportPBImpl;
+import org.apache.hadoop.yarn.api.records.impl.pb.ResourcePBImpl;
+import org.apache.hadoop.yarn.proto.YarnProtos.ContainerProto;
+import org.apache.hadoop.yarn.proto.YarnProtos.ContainerStatusProto;
+import org.apache.hadoop.yarn.proto.YarnProtos.NodeReportProto;
+import org.apache.hadoop.yarn.proto.YarnProtos.ResourceProto;
 import org.apache.hadoop.yarn.proto.YarnServiceProtos.AllocateResponseProto;
 import org.apache.hadoop.yarn.proto.YarnServiceProtos.AllocateResponseProtoOrBuilder;
 
@@ -35,7 +48,12 @@ public class AllocateResponsePBImpl extends ProtoBase<AllocateResponseProto>
   AllocateResponseProto.Builder builder = null;
   boolean viaProto = false;
   
-  private AMResponse amResponse;
+  Resource limit;
+
+  private List<Container> allocatedContainers = null;
+  private List<ContainerStatus> completedContainersStatuses = null;
+
+  private List<NodeReport> updatedNodes = null;
   
   
   public AllocateResponsePBImpl() {
@@ -47,20 +65,38 @@ public class AllocateResponsePBImpl extends ProtoBase<AllocateResponseProto>
     viaProto = true;
   }
   
-  public AllocateResponseProto getProto() {
+  public synchronized AllocateResponseProto getProto() {
       mergeLocalToProto();
     proto = viaProto ? proto : builder.build();
     viaProto = true;
     return proto;
   }
 
-  private void mergeLocalToBuilder() {
-    if (this.amResponse != null) {
-      builder.setAMResponse(convertToProtoFormat(this.amResponse));
+  private synchronized void mergeLocalToBuilder() {
+    if (this.allocatedContainers != null) {
+      builder.clearAllocatedContainers();
+      Iterable<ContainerProto> iterable =
+          getProtoIterable(this.allocatedContainers);
+      builder.addAllAllocatedContainers(iterable);
+    }
+    if (this.completedContainersStatuses != null) {
+      builder.clearCompletedContainerStatuses();
+      Iterable<ContainerStatusProto> iterable =
+          getContainerStatusProtoIterable(this.completedContainersStatuses);
+      builder.addAllCompletedContainerStatuses(iterable);
+    }
+    if (this.updatedNodes != null) {
+      builder.clearUpdatedNodes();
+      Iterable<NodeReportProto> iterable =
+          getNodeReportProtoIterable(this.updatedNodes);
+      builder.addAllUpdatedNodes(iterable);
+    }
+    if (this.limit != null) {
+      builder.setLimit(convertToProtoFormat(this.limit));
     }
   }
 
-  private void mergeLocalToProto() {
+  private synchronized void mergeLocalToProto() {
     if (viaProto) 
       maybeInitBuilder();
     mergeLocalToBuilder();
@@ -68,53 +104,293 @@ public class AllocateResponsePBImpl extends ProtoBase<AllocateResponseProto>
     viaProto = true;
   }
 
-  private void maybeInitBuilder() {
+  private synchronized void maybeInitBuilder() {
     if (viaProto || builder == null) {
       builder = AllocateResponseProto.newBuilder(proto);
     }
     viaProto = false;
   }
-    
   
   @Override
-  public AMResponse getAMResponse() {
+  public synchronized boolean getReboot() {
+    AllocateResponseProtoOrBuilder p = viaProto ? proto : builder;
+    return (p.getReboot());
+  }
+
+  @Override
+  public synchronized void setReboot(boolean reboot) {
+    maybeInitBuilder();
+    builder.setReboot((reboot));
+  }
+
+  @Override
+  public synchronized int getResponseId() {
     AllocateResponseProtoOrBuilder p = viaProto ? proto : builder;
-    if (this.amResponse != null) {
-      return this.amResponse;
+    return (p.getResponseId());
+  }
+
+  @Override
+  public synchronized void setResponseId(int responseId) {
+    maybeInitBuilder();
+    builder.setResponseId((responseId));
+  }
+
+  @Override
+  public synchronized Resource getAvailableResources() {
+    if (this.limit != null) {
+      return this.limit;
     }
-    if (!p.hasAMResponse()) {
+
+    AllocateResponseProtoOrBuilder p = viaProto ? proto : builder;
+    if (!p.hasLimit()) {
       return null;
     }
-    this.amResponse= convertFromProtoFormat(p.getAMResponse());
-    return this.amResponse;
+    this.limit = convertFromProtoFormat(p.getLimit());
+    return this.limit;
   }
 
   @Override
-  public void setAMResponse(AMResponse aMResponse) {
+  public synchronized void setAvailableResources(Resource limit) {
     maybeInitBuilder();
-    if (aMResponse == null) 
-      builder.clearAMResponse();
-    this.amResponse = aMResponse;
+    if (limit == null)
+      builder.clearLimit();
+    this.limit = limit;
   }
-  
+
   @Override
-  public int getNumClusterNodes() {
+  public synchronized List<NodeReport> getUpdatedNodes() {
+    initLocalNewNodeReportList();
+    return this.updatedNodes;
+  }
+  @Override
+  public synchronized void setUpdatedNodes(
+      final List<NodeReport> updatedNodes) {
+    if (updatedNodes == null) {
+      this.updatedNodes.clear();
+      return;
+    }
+    this.updatedNodes = new ArrayList<NodeReport>(updatedNodes.size());
+    this.updatedNodes.addAll(updatedNodes);
+  }
+
+  @Override
+  public synchronized List<Container> getAllocatedContainers() {
+    initLocalNewContainerList();
+    return this.allocatedContainers;
+  }
+
+  @Override
+  public synchronized void setAllocatedContainers(
+      final List<Container> containers) {
+    if (containers == null)
+      return;
+    // this looks like a bug because it results in append and not set
+    initLocalNewContainerList();
+    allocatedContainers.addAll(containers);
+  }
+
+  //// Finished containers
+  @Override
+  public synchronized List<ContainerStatus> getCompletedContainersStatuses() {
+    initLocalFinishedContainerList();
+    return this.completedContainersStatuses;
+  }
+
+  @Override
+  public synchronized void setCompletedContainersStatuses(
+      final List<ContainerStatus> containers) {
+    if (containers == null)
+      return;
+    initLocalFinishedContainerList();
+    completedContainersStatuses.addAll(containers);
+  }
+
+  @Override
+  public synchronized int getNumClusterNodes() {
     AllocateResponseProtoOrBuilder p = viaProto ? proto : builder;
     return p.getNumClusterNodes();
   }
-  
+
   @Override
-  public void setNumClusterNodes(int numNodes) {
+  public synchronized void setNumClusterNodes(int numNodes) {
     maybeInitBuilder();
     builder.setNumClusterNodes(numNodes);
   }
 
+  // Once this is called. updatedNodes will never be null - until a getProto is
+  // called.
+  private synchronized void initLocalNewNodeReportList() {
+    if (this.updatedNodes != null) {
+      return;
+    }
+    AllocateResponseProtoOrBuilder p = viaProto ? proto : builder;
+    List<NodeReportProto> list = p.getUpdatedNodesList();
+    updatedNodes = new ArrayList<NodeReport>(list.size());
+
+    for (NodeReportProto n : list) {
+      updatedNodes.add(convertFromProtoFormat(n));
+    }
+  }
+
+  // Once this is called. containerList will never be null - until a getProto
+  // is called.
+  private synchronized void initLocalNewContainerList() {
+    if (this.allocatedContainers != null) {
+      return;
+    }
+    AllocateResponseProtoOrBuilder p = viaProto ? proto : builder;
+    List<ContainerProto> list = p.getAllocatedContainersList();
+    allocatedContainers = new ArrayList<Container>();
+
+    for (ContainerProto c : list) {
+      allocatedContainers.add(convertFromProtoFormat(c));
+    }
+  }
+
+  private synchronized Iterable<ContainerProto> getProtoIterable(
+      final List<Container> newContainersList) {
+    maybeInitBuilder();
+    return new Iterable<ContainerProto>() {
+      @Override
+      public synchronized Iterator<ContainerProto> iterator() {
+        return new Iterator<ContainerProto>() {
+
+          Iterator<Container> iter = newContainersList.iterator();
+
+          @Override
+          public synchronized boolean hasNext() {
+            return iter.hasNext();
+          }
+
+          @Override
+          public synchronized ContainerProto next() {
+            return convertToProtoFormat(iter.next());
+          }
+
+          @Override
+          public synchronized void remove() {
+            throw new UnsupportedOperationException();
+
+          }
+        };
+
+      }
+    };
+  }
+
+  private synchronized Iterable<ContainerStatusProto>
+  getContainerStatusProtoIterable(
+      final List<ContainerStatus> newContainersList) {
+    maybeInitBuilder();
+    return new Iterable<ContainerStatusProto>() {
+      @Override
+      public synchronized Iterator<ContainerStatusProto> iterator() {
+        return new Iterator<ContainerStatusProto>() {
+
+          Iterator<ContainerStatus> iter = newContainersList.iterator();
+
+          @Override
+          public synchronized boolean hasNext() {
+            return iter.hasNext();
+          }
+
+          @Override
+          public synchronized ContainerStatusProto next() {
+            return convertToProtoFormat(iter.next());
+          }
+
+          @Override
+          public synchronized void remove() {
+            throw new UnsupportedOperationException();
+
+          }
+        };
+
+      }
+    };
+  }
   
-  private AMResponsePBImpl convertFromProtoFormat(AMResponseProto p) {
-    return new AMResponsePBImpl(p);
+  private synchronized Iterable<NodeReportProto>
+  getNodeReportProtoIterable(
+      final List<NodeReport> newNodeReportsList) {
+    maybeInitBuilder();
+    return new Iterable<NodeReportProto>() {
+      @Override
+      public synchronized Iterator<NodeReportProto> iterator() {
+        return new Iterator<NodeReportProto>() {
+
+          Iterator<NodeReport> iter = newNodeReportsList.iterator();
+
+          @Override
+          public synchronized boolean hasNext() {
+            return iter.hasNext();
+          }
+
+          @Override
+          public synchronized NodeReportProto next() {
+            return convertToProtoFormat(iter.next());
+          }
+
+          @Override
+          public synchronized void remove() {
+            throw new UnsupportedOperationException();
+
+          }
+        };
+
+      }
+    };
   }
 
-  private AMResponseProto convertToProtoFormat(AMResponse t) {
-    return ((AMResponsePBImpl)t).getProto();
+  // Once this is called. containerList will never be null - until a getProto
+  // is called.
+  private synchronized void initLocalFinishedContainerList() {
+    if (this.completedContainersStatuses != null) {
+      return;
+    }
+    AllocateResponseProtoOrBuilder p = viaProto ? proto : builder;
+    List<ContainerStatusProto> list = p.getCompletedContainerStatusesList();
+    completedContainersStatuses = new ArrayList<ContainerStatus>();
+
+    for (ContainerStatusProto c : list) {
+      completedContainersStatuses.add(convertFromProtoFormat(c));
+    }
+  }
+
+  private synchronized NodeReportPBImpl convertFromProtoFormat(
+      NodeReportProto p) {
+    return new NodeReportPBImpl(p);
   }
+
+  private synchronized NodeReportProto convertToProtoFormat(NodeReport t) {
+    return ((NodeReportPBImpl)t).getProto();
+  }
+
+  private synchronized ContainerPBImpl convertFromProtoFormat(
+      ContainerProto p) {
+    return new ContainerPBImpl(p);
+  }
+
+  private synchronized ContainerProto convertToProtoFormat(Container t) {
+    return ((ContainerPBImpl)t).getProto();
+  }
+
+  private synchronized ContainerStatusPBImpl convertFromProtoFormat(
+      ContainerStatusProto p) {
+    return new ContainerStatusPBImpl(p);
+  }
+
+  private synchronized ContainerStatusProto convertToProtoFormat(
+      ContainerStatus t) {
+    return ((ContainerStatusPBImpl)t).getProto();
+  }
+
+  private synchronized ResourcePBImpl convertFromProtoFormat(ResourceProto p) {
+    return new ResourcePBImpl(p);
+  }
+
+  private synchronized ResourceProto convertToProtoFormat(Resource r) {
+    return ((ResourcePBImpl) r).getProto();
+  }
+
 }  

+ 2 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/GetAllApplicationsResponsePBImpl.java

@@ -93,7 +93,8 @@ GetAllApplicationsResponse {
     viaProto = false;
   }
 
-  //Once this is called. containerList will never be null - untill a getProto is called.
+  // Once this is called. containerList will never be null - until a getProto
+  // is called.
   private void initLocalApplicationsList() {
     if (this.applicationList != null) {
       return;

+ 2 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/GetClusterNodesResponsePBImpl.java

@@ -92,7 +92,8 @@ public class GetClusterNodesResponsePBImpl extends
     viaProto = false;
   }
 
-  //Once this is called. containerList will never be null - untill a getProto is called.
+  // Once this is called. containerList will never be null - until a getProto
+  // is called.
   private void initLocalNodeManagerInfosList() {
     if (this.nodeManagerInfoList != null) {
       return;

+ 2 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/GetQueueUserAclsInfoResponsePBImpl.java

@@ -94,7 +94,8 @@ implements GetQueueUserAclsInfoResponse {
     viaProto = false;
   }
 
-  //Once this is called. containerList will never be null - untill a getProto is called.
+  // Once this is called. containerList will never be null - until a getProto
+  // is called.
   private void initLocalQueueUserAclsList() {
     if (this.queueUserAclsInfoList != null) {
       return;

+ 0 - 138
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/AMResponse.java

@@ -1,138 +0,0 @@
-/**
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*     http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
-
-package org.apache.hadoop.yarn.api.records;
-
-import java.util.List;
-
-import org.apache.hadoop.classification.InterfaceAudience.Private;
-import org.apache.hadoop.classification.InterfaceAudience.Public;
-import org.apache.hadoop.classification.InterfaceStability.Stable;
-import org.apache.hadoop.classification.InterfaceStability.Unstable;
-import org.apache.hadoop.yarn.api.AMRMProtocol;
-import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest;
-
-/**
- * <p>The response sent by the <code>ResourceManager</code> the  
- * <code>ApplicationMaster</code> during resource negotiation.</p>
- *
- * <p>The response includes:
- *   <ul>
- *     <li>Response ID to track duplicate responses.</li>
- *     <li>
- *       A reboot flag to let the <code>ApplicationMaster</code> know that its 
- *       horribly out of sync and needs to reboot.</li>
- *     <li>A list of newly allocated {@link Container}.</li>
- *     <li>A list of completed {@link Container}.</li>
- *     <li>
- *       The available headroom for resources in the cluster for the
- *       application. 
- *     </li>
- *   </ul>
- * </p>
- * 
- * @see AMRMProtocol#allocate(AllocateRequest)
- */
-@Public
-@Unstable
-public interface AMResponse {
-  /**
-   * Should the <code>ApplicationMaster</code> reboot for being horribly 
-   * out-of-sync with the <code>ResourceManager</code> as deigned by 
-   * {@link #getResponseId()}?
-   * 
-   * @return <code>true</code> if the <code>ApplicationMaster</code> should
-   *         reboot, <code>false</code> otherwise
-   */
-  @Public
-  @Stable
-  public boolean getReboot();
-  
-  @Private
-  @Unstable
-  public void setReboot(boolean reboot);
-
-  /**
-   * Get the <em>last response id</em>.
-   * @return <em>last response id</em>
-   */
-  @Public
-  @Stable
-  public int getResponseId();
-  
-  @Private
-  @Unstable
-  public void setResponseId(int responseId);
-
-  /**
-   * Get the list of <em>newly allocated</em> <code>Container</code> by the 
-   * <code>ResourceManager</code>.
-   * @return list of <em>newly allocated</em> <code>Container</code>
-   */
-  @Public
-  @Stable
-  public List<Container> getAllocatedContainers();
-
-  /**
-   * Set the list of <em>newly allocated</em> <code>Container</code> by the 
-   * <code>ResourceManager</code>.
-   * @param containers list of <em>newly allocated</em> <code>Container</code>
-   */
-  @Public
-  @Stable
-  public void setAllocatedContainers(List<Container> containers);
-
-  /**
-   * Get the <em>available headroom</em> for resources in the cluster for the 
-   * application.
-   * @return limit of available headroom for resources in the cluster for the 
-   * application
-   */
-  @Public
-  @Stable
-  public Resource getAvailableResources();
-
-  @Private
-  @Unstable
-  public void setAvailableResources(Resource limit);
-  
-  /**
-   * Get the list of <em>completed containers' statuses</em>.
-   * @return the list of <em>completed containers' statuses</em>
-   */
-  @Public
-  @Stable
-  public List<ContainerStatus> getCompletedContainersStatuses();
-
-  @Private
-  @Unstable
-  public void setCompletedContainersStatuses(List<ContainerStatus> containers);
-
-  /**
-   * Get the list of <em>updated <code>NodeReport</code>s</em>. Updates could be
-   * changes in health, availability etc of the nodes.
-   * @return The delta of updated nodes since the last response
-   */
-  @Public
-  @Unstable
-  public List<NodeReport> getUpdatedNodes();
-
-  @Private
-  @Unstable
-  public void setUpdatedNodes(final List<NodeReport> updatedNodes);
-}

+ 2 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ApplicationAttemptId.java

@@ -76,8 +76,8 @@ public abstract class ApplicationAttemptId implements
   @Override
   public int hashCode() {
     // Generated by eclipse.
-    final int prime = 31;
-    int result = 1;
+    final int prime = 347671;
+    int result = 5501;
     ApplicationId appId = getApplicationId();
     result = prime * result +  appId.hashCode();
     result = prime * result + getAttemptId();

+ 2 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ApplicationId.java

@@ -97,8 +97,8 @@ public abstract class ApplicationId implements Comparable<ApplicationId> {
   @Override
   public int hashCode() {
     // Generated by eclipse.
-    final int prime = 31;
-    int result = 1;
+    final int prime = 371237;
+    int result = 6521;
     long clusterTimestamp = getClusterTimestamp();
     result = prime * result
         + (int) (clusterTimestamp ^ (clusterTimestamp >>> 32));

+ 2 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ContainerId.java

@@ -86,8 +86,8 @@ public abstract class ContainerId implements Comparable<ContainerId>{
   @Override
   public int hashCode() {
     // Generated by eclipse.
-    final int prime = 31;
-    int result = 1;
+    final int prime = 435569;
+    int result = 7507;
     result = prime * result + getId();
     result = prime * result + getApplicationAttemptId().hashCode();
     return result;

+ 2 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/NodeId.java

@@ -65,8 +65,8 @@ public abstract class NodeId implements Comparable<NodeId> {
 
   @Override
   public int hashCode() {
-    final int prime = 31;
-    int result = 1;
+    final int prime = 493217;
+    int result = 8501;
     result = prime * result + this.getHost().hashCode();
     result = prime * result + this.getPort();
     return result;

+ 2 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/Priority.java

@@ -39,8 +39,8 @@ public abstract class Priority implements Comparable<Priority> {
   
   @Override
   public int hashCode() {
-    final int prime = 31;
-    int result = 1;
+    final int prime = 517861;
+    int result = 9511;
     result = prime * result + getPriority();
     return result;
   }

+ 2 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ResourceRequest.java

@@ -124,8 +124,8 @@ public abstract class ResourceRequest implements Comparable<ResourceRequest> {
 
   @Override
   public int hashCode() {
-    final int prime = 31;
-    int result = 1;
+    final int prime = 2153;
+    int result = 2459;
     Resource capability = getCapability();
     String hostName = getHostName();
     Priority priority = getPriority();

+ 0 - 373
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/AMResponsePBImpl.java

@@ -1,373 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.yarn.api.records.impl.pb;
-
-
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
-
-import org.apache.hadoop.yarn.api.records.AMResponse;
-import org.apache.hadoop.yarn.api.records.Container;
-import org.apache.hadoop.yarn.api.records.ContainerStatus;
-import org.apache.hadoop.yarn.api.records.NodeReport;
-import org.apache.hadoop.yarn.api.records.ProtoBase;
-import org.apache.hadoop.yarn.api.records.Resource;
-import org.apache.hadoop.yarn.proto.YarnProtos.AMResponseProto;
-import org.apache.hadoop.yarn.proto.YarnProtos.AMResponseProtoOrBuilder;
-import org.apache.hadoop.yarn.proto.YarnProtos.ContainerProto;
-import org.apache.hadoop.yarn.proto.YarnProtos.ContainerStatusProto;
-import org.apache.hadoop.yarn.proto.YarnProtos.NodeReportProto;
-import org.apache.hadoop.yarn.proto.YarnProtos.ResourceProto;
-
-
-    
-public class AMResponsePBImpl extends ProtoBase<AMResponseProto> implements AMResponse {
-  AMResponseProto proto = AMResponseProto.getDefaultInstance();
-  AMResponseProto.Builder builder = null;
-  boolean viaProto = false;
-  
-  Resource limit;
-
-  private List<Container> allocatedContainers = null;
-  private List<ContainerStatus> completedContainersStatuses = null;
-//  private boolean hasLocalContainerList = false;
-  
-  private List<NodeReport> updatedNodes = null;
-  
-  public AMResponsePBImpl() {
-    builder = AMResponseProto.newBuilder();
-  }
-
-  public AMResponsePBImpl(AMResponseProto proto) {
-    this.proto = proto;
-    viaProto = true;
-  }
-  
-  public synchronized AMResponseProto getProto() {
-      mergeLocalToProto();
-    proto = viaProto ? proto : builder.build();
-    viaProto = true;
-    return proto;
-  }
-  
-  private synchronized void mergeLocalToBuilder() {
-    if (this.allocatedContainers != null) {
-      builder.clearAllocatedContainers();
-      Iterable<ContainerProto> iterable = 
-          getProtoIterable(this.allocatedContainers);
-      builder.addAllAllocatedContainers(iterable);
-    }
-    if (this.completedContainersStatuses != null) {
-      builder.clearCompletedContainerStatuses();
-      Iterable<ContainerStatusProto> iterable = 
-          getContainerStatusProtoIterable(this.completedContainersStatuses);
-      builder.addAllCompletedContainerStatuses(iterable);
-    }
-    if (this.updatedNodes != null) {
-      builder.clearUpdatedNodes();
-      Iterable<NodeReportProto> iterable = 
-          getNodeReportProtoIterable(this.updatedNodes);
-      builder.addAllUpdatedNodes(iterable);
-    }
-    if (this.limit != null) {
-      builder.setLimit(convertToProtoFormat(this.limit));
-    }
-  }
-  
-  private synchronized void mergeLocalToProto() {
-    if (viaProto) 
-      maybeInitBuilder();
-    mergeLocalToBuilder();
-    proto = builder.build();
-    viaProto = true;
-  }
-
-  private synchronized void maybeInitBuilder() {
-    if (viaProto || builder == null) {
-      builder = AMResponseProto.newBuilder(proto);
-    }
-    viaProto = false;
-  }
-    
-  
-  @Override
-  public synchronized boolean getReboot() {
-    AMResponseProtoOrBuilder p = viaProto ? proto : builder;
-    return (p.getReboot());
-  }
-
-  @Override
-  public synchronized void setReboot(boolean reboot) {
-    maybeInitBuilder();
-    builder.setReboot((reboot));
-  }
-  @Override
-  public synchronized int getResponseId() {
-    AMResponseProtoOrBuilder p = viaProto ? proto : builder;
-    return (p.getResponseId());
-  }
-
-  @Override
-  public synchronized void setResponseId(int responseId) {
-    maybeInitBuilder();
-    builder.setResponseId((responseId));
-  }
-  @Override
-  public synchronized Resource getAvailableResources() {
-    if (this.limit != null) {
-      return this.limit;
-    }
-
-    AMResponseProtoOrBuilder p = viaProto ? proto : builder;
-    if (!p.hasLimit()) {
-      return null;
-    }
-    this.limit = convertFromProtoFormat(p.getLimit());
-    return this.limit;
-  }
-
-  @Override
-  public synchronized void setAvailableResources(Resource limit) {
-    maybeInitBuilder();
-    if (limit == null)
-      builder.clearLimit();
-    this.limit = limit;
-  }
-  
-  @Override
-  public synchronized List<NodeReport> getUpdatedNodes() {
-    initLocalNewNodeReportList();
-    return this.updatedNodes;
-  }
-  
-  //Once this is called. updatedNodes will never be null - until a getProto is called.
-  private synchronized void initLocalNewNodeReportList() {
-    if (this.updatedNodes != null) {
-      return;
-    }
-    AMResponseProtoOrBuilder p = viaProto ? proto : builder;
-    List<NodeReportProto> list = p.getUpdatedNodesList();
-    updatedNodes = new ArrayList<NodeReport>(list.size());
-
-    for (NodeReportProto n : list) {
-      updatedNodes.add(convertFromProtoFormat(n));
-    }
-  }
-
-  @Override
-  public synchronized void setUpdatedNodes(final List<NodeReport> updatedNodes) {
-    if (updatedNodes == null) {
-      this.updatedNodes.clear();
-      return;
-    }
-    this.updatedNodes = new ArrayList<NodeReport>(updatedNodes.size());
-    this.updatedNodes.addAll(updatedNodes);
-  }
-
-  @Override
-  public synchronized List<Container> getAllocatedContainers() {
-    initLocalNewContainerList();
-    return this.allocatedContainers;
-  }
-  
-  //Once this is called. containerList will never be null - until a getProto is called.
-  private synchronized void initLocalNewContainerList() {
-    if (this.allocatedContainers != null) {
-      return;
-    }
-    AMResponseProtoOrBuilder p = viaProto ? proto : builder;
-    List<ContainerProto> list = p.getAllocatedContainersList();
-    allocatedContainers = new ArrayList<Container>();
-
-    for (ContainerProto c : list) {
-      allocatedContainers.add(convertFromProtoFormat(c));
-    }
-  }
-
-  @Override
-  public synchronized void setAllocatedContainers(final List<Container> containers) {
-    if (containers == null) 
-      return;
-    // this looks like a bug because it results in append and not set
-    initLocalNewContainerList();
-    allocatedContainers.addAll(containers);
-  }
-
-  private synchronized Iterable<ContainerProto> getProtoIterable(
-      final List<Container> newContainersList) {
-    maybeInitBuilder();
-    return new Iterable<ContainerProto>() {
-      @Override
-      public synchronized Iterator<ContainerProto> iterator() {
-        return new Iterator<ContainerProto>() {
-
-          Iterator<Container> iter = newContainersList.iterator();
-
-          @Override
-          public synchronized boolean hasNext() {
-            return iter.hasNext();
-          }
-
-          @Override
-          public synchronized ContainerProto next() {
-            return convertToProtoFormat(iter.next());
-          }
-
-          @Override
-          public synchronized void remove() {
-            throw new UnsupportedOperationException();
-
-          }
-        };
-
-      }
-    };
-  }
-
-  private synchronized Iterable<ContainerStatusProto> 
-  getContainerStatusProtoIterable(
-      final List<ContainerStatus> newContainersList) {
-    maybeInitBuilder();
-    return new Iterable<ContainerStatusProto>() {
-      @Override
-      public synchronized Iterator<ContainerStatusProto> iterator() {
-        return new Iterator<ContainerStatusProto>() {
-
-          Iterator<ContainerStatus> iter = newContainersList.iterator();
-
-          @Override
-          public synchronized boolean hasNext() {
-            return iter.hasNext();
-          }
-
-          @Override
-          public synchronized ContainerStatusProto next() {
-            return convertToProtoFormat(iter.next());
-          }
-
-          @Override
-          public synchronized void remove() {
-            throw new UnsupportedOperationException();
-
-          }
-        };
-
-      }
-    };
-  }
-  
-  private synchronized Iterable<NodeReportProto> 
-  getNodeReportProtoIterable(
-      final List<NodeReport> newNodeReportsList) {
-    maybeInitBuilder();
-    return new Iterable<NodeReportProto>() {
-      @Override
-      public synchronized Iterator<NodeReportProto> iterator() {
-        return new Iterator<NodeReportProto>() {
-
-          Iterator<NodeReport> iter = newNodeReportsList.iterator();
-
-          @Override
-          public synchronized boolean hasNext() {
-            return iter.hasNext();
-          }
-
-          @Override
-          public synchronized NodeReportProto next() {
-            return convertToProtoFormat(iter.next());
-          }
-
-          @Override
-          public synchronized void remove() {
-            throw new UnsupportedOperationException();
-
-          }
-        };
-
-      }
-    };
-  }
-
-  //// Finished containers
-  @Override
-  public synchronized List<ContainerStatus> getCompletedContainersStatuses() {
-    initLocalFinishedContainerList();
-    return this.completedContainersStatuses;
-  }
-  
-  //Once this is called. containerList will never be null - untill a getProto is called.
-  private synchronized void initLocalFinishedContainerList() {
-    if (this.completedContainersStatuses != null) {
-      return;
-    }
-    AMResponseProtoOrBuilder p = viaProto ? proto : builder;
-    List<ContainerStatusProto> list = p.getCompletedContainerStatusesList();
-    completedContainersStatuses = new ArrayList<ContainerStatus>();
-
-    for (ContainerStatusProto c : list) {
-      completedContainersStatuses.add(convertFromProtoFormat(c));
-    }
-  }
-
-  @Override
-  public synchronized void setCompletedContainersStatuses(
-      final List<ContainerStatus> containers) {
-    if (containers == null) 
-      return;
-    initLocalFinishedContainerList();
-    completedContainersStatuses.addAll(containers);
-  }
-  
-  private synchronized NodeReportPBImpl convertFromProtoFormat(
-      NodeReportProto p) {
-    return new NodeReportPBImpl(p);
-  }
-
-  private synchronized NodeReportProto convertToProtoFormat(NodeReport t) {
-    return ((NodeReportPBImpl)t).getProto();
-  }
-  
-  private synchronized ContainerPBImpl convertFromProtoFormat(
-      ContainerProto p) {
-    return new ContainerPBImpl(p);
-  }
-
-  private synchronized ContainerProto convertToProtoFormat(Container t) {
-    return ((ContainerPBImpl)t).getProto();
-  }
-
-  private synchronized ContainerStatusPBImpl convertFromProtoFormat(
-      ContainerStatusProto p) {
-    return new ContainerStatusPBImpl(p);
-  }
-
-  private synchronized ContainerStatusProto convertToProtoFormat(ContainerStatus t) {
-    return ((ContainerStatusPBImpl)t).getProto();
-  }
-
-  private synchronized ResourcePBImpl convertFromProtoFormat(ResourceProto p) {
-    return new ResourcePBImpl(p);
-  }
-
-  private synchronized ResourceProto convertToProtoFormat(Resource r) {
-    return ((ResourcePBImpl) r).getProto();
-  }
-
-}  

+ 0 - 10
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto

@@ -207,16 +207,6 @@ message ResourceRequestProto {
   optional int32 num_containers = 4;
 }
 
-message AMResponseProto {
-  optional bool reboot = 1;
-  optional int32 response_id = 2;
-  repeated ContainerProto allocated_containers = 3;
-  repeated ContainerStatusProto completed_container_statuses = 4;
-  optional ResourceProto limit = 5;
-  repeated NodeReportProto updated_nodes = 6;
-}
-
-
 ////////////////////////////////////////////////////////////////////////
 ////// From client_RM_Protocol /////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////

+ 7 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto

@@ -59,8 +59,13 @@ message AllocateRequestProto {
 }
 
 message AllocateResponseProto {
-  optional AMResponseProto AM_response = 1;
-  optional int32 num_cluster_nodes = 2;
+  optional bool reboot = 1;
+  optional int32 response_id = 2;
+  repeated ContainerProto allocated_containers = 3;
+  repeated ContainerStatusProto completed_container_statuses = 4;
+  optional ResourceProto limit = 5;
+  repeated NodeReportProto updated_nodes = 6;
+  optional int32 num_cluster_nodes = 7;
 }
 
 

+ 191 - 228
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java

@@ -53,7 +53,6 @@ import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterRequest
 import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest;
 
-import org.apache.hadoop.yarn.api.records.AMResponse;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.Container;
 import org.apache.hadoop.yarn.api.records.ContainerId;
@@ -64,12 +63,12 @@ import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
 import org.apache.hadoop.yarn.api.records.LocalResource;
 import org.apache.hadoop.yarn.api.records.LocalResourceType;
 import org.apache.hadoop.yarn.api.records.LocalResourceVisibility;
+import org.apache.hadoop.yarn.api.records.NodeReport;
 import org.apache.hadoop.yarn.api.records.Priority;
 import org.apache.hadoop.yarn.api.records.Resource;
 import org.apache.hadoop.yarn.api.records.ResourceRequest;
-import org.apache.hadoop.yarn.client.AMRMClient;
 import org.apache.hadoop.yarn.client.AMRMClient.ContainerRequest;
-import org.apache.hadoop.yarn.client.AMRMClientImpl;
+import org.apache.hadoop.yarn.client.AMRMClientAsync;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.exceptions.YarnRemoteException;
 import org.apache.hadoop.yarn.ipc.YarnRPC;
@@ -148,8 +147,8 @@ public class ApplicationMaster {
   private YarnRPC rpc;
 
   // Handle to communicate with the Resource Manager
-  private AMRMClient resourceManager;
-
+  private AMRMClientAsync resourceManager;
+  
   // Application Attempt Id ( combination of attemptId and fail count )
   private ApplicationAttemptId appAttemptID;
 
@@ -170,8 +169,6 @@ public class ApplicationMaster {
   // Priority of the request
   private int requestPriority;
 
-  // Simple flag to denote whether all works is done
-  private boolean appDone = false;
   // Counter for completed containers ( complete denotes successful or failed )
   private AtomicInteger numCompletedContainers = new AtomicInteger();
   // Allocated container count so that we know how many containers has the RM
@@ -202,6 +199,9 @@ public class ApplicationMaster {
   // Hardcoded path to shell script in launch container's local env
   private final String ExecShellStringPath = "ExecShellScript.sh";
 
+  private volatile boolean done;
+  private volatile boolean success;
+  
   // Launch threads
   private List<Thread> launchThreads = new ArrayList<Thread>();
 
@@ -394,6 +394,10 @@ public class ApplicationMaster {
         "container_memory", "10"));
     numTotalContainers = Integer.parseInt(cliParser.getOptionValue(
         "num_containers", "1"));
+    if (numTotalContainers == 0) {
+      throw new IllegalArgumentException(
+          "Cannot run distributed shell with no containers");
+    }
     requestPriority = Integer.parseInt(cliParser
         .getOptionValue("priority", "0"));
 
@@ -417,225 +421,202 @@ public class ApplicationMaster {
   public boolean run() throws YarnRemoteException {
     LOG.info("Starting ApplicationMaster");
 
-    // Connect to ResourceManager
-    resourceManager = new AMRMClientImpl(appAttemptID);
+    AMRMClientAsync.CallbackHandler allocListener = new RMCallbackHandler();
+    
+    resourceManager = new AMRMClientAsync(appAttemptID, 1000, allocListener);
     resourceManager.init(conf);
     resourceManager.start();
 
-    try {
-      // Setup local RPC Server to accept status requests directly from clients
-      // TODO need to setup a protocol for client to be able to communicate to
-      // the RPC server
-      // TODO use the rpc port info to register with the RM for the client to
-      // send requests to this app master
-
-      // Register self with ResourceManager
-      RegisterApplicationMasterResponse response = resourceManager
-          .registerApplicationMaster(appMasterHostname, appMasterRpcPort,
-              appMasterTrackingUrl);
-      // Dump out information about cluster capability as seen by the
-      // resource manager
-      int minMem = response.getMinimumResourceCapability().getMemory();
-      int maxMem = response.getMaximumResourceCapability().getMemory();
-      LOG.info("Min mem capabililty of resources in this cluster " + minMem);
-      LOG.info("Max mem capabililty of resources in this cluster " + maxMem);
-
-      // A resource ask has to be atleast the minimum of the capability of the
-      // cluster, the value has to be a multiple of the min value and cannot
-      // exceed the max.
-      // If it is not an exact multiple of min, the RM will allocate to the
-      // nearest multiple of min
-      if (containerMemory < minMem) {
-        LOG.info("Container memory specified below min threshold of cluster."
-            + " Using min value." + ", specified=" + containerMemory + ", min="
-            + minMem);
-        containerMemory = minMem;
-      } else if (containerMemory > maxMem) {
-        LOG.info("Container memory specified above max threshold of cluster."
-            + " Using max value." + ", specified=" + containerMemory + ", max="
-            + maxMem);
-        containerMemory = maxMem;
-      }
+    // Setup local RPC Server to accept status requests directly from clients
+    // TODO need to setup a protocol for client to be able to communicate to
+    // the RPC server
+    // TODO use the rpc port info to register with the RM for the client to
+    // send requests to this app master
+
+    // Register self with ResourceManager
+    // This will start heartbeating to the RM
+    RegisterApplicationMasterResponse response = resourceManager
+        .registerApplicationMaster(appMasterHostname, appMasterRpcPort,
+            appMasterTrackingUrl);
+    // Dump out information about cluster capability as seen by the
+    // resource manager
+    int minMem = response.getMinimumResourceCapability().getMemory();
+    int maxMem = response.getMaximumResourceCapability().getMemory();
+    LOG.info("Min mem capabililty of resources in this cluster " + minMem);
+    LOG.info("Max mem capabililty of resources in this cluster " + maxMem);
+
+    // A resource ask has to be atleast the minimum of the capability of the
+    // cluster, the value has to be a multiple of the min value and cannot
+    // exceed the max.
+    // If it is not an exact multiple of min, the RM will allocate to the
+    // nearest multiple of min
+    if (containerMemory < minMem) {
+      LOG.info("Container memory specified below min threshold of cluster."
+          + " Using min value." + ", specified=" + containerMemory + ", min="
+          + minMem);
+      containerMemory = minMem;
+    } else if (containerMemory > maxMem) {
+      LOG.info("Container memory specified above max threshold of cluster."
+          + " Using max value." + ", specified=" + containerMemory + ", max="
+          + maxMem);
+      containerMemory = maxMem;
+    }
 
-      // Setup heartbeat emitter
-      // TODO poll RM every now and then with an empty request to let RM know
-      // that we are alive
-      // The heartbeat interval after which an AM is timed out by the RM is
-      // defined by a config setting:
-      // RM_AM_EXPIRY_INTERVAL_MS with default defined by
-      // DEFAULT_RM_AM_EXPIRY_INTERVAL_MS
-      // The allocate calls to the RM count as heartbeats so, for now,
-      // this additional heartbeat emitter is not required.
-
-      // Setup ask for containers from RM
-      // Send request for containers to RM
-      // Until we get our fully allocated quota, we keep on polling RM for
-      // containers
-      // Keep looping until all the containers are launched and shell script
-      // executed on them ( regardless of success/failure).
-
-      int loopCounter = -1;
-
-      while (numCompletedContainers.get() < numTotalContainers && !appDone) {
-        loopCounter++;
-
-        // log current state
-        LOG.info("Current application state: loop=" + loopCounter
-            + ", appDone=" + appDone + ", total=" + numTotalContainers
-            + ", requested=" + numRequestedContainers + ", completed="
-            + numCompletedContainers + ", failed=" + numFailedContainers
-            + ", currentAllocated=" + numAllocatedContainers);
-
-        // Sleep before each loop when asking RM for containers
-        // to avoid flooding RM with spurious requests when it
-        // need not have any available containers
-        // Sleeping for 1000 ms.
-        try {
-          Thread.sleep(1000);
-        } catch (InterruptedException e) {
-          LOG.info("Sleep interrupted " + e.getMessage());
-        }
 
-        // No. of containers to request
-        // For the first loop, askCount will be equal to total containers needed
-        // From that point on, askCount will always be 0 as current
-        // implementation does not change its ask on container failures.
-        int askCount = numTotalContainers - numRequestedContainers.get();
-        numRequestedContainers.addAndGet(askCount);
+    // Setup ask for containers from RM
+    // Send request for containers to RM
+    // Until we get our fully allocated quota, we keep on polling RM for
+    // containers
+    // Keep looping until all the containers are launched and shell script
+    // executed on them ( regardless of success/failure).
+    ContainerRequest containerAsk = setupContainerAskForRM(numTotalContainers);
+    resourceManager.addContainerRequest(containerAsk);
+    numRequestedContainers.set(numTotalContainers);
 
-        if (askCount > 0) {
-          ContainerRequest containerAsk = setupContainerAskForRM(askCount);
-          resourceManager.addContainerRequest(containerAsk);
-        }
+    while (!done) {
+      try {
+        Thread.sleep(200);
+      } catch (InterruptedException ex) {}
+    }
+    finish();
+    
+    return success;
+  }
+  
+  private void finish() {
+    // Join all launched threads
+    // needed for when we time out
+    // and we need to release containers
+    for (Thread launchThread : launchThreads) {
+      try {
+        launchThread.join(10000);
+      } catch (InterruptedException e) {
+        LOG.info("Exception thrown in thread join: " + e.getMessage());
+        e.printStackTrace();
+      }
+    }
 
-        // Send the request to RM
-        LOG.info("Asking RM for containers" + ", askCount=" + askCount);
-        AMResponse amResp = sendContainerAskToRM();
-
-        // Retrieve list of allocated containers from the response
-        List<Container> allocatedContainers = amResp.getAllocatedContainers();
-        LOG.info("Got response from RM for container ask, allocatedCnt="
-            + allocatedContainers.size());
-        numAllocatedContainers.addAndGet(allocatedContainers.size());
-        for (Container allocatedContainer : allocatedContainers) {
-          LOG.info("Launching shell command on a new container."
-              + ", containerId=" + allocatedContainer.getId()
-              + ", containerNode=" + allocatedContainer.getNodeId().getHost()
-              + ":" + allocatedContainer.getNodeId().getPort()
-              + ", containerNodeURI=" + allocatedContainer.getNodeHttpAddress()
-              + ", containerState" + allocatedContainer.getState()
-              + ", containerResourceMemory"
-              + allocatedContainer.getResource().getMemory());
-          // + ", containerToken"
-          // +allocatedContainer.getContainerToken().getIdentifier().toString());
-
-          LaunchContainerRunnable runnableLaunchContainer = new LaunchContainerRunnable(
-              allocatedContainer);
-          Thread launchThread = new Thread(runnableLaunchContainer);
-
-          // launch and start the container on a separate thread to keep
-          // the main thread unblocked
-          // as all containers may not be allocated at one go.
-          launchThreads.add(launchThread);
-          launchThread.start();
-        }
+    // When the application completes, it should send a finish application
+    // signal to the RM
+    LOG.info("Application completed. Signalling finish to RM");
 
-        // Check what the current available resources in the cluster are
-        // TODO should we do anything if the available resources are not enough?
-        Resource availableResources = amResp.getAvailableResources();
-        LOG.info("Current available resources in the cluster "
-            + availableResources);
-
-        // Check the completed containers
-        List<ContainerStatus> completedContainers = amResp
-            .getCompletedContainersStatuses();
-        LOG.info("Got response from RM for container ask, completedCnt="
-            + completedContainers.size());
-        for (ContainerStatus containerStatus : completedContainers) {
-          LOG.info("Got container status for containerID="
-              + containerStatus.getContainerId() + ", state="
-              + containerStatus.getState() + ", exitStatus="
-              + containerStatus.getExitStatus() + ", diagnostics="
-              + containerStatus.getDiagnostics());
-
-          // non complete containers should not be here
-          assert (containerStatus.getState() == ContainerState.COMPLETE);
-
-          // increment counters for completed/failed containers
-          int exitStatus = containerStatus.getExitStatus();
-          if (0 != exitStatus) {
-            // container failed
-            if (-100 != exitStatus) {
-              // shell script failed
-              // counts as completed
-              numCompletedContainers.incrementAndGet();
-              numFailedContainers.incrementAndGet();
-            } else {
-              // something else bad happened
-              // app job did not complete for some reason
-              // we should re-try as the container was lost for some reason
-              numAllocatedContainers.decrementAndGet();
-              numRequestedContainers.decrementAndGet();
-              // we do not need to release the container as it would be done
-              // by the RM/CM.
-            }
-          } else {
-            // nothing to do
-            // container completed successfully
+    FinalApplicationStatus appStatus;
+    String appMessage = null;
+    success = true;
+    if (numFailedContainers.get() == 0) {
+      appStatus = FinalApplicationStatus.SUCCEEDED;
+    } else {
+      appStatus = FinalApplicationStatus.FAILED;
+      appMessage = "Diagnostics." + ", total=" + numTotalContainers
+          + ", completed=" + numCompletedContainers.get() + ", allocated="
+          + numAllocatedContainers.get() + ", failed="
+          + numFailedContainers.get();
+      success = false;
+    }
+    try {
+      resourceManager.unregisterApplicationMaster(appStatus, appMessage, null);
+    } catch (YarnRemoteException ex) {
+      LOG.error("Failed to unregister application", ex);
+    }
+    
+    done = true;
+    resourceManager.stop();
+  }
+  
+  private class RMCallbackHandler implements AMRMClientAsync.CallbackHandler {
+    @Override
+    public void onContainersCompleted(List<ContainerStatus> completedContainers) {
+      LOG.info("Got response from RM for container ask, completedCnt="
+          + completedContainers.size());
+      for (ContainerStatus containerStatus : completedContainers) {
+        LOG.info("Got container status for containerID="
+            + containerStatus.getContainerId() + ", state="
+            + containerStatus.getState() + ", exitStatus="
+            + containerStatus.getExitStatus() + ", diagnostics="
+            + containerStatus.getDiagnostics());
+
+        // non complete containers should not be here
+        assert (containerStatus.getState() == ContainerState.COMPLETE);
+
+        // increment counters for completed/failed containers
+        int exitStatus = containerStatus.getExitStatus();
+        if (0 != exitStatus) {
+          // container failed
+          if (YarnConfiguration.ABORTED_CONTAINER_EXIT_STATUS != exitStatus) {
+            // shell script failed
+            // counts as completed
             numCompletedContainers.incrementAndGet();
-            LOG.info("Container completed successfully." + ", containerId="
-                + containerStatus.getContainerId());
+            numFailedContainers.incrementAndGet();
+          } else {
+            // container was killed by framework, possibly preempted
+            // we should re-try as the container was lost for some reason
+            numAllocatedContainers.decrementAndGet();
+            numRequestedContainers.decrementAndGet();
+            // we do not need to release the container as it would be done
+            // by the RM
           }
+        } else {
+          // nothing to do
+          // container completed successfully
+          numCompletedContainers.incrementAndGet();
+          LOG.info("Container completed successfully." + ", containerId="
+              + containerStatus.getContainerId());
         }
-        if (numCompletedContainers.get() == numTotalContainers) {
-          appDone = true;
-        }
-
-        LOG.info("Current application state: loop=" + loopCounter
-            + ", appDone=" + appDone + ", total=" + numTotalContainers
-            + ", requested=" + numRequestedContainers + ", completed="
-            + numCompletedContainers + ", failed=" + numFailedContainers
-            + ", currentAllocated=" + numAllocatedContainers);
-
-        // TODO
-        // Add a timeout handling layer
-        // for misbehaving shell commands
       }
-
-      // Join all launched threads
-      // needed for when we time out
-      // and we need to release containers
-      for (Thread launchThread : launchThreads) {
-        try {
-          launchThread.join(10000);
-        } catch (InterruptedException e) {
-          LOG.info("Exception thrown in thread join: " + e.getMessage());
-          e.printStackTrace();
-        }
+      
+      // ask for more containers if any failed
+      int askCount = numTotalContainers - numRequestedContainers.get();
+      numRequestedContainers.addAndGet(askCount);
+
+      if (askCount > 0) {
+        ContainerRequest containerAsk = setupContainerAskForRM(askCount);
+        resourceManager.addContainerRequest(containerAsk);
       }
+      
+      // set progress to deliver to RM on next heartbeat
+      float progress = (float) numCompletedContainers.get()
+          / numTotalContainers;
+      resourceManager.setProgress(progress);
+      
+      if (numCompletedContainers.get() == numTotalContainers) {
+        done = true;
+      }
+    }
 
-      // When the application completes, it should send a finish application
-      // signal to the RM
-      LOG.info("Application completed. Signalling finish to RM");
-
-      FinalApplicationStatus appStatus;
-      String appMessage = null;
-      boolean isSuccess = true;
-      if (numFailedContainers.get() == 0) {
-        appStatus = FinalApplicationStatus.SUCCEEDED;
-      } else {
-        appStatus = FinalApplicationStatus.FAILED;
-        appMessage = "Diagnostics." + ", total=" + numTotalContainers
-            + ", completed=" + numCompletedContainers.get() + ", allocated="
-            + numAllocatedContainers.get() + ", failed="
-            + numFailedContainers.get();
-        isSuccess = false;
+    @Override
+    public void onContainersAllocated(List<Container> allocatedContainers) {
+      LOG.info("Got response from RM for container ask, allocatedCnt="
+          + allocatedContainers.size());
+      numAllocatedContainers.addAndGet(allocatedContainers.size());
+      for (Container allocatedContainer : allocatedContainers) {
+        LOG.info("Launching shell command on a new container."
+            + ", containerId=" + allocatedContainer.getId()
+            + ", containerNode=" + allocatedContainer.getNodeId().getHost()
+            + ":" + allocatedContainer.getNodeId().getPort()
+            + ", containerNodeURI=" + allocatedContainer.getNodeHttpAddress()
+            + ", containerState" + allocatedContainer.getState()
+            + ", containerResourceMemory"
+            + allocatedContainer.getResource().getMemory());
+        // + ", containerToken"
+        // +allocatedContainer.getContainerToken().getIdentifier().toString());
+
+        LaunchContainerRunnable runnableLaunchContainer = new LaunchContainerRunnable(
+            allocatedContainer);
+        Thread launchThread = new Thread(runnableLaunchContainer);
+
+        // launch and start the container on a separate thread to keep
+        // the main thread unblocked
+        // as all containers may not be allocated at one go.
+        launchThreads.add(launchThread);
+        launchThread.start();
       }
-      resourceManager.unregisterApplicationMaster(appStatus, appMessage, null);
-      return isSuccess;
-    } finally {
-      resourceManager.stop();
     }
+
+    @Override
+    public void onRebootRequest() {}
+
+    @Override
+    public void onNodesUpdated(List<NodeReport> updatedNodes) {}
   }
 
   /**
@@ -811,22 +792,4 @@ public class ApplicationMaster {
     LOG.info("Requested container ask: " + request.toString());
     return request;
   }
-
-  /**
-   * Ask RM to allocate given no. of containers to this Application Master
-   *
-   * @param requestedContainers Containers to ask for from RM
-   * @return Response from RM to AM with allocated containers
-   * @throws YarnRemoteException
-   */
-  private AMResponse sendContainerAskToRM() throws YarnRemoteException {
-    float progressIndicator = (float) numCompletedContainers.get()
-        / numTotalContainers;
-
-    LOG.info("Sending request to RM for containers" + ", progress="
-        + progressIndicator);
-
-    AllocateResponse resp = resourceManager.allocate(progressIndicator);
-    return resp.getAMResponse();
-  }
 }

+ 7 - 4
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/Client.java

@@ -18,6 +18,7 @@
 
 package org.apache.hadoop.yarn.applications.distributedshell;
 
+import java.io.File;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.HashMap;
@@ -39,6 +40,7 @@ import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.yarn.api.ApplicationConstants;
+import org.apache.hadoop.yarn.api.ApplicationConstants.Environment;
 import org.apache.hadoop.yarn.api.ClientRMProtocol;
 import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationRequest;
@@ -481,14 +483,15 @@ public class Client extends YarnClientImpl {
     // It should be provided out of the box. 
     // For now setting all required classpaths including
     // the classpath to "." for the application jar
-    StringBuilder classPathEnv = new StringBuilder("${CLASSPATH}:./*");
+    StringBuilder classPathEnv = new StringBuilder(Environment.CLASSPATH.$())
+      .append(File.pathSeparatorChar).append("./*");
     for (String c : conf.getStrings(
         YarnConfiguration.YARN_APPLICATION_CLASSPATH,
         YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH)) {
-      classPathEnv.append(':');
+      classPathEnv.append(File.pathSeparatorChar);
       classPathEnv.append(c.trim());
     }
-    classPathEnv.append(":./log4j.properties");
+    classPathEnv.append(File.pathSeparatorChar).append("./log4j.properties");
 
     // add the runtime classpath needed for tests to work
     if (conf.getBoolean(YarnConfiguration.IS_MINI_YARN_CLUSTER, false)) {
@@ -505,7 +508,7 @@ public class Client extends YarnClientImpl {
 
     // Set java executable command 
     LOG.info("Setting up app master command");
-    vargs.add("${JAVA_HOME}" + "/bin/java");
+    vargs.add(Environment.JAVA_HOME.$() + "/bin/java");
     // Set Xmx based on am memory size
     vargs.add("-Xmx" + amMemory + "m");
     // Set class name 

+ 6 - 5
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDistributedShell.java

@@ -29,6 +29,7 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.util.JarFinder;
+import org.apache.hadoop.util.Shell;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.server.MiniYARNCluster;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
@@ -54,8 +55,8 @@ public class TestDistributedShell {
     conf.setClass(YarnConfiguration.RM_SCHEDULER, 
         FifoScheduler.class, ResourceScheduler.class);
     if (yarnCluster == null) {
-      yarnCluster = new MiniYARNCluster(TestDistributedShell.class.getName(),
-          1, 1, 1);
+      yarnCluster = new MiniYARNCluster(
+        TestDistributedShell.class.getSimpleName(), 1, 1, 1);
       yarnCluster.init(conf);
       yarnCluster.start();
       URL url = Thread.currentThread().getContextClassLoader().getResource("yarn-site.xml");
@@ -82,7 +83,7 @@ public class TestDistributedShell {
     }
   }
 
-  @Test
+  @Test(timeout=30000)
   public void testDSShell() throws Exception {
 
     String[] args = {
@@ -91,7 +92,7 @@ public class TestDistributedShell {
         "--num_containers",
         "2",
         "--shell_command",
-        "ls",
+        Shell.WINDOWS ? "dir" : "ls",
         "--master_memory",
         "512",
         "--container_memory",
@@ -110,7 +111,7 @@ public class TestDistributedShell {
 
   }
 
-  @Test
+  @Test(timeout=30000)
   public void testDSShellWithNoArgs() throws Exception {
 
     String[] args = {};

+ 2 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-unmanaged-am-launcher/src/test/java/org/apache/hadoop/yarn/applications/unmanagedamlauncher/TestUnmanagedAMLauncher.java

@@ -91,7 +91,7 @@ public class TestUnmanagedAMLauncher {
     return envClassPath;
   }
 
-  @Test
+  @Test(timeout=10000)
   public void testDSShell() throws Exception {
     String classpath = getTestRuntimeClasspath();
     String javaHome = System.getenv("JAVA_HOME");
@@ -110,7 +110,7 @@ public class TestUnmanagedAMLauncher {
         javaHome
             + "/bin/java -Xmx512m "
             + "org.apache.hadoop.yarn.applications.distributedshell.ApplicationMaster "
-            + "--container_memory 128 --num_containers 0 --priority 0 --shell_command ls" };
+            + "--container_memory 128 --num_containers 1 --priority 0 --shell_command ls" };
 
     LOG.info("Initializing Launcher");
     UnmanagedAMLauncher launcher = new UnmanagedAMLauncher(new Configuration(

+ 354 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/AMRMClientAsync.java

@@ -0,0 +1,354 @@
+/**
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.hadoop.yarn.client;
+
+import java.util.List;
+import java.util.concurrent.BlockingQueue;
+import java.util.concurrent.LinkedBlockingQueue;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.classification.InterfaceAudience.Private;
+import org.apache.hadoop.classification.InterfaceStability.Evolving;
+import org.apache.hadoop.classification.InterfaceStability.Unstable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.yarn.YarnException;
+import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.api.records.Container;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.ContainerStatus;
+import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
+import org.apache.hadoop.yarn.api.records.NodeReport;
+import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.exceptions.YarnRemoteException;
+import org.apache.hadoop.yarn.service.AbstractService;
+
+import com.google.common.annotations.VisibleForTesting;
+
+/**
+ * <code>AMRMClientAsync</code> handles communication with the ResourceManager
+ * and provides asynchronous updates on events such as container allocations and
+ * completions.  It contains a thread that sends periodic heartbeats to the
+ * ResourceManager.
+ * 
+ * It should be used by implementing a CallbackHandler:
+ * <pre>
+ * {@code
+ * class MyCallbackHandler implements AMRMClientAsync.CallbackHandler {
+ *   public void onContainersAllocated(List<Container> containers) {
+ *     [run tasks on the containers]
+ *   }
+ *   
+ *   public void onContainersCompleted(List<ContainerStatus> statuses) {
+ *     [update progress, check whether app is done]
+ *   }
+ *   
+ *   public void onNodesUpdated(List<NodeReport> updated) {}
+ *   
+ *   public void onReboot() {}
+ * }
+ * }
+ * </pre>
+ * 
+ * The client's lifecycle should be managed similarly to the following:
+ * 
+ * <pre>
+ * {@code
+ * AMRMClientAsync asyncClient = new AMRMClientAsync(appAttId, 1000, new MyCallbackhandler());
+ * asyncClient.init(conf);
+ * asyncClient.start();
+ * RegisterApplicationMasterResponse response = asyncClient
+ *    .registerApplicationMaster(appMasterHostname, appMasterRpcPort,
+ *       appMasterTrackingUrl);
+ * asyncClient.addContainerRequest(containerRequest);
+ * [... wait for application to complete]
+ * asyncClient.unregisterApplicationMaster(status, appMsg, trackingUrl);
+ * asyncClient.stop();
+ * }
+ * </pre>
+ */
+@Unstable
+@Evolving
+public class AMRMClientAsync extends AbstractService {
+  
+  private static final Log LOG = LogFactory.getLog(AMRMClientAsync.class);
+  
+  private final AMRMClient client;
+  private final int intervalMs;
+  private final HeartbeatThread heartbeatThread;
+  private final CallbackHandlerThread handlerThread;
+  private final CallbackHandler handler;
+
+  private final BlockingQueue<AllocateResponse> responseQueue;
+  
+  private volatile boolean keepRunning;
+  private volatile float progress;
+  
+  public AMRMClientAsync(ApplicationAttemptId id, int intervalMs,
+      CallbackHandler callbackHandler) {
+    this(new AMRMClientImpl(id), intervalMs, callbackHandler);
+  }
+  
+  @Private
+  @VisibleForTesting
+  AMRMClientAsync(AMRMClient client, int intervalMs,
+      CallbackHandler callbackHandler) {
+    super(AMRMClientAsync.class.getName());
+    this.client = client;
+    this.intervalMs = intervalMs;
+    handler = callbackHandler;
+    heartbeatThread = new HeartbeatThread();
+    handlerThread = new CallbackHandlerThread();
+    responseQueue = new LinkedBlockingQueue<AllocateResponse>();
+    keepRunning = true;
+  }
+  
+  /**
+   * Sets the application's current progress. It will be transmitted to the
+   * resource manager on the next heartbeat.
+   * @param progress
+   *    the application's progress so far
+   */
+  public void setProgress(float progress) {
+    this.progress = progress;
+  }
+  
+  @Override
+  public void init(Configuration conf) {
+    super.init(conf);
+    client.init(conf);
+  }
+  
+  @Override
+  public void start() {
+    handlerThread.start();
+    client.start();
+    super.start();
+  }
+  
+  /**
+   * Tells the heartbeat and handler threads to stop and waits for them to
+   * terminate.  Calling this method from the callback handler thread would cause
+   * deadlock, and thus should be avoided.
+   */
+  @Override
+  public void stop() {
+    if (Thread.currentThread() == handlerThread) {
+      throw new YarnException("Cannot call stop from callback handler thread!");
+    }
+    keepRunning = false;
+    try {
+      heartbeatThread.join();
+    } catch (InterruptedException ex) {
+      LOG.error("Error joining with heartbeat thread", ex);
+    }
+    client.stop();
+    try {
+      handlerThread.interrupt();
+      handlerThread.join();
+    } catch (InterruptedException ex) {
+      LOG.error("Error joining with hander thread", ex);
+    }
+    super.stop();
+  }
+  
+  /**
+   * Registers this application master with the resource manager. On successful
+   * registration, starts the heartbeating thread.
+   */
+  public RegisterApplicationMasterResponse registerApplicationMaster(
+      String appHostName, int appHostPort, String appTrackingUrl)
+      throws YarnRemoteException {
+    RegisterApplicationMasterResponse response =
+        client.registerApplicationMaster(appHostName, appHostPort, appTrackingUrl);
+    heartbeatThread.start();
+    return response;
+  }
+
+  /**
+   * Unregister the application master. This must be called in the end.
+   * @param appStatus Success/Failure status of the master
+   * @param appMessage Diagnostics message on failure
+   * @param appTrackingUrl New URL to get master info
+   * @throws YarnRemoteException
+   */
+  public void unregisterApplicationMaster(FinalApplicationStatus appStatus,
+      String appMessage, String appTrackingUrl) throws YarnRemoteException {
+    synchronized (client) {
+      keepRunning = false;
+      client.unregisterApplicationMaster(appStatus, appMessage, appTrackingUrl);
+    }
+  }
+
+  /**
+   * Request containers for resources before calling <code>allocate</code>
+   * @param req Resource request
+   */
+  public void addContainerRequest(AMRMClient.ContainerRequest req) {
+    client.addContainerRequest(req);
+  }
+
+  /**
+   * Remove previous container request. The previous container request may have 
+   * already been sent to the ResourceManager. So even after the remove request 
+   * the app must be prepared to receive an allocation for the previous request 
+   * even after the remove request
+   * @param req Resource request
+   */
+  public void removeContainerRequest(AMRMClient.ContainerRequest req) {
+    client.removeContainerRequest(req);
+  }
+
+  /**
+   * Release containers assigned by the Resource Manager. If the app cannot use
+   * the container or wants to give up the container then it can release them.
+   * The app needs to make new requests for the released resource capability if
+   * it still needs it. eg. it released non-local resources
+   * @param containerId
+   */
+  public void releaseAssignedContainer(ContainerId containerId) {
+    client.releaseAssignedContainer(containerId);
+  }
+
+  /**
+   * Get the currently available resources in the cluster.
+   * A valid value is available after a call to allocate has been made
+   * @return Currently available resources
+   */
+  public Resource getClusterAvailableResources() {
+    return client.getClusterAvailableResources();
+  }
+
+  /**
+   * Get the current number of nodes in the cluster.
+   * A valid values is available after a call to allocate has been made
+   * @return Current number of nodes in the cluster
+   */
+  public int getClusterNodeCount() {
+    return client.getClusterNodeCount();
+  }
+  
+  private class HeartbeatThread extends Thread {
+    public HeartbeatThread() {
+      super("AMRM Heartbeater thread");
+    }
+    
+    public void run() {
+      while (true) {
+        AllocateResponse response = null;
+        // synchronization ensures we don't send heartbeats after unregistering
+        synchronized (client) {
+          if (!keepRunning) {
+            break;
+          }
+            
+          try {
+            response = client.allocate(progress);
+          } catch (YarnRemoteException ex) {
+            LOG.error("Failed to heartbeat", ex);
+          }
+        }
+        if (response != null) {
+          while (true) {
+            try {
+              responseQueue.put(response);
+              break;
+            } catch (InterruptedException ex) {
+              LOG.warn("Interrupted while waiting to put on response queue", ex);
+            }
+          }
+        }
+        
+        try {
+          Thread.sleep(intervalMs);
+        } catch (InterruptedException ex) {
+          LOG.warn("Heartbeater interrupted", ex);
+        }
+      }
+    }
+  }
+  
+  private class CallbackHandlerThread extends Thread {
+    public CallbackHandlerThread() {
+      super("AMRM Callback Handler Thread");
+    }
+    
+    public void run() {
+      while (keepRunning) {
+        AllocateResponse response;
+        try {
+          response = responseQueue.take();
+        } catch (InterruptedException ex) {
+          LOG.info("Interrupted while waiting for queue");
+          continue;
+        }
+
+        if (response.getReboot()) {
+          handler.onRebootRequest();
+        }
+        List<NodeReport> updatedNodes = response.getUpdatedNodes();
+        if (!updatedNodes.isEmpty()) {
+          handler.onNodesUpdated(updatedNodes);
+        }
+        
+        List<ContainerStatus> completed =
+            response.getCompletedContainersStatuses();
+        if (!completed.isEmpty()) {
+          handler.onContainersCompleted(completed);
+        }
+
+        List<Container> allocated = response.getAllocatedContainers();
+        if (!allocated.isEmpty()) {
+          handler.onContainersAllocated(allocated);
+        }
+      }
+    }
+  }
+  
+  public interface CallbackHandler {
+    
+    /**
+     * Called when the ResourceManager responds to a heartbeat with completed
+     * containers. If the response contains both completed containers and
+     * allocated containers, this will be called before containersAllocated.
+     */
+    public void onContainersCompleted(List<ContainerStatus> statuses);
+    
+    /**
+     * Called when the ResourceManager responds to a heartbeat with allocated
+     * containers. If the response containers both completed containers and
+     * allocated containers, this will be called after containersCompleted.
+     */
+    public void onContainersAllocated(List<Container> containers);
+    
+    /**
+     * Called when the ResourceManager wants the ApplicationMaster to reboot
+     * for being out of sync.
+     */
+    public void onRebootRequest();
+    
+    /**
+     * Called when nodes tracked by the ResourceManager have changed in in health,
+     * availability etc.
+     */
+    public void onNodesUpdated(List<NodeReport> updatedNodes);
+  }
+}

+ 2 - 4
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/AMRMClientImpl.java

@@ -45,7 +45,6 @@ import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse;
-import org.apache.hadoop.yarn.api.records.AMResponse;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
@@ -194,13 +193,12 @@ public class AMRMClientImpl extends AbstractService implements AMRMClient {
       }
 
       allocateResponse = rmClient.allocate(allocateRequest);
-      AMResponse response = allocateResponse.getAMResponse();
 
       synchronized (this) {
         // update these on successful RPC
         clusterNodeCount = allocateResponse.getNumClusterNodes();
-        lastResponseId = response.getResponseId();
-        clusterAvailableResources = response.getAvailableResources();
+        lastResponseId = allocateResponse.getResponseId();
+        clusterAvailableResources = allocateResponse.getAvailableResources();
       }
     } finally {
       // TODO how to differentiate remote yarn exception vs error in rpc

+ 4 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/cli/ApplicationCLI.java

@@ -153,6 +153,10 @@ public class ApplicationCLI extends YarnCLI {
       appReportStr.println(appReport.getFinalApplicationStatus());
       appReportStr.print("\tTracking-URL : ");
       appReportStr.println(appReport.getOriginalTrackingUrl());
+      appReportStr.print("\tRPC Port : ");
+      appReportStr.println(appReport.getRpcPort());
+      appReportStr.print("\tAM Host : ");
+      appReportStr.println(appReport.getHost());
       appReportStr.print("\tDiagnostics : ");
       appReportStr.print(appReport.getDiagnostics());
     } else {

+ 6 - 8
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestAMRMClient.java

@@ -36,7 +36,6 @@ import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationRequest;
-import org.apache.hadoop.yarn.api.records.AMResponse;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.ApplicationReport;
@@ -202,9 +201,8 @@ public class TestAMRMClient {
       assertTrue(amClient.release.size() == 0);
       
       assertTrue(nodeCount == amClient.getClusterNodeCount());
-      AMResponse amResponse = allocResponse.getAMResponse();
-      allocatedContainerCount += amResponse.getAllocatedContainers().size();
-      for(Container container : amResponse.getAllocatedContainers()) {
+      allocatedContainerCount += allocResponse.getAllocatedContainers().size();
+      for(Container container : allocResponse.getAllocatedContainers()) {
         ContainerId rejectContainerId = container.getId();
         releases.add(rejectContainerId);
         amClient.releaseAssignedContainer(rejectContainerId);
@@ -264,11 +262,11 @@ public class TestAMRMClient {
     while(!releases.isEmpty() || iterationsLeft-- > 0) {
       // inform RM of rejection
       AllocateResponse allocResponse = amClient.allocate(0.1f);
-      AMResponse amResponse = allocResponse.getAMResponse();
       // RM did not send new containers because AM does not need any
-      assertTrue(amResponse.getAllocatedContainers().size() == 0);
-      if(amResponse.getCompletedContainersStatuses().size() > 0) {
-        for(ContainerStatus cStatus : amResponse.getCompletedContainersStatuses()) {
+      assertTrue(allocResponse.getAllocatedContainers().size() == 0);
+      if(allocResponse.getCompletedContainersStatuses().size() > 0) {
+        for(ContainerStatus cStatus :allocResponse
+            .getCompletedContainersStatuses()) {
           if(releases.contains(cStatus.getContainerId())) {
             assertTrue(cStatus.getState() == ContainerState.COMPLETE);
             assertTrue(cStatus.getExitStatus() == -100);

+ 184 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestAMRMClientAsync.java

@@ -0,0 +1,184 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.client;
+
+import static org.mockito.Mockito.anyFloat;
+import static org.mockito.Mockito.anyInt;
+import static org.mockito.Mockito.anyString;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicBoolean;
+
+import junit.framework.Assert;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
+import org.apache.hadoop.yarn.api.records.Container;
+import org.apache.hadoop.yarn.api.records.ContainerState;
+import org.apache.hadoop.yarn.api.records.ContainerStatus;
+import org.apache.hadoop.yarn.api.records.NodeReport;
+import org.apache.hadoop.yarn.util.BuilderUtils;
+import org.junit.Test;
+import org.mockito.invocation.InvocationOnMock;
+import org.mockito.stubbing.Answer;
+
+public class TestAMRMClientAsync {
+
+  private static final Log LOG = LogFactory.getLog(TestAMRMClientAsync.class);
+  
+  @Test(timeout=10000)
+  public void testAMRMClientAsync() throws Exception {
+    Configuration conf = new Configuration();
+    List<ContainerStatus> completed1 = Arrays.asList(
+        BuilderUtils.newContainerStatus(
+            BuilderUtils.newContainerId(0, 0, 0, 0),
+            ContainerState.COMPLETE, "", 0));
+    List<Container> allocated1 = Arrays.asList(
+        BuilderUtils.newContainer(null, null, null, null, null, null));
+    final AllocateResponse response1 = createAllocateResponse(
+        new ArrayList<ContainerStatus>(), allocated1);
+    final AllocateResponse response2 = createAllocateResponse(completed1,
+        new ArrayList<Container>());
+    final AllocateResponse emptyResponse = createAllocateResponse(
+        new ArrayList<ContainerStatus>(), new ArrayList<Container>());
+
+    TestCallbackHandler callbackHandler = new TestCallbackHandler();
+    AMRMClient client = mock(AMRMClient.class);
+    final AtomicBoolean secondHeartbeatReceived = new AtomicBoolean(false);
+    when(client.allocate(anyFloat())).thenReturn(response1).thenAnswer(new Answer<AllocateResponse>() {
+      @Override
+      public AllocateResponse answer(InvocationOnMock invocation)
+          throws Throwable {
+        secondHeartbeatReceived.set(true);
+        return response2;
+      }
+    }).thenReturn(emptyResponse);
+    when(client.registerApplicationMaster(anyString(), anyInt(), anyString()))
+      .thenReturn(null);
+    
+    AMRMClientAsync asyncClient = new AMRMClientAsync(client, 20, callbackHandler);
+    asyncClient.init(conf);
+    asyncClient.start();
+    asyncClient.registerApplicationMaster("localhost", 1234, null);
+    
+    // while the CallbackHandler will still only be processing the first response,
+    // heartbeater thread should still be sending heartbeats.
+    // To test this, wait for the second heartbeat to be received. 
+    while (!secondHeartbeatReceived.get()) {
+      Thread.sleep(10);
+    }
+    
+    // allocated containers should come before completed containers
+    Assert.assertEquals(null, callbackHandler.takeCompletedContainers());
+    
+    // wait for the allocated containers from the first heartbeat's response
+    while (callbackHandler.takeAllocatedContainers() == null) {
+      Assert.assertEquals(null, callbackHandler.takeCompletedContainers());
+      Thread.sleep(10);
+    }
+    
+    // wait for the completed containers from the second heartbeat's response
+    while (callbackHandler.takeCompletedContainers() == null) {
+      Thread.sleep(10);
+    }
+    
+    asyncClient.stop();
+    
+    Assert.assertEquals(null, callbackHandler.takeAllocatedContainers());
+    Assert.assertEquals(null, callbackHandler.takeCompletedContainers());
+  }
+  
+  private AllocateResponse createAllocateResponse(
+      List<ContainerStatus> completed, List<Container> allocated) {
+    AllocateResponse response = BuilderUtils.newAllocateResponse(0, completed, allocated,
+        new ArrayList<NodeReport>(), null, false, 1);
+    return response;
+  }
+  
+  private class TestCallbackHandler implements AMRMClientAsync.CallbackHandler {
+    private volatile List<ContainerStatus> completedContainers;
+    private volatile List<Container> allocatedContainers;
+    
+    public List<ContainerStatus> takeCompletedContainers() {
+      List<ContainerStatus> ret = completedContainers;
+      if (ret == null) {
+        return null;
+      }
+      completedContainers = null;
+      synchronized (ret) {
+        ret.notify();
+      }
+      return ret;
+    }
+    
+    public List<Container> takeAllocatedContainers() {
+      List<Container> ret = allocatedContainers;
+      if (ret == null) {
+        return null;
+      }
+      allocatedContainers = null;
+      synchronized (ret) {
+        ret.notify();
+      }
+      return ret;
+    }
+    
+    @Override
+    public void onContainersCompleted(List<ContainerStatus> statuses) {
+      completedContainers = statuses;
+      // wait for containers to be taken before returning
+      synchronized (completedContainers) {
+        while (completedContainers != null) {
+          try {
+            completedContainers.wait();
+          } catch (InterruptedException ex) {
+            LOG.error("Interrupted during wait", ex);
+          }
+        }
+      }
+    }
+
+    @Override
+    public void onContainersAllocated(List<Container> containers) {
+      allocatedContainers = containers;
+      // wait for containers to be taken before returning
+      synchronized (allocatedContainers) {
+        while (allocatedContainers != null) {
+          try {
+            allocatedContainers.wait();
+          } catch (InterruptedException ex) {
+            LOG.error("Interrupted during wait", ex);
+          }
+        }
+      }
+    }
+
+    @Override
+    public void onRebootRequest() {}
+
+    @Override
+    public void onNodesUpdated(List<NodeReport> updatedNodes) {}
+  }
+}

+ 2 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/cli/TestYarnCLI.java

@@ -94,6 +94,8 @@ public class TestYarnCLI {
     pw.println("\tState : FINISHED");
     pw.println("\tFinal-State : SUCCEEDED");
     pw.println("\tTracking-URL : N/A");
+    pw.println("\tRPC Port : 124");
+    pw.println("\tAM Host : host");
     pw.println("\tDiagnostics : diagnostics");
     pw.close();
     String appReportStr = baos.toString("UTF-8");

+ 10 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java

@@ -437,6 +437,16 @@ public class YarnConfiguration extends Configuration {
   public static final String NM_PMEM_MB = NM_PREFIX + "resource.memory-mb";
   public static final int DEFAULT_NM_PMEM_MB = 8 * 1024;
 
+  /** Specifies whether physical memory check is enabled. */
+  public static final String NM_PMEM_CHECK_ENABLED = NM_PREFIX
+      + "pmem-check-enabled";
+  public static final boolean DEFAULT_NM_PMEM_CHECK_ENABLED = true;
+
+  /** Specifies whether physical memory check is enabled. */
+  public static final String NM_VMEM_CHECK_ENABLED = NM_PREFIX
+      + "vmem-check-enabled";
+  public static final boolean DEFAULT_NM_VMEM_CHECK_ENABLED = true;
+
   /** Conversion ratio for physical memory to virtual memory. */
   public static final String NM_VMEM_PMEM_RATIO =
     NM_PREFIX + "vmem-pmem-ratio";

+ 8 - 6
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/AggregatedLogFormat.java

@@ -27,6 +27,7 @@ import java.io.File;
 import java.io.FileInputStream;
 import java.io.InputStreamReader;
 import java.io.IOException;
+import java.io.PrintStream;
 import java.io.Writer;
 import java.security.PrivilegedExceptionAction;
 import java.util.ArrayList;
@@ -505,7 +506,7 @@ public class AggregatedLogFormat {
      * @throws IOException
      */
     public static void readAContainerLogsForALogType(
-        DataInputStream valueStream, DataOutputStream out)
+        DataInputStream valueStream, PrintStream out)
           throws IOException {
 
       byte[] buf = new byte[65535];
@@ -513,11 +514,11 @@ public class AggregatedLogFormat {
       String fileType = valueStream.readUTF();
       String fileLengthStr = valueStream.readUTF();
       long fileLength = Long.parseLong(fileLengthStr);
-      out.writeUTF("\nLogType:");
-      out.writeUTF(fileType);
-      out.writeUTF("\nLogLength:");
-      out.writeUTF(fileLengthStr);
-      out.writeUTF("\nLog Contents:\n");
+      out.print("LogType: ");
+      out.println(fileType);
+      out.print("LogLength: ");
+      out.println(fileLengthStr);
+      out.println("Log Contents:");
 
       int curRead = 0;
       long pendingRead = fileLength - curRead;
@@ -533,6 +534,7 @@ public class AggregatedLogFormat {
                   pendingRead > buf.length ? buf.length : (int) pendingRead;
         len = valueStream.read(buf, 0, toRead);
       }
+      out.println("");
     }
 
     public void close() throws IOException {

+ 17 - 13
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/LogDumper.java

@@ -19,10 +19,10 @@
 package org.apache.hadoop.yarn.logaggregation;
 
 import java.io.DataInputStream;
-import java.io.DataOutputStream;
 import java.io.EOFException;
 import java.io.FileNotFoundException;
 import java.io.IOException;
+import java.io.PrintStream;
 
 import org.apache.commons.cli.CommandLine;
 import org.apache.commons.cli.CommandLineParser;
@@ -30,6 +30,7 @@ import org.apache.commons.cli.GnuParser;
 import org.apache.commons.cli.HelpFormatter;
 import org.apache.commons.cli.Options;
 import org.apache.commons.cli.ParseException;
+import org.apache.commons.lang.StringUtils;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configured;
 import org.apache.hadoop.fs.FileContext;
@@ -57,10 +58,13 @@ public class LogDumper extends Configured implements Tool {
   public int run(String[] args) throws Exception {
 
     Options opts = new Options();
-    opts.addOption(APPLICATION_ID_OPTION, true, "ApplicationId");
-    opts.addOption(CONTAINER_ID_OPTION, true, "ContainerId");
-    opts.addOption(NODE_ADDRESS_OPTION, true, "NodeAddress");
-    opts.addOption(APP_OWNER_OPTION, true, "AppOwner");
+    opts.addOption(APPLICATION_ID_OPTION, true, "ApplicationId (required)");
+    opts.addOption(CONTAINER_ID_OPTION, true,
+      "ContainerId (must be specified if node address is specified)");
+    opts.addOption(NODE_ADDRESS_OPTION, true, "NodeAddress in the format "
+      + "nodename:port (must be specified if container id is specified)");
+    opts.addOption(APP_OWNER_OPTION, true,
+      "AppOwner (assumed to be current user if not specified)");
 
     if (args.length < 1) {
       HelpFormatter formatter = new HelpFormatter();
@@ -99,14 +103,12 @@ public class LogDumper extends Configured implements Tool {
     ApplicationId appId =
         ConverterUtils.toApplicationId(recordFactory, appIdStr);
 
-    DataOutputStream out = new DataOutputStream(System.out);
-
     if (appOwner == null || appOwner.isEmpty()) {
       appOwner = UserGroupInformation.getCurrentUser().getShortUserName();
     }
     int resultCode = 0;
     if (containerIdStr == null && nodeAddress == null) {
-      resultCode = dumpAllContainersLogs(appId, appOwner, out);
+      resultCode = dumpAllContainersLogs(appId, appOwner, System.out);
     } else if ((containerIdStr == null && nodeAddress != null)
         || (containerIdStr != null && nodeAddress == null)) {
       System.out.println("ContainerId or NodeAddress cannot be null!");
@@ -125,7 +127,7 @@ public class LogDumper extends Configured implements Tool {
                   appOwner,
                   ConverterUtils.toNodeId(nodeAddress),
                   LogAggregationUtils.getRemoteNodeLogDirSuffix(getConf())));
-      resultCode = dumpAContainerLogs(containerIdStr, reader, out);
+      resultCode = dumpAContainerLogs(containerIdStr, reader, System.out);
     }
 
     return resultCode;
@@ -149,12 +151,11 @@ public class LogDumper extends Configured implements Tool {
           "Log aggregation has not completed or is not enabled.");
       return -1;
     }
-    DataOutputStream out = new DataOutputStream(System.out);
-    return dumpAContainerLogs(containerId, reader, out);
+    return dumpAContainerLogs(containerId, reader, System.out);
   }
 
   private int dumpAContainerLogs(String containerIdStr,
-      AggregatedLogFormat.LogReader reader, DataOutputStream out)
+      AggregatedLogFormat.LogReader reader, PrintStream out)
       throws IOException {
     DataInputStream valueStream;
     LogKey key = new LogKey();
@@ -183,7 +184,7 @@ public class LogDumper extends Configured implements Tool {
   }
 
   private int dumpAllContainersLogs(ApplicationId appId, String appOwner,
-      DataOutputStream out) throws IOException {
+      PrintStream out) throws IOException {
     Path remoteRootLogDir =
         new Path(getConf().get(YarnConfiguration.NM_REMOTE_APP_LOG_DIR,
             YarnConfiguration.DEFAULT_NM_REMOTE_APP_LOG_DIR));
@@ -216,6 +217,9 @@ public class LogDumper extends Configured implements Tool {
         valueStream = reader.next(key);
 
         while (valueStream != null) {
+          String containerString = "\n\nContainer: " + key + " on " + thisNodeFile.getPath().getName();
+          out.println(containerString);
+          out.println(StringUtils.repeat("=", containerString.length()));
           while (true) {
             try {
               LogReader.readAContainerLogsForALogType(valueStream, out);

+ 18 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/BuilderUtils.java

@@ -28,6 +28,7 @@ import java.util.Map;
 import org.apache.hadoop.net.NetUtils;
 import org.apache.hadoop.security.SecurityUtil;
 import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
 import org.apache.hadoop.yarn.api.records.ApplicationAccessType;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
@@ -404,4 +405,21 @@ public class BuilderUtils {
     allocateRequest.addAllReleases(containersToBeReleased);
     return allocateRequest;
   }
+  
+  public static AllocateResponse newAllocateResponse(int responseId,
+      List<ContainerStatus> completedContainers,
+      List<Container> allocatedContainers, List<NodeReport> updatedNodes,
+      Resource availResources, boolean reboot, int numClusterNodes) {
+    AllocateResponse response = recordFactory
+        .newRecordInstance(AllocateResponse.class);
+    response.setNumClusterNodes(numClusterNodes);
+    response.setResponseId(responseId);
+    response.setCompletedContainersStatuses(completedContainers);
+    response.setAllocatedContainers(allocatedContainers);
+    response.setUpdatedNodes(updatedNodes);
+    response.setAvailableResources(availResources);
+    response.setReboot(reboot);
+
+    return response;
+  }
 }

+ 14 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml

@@ -447,6 +447,20 @@
     <value>8192</value>
   </property>
 
+  <property>
+    <description>Whether physical memory limits will be enforced for
+    containers.</description>
+    <name>yarn.nodemanager.pmem-check-enabled</name>
+    <value>true</value>
+  </property>
+
+  <property>
+    <description>Whether virtual memory limits will be enforced for
+    containers.</description>
+    <name>yarn.nodemanager.vmem-check-enabled</name>
+    <value>true</value>
+  </property>
+
   <property>
     <description>Ratio between virtual memory to physical memory when
     setting memory limits for containers. Container allocations are

+ 7 - 5
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/TestRecordFactory.java

@@ -23,9 +23,9 @@ import junit.framework.Assert;
 import org.apache.hadoop.yarn.factories.RecordFactory;
 import org.apache.hadoop.yarn.factories.impl.pb.RecordFactoryPBImpl;
 import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.AllocateRequestPBImpl;
-import org.apache.hadoop.yarn.api.records.AMResponse;
-import org.apache.hadoop.yarn.api.records.impl.pb.AMResponsePBImpl;
+import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.AllocateResponsePBImpl;
 import org.junit.Test;
 
 public class TestRecordFactory {
@@ -35,15 +35,17 @@ public class TestRecordFactory {
     RecordFactory pbRecordFactory = RecordFactoryPBImpl.get();
     
     try {
-      AMResponse response = pbRecordFactory.newRecordInstance(AMResponse.class);
-      Assert.assertEquals(AMResponsePBImpl.class, response.getClass());
+      AllocateResponse response =
+          pbRecordFactory.newRecordInstance(AllocateResponse.class);
+      Assert.assertEquals(AllocateResponsePBImpl.class, response.getClass());
     } catch (YarnException e) {
       e.printStackTrace();
       Assert.fail("Failed to crete record");
     }
     
     try {
-      AllocateRequest response = pbRecordFactory.newRecordInstance(AllocateRequest.class);
+      AllocateRequest response =
+          pbRecordFactory.newRecordInstance(AllocateRequest.class);
       Assert.assertEquals(AllocateRequestPBImpl.class, response.getClass());
     } catch (YarnException e) {
       e.printStackTrace();

+ 16 - 5
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestProcfsBasedProcessTree.java

@@ -18,6 +18,8 @@
 
 package org.apache.hadoop.yarn.util;
 
+import static org.junit.Assert.fail;
+
 import java.io.BufferedReader;
 import java.io.BufferedWriter;
 import java.io.File;
@@ -188,11 +190,20 @@ public class TestProcfsBasedProcessTree {
     // destroy the process and all its subprocesses
     destroyProcessTree(pid);
 
-    if (isSetsidAvailable()) { // whole processtree should be gone
-      Assert.assertFalse("Proceesses in process group live",
-          isAnyProcessInTreeAlive(p));
-    } else {// process should be gone
-      Assert.assertFalse("ProcessTree must have been gone", isAlive(pid));
+    boolean isAlive = true;
+    for (int tries = 100; tries > 0; tries--) {
+      if (isSetsidAvailable()) {// whole processtree
+        isAlive = isAnyProcessInTreeAlive(p);
+      } else {// process
+        isAlive = isAlive(pid);
+      }
+      if (!isAlive) {
+        break;
+      }
+      Thread.sleep(100);
+    }
+    if (isAlive) {
+      fail("ProcessTree shouldn't be alive");
     }
 
     LOG.info("Process-tree dump follows: \n" + processTreeDump);

+ 4 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestRackResolver.java

@@ -63,6 +63,10 @@ public class TestRackResolver {
       return returnList;
     }
 
+    @Override
+    public void reloadCachedMappings() {
+      // nothing to do here, since RawScriptBasedMapping has no cache.
+    }
   }
 
   @Test

+ 4 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ResourceView.java

@@ -22,5 +22,9 @@ public interface ResourceView {
 
   long getVmemAllocatedForContainers();
 
+  boolean isVmemCheckEnabled();
+
   long getPmemAllocatedForContainers();
+
+  boolean isPmemCheckEnabled();
 }

+ 4 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java

@@ -570,8 +570,10 @@ public class ContainerLaunch implements Callable<Integer> {
     // additional testing.  See YARN-358.
     if (Shell.WINDOWS) {
       String inputClassPath = environment.get(Environment.CLASSPATH.name());
-      environment.put(Environment.CLASSPATH.name(),
-          FileUtil.createJarWithClassPath(inputClassPath, pwd));
+      if (inputClassPath != null && !inputClassPath.isEmpty()) {
+        environment.put(Environment.CLASSPATH.name(),
+            FileUtil.createJarWithClassPath(inputClassPath, pwd));
+      }
     }
 
     /**

+ 69 - 58
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java

@@ -63,14 +63,13 @@ public class ContainersMonitorImpl extends AbstractService implements
   private Configuration conf;
   private Class<? extends ResourceCalculatorProcessTree> processTreeClass;
 
-  private long maxVmemAllottedForContainers = DISABLED_MEMORY_LIMIT;
-  private long maxPmemAllottedForContainers = DISABLED_MEMORY_LIMIT;
+  private long maxVmemAllottedForContainers = UNKNOWN_MEMORY_LIMIT;
+  private long maxPmemAllottedForContainers = UNKNOWN_MEMORY_LIMIT;
 
-  /**
-   * A value which if set for memory related configuration options, indicates
-   * that the options are turned off.
-   */
-  public static final long DISABLED_MEMORY_LIMIT = -1L;
+  private boolean pmemCheckEnabled;
+  private boolean vmemCheckEnabled;
+
+  private static final long UNKNOWN_MEMORY_LIMIT = -1L;
 
   public ContainersMonitorImpl(ContainerExecutor exec,
       AsyncDispatcher dispatcher, Context context) {
@@ -104,65 +103,57 @@ public class ContainersMonitorImpl extends AbstractService implements
     LOG.info(" Using ResourceCalculatorProcessTree : "
         + this.processTreeClass);
 
-    long totalPhysicalMemoryOnNM = DISABLED_MEMORY_LIMIT;
-    if (this.resourceCalculatorPlugin != null) {
-      totalPhysicalMemoryOnNM =
-          this.resourceCalculatorPlugin.getPhysicalMemorySize();
-      if (totalPhysicalMemoryOnNM <= 0) {
-        LOG.warn("NodeManager's totalPmem could not be calculated. "
-            + "Setting it to " + DISABLED_MEMORY_LIMIT);
-        totalPhysicalMemoryOnNM = DISABLED_MEMORY_LIMIT;
-      }
-    }
+    long configuredPMemForContainers = conf.getLong(
+        YarnConfiguration.NM_PMEM_MB,
+        YarnConfiguration.DEFAULT_NM_PMEM_MB) * 1024 * 1024l;
 
+    // Setting these irrespective of whether checks are enabled. Required in
+    // the UI.
     // ///////// Physical memory configuration //////
-    this.maxPmemAllottedForContainers =
-        conf.getLong(YarnConfiguration.NM_PMEM_MB, YarnConfiguration.DEFAULT_NM_PMEM_MB);
-    this.maxPmemAllottedForContainers =
-        this.maxPmemAllottedForContainers * 1024 * 1024L; //Normalize to bytes
-
-    if (totalPhysicalMemoryOnNM != DISABLED_MEMORY_LIMIT &&
-        this.maxPmemAllottedForContainers >
-        totalPhysicalMemoryOnNM * 0.80f) {
-      LOG.warn("NodeManager configured with " +
-          TraditionalBinaryPrefix.long2String(maxPmemAllottedForContainers, "", 1) +
-          " physical memory allocated to containers, which is more than " +
-          "80% of the total physical memory available (" +
-          TraditionalBinaryPrefix.long2String(totalPhysicalMemoryOnNM, "", 1) +
-          "). Thrashing might happen.");
-    }
+    this.maxPmemAllottedForContainers = configuredPMemForContainers;
 
     // ///////// Virtual memory configuration //////
-    float vmemRatio = conf.getFloat(
-        YarnConfiguration.NM_VMEM_PMEM_RATIO,
+    float vmemRatio = conf.getFloat(YarnConfiguration.NM_VMEM_PMEM_RATIO,
         YarnConfiguration.DEFAULT_NM_VMEM_PMEM_RATIO);
     Preconditions.checkArgument(vmemRatio > 0.99f,
-        YarnConfiguration.NM_VMEM_PMEM_RATIO +
-        " should be at least 1.0");
+        YarnConfiguration.NM_VMEM_PMEM_RATIO + " should be at least 1.0");
     this.maxVmemAllottedForContainers =
-      (long)(vmemRatio * maxPmemAllottedForContainers);
+        (long) (vmemRatio * configuredPMemForContainers);
+
+    pmemCheckEnabled = conf.getBoolean(YarnConfiguration.NM_PMEM_CHECK_ENABLED,
+        YarnConfiguration.DEFAULT_NM_PMEM_CHECK_ENABLED);
+    vmemCheckEnabled = conf.getBoolean(YarnConfiguration.NM_VMEM_CHECK_ENABLED,
+        YarnConfiguration.DEFAULT_NM_VMEM_CHECK_ENABLED);
+    LOG.info("Physical memory check enabled: " + pmemCheckEnabled);
+    LOG.info("Virtual memory check enabled: " + vmemCheckEnabled);
+
+    if (pmemCheckEnabled) {
+      // Logging if actual pmem cannot be determined.
+      long totalPhysicalMemoryOnNM = UNKNOWN_MEMORY_LIMIT;
+      if (this.resourceCalculatorPlugin != null) {
+        totalPhysicalMemoryOnNM = this.resourceCalculatorPlugin
+            .getPhysicalMemorySize();
+        if (totalPhysicalMemoryOnNM <= 0) {
+          LOG.warn("NodeManager's totalPmem could not be calculated. "
+              + "Setting it to " + UNKNOWN_MEMORY_LIMIT);
+          totalPhysicalMemoryOnNM = UNKNOWN_MEMORY_LIMIT;
+        }
+      }
 
+      if (totalPhysicalMemoryOnNM != UNKNOWN_MEMORY_LIMIT &&
+          this.maxPmemAllottedForContainers > totalPhysicalMemoryOnNM * 0.80f) {
+        LOG.warn("NodeManager configured with "
+            + TraditionalBinaryPrefix.long2String(maxPmemAllottedForContainers,
+                "", 1)
+            + " physical memory allocated to containers, which is more than "
+            + "80% of the total physical memory available ("
+            + TraditionalBinaryPrefix.long2String(totalPhysicalMemoryOnNM, "",
+                1) + "). Thrashing might happen.");
+      }
+    }
     super.init(conf);
   }
 
-  /**
-   * Is the total physical memory check enabled?
-   *
-   * @return true if total physical memory check is enabled.
-   */
-  boolean isPhysicalMemoryCheckEnabled() {
-    return !(this.maxPmemAllottedForContainers == DISABLED_MEMORY_LIMIT);
-  }
-
-  /**
-   * Is the total virtual memory check enabled?
-   *
-   * @return true if total virtual memory check is enabled.
-   */
-  boolean isVirtualMemoryCheckEnabled() {
-    return !(this.maxVmemAllottedForContainers == DISABLED_MEMORY_LIMIT);
-  }
-
   private boolean isEnabled() {
     if (resourceCalculatorPlugin == null) {
             LOG.info("ResourceCalculatorPlugin is unavailable on this system. "
@@ -174,7 +165,7 @@ public class ContainersMonitorImpl extends AbstractService implements
                 + this.getClass().getName() + " is disabled.");
             return false;
     }
-    if (!(isPhysicalMemoryCheckEnabled() || isVirtualMemoryCheckEnabled())) {
+    if (!(isPmemCheckEnabled() || isVmemCheckEnabled())) {
       LOG.info("Neither virutal-memory nor physical-memory monitoring is " +
           "needed. Not running the monitor-thread");
       return false;
@@ -412,7 +403,7 @@ public class ContainersMonitorImpl extends AbstractService implements
 
             boolean isMemoryOverLimit = false;
             String msg = "";
-            if (isVirtualMemoryCheckEnabled()
+            if (isVmemCheckEnabled()
                 && isProcessTreeOverLimit(containerId.toString(),
                     currentVmemUsage, curMemUsageOfAgedProcesses, vmemLimit)) {
               // Container (the root process) is still alive and overflowing
@@ -423,7 +414,7 @@ public class ContainersMonitorImpl extends AbstractService implements
                   currentPmemUsage, pmemLimit,
                   pId, containerId, pTree);
               isMemoryOverLimit = true;
-            } else if (isPhysicalMemoryCheckEnabled()
+            } else if (isPmemCheckEnabled()
                 && isProcessTreeOverLimit(containerId.toString(),
                     currentPmemUsage, curRssMemUsageOfAgedProcesses,
                     pmemLimit)) {
@@ -507,11 +498,31 @@ public class ContainersMonitorImpl extends AbstractService implements
     return this.maxVmemAllottedForContainers;
   }
 
+  /**
+   * Is the total physical memory check enabled?
+   *
+   * @return true if total physical memory check is enabled.
+   */
+  @Override
+  public boolean isPmemCheckEnabled() {
+    return this.pmemCheckEnabled;
+  }
+
   @Override
   public long getPmemAllocatedForContainers() {
     return this.maxPmemAllottedForContainers;
   }
 
+  /**
+   * Is the total virtual memory check enabled?
+   *
+   * @return true if total virtual memory check is enabled.
+   */
+  @Override
+  public boolean isVmemCheckEnabled() {
+    return this.vmemCheckEnabled;
+  }
+
   @Override
   public void handle(ContainersMonitorEvent monitoringEvent) {
 

+ 4 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/NodePage.java

@@ -67,8 +67,12 @@ public class NodePage extends NMView {
       info("NodeManager information")
           ._("Total Vmem allocated for Containers",
               StringUtils.byteDesc(info.getTotalVmemAllocated() * BYTES_IN_MB))
+          ._("Vmem enforcement enabled",
+              info.isVmemCheckEnabled())
           ._("Total Pmem allocated for Container",
               StringUtils.byteDesc(info.getTotalPmemAllocated() * BYTES_IN_MB))
+          ._("Pmem enforcement enabled",
+              info.isVmemCheckEnabled())
           ._("NodeHealthyStatus",
               info.getHealthStatus())
           ._("LastNodeHealthTime", new Date(

+ 12 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/NodeInfo.java

@@ -36,6 +36,8 @@ public class NodeInfo {
   protected String healthReport;
   protected long totalVmemAllocatedContainersMB;
   protected long totalPmemAllocatedContainersMB;
+  protected boolean vmemCheckEnabled;
+  protected boolean pmemCheckEnabled;
   protected long lastNodeUpdateTime;
   protected boolean nodeHealthy;
   protected String nodeManagerVersion;
@@ -56,8 +58,10 @@ public class NodeInfo {
     this.nodeHostName = context.getNodeId().getHost();
     this.totalVmemAllocatedContainersMB = resourceView
         .getVmemAllocatedForContainers() / BYTES_IN_MB;
+    this.vmemCheckEnabled = resourceView.isVmemCheckEnabled();
     this.totalPmemAllocatedContainersMB = resourceView
         .getPmemAllocatedForContainers() / BYTES_IN_MB;
+    this.pmemCheckEnabled = resourceView.isPmemCheckEnabled();
     this.nodeHealthy = context.getNodeHealthStatus().getIsNodeHealthy();
     this.lastNodeUpdateTime = context.getNodeHealthStatus()
         .getLastHealthReportTime();
@@ -120,8 +124,16 @@ public class NodeInfo {
     return this.totalVmemAllocatedContainersMB;
   }
 
+  public boolean isVmemCheckEnabled() {
+    return this.vmemCheckEnabled;
+  }
+
   public long getTotalPmemAllocated() {
     return this.totalPmemAllocatedContainersMB;
   }
 
+  public boolean isPmemCheckEnabled() {
+    return this.pmemCheckEnabled;
+  }
+
 }

+ 18 - 13
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/TestLogAggregationService.java

@@ -22,11 +22,13 @@ import static org.mockito.Mockito.*;
 import static junit.framework.Assert.assertEquals;
 import static junit.framework.Assert.assertTrue;
 
+import java.io.ByteArrayOutputStream;
 import java.io.DataInputStream;
 import java.io.EOFException;
 import java.io.File;
 import java.io.FileWriter;
 import java.io.IOException;
+import java.io.PrintStream;
 import java.io.PrintWriter;
 import java.io.Writer;
 import java.lang.reflect.Method;
@@ -40,6 +42,7 @@ import java.util.Set;
 
 import junit.framework.Assert;
 
+import org.apache.commons.lang.StringUtils;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.UnsupportedFileSystemException;
@@ -531,24 +534,26 @@ public class TestLogAggregationService extends BaseContainerManagerTest {
 
         while (true) {
           try {
-            DataOutputBuffer dob = new DataOutputBuffer();
-            LogReader.readAContainerLogsForALogType(valueStream, dob);
+            ByteArrayOutputStream baos = new ByteArrayOutputStream();
+            PrintStream ps = new PrintStream(baos);
+            LogReader.readAContainerLogsForALogType(valueStream, ps);
 
-            DataInputBuffer dib = new DataInputBuffer();
-            dib.reset(dob.getData(), dob.getLength());
+            String writtenLines[] = baos.toString().split(
+              System.getProperty("line.separator"));
 
-            Assert.assertEquals("\nLogType:", dib.readUTF());
-            String fileType = dib.readUTF();
+            Assert.assertEquals("LogType:", writtenLines[0].substring(0, 8));
+            String fileType = writtenLines[0].substring(9);
 
-            Assert.assertEquals("\nLogLength:", dib.readUTF());
-            String fileLengthStr = dib.readUTF();
+            Assert.assertEquals("LogLength:", writtenLines[1].substring(0, 10));
+            String fileLengthStr = writtenLines[1].substring(11);
             long fileLength = Long.parseLong(fileLengthStr);
 
-            Assert.assertEquals("\nLog Contents:\n", dib.readUTF());
-            byte[] buf = new byte[(int) fileLength]; // cast is okay in this
-                                                     // test.
-            dib.read(buf, 0, (int) fileLength);
-            perContainerMap.put(fileType, new String(buf));
+            Assert.assertEquals("Log Contents:",
+              writtenLines[2].substring(0, 13));
+
+            String logContents = StringUtils.join(
+              Arrays.copyOfRange(writtenLines, 3, writtenLines.length), "\n");
+            perContainerMap.put(fileType, logContents);
 
             LOG.info("LogType:" + fileType);
             LOG.info("LogType:" + fileLength);

Nem az összes módosított fájl került megjelenítésre, mert túl sok fájl változott