瀏覽代碼

YARN-2354. DistributedShell may allocate more containers than client specified after AM restarts. Contributed by Li Lu

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1614538 13f79535-47bb-0310-9956-ffa450edef68
Jian He 11 年之前
父節點
當前提交
7e54b1c6d9

+ 3 - 0
hadoop-yarn-project/CHANGES.txt

@@ -111,6 +111,9 @@ Release 2.6.0 - UNRELEASED
 
     YARN-1796. container-executor shouldn't require o-r permissions (atm)
 
+    YARN-2354. DistributedShell may allocate more containers than client
+    specified after AM restarts. (Li Lu via jianhe)
+
 Release 2.5.0 - UNRELEASED
 
   INCOMPATIBLE CHANGES

+ 6 - 5
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java

@@ -208,7 +208,8 @@ public class ApplicationMaster {
 
   // App Master configuration
   // No. of containers to run shell command on
-  private int numTotalContainers = 1;
+  @VisibleForTesting
+  protected int numTotalContainers = 1;
   // Memory to request for the container on which the shell command will run
   private int containerMemory = 10;
   // VirtualCores to request for the container on which the shell command will run
@@ -594,8 +595,8 @@ public class ApplicationMaster {
 
     List<Container> previousAMRunningContainers =
         response.getContainersFromPreviousAttempts();
-    LOG.info("Received " + previousAMRunningContainers.size()
-        + " previous AM's running containers on AM registration.");
+    LOG.info(appAttemptID + " received " + previousAMRunningContainers.size()
+      + " previous attempts' running containers on AM registration.");
     numAllocatedContainers.addAndGet(previousAMRunningContainers.size());
 
     int numTotalContainersToRequest =
@@ -610,7 +611,7 @@ public class ApplicationMaster {
       ContainerRequest containerAsk = setupContainerAskForRM();
       amRMClient.addContainerRequest(containerAsk);
     }
-    numRequestedContainers.set(numTotalContainersToRequest);
+    numRequestedContainers.set(numTotalContainers);
     try {
       publishApplicationAttemptEvent(timelineClient, appAttemptID.toString(),
           DSEvent.DS_APP_ATTEMPT_END);
@@ -689,7 +690,7 @@ public class ApplicationMaster {
       LOG.info("Got response from RM for container ask, completedCnt="
           + completedContainers.size());
       for (ContainerStatus containerStatus : completedContainers) {
-        LOG.info("Got container status for containerID="
+        LOG.info(appAttemptID + " got container status for containerID="
             + containerStatus.getContainerId() + ", state="
             + containerStatus.getState() + ", exitStatus="
             + containerStatus.getExitStatus() + ", diagnostics="

+ 4 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDSFailedAppMaster.java

@@ -36,9 +36,11 @@ public class TestDSFailedAppMaster extends ApplicationMaster {
     if (appAttemptID.getAttemptId() == 2) {
       // should reuse the earlier running container, so numAllocatedContainers
       // should be set to 1. And should ask no more containers, so
-      // numRequestedContainers should be set to 0.
+      // numRequestedContainers should be the same as numTotalContainers.
+      // The only container is the container requested by the AM in the first
+      // attempt.
       if (numAllocatedContainers.get() != 1
-          || numRequestedContainers.get() != 0) {
+          || numRequestedContainers.get() != numTotalContainers) {
         LOG.info("NumAllocatedContainers is " + numAllocatedContainers.get()
             + " and NumRequestedContainers is " + numAllocatedContainers.get()
             + ".Application Master failed. exiting");