Переглянути джерело

YARN-5024. TestContainerResourceUsage#testUsageAfterAMRestartWithMultipleContainers random failure. Contributed by Bibin A Chundatt

Rohith Sharma K S 9 роки тому
батько
коміт
28bd63e92b

+ 20 - 6
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestContainerResourceUsage.java

@@ -309,7 +309,7 @@ public class TestContainerResourceUsage {
     nm.nodeHeartbeat(am0.getApplicationAttemptId(),
                       amContainerId.getContainerId(), ContainerState.COMPLETE);
     rm.waitForState(am0.getApplicationAttemptId(), RMAppAttemptState.FAILED);
-
+    rm.drainEvents();
     long memorySeconds = 0;
     long vcoreSeconds = 0;
 
@@ -331,7 +331,8 @@ public class TestContainerResourceUsage {
     } else {
       // If keepRunningContainers is false, all live containers should now
       // be completed. Calculate the resource usage metrics for all of them.
-      for (RMContainer c : rmContainers) { 
+      for (RMContainer c : rmContainers) {
+        waitforContainerCompletion(rm, nm, amContainerId, c);
         AggregateAppResourceUsage ru = calculateContainerResourceMetrics(c);
         memorySeconds += ru.getMemorySeconds();
         vcoreSeconds += ru.getVcoreSeconds();
@@ -346,13 +347,11 @@ public class TestContainerResourceUsage {
     Assert.assertFalse(attempt2.getAppAttemptId()
                                .equals(am0.getApplicationAttemptId()));
 
-    // launch the new AM
-    //TODO explore a better way than sleeping for a while (YARN-4929)
-    Thread.sleep(1000);
+    rm.waitForState(attempt2.getAppAttemptId(), RMAppAttemptState.SCHEDULED);
     nm.nodeHeartbeat(true);
     MockAM am1 = rm.sendAMLaunched(attempt2.getAppAttemptId());
     am1.registerAppAttempt();
-    
+    rm.waitForState(am1.getApplicationAttemptId(), RMAppAttemptState.RUNNING);
     // allocate NUM_CONTAINERS containers
     am1.allocate("127.0.0.1", 1024, NUM_CONTAINERS,
       new ArrayList<ContainerId>());
@@ -385,6 +384,7 @@ public class TestContainerResourceUsage {
 
     // Calculate container usage metrics for second attempt.
     for (RMContainer c : rmContainers) {
+      waitforContainerCompletion(rm, nm, amContainerId, c);
       AggregateAppResourceUsage ru = calculateContainerResourceMetrics(c);
       memorySeconds += ru.getMemorySeconds();
       vcoreSeconds += ru.getVcoreSeconds();
@@ -401,6 +401,20 @@ public class TestContainerResourceUsage {
     return;
   }
 
+  private void waitforContainerCompletion(MockRM rm, MockNM nm,
+      ContainerId amContainerId, RMContainer container) throws Exception {
+    ContainerId containerId = container.getContainerId();
+    if (null != rm.scheduler.getRMContainer(containerId)) {
+      if (containerId.equals(amContainerId)) {
+        rm.waitForState(nm, containerId, RMContainerState.COMPLETED);
+      } else {
+        rm.waitForState(nm, containerId, RMContainerState.KILLED);
+      }
+    } else {
+      rm.drainEvents();
+    }
+  }
+
   private AggregateAppResourceUsage calculateContainerResourceMetrics(
       RMContainer rmContainer) {
     Resource resource = rmContainer.getContainer().getResource();