Browse Source

YARN-4615. Fix random test failure in TestAbstractYarnScheduler#testResourceRequestRecoveryToTheRightAppAttempt. (Sunil G via rohithsharmaks)

Rohith Sharma K S 9 years ago
parent
commit
2673cbaf55

+ 3 - 0
hadoop-yarn-project/CHANGES.txt

@@ -178,6 +178,9 @@ Release 2.9.0 - UNRELEASED
     YARN-4543. Fix random test failure in TestNodeStatusUpdater.testStopReentrant
     (Akihiro Suda via rohithsharmaks)
 
+    YARN-4615. Fix random test failure in TestAbstractYarnScheduler#testResource
+    RequestRecoveryToTheRightAppAttempt. (Sunil G via rohithsharmaks)
+
 Release 2.8.0 - UNRELEASED
 
   INCOMPATIBLE CHANGES

+ 25 - 7
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java

@@ -202,15 +202,33 @@ public class MockRM extends ResourceManager {
 
   public void waitForContainerState(ContainerId containerId,
       RMContainerState state) throws Exception {
-    int timeoutSecs = 0;
+    // This method will assert if state is not expected after timeout.
+    Assert.assertTrue(waitForContainerState(containerId, state, 8 * 1000));
+  }
+
+  public boolean waitForContainerState(ContainerId containerId,
+      RMContainerState containerState, int timeoutMillisecs) throws Exception {
     RMContainer container = getResourceScheduler().getRMContainer(containerId);
-    while ((container == null || container.getState() != state)
-        && timeoutSecs++ < 40) {
-      System.out.println(
-          "Waiting for" + containerId + " state to be:" + state.name());
-      Thread.sleep(200);
+    int timeoutSecs = 0;
+    while (((container == null) || !containerState.equals(container.getState()))
+        && timeoutSecs++ < timeoutMillisecs / 100) {
+      if(container == null){
+        container = getResourceScheduler().getRMContainer(containerId);
+      }
+      System.out.println("Container : " + containerId +
+          " Waiting for state : " + containerState);
+
+      Thread.sleep(100);
+
+      if (timeoutMillisecs <= timeoutSecs * 100) {
+        return false;
+      }
     }
-    Assert.assertTrue(container.getState() == state);
+
+    System.out.println("Container State is : " + container.getState());
+    Assert.assertEquals("Container state is not correct (timedout)",
+        containerState, container.getState());
+    return true;
   }
 
   public void waitForContainerAllocated(MockNM nm, ContainerId containerId)

+ 2 - 3
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestAbstractYarnScheduler.java

@@ -52,10 +52,8 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
 import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerState;
 import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp;
-import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.ContainerPreemptEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeRemovedSchedulerEvent;
-import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEventType;
 import org.apache.hadoop.yarn.util.resource.Resources;
 import org.junit.Assert;
 import org.junit.Test;
@@ -568,7 +566,8 @@ public class TestAbstractYarnScheduler extends ParameterizedSchedulerTestBase {
 
       // AM crashes, and a new app-attempt gets created
       node.nodeHeartbeat(applicationAttemptOneID, 1, ContainerState.COMPLETE);
-      rm.waitForState(node, am1ContainerID, RMContainerState.COMPLETED);
+      rm.waitForContainerState(am1ContainerID, RMContainerState.COMPLETED,
+          30 * 1000);
       RMAppAttempt rmAppAttempt2 = MockRM.waitForAttemptScheduled(rmApp, rm);
       ApplicationAttemptId applicationAttemptTwoID =
           rmAppAttempt2.getAppAttemptId();