浏览代码

YARN-6218. Fix TestAMRMClient when using FairScheduler. (Miklos Szegedi via rchiang)

(cherry picked from commit 30b91ff9540e35195af834d1bd5983114a556c6d)
Ray Chiang 8 年之前
父节点
当前提交
303ee13e3c

+ 85 - 54
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestAMRMClient.java

@@ -86,47 +86,71 @@ import org.apache.hadoop.yarn.exceptions.YarnException;
 import org.apache.hadoop.yarn.ipc.YarnRPC;
 import org.apache.hadoop.yarn.security.AMRMTokenIdentifier;
 import org.apache.hadoop.yarn.server.MiniYARNCluster;
+import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
+import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler;
 import org.apache.hadoop.yarn.server.resourcemanager.security.AMRMTokenSecretManager;
 import org.apache.hadoop.yarn.server.utils.BuilderUtils;
 import org.apache.hadoop.yarn.util.Records;
 import org.junit.After;
-import org.junit.AfterClass;
 import org.junit.Assert;
+import org.junit.Assume;
 import org.junit.Before;
-import org.junit.BeforeClass;
 import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
 import org.mockito.invocation.InvocationOnMock;
 import org.mockito.stubbing.Answer;
 import org.mortbay.log.Log;
 
 import com.google.common.base.Supplier;
 
+/**
+ * Test application master client class to resource manager.
+ */
+@RunWith(value = Parameterized.class)
 public class TestAMRMClient {
-  static Configuration conf = null;
-  static MiniYARNCluster yarnCluster = null;
-  static YarnClient yarnClient = null;
-  static List<NodeReport> nodeReports = null;
-  static ApplicationAttemptId attemptId = null;
-  static int nodeCount = 3;
+  private String schedulerName = null;
+  private Configuration conf = null;
+  private MiniYARNCluster yarnCluster = null;
+  private YarnClient yarnClient = null;
+  private List<NodeReport> nodeReports = null;
+  private ApplicationAttemptId attemptId = null;
+  private int nodeCount = 3;
   
   static final int rolling_interval_sec = 13;
   static final long am_expire_ms = 4000;
 
-  static Resource capability;
-  static Priority priority;
-  static Priority priority2;
-  static String node;
-  static String rack;
-  static String[] nodes;
-  static String[] racks;
+  private Resource capability;
+  private Priority priority;
+  private Priority priority2;
+  private String node;
+  private String rack;
+  private String[] nodes;
+  private String[] racks;
   private final static int DEFAULT_ITERATION = 3;
 
-  @BeforeClass
-  public static void setup() throws Exception {
+  public TestAMRMClient(String schedulerName) {
+    this.schedulerName = schedulerName;
+  }
+
+  @Parameterized.Parameters
+  public static Collection<Object[]> data() {
+    List<Object[]> list = new ArrayList<Object[]>(2);
+    list.add(new Object[] {CapacityScheduler.class.getName()});
+    list.add(new Object[] {FairScheduler.class.getName()});
+    return list;
+  }
+
+  @Before
+  public void setup() throws Exception {
     // start minicluster
     conf = new YarnConfiguration();
+    conf.set(YarnConfiguration.RM_SCHEDULER, schedulerName);
     conf.setLong(
       YarnConfiguration.RM_AMRM_TOKEN_MASTER_KEY_ROLLING_INTERVAL_SECS,
       rolling_interval_sec);
@@ -160,10 +184,7 @@ public class TestAMRMClient {
     rack = nodeReports.get(0).getRackName();
     nodes = new String[]{ node };
     racks = new String[]{ rack };
-  }
-  
-  @Before
-  public void startApp() throws Exception {
+
     // submit new app
     ApplicationSubmissionContext appContext = 
         yarnClient.createApplication().getApplicationSubmissionContext();
@@ -221,13 +242,10 @@ public class TestAMRMClient {
   }
   
   @After
-  public void cancelApp() throws YarnException, IOException {
+  public void teardown() throws YarnException, IOException {
     yarnClient.killApplication(attemptId.getApplicationId());
     attemptId = null;
-  }
-  
-  @AfterClass
-  public static void tearDown() {
+
     if (yarnClient != null && yarnClient.getServiceState() == STATE.STARTED) {
       yarnClient.stop();
     }
@@ -663,8 +681,8 @@ public class TestAMRMClient {
           amClient.releaseAssignedContainer(container.getId());
         }
         if(allocatedContainerCount < containersRequestedAny) {
-          // sleep to let NM's heartbeat to RM and trigger allocations
-          sleep(100);
+          // let NM heartbeat to RM and trigger allocations
+          triggerSchedulingWithNMHeartBeat();
         }
       }
       
@@ -685,7 +703,27 @@ public class TestAMRMClient {
       }
     }
   }
-  
+
+  /**
+   * Make sure we get allocations regardless of timing issues.
+   */
+  private void triggerSchedulingWithNMHeartBeat() {
+    // Simulate fair scheduler update thread
+    RMContext context = yarnCluster.getResourceManager().getRMContext();
+    if (context.getScheduler() instanceof FairScheduler) {
+      FairScheduler scheduler = (FairScheduler)context.getScheduler();
+      scheduler.update();
+    }
+    // Trigger NM's heartbeat to RM and trigger allocations
+    for (RMNode rmNode : context.getRMNodes().values()) {
+      context.getScheduler().handle(new NodeUpdateSchedulerEvent(rmNode));
+    }
+    if (context.getScheduler() instanceof FairScheduler) {
+      FairScheduler scheduler = (FairScheduler)context.getScheduler();
+      scheduler.update();
+    }
+  }
+
   @Test (timeout=60000)
   public void testAllocationWithBlacklist() throws YarnException, IOException {
     AMRMClientImpl<ContainerRequest> amClient = null;
@@ -817,8 +855,8 @@ public class TestAMRMClient {
       allocatedContainerCount += allocResponse.getAllocatedContainers().size();
         
       if(allocatedContainerCount == 0) {
-        // sleep to let NM's heartbeat to RM and trigger allocations
-        sleep(100);
+        // let NM heartbeat to RM and trigger allocations
+        triggerSchedulingWithNMHeartBeat();
       }
     }
     return allocatedContainerCount;
@@ -940,6 +978,8 @@ public class TestAMRMClient {
   @Test(timeout=60000)
   public void testAMRMClientWithContainerResourceChange()
       throws YarnException, IOException {
+    // Fair scheduler does not support resource change
+    Assume.assumeTrue(schedulerName.equals(CapacityScheduler.class.getName()));
     AMRMClient<ContainerRequest> amClient = null;
     try {
       // start am rm client
@@ -987,8 +1027,8 @@ public class TestAMRMClient {
     }
     // send allocation requests
     amClient.allocate(0.1f);
-    // sleep to let NM's heartbeat to RM and trigger allocations
-    sleep(150);
+    // let NM heartbeat to RM and trigger allocations
+    triggerSchedulingWithNMHeartBeat();
     // get allocations
     AllocateResponse allocResponse = amClient.allocate(0.1f);
     List<Container> containers = allocResponse.getAllocatedContainers();
@@ -1018,14 +1058,14 @@ public class TestAMRMClient {
           if (status.getState() == ContainerState.RUNNING) {
             break;
           }
-          sleep(100);
+          sleep(10);
         }
       }
     } catch (YarnException e) {
       throw new AssertionError("Exception is not expected: " + e);
     }
-    // sleep to let NM's heartbeat to RM to confirm container launch
-    sleep(200);
+    // let NM's heartbeat to RM to confirm container launch
+    triggerSchedulingWithNMHeartBeat();
     return containers;
   }
 
@@ -1075,7 +1115,7 @@ public class TestAMRMClient {
         allocResponse.getUpdatedContainers();
     Assert.assertEquals(1, updatedContainers.size());
     // we should get increase allocation after the next NM's heartbeat to RM
-    sleep(150);
+    triggerSchedulingWithNMHeartBeat();
     // get allocations
     allocResponse = amClient.allocate(0.1f);
     updatedContainers =
@@ -1138,8 +1178,8 @@ public class TestAMRMClient {
       }
       
       if(allocatedContainerCount < containersRequestedAny) {
-        // sleep to let NM's heartbeat to RM and trigger allocations
-        sleep(100);
+        // let NM heartbeat to RM and trigger allocations
+        triggerSchedulingWithNMHeartBeat();
       }
     }
     
@@ -1221,8 +1261,8 @@ public class TestAMRMClient {
         }
       }
       if(numIterations > 0) {
-        // sleep to make sure NM's heartbeat
-        sleep(100);
+        // let NM heartbeat to RM and trigger allocations
+        triggerSchedulingWithNMHeartBeat();
       }
     }
     assertEquals(0, amClient.ask.size());
@@ -1280,8 +1320,8 @@ public class TestAMRMClient {
       }
 
       if(allocatedContainers.size() < containersRequestedAny) {
-        // sleep to let NM's heartbeat to RM and trigger allocations
-        sleep(100);
+        // let NM heartbeat to RM and trigger allocations
+        triggerSchedulingWithNMHeartBeat();
       }
     }
 
@@ -1392,12 +1432,7 @@ public class TestAMRMClient {
       while (System.currentTimeMillis() - startTime <
           rolling_interval_sec * 1000) {
         amClient.allocate(0.1f);
-        try {
-          Thread.sleep(1000);
-        } catch (InterruptedException e) {
-          // TODO Auto-generated catch block
-          e.printStackTrace();
-        }
+        sleep(1000);
       }
       amClient.allocate(0.1f);
 
@@ -1457,11 +1492,7 @@ public class TestAMRMClient {
           }
         }
         amClient.allocate(0.1f);
-        try {
-          Thread.sleep(1000);
-        } catch (InterruptedException e) {
-          // DO NOTHING
-        }
+        sleep(1000);
       }
 
       try {

+ 2 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java

@@ -353,7 +353,8 @@ public class FairScheduler extends
    * fair shares, deficits, minimum slot allocations, and amount of used and
    * required resources per job.
    */
-  protected void update() {
+  @VisibleForTesting
+  public void update() {
     try {
       writeLock.lock();