Ver Fonte

YARN-4477. FairScheduler: Handle condition which can result in an infinite loop in attemptScheduling. (Tao Jie via asuresh)

Arun Suresh há 9 anos atrás
pai
commit
e88422df45

+ 3 - 0
hadoop-yarn-project/CHANGES.txt

@@ -1173,6 +1173,9 @@ Release 2.8.0 - UNRELEASED
     YARN-4454. NM to nodelabel mapping going wrong after RM restart. 
     (Bibin A Chundatt via wangda)
 
+    YARN-4477. FairScheduler: Handle condition which can result in an
+    infinite loop in attemptScheduling. (Tao Jie via asuresh)
+
 Release 2.7.3 - UNRELEASED
 
   INCOMPATIBLE CHANGES

+ 7 - 5
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java

@@ -458,8 +458,9 @@ public class FSAppAttempt extends SchedulerApplicationAttempt
    * the container is {@code alreadyReserved} on the node, simply
    * update relevant bookeeping. This dispatches ro relevant handlers
    * in {@link FSSchedulerNode}..
+   * return whether reservation was possible with the current threshold limits
    */
-  private void reserve(Priority priority, FSSchedulerNode node,
+  private boolean reserve(Priority priority, FSSchedulerNode node,
       Container container, NodeType type, boolean alreadyReserved) {
 
     if (!reservationExceedsThreshold(node, type)) {
@@ -477,7 +478,9 @@ public class FSAppAttempt extends SchedulerApplicationAttempt
         node.reserveResource(this, priority, rmContainer);
         setReservation(node);
       }
+      return true;
     }
+    return false;
   }
 
   private boolean reservationExceedsThreshold(FSSchedulerNode node,
@@ -627,10 +630,9 @@ public class FSAppAttempt extends SchedulerApplicationAttempt
       return container.getResource();
     }
 
-    if (isReservable(container)) {
-      // The desired container won't fit here, so reserve
-      reserve(request.getPriority(), node, container, type, reserved);
-
+    // The desired container won't fit here, so reserve
+    if (isReservable(container) &&
+        reserve(request.getPriority(), node, container, type, reserved)) {
       return FairScheduler.CONTAINER_RESERVED;
     } else {
       if (LOG.isDebugEnabled()) {

+ 38 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java

@@ -981,6 +981,43 @@ public class TestFairScheduler extends FairSchedulerTestBase {
             scheduler.getSchedulerApp(attId).getNumReservations(null, true));
   }
 
+  @Test (timeout = 5000)
+  public void testReservationThresholdWithAssignMultiple() throws Exception {
+    // set reservable-nodes to 0 which make reservation exceed
+    conf.setFloat(FairSchedulerConfiguration.RESERVABLE_NODES, 0f);
+    conf.setBoolean(FairSchedulerConfiguration.ASSIGN_MULTIPLE, true);
+    scheduler.init(conf);
+    scheduler.start();
+    scheduler.reinitialize(conf, resourceManager.getRMContext());
+
+    // Add two node
+    RMNode node1 =
+        MockNodes
+                .newNodeInfo(1, Resources.createResource(4096, 4), 1, "127.0.0.1");
+    NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1);
+    scheduler.handle(nodeEvent1);
+    RMNode node2 =
+        MockNodes
+                .newNodeInfo(2, Resources.createResource(4096, 4), 1, "127.0.0.2");
+    NodeAddedSchedulerEvent nodeEvent2 = new NodeAddedSchedulerEvent(node2);
+    scheduler.handle(nodeEvent2);
+
+    //create one request and assign containers
+    ApplicationAttemptId attId = createSchedulingRequest(1024, "queue1", "user1", 10);
+    scheduler.update();
+    scheduler.handle(new NodeUpdateSchedulerEvent(node1));
+    scheduler.update();
+    scheduler.handle(new NodeUpdateSchedulerEvent(node2));
+
+    // Verify capacity allocation
+    assertEquals(8192, scheduler.getQueueManager().getQueue("queue1").
+            getResourceUsage().getMemory());
+
+    // Verify number of reservations have decremented
+    assertEquals(0,
+            scheduler.getSchedulerApp(attId).getNumReservations(null, true));
+  }
+
   @Test (timeout = 500000)
   public void testContainerReservationAttemptExceedingQueueMax()
       throws Exception {
@@ -4152,6 +4189,7 @@ public class TestFairScheduler extends FairSchedulerTestBase {
   @Test
   public void testQueueMaxAMShareWithContainerReservation() throws Exception {
     conf.set(FairSchedulerConfiguration.ALLOCATION_FILE, ALLOC_FILE);
+    conf.setFloat(FairSchedulerConfiguration.RESERVABLE_NODES, 1f);
     PrintWriter out = new PrintWriter(new FileWriter(ALLOC_FILE));
     out.println("<?xml version=\"1.0\"?>");
     out.println("<allocations>");