Browse Source

YARN-4866. FairScheduler: AMs can consume all vcores leading to a livelock when using FAIR policy. (Yufei Gu via kasha)

Karthik Kambatla 9 năm trước cách đây
mục cha
commit
4f513a4a8e

+ 19 - 4
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSLeafQueue.java

@@ -43,6 +43,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ActiveUsersManager;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerAppUtils;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.policies.DominantResourceFairnessPolicy;
 import org.apache.hadoop.yarn.util.resource.Resources;
 
 @Private
@@ -481,8 +482,8 @@ public class FSLeafQueue extends FSQueue {
 
   /**
    * Check whether this queue can run this application master under the
-   * maxAMShare limit
-   *
+   * maxAMShare limit. For FIFO and FAIR policies, check if the VCore usage
+   * takes up the entire cluster or maxResources for the queue.
    * @param amResource
    * @return true if this queue can run
    */
@@ -494,8 +495,22 @@ public class FSLeafQueue extends FSQueue {
     }
     Resource maxAMResource = Resources.multiply(getFairShare(), maxAMShare);
     Resource ifRunAMResource = Resources.add(amResourceUsage, amResource);
-    return !policy
-        .checkIfAMResourceUsageOverLimit(ifRunAMResource, maxAMResource);
+
+    boolean overMaxAMShareLimit = policy
+            .checkIfAMResourceUsageOverLimit(ifRunAMResource, maxAMResource);
+
+    // For fair policy and fifo policy which doesn't check VCore usages,
+    // additionally check if the AM takes all available VCores or
+    // over maxResource to avoid deadlock.
+    if (!overMaxAMShareLimit && !policy.equals(
+        SchedulingPolicy.getInstance(DominantResourceFairnessPolicy.class))) {
+      overMaxAMShareLimit =
+         isVCoresOverMaxResource(ifRunAMResource.getVirtualCores()) ||
+         ifRunAMResource.getVirtualCores() >=
+         scheduler.getRootQueueMetrics().getAvailableVirtualCores();
+    }
+
+    return !overMaxAMShareLimit;
   }
 
   public void addAMResourceUsage(Resource amResource) {

+ 19 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSQueue.java

@@ -310,6 +310,25 @@ public abstract class FSQueue implements Queue, Schedulable {
     return true;
   }
 
+  /**
+   * Helper method to check if requested VCores are over maxResource.
+   * @param requestedVCores the number of VCores requested
+   * @return true if the number of VCores requested is over the maxResource;
+   *         false otherwise
+   */
+  protected boolean isVCoresOverMaxResource(int requestedVCores) {
+    if (requestedVCores >= scheduler.getAllocationConfiguration().
+        getMaxResources(getName()).getVirtualCores()) {
+      return true;
+    }
+
+    if (getParent() == null) {
+      return false;
+    }
+
+    return getParent().isVCoresOverMaxResource(requestedVCores);
+  }
+
   /**
    * Returns true if queue has at least one app running.
    */

+ 43 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java

@@ -3287,6 +3287,7 @@ public class TestFairScheduler extends FairSchedulerTestBase {
   @Test
   public void testQueueMaxAMShareDefault() throws Exception {
     conf.set(FairSchedulerConfiguration.ALLOCATION_FILE, ALLOC_FILE);
+    conf.setInt(YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_VCORES, 6);
 
     PrintWriter out = new PrintWriter(new FileWriter(ALLOC_FILE));
     out.println("<?xml version=\"1.0\"?>");
@@ -3297,11 +3298,14 @@ public class TestFairScheduler extends FairSchedulerTestBase {
     out.println("<maxAMShare>0.4</maxAMShare>");
     out.println("</queue>");
     out.println("<queue name=\"queue3\">");
+    out.println("<maxResources>10240 mb 4 vcores</maxResources>");
     out.println("</queue>");
     out.println("<queue name=\"queue4\">");
     out.println("</queue>");
     out.println("<queue name=\"queue5\">");
     out.println("</queue>");
+    out.println(
+        "<defaultQueueSchedulingPolicy>fair</defaultQueueSchedulingPolicy>");
     out.println("</allocations>");
     out.close();
 
@@ -3310,7 +3314,7 @@ public class TestFairScheduler extends FairSchedulerTestBase {
     scheduler.reinitialize(conf, resourceManager.getRMContext());
 
     RMNode node =
-        MockNodes.newNodeInfo(1, Resources.createResource(8192, 20),
+        MockNodes.newNodeInfo(1, Resources.createResource(8192, 10),
             0, "127.0.0.1");
     NodeAddedSchedulerEvent nodeEvent = new NodeAddedSchedulerEvent(node);
     NodeUpdateSchedulerEvent updateEvent = new NodeUpdateSchedulerEvent(node);
@@ -3378,6 +3382,44 @@ public class TestFairScheduler extends FairSchedulerTestBase {
         0, app2.getLiveContainers().size());
     assertEquals("Queue2's AM resource usage should be 0 MB memory",
         0, queue2.getAmResourceUsage().getMemory());
+
+    // Remove the app2
+    AppAttemptRemovedSchedulerEvent appRemovedEvent2 =
+        new AppAttemptRemovedSchedulerEvent(attId2,
+                RMAppAttemptState.FINISHED, false);
+    scheduler.handle(appRemovedEvent2);
+    scheduler.update();
+
+    // AM3 can pass the fair share checking, but it takes all available VCore,
+    // So the AM3 is not accepted.
+    ApplicationAttemptId attId3 = createAppAttemptId(3, 1);
+    createApplicationWithAMResource(attId3, "queue3", "test1", amResource1);
+    createSchedulingRequestExistingApplication(1024, 6, amPriority, attId3);
+    FSAppAttempt app3 = scheduler.getSchedulerApp(attId3);
+    scheduler.update();
+    scheduler.handle(updateEvent);
+    assertEquals("Application3's AM resource shouldn't be updated",
+            0, app3.getAMResource().getMemory());
+    assertEquals("Application3's AM should not be running",
+            0, app3.getLiveContainers().size());
+    assertEquals("Queue3's AM resource usage should be 0 MB memory",
+            0, queue3.getAmResourceUsage().getMemory());
+
+    // AM4 can pass the fair share checking and it doesn't takes all
+    // available VCore, but it need 5 VCores which are more than
+    // maxResources(4 VCores). So the AM4 is not accepted.
+    ApplicationAttemptId attId4 = createAppAttemptId(4, 1);
+    createApplicationWithAMResource(attId4, "queue3", "test1", amResource1);
+    createSchedulingRequestExistingApplication(1024, 5, amPriority, attId4);
+    FSAppAttempt app4 = scheduler.getSchedulerApp(attId4);
+    scheduler.update();
+    scheduler.handle(updateEvent);
+    assertEquals("Application4's AM resource shouldn't be updated",
+            0, app4.getAMResource().getMemory());
+    assertEquals("Application4's AM should not be running",
+            0, app4.getLiveContainers().size());
+    assertEquals("Queue3's AM resource usage should be 0 MB memory",
+            0, queue3.getAmResourceUsage().getMemory());
   }
 
   /**