Browse Source

Merge -c 1406834 from trunk to branch-2 to fix YARN-201. Fix CapacityScheduler to be less conservative for starved off-switch requests. Contributed by Jason Lowe.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1406835 13f79535-47bb-0310-9956-ffa450edef68
Arun Murthy 12 years ago
parent
commit
b71f09c5f4

+ 3 - 0
hadoop-yarn-project/CHANGES.txt

@@ -185,6 +185,9 @@ Release 0.23.5 - UNRELEASED
     YARN-202. Log Aggregation generates a storm of fsync() for namenode 
     (Kihwal Lee via bobby)
 
+    YARN-201. Fix CapacityScheduler to be less conservative for starved 
+    off-switch requests. (jlowe via acmurthy) 
+
 Release 0.23.4 - UNRELEASED
 
   INCOMPATIBLE CHANGES

+ 6 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java

@@ -833,8 +833,12 @@ public class LeafQueue implements CSQueue {
             // Note: Update headroom to account for current allocation too...
             allocateResource(clusterResource, application, assigned);
             
-            // Reset scheduling opportunities
-            application.resetSchedulingOpportunities(priority);
+            // Don't reset scheduling opportunities for non-local assignments
+            // otherwise the app will be delayed for each non-local assignment.
+            // This helps apps with many off-cluster requests schedule faster.
+            if (assignment.getType() != NodeType.OFF_SWITCH) {
+              application.resetSchedulingOpportunities(priority);
+            }
             
             // Done
             return assignment;

+ 4 - 4
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java

@@ -1259,7 +1259,7 @@ public class TestLeafQueue {
     assignment = a.assignContainers(clusterResource, node_2);
     verify(app_0).allocate(eq(NodeType.OFF_SWITCH), eq(node_2), 
         any(Priority.class), any(ResourceRequest.class), any(Container.class));
-    assertEquals(0, app_0.getSchedulingOpportunities(priority)); // should reset
+    assertEquals(4, app_0.getSchedulingOpportunities(priority)); // should NOT reset
     assertEquals(2, app_0.getTotalRequiredResources(priority));
     assertEquals(NodeType.OFF_SWITCH, assignment.getType());
     
@@ -1408,11 +1408,11 @@ public class TestLeafQueue {
     assertEquals(0, app_0.getSchedulingOpportunities(priority_2));
     assertEquals(1, app_0.getTotalRequiredResources(priority_2));
 
-    // Another off-switch, shouldn allocate OFF_SWITCH P1
+    // Another off-switch, shouldn't allocate OFF_SWITCH P1
     a.assignContainers(clusterResource, node_2);
     verify(app_0).allocate(eq(NodeType.OFF_SWITCH), eq(node_2), 
         eq(priority_1), any(ResourceRequest.class), any(Container.class));
-    assertEquals(0, app_0.getSchedulingOpportunities(priority_1));
+    assertEquals(3, app_0.getSchedulingOpportunities(priority_1));
     assertEquals(1, app_0.getTotalRequiredResources(priority_1));
     verify(app_0, never()).allocate(any(NodeType.class), eq(node_2), 
         eq(priority_2), any(ResourceRequest.class), any(Container.class));
@@ -1438,7 +1438,7 @@ public class TestLeafQueue {
     assertEquals(0, app_0.getTotalRequiredResources(priority_1));
     verify(app_0).allocate(eq(NodeType.OFF_SWITCH), eq(node_1), 
         eq(priority_2), any(ResourceRequest.class), any(Container.class));
-    assertEquals(0, app_0.getSchedulingOpportunities(priority_2));
+    assertEquals(1, app_0.getSchedulingOpportunities(priority_2));
     assertEquals(0, app_0.getTotalRequiredResources(priority_2));
 
   }