Przeglądaj źródła

YARN-2534. FairScheduler: Potential integer overflow calculating totalMaxShare. (Zhihai Xu via kasha)

(cherry picked from commit c11ada5ea6d17321626e5a9a4152ff857d03aee2)
Karthik Kambatla 10 lat temu
rodzic
commit
6f8b2a8e2e

+ 3 - 0
hadoop-yarn-project/CHANGES.txt

@@ -297,6 +297,9 @@ Release 2.6.0 - UNRELEASED
     YARN-2158. Fixed TestRMWebServicesAppsModification#testSingleAppKill test
     failure. (Varun Vasudev via jianhe)
 
+    YARN-2534. FairScheduler: Potential integer overflow calculating totalMaxShare. 
+    (Zhihai Xu via kasha)
+
 Release 2.5.1 - UNRELEASED
 
   INCOMPATIBLE CHANGES

+ 3 - 4
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/policies/ComputeFairShares.java

@@ -124,11 +124,10 @@ public class ComputeFairShares {
     int totalMaxShare = 0;
     for (Schedulable sched : schedulables) {
       int maxShare = getResourceValue(sched.getMaxShare(), type);
-      if (maxShare == Integer.MAX_VALUE) {
-        totalMaxShare = Integer.MAX_VALUE;
+      totalMaxShare = (int) Math.min((long)maxShare + (long)totalMaxShare,
+          Integer.MAX_VALUE);
+      if (totalMaxShare == Integer.MAX_VALUE) {
         break;
-      } else {
-        totalMaxShare += maxShare;
       }
     }
 

+ 48 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java

@@ -308,6 +308,54 @@ public class TestFairScheduler extends FairSchedulerTestBase {
     }
   }
 
+  @Test
+  public void testFairShareWithMaxResources() throws IOException {
+    conf.set(FairSchedulerConfiguration.ALLOCATION_FILE, ALLOC_FILE);
+    // set queueA and queueB maxResources,
+    // the sum of queueA and queueB maxResources is more than
+    // Integer.MAX_VALUE.
+    PrintWriter out = new PrintWriter(new FileWriter(ALLOC_FILE));
+    out.println("<?xml version=\"1.0\"?>");
+    out.println("<allocations>");
+    out.println("<queue name=\"queueA\">");
+    out.println("<maxResources>1073741824 mb 1000 vcores</maxResources>");
+    out.println("<weight>.25</weight>");
+    out.println("</queue>");
+    out.println("<queue name=\"queueB\">");
+    out.println("<maxResources>1073741824 mb 1000 vcores</maxResources>");
+    out.println("<weight>.75</weight>");
+    out.println("</queue>");
+    out.println("</allocations>");
+    out.close();
+
+    scheduler.init(conf);
+    scheduler.start();
+    scheduler.reinitialize(conf, resourceManager.getRMContext());
+
+    // Add one big node (only care about aggregate capacity)
+    RMNode node1 =
+        MockNodes.newNodeInfo(1, Resources.createResource(8 * 1024, 8), 1,
+            "127.0.0.1");
+    NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1);
+    scheduler.handle(nodeEvent1);
+
+    // Queue A wants 1 * 1024.
+    createSchedulingRequest(1 * 1024, "queueA", "user1");
+    // Queue B wants 6 * 1024
+    createSchedulingRequest(6 * 1024, "queueB", "user1");
+
+    scheduler.update();
+
+    FSLeafQueue queue = scheduler.getQueueManager().getLeafQueue(
+        "queueA", false);
+    // queueA's weight is 0.25, so its fair share should be 2 * 1024.
+    assertEquals(2 * 1024, queue.getFairShare().getMemory());
+    // queueB's weight is 0.75, so its fair share should be 6 * 1024.
+    queue = scheduler.getQueueManager().getLeafQueue(
+        "queueB", false);
+    assertEquals(6 * 1024, queue.getFairShare().getMemory());
+  }
+
   @Test
   public void testFairShareWithZeroWeight() throws IOException {
     conf.set(FairSchedulerConfiguration.ALLOCATION_FILE, ALLOC_FILE);