浏览代码

YARN-7327: Enable asynchronous scheduling by default for capacity scheduler (#7138)

Syed Shameerur Rahman 6 月之前
父节点
当前提交
7f49190802
共有 9 个文件被更改,包括 311 次插入14 次删除
  1. 1 1
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java
  2. 1 1
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/converter/FSYarnSiteConverter.java
  3. 5 1
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerAsyncScheduling.java
  4. 2 2
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/converter/TestFSYarnSiteConverter.java
  5. 5 5
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/converter/weightconversion/TestWeightToPercentageConverter.java
  6. 4 4
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/converter/weightconversion/TestWeightToWeightConverter.java
  7. 1 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesCapacitySched.java
  8. 255 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/resources/capacity-scheduler.xml
  9. 37 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/CapacityScheduler.md

+ 1 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java

@@ -299,7 +299,7 @@ public class CapacitySchedulerConfiguration extends ReservationSchedulerConfigur
       DEFAULT_SCHEDULE_ASYNCHRONOUSLY_MAXIMUM_PENDING_BACKLOGS = 100;
 
   @Private
-  public static final boolean DEFAULT_SCHEDULE_ASYNCHRONOUSLY_ENABLE = false;
+  public static final boolean DEFAULT_SCHEDULE_ASYNCHRONOUSLY_ENABLE = true;
 
   @Private
   public static final String QUEUE_MAPPING = PREFIX + "queue-mappings";

+ 1 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/converter/FSYarnSiteConverter.java

@@ -49,7 +49,7 @@ public class FSYarnSiteConverter {
         FairSchedulerConfiguration.CONTINUOUS_SCHEDULING_ENABLED,
         FairSchedulerConfiguration.DEFAULT_CONTINUOUS_SCHEDULING_ENABLED)) {
       yarnSiteConfig.setBoolean(
-          CapacitySchedulerConfiguration.SCHEDULE_ASYNCHRONOUSLY_ENABLE, true);
+          CapacitySchedulerConfiguration.SCHEDULE_ASYNCHRONOUSLY_ENABLE, enableAsyncScheduler);
       int interval = conf.getInt(
           FairSchedulerConfiguration.CONTINUOUS_SCHEDULING_SLEEP_MS,
           FairSchedulerConfiguration.DEFAULT_CONTINUOUS_SCHEDULING_SLEEP_MS);

+ 5 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerAsyncScheduling.java

@@ -929,7 +929,11 @@ public class TestCapacitySchedulerAsyncScheduling {
      * First proposal should be accepted, second proposal should be rejected
      * because it try to release an outdated reserved container
      */
-    MockRM rm1 = new MockRM();
+    // disable async-scheduling for simulating complex scene
+    Configuration disableAsyncConf = new Configuration(conf);
+    disableAsyncConf.setBoolean(
+        CapacitySchedulerConfiguration.SCHEDULE_ASYNCHRONOUSLY_ENABLE, false);
+    MockRM rm1 = new MockRM(disableAsyncConf);
     rm1.getRMContext().setNodeLabelManager(mgr);
     rm1.start();
     MockNM nm1 = rm1.registerNode("h1:1234", 8 * GB);

+ 2 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/converter/TestFSYarnSiteConverter.java

@@ -60,7 +60,7 @@ public class TestFSYarnSiteConverter {
         FairSchedulerConfiguration.CONTINUOUS_SCHEDULING_SLEEP_MS, 666);
 
     converter.convertSiteProperties(yarnConfig, yarnConvertedConfig, false,
-        false, false, null);
+        true, false, null);
 
     assertTrue("Cont. scheduling", yarnConvertedConfig.getBoolean(
         CapacitySchedulerConfiguration.SCHEDULE_ASYNCHRONOUSLY_ENABLE, false));
@@ -224,7 +224,7 @@ public class TestFSYarnSiteConverter {
 
     assertFalse("Asynchronous scheduling", yarnConvertedConfig.getBoolean(
             CapacitySchedulerConfiguration.SCHEDULE_ASYNCHRONOUSLY_ENABLE,
-            CapacitySchedulerConfiguration.DEFAULT_SCHEDULE_ASYNCHRONOUSLY_ENABLE));
+            false));
   }
 
   @Test

+ 5 - 5
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/converter/weightconversion/TestWeightToPercentageConverter.java

@@ -66,7 +66,7 @@ public class TestWeightToPercentageConverter
     FSQueue root = createFSQueues();
     converter.convertWeightsForChildQueues(root, csConfig);
 
-    assertEquals("Converted items", 19,
+    assertEquals("Converted items", 20,
         csConfig.getPropsWithPrefix(PREFIX).size());
   }
 
@@ -76,7 +76,7 @@ public class TestWeightToPercentageConverter
 
     converter.convertWeightsForChildQueues(root, csConfig);
 
-    assertEquals("Number of properties", 22,
+    assertEquals("Number of properties", 23,
         csConfig.getPropsWithPrefix(PREFIX).size());
     // this is no fixing - it's the result of BigDecimal rounding
     assertEquals("root.a capacity", 16.667f,
@@ -95,7 +95,7 @@ public class TestWeightToPercentageConverter
 
     assertFalse("Capacity zerosum allowed",
         csConfig.getAllowZeroCapacitySum(ROOT));
-    assertEquals("Number of properties", 22,
+    assertEquals("Number of properties", 23,
         csConfig.getPropsWithPrefix(PREFIX).size());
     assertEquals("root.a capacity", 0.000f,
         csConfig.getNonLabeledQueueCapacity(ROOT_A), 0.0f);
@@ -111,7 +111,7 @@ public class TestWeightToPercentageConverter
 
     converter.convertWeightsForChildQueues(root, csConfig);
 
-    assertEquals("Number of properties", 23,
+    assertEquals("Number of properties", 24,
         csConfig.getPropsWithPrefix(PREFIX).size());
     assertTrue("Capacity zerosum allowed",
         csConfig.getAllowZeroCapacitySum(ROOT));
@@ -129,7 +129,7 @@ public class TestWeightToPercentageConverter
 
     converter.convertWeightsForChildQueues(root, csConfig);
 
-    assertEquals("Number of properties", 22,
+    assertEquals("Number of properties", 23,
         csConfig.getPropsWithPrefix(PREFIX).size());
     assertEquals("root.a capacity", 33.334f,
         csConfig.getNonLabeledQueueCapacity(ROOT_A), 0.0f);

+ 4 - 4
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/converter/weightconversion/TestWeightToWeightConverter.java

@@ -54,7 +54,7 @@ public class TestWeightToWeightConverter extends WeightConverterTestBase {
 
     assertEquals("root weight", 1.0f,
         csConfig.getNonLabeledQueueWeight(ROOT), 0.0f);
-    assertEquals("Converted items", 21,
+    assertEquals("Converted items", 22,
         csConfig.getPropsWithPrefix(PREFIX).size());
   }
 
@@ -67,7 +67,7 @@ public class TestWeightToWeightConverter extends WeightConverterTestBase {
         csConfig.getNonLabeledQueueWeight(ROOT), 0.0f);
     assertEquals("root.a weight", 1.0f,
         csConfig.getNonLabeledQueueWeight(ROOT_A), 0.0f);
-    assertEquals("Number of properties", 22,
+    assertEquals("Number of properties", 23,
         csConfig.getPropsWithPrefix(PREFIX).size());
   }
 
@@ -77,7 +77,7 @@ public class TestWeightToWeightConverter extends WeightConverterTestBase {
 
     converter.convertWeightsForChildQueues(root, csConfig);
 
-    assertEquals("Number of properties", 24,
+    assertEquals("Number of properties", 25,
         csConfig.getPropsWithPrefix(PREFIX).size());
     assertEquals("root weight", 1.0f,
         csConfig.getNonLabeledQueueWeight(ROOT), 0.0f);
@@ -103,7 +103,7 @@ public class TestWeightToWeightConverter extends WeightConverterTestBase {
     FSQueue root = createParent(new ArrayList<>());
     converter.convertWeightsForChildQueues(root, csConfig);
 
-    assertEquals("Number of properties", 21,
+    assertEquals("Number of properties", 22,
         csConfig.getPropsWithPrefix(PREFIX).size());
     assertTrue("root autocreate v2 enabled",
         csConfig.isAutoQueueCreationV2Enabled(ROOT));

+ 1 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesCapacitySched.java

@@ -167,6 +167,7 @@ public class TestRMWebServicesCapacitySched extends JerseyTestBase {
     conf.set("yarn.scheduler.capacity.root.a.max-parallel-app", "42");
     conf.set("yarn.scheduler.capacity.root.b.capacity", "50");
     conf.set("yarn.scheduler.capacity.root.c.capacity", "37.5");
+    conf.set("yarn.scheduler.capacity.schedule-asynchronously.enable", "false");
     return conf;
   }
 }

+ 255 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/resources/capacity-scheduler.xml

@@ -0,0 +1,255 @@
+<!--
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. See accompanying LICENSE file.
+-->
+<configuration>
+
+    <property>
+        <name>yarn.scheduler.capacity.maximum-applications</name>
+        <value>10000</value>
+        <description>
+            Maximum number of applications that can be pending and running.
+        </description>
+    </property>
+
+    <property>
+        <name>yarn.scheduler.capacity.maximum-am-resource-percent</name>
+        <value>0.1</value>
+        <description>
+            Maximum percent of resources in the cluster which can be used to run
+            application masters i.e. controls number of concurrent running
+            applications.
+        </description>
+    </property>
+
+    <property>
+        <name>yarn.scheduler.capacity.resource-calculator</name>
+        <value>org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator</value>
+        <description>
+            The ResourceCalculator implementation to be used to compare
+            Resources in the scheduler.
+            The default i.e. DefaultResourceCalculator only uses Memory while
+            DominantResourceCalculator uses dominant-resource to compare
+            multi-dimensional resources such as Memory, CPU etc.
+        </description>
+    </property>
+
+    <property>
+        <name>yarn.scheduler.capacity.root.queues</name>
+        <value>default</value>
+        <description>
+            The queues at the this level (root is the root queue).
+        </description>
+    </property>
+
+    <property>
+        <name>yarn.scheduler.capacity.root.default.capacity</name>
+        <value>100</value>
+        <description>Default queue target capacity.</description>
+    </property>
+
+    <property>
+        <name>yarn.scheduler.capacity.root.default.user-limit-factor</name>
+        <value>1</value>
+        <description>
+            Default queue user limit a percentage from 0.0 to 1.0.
+        </description>
+    </property>
+
+    <property>
+        <name>yarn.scheduler.capacity.root.default.maximum-capacity</name>
+        <value>100</value>
+        <description>
+            The maximum capacity of the default queue.
+        </description>
+    </property>
+
+    <property>
+        <name>yarn.scheduler.capacity.root.default.state</name>
+        <value>RUNNING</value>
+        <description>
+            The state of the default queue. State can be one of RUNNING or STOPPED.
+        </description>
+    </property>
+
+    <property>
+        <name>yarn.scheduler.capacity.root.default.acl_submit_applications</name>
+        <value>*</value>
+        <description>
+            The ACL of who can submit jobs to the default queue.
+        </description>
+    </property>
+
+    <property>
+        <name>yarn.scheduler.capacity.root.default.acl_administer_queue</name>
+        <value>*</value>
+        <description>
+            The ACL of who can administer jobs on the default queue.
+        </description>
+    </property>
+
+    <property>
+        <name>yarn.scheduler.capacity.root.default.acl_application_max_priority</name>
+        <value>*</value>
+        <description>
+            The ACL of who can submit applications with configured priority.
+            For e.g, [user={name} group={name} max_priority={priority} default_priority={priority}]
+        </description>
+    </property>
+
+    <property>
+        <name>yarn.scheduler.capacity.root.default.maximum-application-lifetime
+        </name>
+        <value>-1</value>
+        <description>
+            Maximum lifetime of an application which is submitted to a queue
+            in seconds. Any value less than or equal to zero will be considered as
+            disabled.
+            This will be a hard time limit for all applications in this
+            queue. If positive value is configured then any application submitted
+            to this queue will be killed after exceeds the configured lifetime.
+            User can also specify lifetime per application basis in
+            application submission context. But user lifetime will be
+            overridden if it exceeds queue maximum lifetime. It is point-in-time
+            configuration.
+            Note : Configuring too low value will result in killing application
+            sooner. This feature is applicable only for leaf queue.
+        </description>
+    </property>
+
+    <property>
+        <name>yarn.scheduler.capacity.root.default.default-application-lifetime
+        </name>
+        <value>-1</value>
+        <description>
+            Default lifetime of an application which is submitted to a queue
+            in seconds. Any value less than or equal to zero will be considered as
+            disabled.
+            If the user has not submitted application with lifetime value then this
+            value will be taken. It is point-in-time configuration.
+            Note : Default lifetime can't exceed maximum lifetime. This feature is
+            applicable only for leaf queue.
+        </description>
+    </property>
+
+    <property>
+        <name>yarn.scheduler.capacity.node-locality-delay</name>
+        <value>40</value>
+        <description>
+            Number of missed scheduling opportunities after which the CapacityScheduler
+            attempts to schedule rack-local containers.
+            When setting this parameter, the size of the cluster should be taken into account.
+            We use 40 as the default value, which is approximately the number of nodes in one rack.
+            Note, if this value is -1, the locality constraint in the container request
+            will be ignored, which disables the delay scheduling.
+        </description>
+    </property>
+
+    <property>
+        <name>yarn.scheduler.capacity.rack-locality-additional-delay</name>
+        <value>-1</value>
+        <description>
+            Number of additional missed scheduling opportunities over the node-locality-delay
+            ones, after which the CapacityScheduler attempts to schedule off-switch containers,
+            instead of rack-local ones.
+            Example: with node-locality-delay=40 and rack-locality-delay=20, the scheduler will
+            attempt rack-local assignments after 40 missed opportunities, and off-switch assignments
+            after 40+20=60 missed opportunities.
+            When setting this parameter, the size of the cluster should be taken into account.
+            We use -1 as the default value, which disables this feature. In this case, the number
+            of missed opportunities for assigning off-switch containers is calculated based on
+            the number of containers and unique locations specified in the resource request,
+            as well as the size of the cluster.
+        </description>
+    </property>
+
+    <property>
+        <name>yarn.scheduler.capacity.queue-mappings</name>
+        <value></value>
+        <description>
+            A list of mappings that will be used to assign jobs to queues
+            The syntax for this list is [u|g]:[name]:[queue_name][,next mapping]*
+            Typically this list will be used to map users to queues,
+            for example, u:%user:%user maps all users to queues with the same name
+            as the user.
+        </description>
+    </property>
+
+    <property>
+        <name>yarn.scheduler.capacity.queue-mappings-override.enable</name>
+        <value>false</value>
+        <description>
+            If a queue mapping is present, will it override the value specified
+            by the user? This can be used by administrators to place jobs in queues
+            that are different than the one specified by the user.
+            The default is false.
+        </description>
+    </property>
+
+    <property>
+        <name>yarn.scheduler.capacity.per-node-heartbeat.maximum-offswitch-assignments</name>
+        <value>1</value>
+        <description>
+            Controls the number of OFF_SWITCH assignments allowed
+            during a node's heartbeat. Increasing this value can improve
+            scheduling rate for OFF_SWITCH containers. Lower values reduce
+            "clumping" of applications on particular nodes. The default is 1.
+            Legal values are 1-MAX_INT. This config is refreshable.
+        </description>
+    </property>
+
+
+    <property>
+        <name>yarn.scheduler.capacity.application.fail-fast</name>
+        <value>false</value>
+        <description>
+            Whether RM should fail during recovery if previous applications'
+            queue is no longer valid.
+        </description>
+    </property>
+
+    <property>
+        <name>yarn.scheduler.capacity.workflow-priority-mappings</name>
+        <value></value>
+        <description>
+            A list of mappings that will be used to override application priority.
+            The syntax for this list is
+            [workflowId]:[full_queue_name]:[priority][,next mapping]*
+            where an application submitted (or mapped to) queue "full_queue_name"
+            and workflowId "workflowId" (as specified in application submission
+            context) will be given priority "priority".
+        </description>
+    </property>
+
+    <property>
+        <name>yarn.scheduler.capacity.workflow-priority-mappings-override.enable</name>
+        <value>false</value>
+        <description>
+            If a priority mapping is present, will it override the value specified
+            by the user? This can be used by administrators to give applications a
+            priority that is different than the one specified by the user.
+            The default is false.
+        </description>
+    </property>
+
+<!--    Although asynchronous scheduling is enabled by default, for unit testing-->
+<!--    disabling it by default to give more control over container scheduling while-->
+<!--    simulating complex tests.-->
+    <property>
+        <name>yarn.scheduler.capacity.schedule-asynchronously.enable</name>
+        <value>false</value>
+        <description>
+            Whether to enable asynchronous scheduling.
+        </description>
+    </property>
+
+</configuration>

+ 37 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/CapacityScheduler.md

@@ -37,6 +37,32 @@ The primary abstraction provided by the `CapacityScheduler` is the concept of *q
 
 To provide further control and predictability on sharing of resources, the `CapacityScheduler` supports *hierarchical queues* to ensure resources are shared among the sub-queues of an organization before other queues are allowed to use free resources, thereby providing *affinity* for sharing free resources among applications of a given organization.
 
+The container allocation in `CapacityScheduler` can be triggered by using one of the following ways:
+
+* **Node heartbeat** : The container scheduling is triggered by a
+  heartbeat signal from a NodeManager to the ResourceManager.The scheduler then
+  selects an application that can be scheduled for the node.
+  This type of scheduling is random, and whichever node's heartbeat
+  gets the chance. If the NodeManager heartbeat interval is set to higher values,
+  it can negatively affect the container scheduling performance.
+
+* **Asynchronous scheduling** : The container scheduling is triggered by
+  single or multiple parallel threads running in the background.
+  The scheduler first selects a random node from the node list for scheduling,
+  and then it loops through the node list (circularly) to ensure that all the nodes get
+  a fair chance. This approach improves the scheduling performance because
+  it is proactive and does not need to wait for any events.
+
+* **Global scheduling** : It is similar to asynchronous scheduling,
+  but instead of randomly picking nodes, the nodes are selected for
+  scheduling based on factors such as the resource size, scheduling requirements,
+  and resource distribution of the applications. This approach allows the scheduler
+  to make optimal scheduling decisions. The node selection policy is pluggable,
+  meaning it can be customized to fit the specific needs of the application.
+
+Note: Asynchronous scheduling is the default scheduling mechanism for capacity scheduler.
+
+
 Features
 --------
 
@@ -85,6 +111,17 @@ Configuration
 |:---- |:---- |
 | `yarn.resourcemanager.scheduler.class` | `org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler` |
 
+###Setting up scheduling strategy
+
+  To configure different scheduling strategy in capacity scheduler, set the following property in the **conf/capacity-scheduler.xml**:
+
+
+| Property                                                                                                         | Description                                                                                                                                                                                          |
+|:-----------------------------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| `yarn.scheduler.capacity.schedule-asynchronously.enable`                                                         | Specifies whether to enable asynchronous scheduling. The default value is true.                                                                                                                      |
+| `yarn.scheduler.capacity.multi-node-placement-enabled`                                                           | Specifies whether to enable global scheduling. The default value is false. In addition to this, node sorting policy needs to be set using `yarn.scheduler.capacity.multi-node-sorting.policy.names`. |
+
+
 ###Setting up queues
 
   `etc/hadoop/capacity-scheduler.xml` is the configuration file for the `CapacityScheduler`.