Selaa lähdekoodia

YARN-11815: Fix NodeQueueLoadMonitor scheduler running on standby RMs (#7665)

* `NodeQueueLoadMonitor` runs as an active service
  via `OpportunisticContainerAllocatorAMService` in
  YARN Resource Manager.
* However, its scheduler thread is started in the
  constructor itself.
* This would cause the scheduler to run on standby
  RM too, which shouldn't be the case since it is an
  active service.
nihal292 1 kuukausi sitten
vanhempi
commit
e442a32a8b

+ 8 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/OpportunisticContainerAllocatorAMService.java

@@ -373,6 +373,14 @@ public class OpportunisticContainerAllocatorAMService
     }
   }
 
+  @Override
+  protected void serviceStart() throws Exception {
+    if (this.nodeMonitor != null) {
+      this.nodeMonitor.start();
+    }
+    super.serviceStart();
+  }
+
   @Override
   protected void serviceStop() throws Exception {
     if (nodeMonitor != null) {

+ 7 - 3
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/distributed/NodeQueueLoadMonitor.java

@@ -245,6 +245,7 @@ public class NodeQueueLoadMonitor implements ClusterMonitor {
   protected ReentrantReadWriteLock sortedNodesLock = new ReentrantReadWriteLock();
   protected ReentrantReadWriteLock clusterNodesLock =
       new ReentrantReadWriteLock();
+  private long nodeComputationInterval;
 
   Runnable computeTask = new Runnable() {
     @Override
@@ -278,12 +279,15 @@ public class NodeQueueLoadMonitor implements ClusterMonitor {
     this.sortedNodes = new ArrayList<>();
     this.scheduledExecutor = Executors.newScheduledThreadPool(1);
     this.comparator = comparator;
-    this.scheduledExecutor.scheduleAtFixedRate(computeTask,
-        nodeComputationInterval, nodeComputationInterval,
-        TimeUnit.MILLISECONDS);
+    this.nodeComputationInterval = nodeComputationInterval;
     numNodesForAnyAllocation = numNodes;
   }
 
+  public void start() {
+    this.scheduledExecutor.scheduleAtFixedRate(computeTask, nodeComputationInterval,
+        nodeComputationInterval, TimeUnit.MILLISECONDS);
+  }
+
   protected void updateSortedNodes() {
     List<NodeId> nodeIds = sortNodes(true).stream()
         .map(n -> n.nodeId)

+ 1 - 11
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java

@@ -750,17 +750,7 @@ public class MockRM extends ResourceManager {
         YarnConfiguration.OPPORTUNISTIC_CONTAINER_ALLOCATION_ENABLED,
         YarnConfiguration.DEFAULT_OPPORTUNISTIC_CONTAINER_ALLOCATION_ENABLED)) {
       return new OpportunisticContainerAllocatorAMService(getRMContext(),
-          scheduler) {
-        @Override
-        protected void serviceStart() {
-          // override to not start rpc handler
-        }
-
-        @Override
-        protected void serviceStop() {
-          // don't do anything
-        }
-      };
+          scheduler);
     }
     return new ApplicationMasterService(getRMContext(), scheduler) {
       @Override