Browse Source

YARN-11100. Fix StackOverflowError in SLS scheduler event handling. Contributed by Szilard Nemeth.

9uapaw 3 năm trước cách đây
mục cha
commit
adbaf48082

+ 3 - 2
hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/SLSRunner.java

@@ -175,11 +175,12 @@ public class SLSRunner extends Configured implements Tool {
   }
 
   private void init(Configuration tempConf) throws ClassNotFoundException {
+    // runner configuration
+    setConf(tempConf);
+
     nmMap = new ConcurrentHashMap<>();
     queueAppNumMap = new HashMap<>();
     amRunner = new AMRunner(runner, this);
-    // runner configuration
-    setConf(tempConf);
 
     // runner
     poolSize = tempConf.getInt(SLSConfiguration.RUNNER_POOL_SIZE,

+ 15 - 1
hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/SLSCapacityScheduler.java

@@ -39,7 +39,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEv
 @Private
 @Unstable
 public class SLSCapacityScheduler extends CapacityScheduler implements
-        SchedulerWrapper,Configurable {
+        SchedulerWrapper, Configurable {
 
   private final SLSSchedulerCommons schedulerCommons;
   private Configuration conf;
@@ -65,6 +65,15 @@ public class SLSCapacityScheduler extends CapacityScheduler implements
         containerIds, blacklistAdditions, blacklistRemovals, updateRequests);
   }
 
+  @Override
+  public Allocation allocatePropagated(ApplicationAttemptId attemptId,
+      List<ResourceRequest> resourceRequests,
+      List<SchedulingRequest> schedulingRequests,
+      List<ContainerId> containerIds, List<String> blacklistAdditions,
+      List<String> blacklistRemovals, ContainerUpdates updateRequests) {
+    return super.allocate(attemptId, resourceRequests, schedulingRequests,
+        containerIds, blacklistAdditions, blacklistRemovals, updateRequests);
+  }
 
   @Override
   public boolean tryCommit(Resource cluster, ResourceCommitRequest r,
@@ -97,6 +106,11 @@ public class SLSCapacityScheduler extends CapacityScheduler implements
     schedulerCommons.handle(schedulerEvent);
   }
 
+  @Override
+  public void propagatedHandle(SchedulerEvent schedulerEvent) {
+    super.handle(schedulerEvent);
+  }
+
   @Override
   public void serviceStop() throws Exception {
     schedulerCommons.stopMetrics();

+ 15 - 0
hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/SLSFairScheduler.java

@@ -63,6 +63,21 @@ public class SLSFairScheduler extends FairScheduler
     schedulerCommons.handle(schedulerEvent);
   }
 
+  @Override
+  public void propagatedHandle(SchedulerEvent schedulerEvent) {
+    super.handle(schedulerEvent);
+  }
+
+  @Override
+  public Allocation allocatePropagated(ApplicationAttemptId attemptId,
+      List<ResourceRequest> resourceRequests,
+      List<SchedulingRequest> schedulingRequests,
+      List<ContainerId> containerIds, List<String> blacklistAdditions,
+      List<String> blacklistRemovals, ContainerUpdates updateRequests) {
+    return super.allocate(attemptId, resourceRequests, schedulingRequests,
+        containerIds, blacklistAdditions, blacklistRemovals, updateRequests);
+  }
+
   @Override
   public void serviceStop() throws Exception {
     schedulerCommons.stopMetrics();

+ 6 - 4
hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/SLSSchedulerCommons.java

@@ -100,7 +100,8 @@ public class SLSSchedulerCommons {
           .time();
       Allocation allocation = null;
       try {
-        allocation = scheduler.allocate(attemptId, resourceRequests,
+        allocation = ((SchedulerWrapper)scheduler).allocatePropagated(
+            attemptId, resourceRequests,
             schedulingRequests, containerIds,
             blacklistAdditions, blacklistRemovals, updateRequests);
         return allocation;
@@ -118,7 +119,8 @@ public class SLSSchedulerCommons {
         }
       }
     } else {
-      return scheduler.allocate(attemptId, resourceRequests, schedulingRequests,
+      return ((SchedulerWrapper)scheduler).allocatePropagated(
+          attemptId, resourceRequests, schedulingRequests,
           containerIds,
           blacklistAdditions, blacklistRemovals, updateRequests);
     }
@@ -204,7 +206,7 @@ public class SLSSchedulerCommons {
 
   public void handle(SchedulerEvent schedulerEvent) {
     if (!metricsON) {
-      scheduler.handle(schedulerEvent);
+      ((SchedulerWrapper)scheduler).propagatedHandle(schedulerEvent);
       return;
     }
 
@@ -245,7 +247,7 @@ public class SLSSchedulerCommons {
       operationTimer = schedulerMetrics.getSchedulerHandleTimer(
           schedulerEvent.getType()).time();
 
-      scheduler.handle(schedulerEvent);
+      ((SchedulerWrapper)scheduler).propagatedHandle(schedulerEvent);
     } finally {
       if (handlerTimer != null) {
         handlerTimer.stop();

+ 19 - 0
hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/SchedulerWrapper.java

@@ -19,7 +19,16 @@ package org.apache.hadoop.yarn.sls.scheduler;
 
 import org.apache.hadoop.classification.InterfaceAudience.Private;
 import org.apache.hadoop.classification.InterfaceStability.Unstable;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.ResourceRequest;
+import org.apache.hadoop.yarn.api.records.SchedulingRequest;
 import org.apache.hadoop.yarn.exceptions.YarnException;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ContainerUpdates;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEvent;
+
+import java.util.List;
 
 @Private
 @Unstable
@@ -29,4 +38,14 @@ public interface SchedulerWrapper {
   Tracker getTracker();
 
   String getRealQueueName(String queue) throws YarnException;
+
+  void propagatedHandle(SchedulerEvent schedulerEvent);
+
+  Allocation allocatePropagated(ApplicationAttemptId attemptId,
+      List<ResourceRequest> resourceRequests,
+      List<SchedulingRequest> schedulingRequests,
+      List<ContainerId> containerIds,
+      List<String> blacklistAdditions,
+      List<String> blacklistRemovals,
+      ContainerUpdates updateRequests);
 }