Browse Source

YARN-7591. NPE in async-scheduling mode of CapacityScheduler. (Tao Yang via wangda)

Change-Id: I46689e530550ee0a6ac7a29786aab2cc1bdf314f
Wangda Tan 7 năm trước cách đây
mục cha
commit
adca1a72e4

+ 4 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java

@@ -1301,6 +1301,10 @@ public class CapacityScheduler extends
     if (reservedContainer != null) {
       FiCaSchedulerApp reservedApplication = getCurrentAttemptForContainer(
           reservedContainer.getContainerId());
+      if (reservedApplication == null) {
+        LOG.error("Trying to schedule for a finished app, please double check.");
+        return null;
+      }
 
       // Try to fulfill the reservation
       LOG.info(

+ 20 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java

@@ -1201,7 +1201,14 @@ public class LeafQueue extends AbstractCSQueue {
             allocation.getSchedulingMode(), null);
 
         // Deduct resources that we can release
-        Resource usedResource = Resources.clone(getUser(username).getUsed(p));
+        User user = getUser(username);
+        if (user == null) {
+          if (LOG.isDebugEnabled()) {
+            LOG.debug("User " + username + " has been removed!");
+          }
+          return false;
+        }
+        Resource usedResource = Resources.clone(user.getUsed(p));
         Resources.subtractFrom(usedResource,
             request.getTotalReleasedResource());
 
@@ -1406,6 +1413,12 @@ public class LeafQueue extends AbstractCSQueue {
       SchedulingMode schedulingMode, Resource userLimit) {
     String user = application.getUser();
     User queueUser = getUser(user);
+    if (queueUser == null) {
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("User " + user + " has been removed!");
+      }
+      return Resources.none();
+    }
 
     // Compute user limit respect requested labels,
     // TODO, need consider headroom respect labels also
@@ -1500,6 +1513,12 @@ public class LeafQueue extends AbstractCSQueue {
     try {
       readLock.lock();
       User user = getUser(userName);
+      if (user == null) {
+        if (LOG.isDebugEnabled()) {
+          LOG.debug("User " + userName + " has been removed!");
+        }
+        return false;
+      }
 
       currentResourceLimits.setAmountNeededUnreserve(Resources.none());
 

+ 5 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java

@@ -322,6 +322,11 @@ public class FiCaSchedulerApp extends SchedulerApplicationAttempt {
     RMContainer reservedContainerOnNode =
         schedulerContainer.getSchedulerNode().getReservedContainer();
     if (reservedContainerOnNode != null) {
+      // adding NP check as this proposal could not be allocated from reserved
+      // container in async-scheduling mode
+      if (allocation.getAllocateFromReservedContainer() == null) {
+        return false;
+      }
       RMContainer fromReservedContainer =
           allocation.getAllocateFromReservedContainer().getRmContainer();