Browse Source

YARN-845. RM crash with NPE on NODE_UPDATE (Mayank Bansal via bikas)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1499886 13f79535-47bb-0310-9956-ffa450edef68
Bikas Saha 12 năm trước cách đây
mục cha
commit
eff5d9b17e

+ 2 - 0
hadoop-yarn-project/CHANGES.txt

@@ -663,6 +663,8 @@ Release 2.1.0-beta - 2013-07-02
     mechanisms are enabled and thus fix YARN/MR test failures after HADOOP-9421.
     (Daryn Sharp and Vinod Kumar Vavilapalli via vinodkv)
 
+    YARN-845. RM crash with NPE on NODE_UPDATE (Mayank Bansal via bikas)
+
   BREAKDOWN OF HADOOP-8562 SUBTASKS AND RELATED JIRAS
 
     YARN-158. Yarn creating package-info.java must not depend on sh.

+ 4 - 3
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java

@@ -801,9 +801,10 @@ public class LeafQueue implements CSQueue {
     if (reservedContainer != null) {
       FiCaSchedulerApp application = 
           getApplication(reservedContainer.getApplicationAttemptId());
-      return 
-          assignReservedContainer(application, node, reservedContainer, 
-              clusterResource); 
+      synchronized (application) {
+        return assignReservedContainer(application, node, reservedContainer,
+          clusterResource);
+      }
     }
     
     // Try to assign containers to applications in order

+ 11 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java

@@ -38,6 +38,7 @@ import org.apache.hadoop.yarn.api.records.NodeId;
 import org.apache.hadoop.yarn.api.records.Priority;
 import org.apache.hadoop.yarn.api.records.Resource;
 import org.apache.hadoop.yarn.api.records.ResourceRequest;
+import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
 import org.apache.hadoop.yarn.factories.RecordFactory;
 import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
 import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger;
@@ -426,6 +427,16 @@ public class FiCaSchedulerApp extends SchedulerApplication {
       this.reservedContainers.remove(priority);
     }
     
+    // reservedContainer should not be null here
+    if (reservedContainer == null) {
+      String errorMesssage =
+          "Application " + getApplicationId() + " is trying to unreserve "
+              + " on node " + node + ", currently has "
+              + reservedContainers.size() + " at priority " + priority
+              + "; currentReservation " + currentReservation;
+      LOG.warn(errorMesssage);
+      throw new YarnRuntimeException(errorMesssage);
+    }
     // Reset the re-reservation count
     resetReReservations(priority);