Browse Source

YARN-1265. Fair Scheduler chokes on unhealthy node reconnect (Sandy Ryza)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1531146 13f79535-47bb-0310-9956-ffa450edef68
Sanford Ryza 11 years ago
parent
commit
f218527fff

+ 2 - 0
hadoop-yarn-project/CHANGES.txt

@@ -98,6 +98,8 @@ Release 2.2.1 - UNRELEASED
     YARN-879. Fixed tests w.r.t o.a.h.y.server.resourcemanager.Application.
     (Junping Du via devaraj)
 
+    YARN-1265. Fair Scheduler chokes on unhealthy node reconnect (Sandy Ryza)
+
 Release 2.2.0 - 2013-10-13
 
   INCOMPATIBLE CHANGES

+ 4 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java

@@ -788,6 +788,10 @@ public class FairScheduler implements ResourceScheduler {
 
   private synchronized void removeNode(RMNode rmNode) {
     FSSchedulerNode node = nodes.get(rmNode.getNodeID());
+    // This can occur when an UNHEALTHY node reconnects
+    if (node == null) {
+      return;
+    }
     Resources.subtractFrom(clusterCapacity, rmNode.getTotalCapability());
     updateRootQueueMetrics();