瀏覽代碼

YARN-876. Node resource is added twice when node comes back from unhealthy. (Peng Zhang via Sandy Ryza)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2.1-beta@1528662 13f79535-47bb-0310-9956-ffa450edef68
Sanford Ryza 11 年之前
父節點
當前提交
611bb5d543

+ 3 - 0
hadoop-yarn-project/CHANGES.txt

@@ -67,6 +67,9 @@ Release 2.1.2 - UNRELEASED
     install with https enabled doesn't have broken link on NM UI. (Omkar Vinit
     Joshi via vinodkv)
 
+    YARN-876. Node resource is added twice when node comes back from unhealthy
+    to healthy. (Peng Zhang via Sandy Ryza)
+
 Release 2.1.1-beta - 2013-09-23
 
   INCOMPATIBLE CHANGES

+ 5 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java

@@ -460,8 +460,11 @@ public class RMNodeImpl implements RMNode, EventHandler<RMNodeEvent> {
           && rmNode.getHttpPort() == newNode.getHttpPort()) {
         // Reset heartbeat ID since node just restarted.
         rmNode.getLastNodeHeartBeatResponse().setResponseId(0);
-        rmNode.context.getDispatcher().getEventHandler().handle(
-            new NodeAddedSchedulerEvent(rmNode));
+        if (rmNode.getState() != NodeState.UNHEALTHY) {
+          // Only add new node if old state is not UNHEALTHY
+          rmNode.context.getDispatcher().getEventHandler().handle(
+              new NodeAddedSchedulerEvent(rmNode));
+         }
       } else {
         // Reconnected node differs, so replace old node and start new node
         switch (rmNode.getState()) {

+ 8 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceTrackerService.java

@@ -452,6 +452,14 @@ public class TestResourceTrackerService {
     dispatcher.await();
     Assert.assertEquals(expectedNMs, ClusterMetrics.getMetrics().getNumActiveNMs());
     checkUnealthyNMCount(rm, nm2, true, 1);
+    
+    // unhealthy node changed back to healthy
+    nm2 = rm.registerNode("host2:5678", 5120);
+    dispatcher.await();
+    response = nm2.nodeHeartbeat(true);
+    response = nm2.nodeHeartbeat(true);
+    dispatcher.await();
+    Assert.assertEquals(5120 + 5120, metrics.getAvailableMB());
 
     // reconnect of node with changed capability
     nm1 = rm.registerNode("host2:5678", 10240);