Explorar el Código

YARN-502. Fixed a state machine issue with RMNode inside ResourceManager which was crashing scheduler. Contributed by Mayank Bansal.
svn merge --ignore-ancestry -c 1509060 ../../trunk/


git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1509061 13f79535-47bb-0310-9956-ffa450edef68

Vinod Kumar Vavilapalli hace 12 años
padre
commit
b1cec77b5f

+ 3 - 0
hadoop-yarn-project/CHANGES.txt

@@ -30,6 +30,9 @@ Release 2.1.1-beta - UNRELEASED
     YARN-966. Fixed ContainerLaunch to not fail quietly when there are no
     localized resources due to some other failure. (Zhijie Shen via vinodkv)
 
+    YARN-502. Fixed a state machine issue with RMNode inside ResourceManager
+    which was crashing scheduler. (Mayank Bansal via vinodkv)
+
 Release 2.1.0-beta - 2013-08-06
 
   INCOMPATIBLE CHANGES

+ 7 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java

@@ -501,8 +501,13 @@ public class RMNodeImpl implements RMNode, EventHandler<RMNodeEvent> {
     public void transition(RMNodeImpl rmNode, RMNodeEvent event) {
       // Inform the scheduler
       rmNode.nodeUpdateQueue.clear();
-      rmNode.context.getDispatcher().getEventHandler().handle(
-          new NodeRemovedSchedulerEvent(rmNode));
+      // If the current state is NodeState.UNHEALTHY
+      // Then node is already been removed from the
+      // Scheduler
+      if (!rmNode.getState().equals(NodeState.UNHEALTHY)) {
+        rmNode.context.getDispatcher().getEventHandler()
+          .handle(new NodeRemovedSchedulerEvent(rmNode));
+      }
       rmNode.context.getDispatcher().getEventHandler().handle(
           new NodesListManagerEvent(
               NodesListManagerEventType.NODE_UNUSABLE, rmNode));

+ 10 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMNodeTransitions.java

@@ -48,6 +48,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeStatusEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.rmnode.UpdatedContainerInfo;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.YarnScheduler;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeRemovedSchedulerEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEventType;
@@ -269,6 +270,15 @@ public class TestRMNodeTransitions {
     node.handle(new RMNodeEvent(node.getNodeID(), RMNodeEventType.EXPIRE));
     Assert.assertEquals(NodeState.LOST, node.getState());
   }
+  
+  @Test
+  public void testUnhealthyExpireForSchedulerRemove() {
+    RMNodeImpl node = getUnhealthyNode();
+    verify(scheduler,times(2)).handle(any(NodeRemovedSchedulerEvent.class));
+    node.handle(new RMNodeEvent(node.getNodeID(), RMNodeEventType.EXPIRE));
+    verify(scheduler,times(2)).handle(any(NodeRemovedSchedulerEvent.class));
+    Assert.assertEquals(NodeState.LOST, node.getState());
+  }
 
   @Test
   public void testRunningDecommission() {