Browse Source

YARN-63. RMNodeImpl is missing valid transitions from the UNHEALTHY state (Jason Lowe via bobby)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-0.23@1379501 13f79535-47bb-0310-9956-ffa450edef68
Robert Joseph Evans 12 years ago
parent
commit
b9a00fbd32

+ 3 - 0
hadoop-yarn-project/CHANGES.txt

@@ -60,3 +60,6 @@ Release 0.23.3 - Unreleased
 
     YARN-31. Fix TestDelegationTokenRenewer to not depend on test order so as to
     pass tests on jdk7. (Thomas Graves via vinodkv)
+
+    YARN-63. RMNodeImpl is missing valid transitions from the UNHEALTHY state
+    (Jason Lowe via bobby)

+ 13 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java

@@ -141,8 +141,21 @@ public class RMNodeImpl implements RMNode, EventHandler<RMNodeEvent> {
      .addTransition(RMNodeState.UNHEALTHY, 
          EnumSet.of(RMNodeState.UNHEALTHY, RMNodeState.RUNNING),
          RMNodeEventType.STATUS_UPDATE, new StatusUpdateWhenUnHealthyTransition())
+     .addTransition(RMNodeState.UNHEALTHY, RMNodeState.DECOMMISSIONED,
+         RMNodeEventType.DECOMMISSION,
+         new DeactivateNodeTransition(RMNodeState.DECOMMISSIONED))
+     .addTransition(RMNodeState.UNHEALTHY, RMNodeState.LOST,
+         RMNodeEventType.EXPIRE,
+         new DeactivateNodeTransition(RMNodeState.LOST))
+     .addTransition(RMNodeState.UNHEALTHY, RMNodeState.REBOOTED,
+         RMNodeEventType.REBOOTING,
+         new DeactivateNodeTransition(RMNodeState.REBOOTED))
      .addTransition(RMNodeState.UNHEALTHY, RMNodeState.UNHEALTHY,
          RMNodeEventType.RECONNECTED, new ReconnectNodeTransition())
+     .addTransition(RMNodeState.UNHEALTHY, RMNodeState.UNHEALTHY,
+         RMNodeEventType.CLEANUP_APP, new CleanUpAppTransition())
+     .addTransition(RMNodeState.UNHEALTHY, RMNodeState.UNHEALTHY,
+         RMNodeEventType.CLEANUP_CONTAINER, new CleanUpContainerTransition())
          
      // create the topology tables
      .installTopology(); 

+ 67 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMNodeTransitions.java

@@ -23,6 +23,7 @@ import static org.mockito.Mockito.doReturn;
 import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.verify;
 
+import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
 
@@ -40,6 +41,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeCleanContainer
 import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeEventType;
 import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl;
+import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeState;
 import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeStatusEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.YarnScheduler;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent;
@@ -148,4 +150,69 @@ public class TestRMNodeTransitions {
     Assert.assertEquals(0, completedContainers.size());
   }
 
+  @Test
+  public void testRunningExpire() {
+    RMNodeImpl node = getRunningNode();
+    node.handle(new RMNodeEvent(node.getNodeID(), RMNodeEventType.EXPIRE));
+    Assert.assertEquals(RMNodeState.LOST, node.getState());
+  }
+
+  @Test
+  public void testUnhealthyExpire() {
+    RMNodeImpl node = getUnhealthyNode();
+    node.handle(new RMNodeEvent(node.getNodeID(), RMNodeEventType.EXPIRE));
+    Assert.assertEquals(RMNodeState.LOST, node.getState());
+  }
+
+  @Test
+  public void testRunningDecommission() {
+    RMNodeImpl node = getRunningNode();
+    node.handle(new RMNodeEvent(node.getNodeID(),
+        RMNodeEventType.DECOMMISSION));
+    Assert.assertEquals(RMNodeState.DECOMMISSIONED, node.getState());
+  }
+
+  @Test
+  public void testUnhealthyDecommission() {
+    RMNodeImpl node = getUnhealthyNode();
+    node.handle(new RMNodeEvent(node.getNodeID(),
+        RMNodeEventType.DECOMMISSION));
+    Assert.assertEquals(RMNodeState.DECOMMISSIONED, node.getState());
+  }
+
+  @Test
+  public void testRunningRebooting() {
+    RMNodeImpl node = getRunningNode();
+    node.handle(new RMNodeEvent(node.getNodeID(),
+        RMNodeEventType.REBOOTING));
+    Assert.assertEquals(RMNodeState.REBOOTED, node.getState());
+  }
+
+  @Test
+  public void testUnhealthyRebooting() {
+    RMNodeImpl node = getUnhealthyNode();
+    node.handle(new RMNodeEvent(node.getNodeID(),
+        RMNodeEventType.REBOOTING));
+    Assert.assertEquals(RMNodeState.REBOOTED, node.getState());
+  }
+
+  private RMNodeImpl getRunningNode() {
+    NodeId nodeId = BuilderUtils.newNodeId("localhost", 0);
+    RMNodeImpl node = new RMNodeImpl(nodeId, rmContext,null, 0, 0,
+        null, null, null);
+    node.handle(new RMNodeEvent(node.getNodeID(), RMNodeEventType.STARTED));
+    Assert.assertEquals(RMNodeState.RUNNING, node.getState());
+    return node;
+  }
+
+  private RMNodeImpl getUnhealthyNode() {
+    RMNodeImpl node = getRunningNode();
+    NodeHealthStatus status = node.getNodeHealthStatus();
+    status.setHealthReport("sick");
+    status.setIsNodeHealthy(false);
+    node.handle(new RMNodeStatusEvent(node.getNodeID(), status,
+        new ArrayList<ContainerStatus>(), null, null, null));
+    Assert.assertEquals(RMNodeState.UNHEALTHY, node.getState());
+    return node;
+  }
 }