|
@@ -18,12 +18,14 @@
|
|
|
|
|
|
package org.apache.hadoop.yarn.server.resourcemanager;
|
|
|
|
|
|
+import org.apache.hadoop.test.GenericTestUtils;
|
|
|
import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableMap;
|
|
|
import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableSet;
|
|
|
import org.apache.hadoop.net.ServerSocketUtil;
|
|
|
import org.apache.hadoop.security.Credentials;
|
|
|
import org.apache.hadoop.security.token.Token;
|
|
|
import org.apache.hadoop.security.token.delegation.web.DelegationTokenIdentifier;
|
|
|
+import org.apache.hadoop.util.Sets;
|
|
|
import org.apache.hadoop.yarn.nodelabels.NodeAttributeStore;
|
|
|
import org.apache.hadoop.yarn.nodelabels.NodeLabelUtil;
|
|
|
import org.apache.hadoop.yarn.server.api.ResourceTracker;
|
|
@@ -122,6 +124,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState;
|
|
|
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptImpl;
|
|
|
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
|
|
|
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
|
|
|
+import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeEvent;
|
|
|
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeEventType;
|
|
|
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl;
|
|
|
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeStatusEvent;
|
|
@@ -3063,4 +3066,86 @@ public class TestResourceTrackerService extends NodeLabelTestBase {
|
|
|
|
|
|
resourceTrackerService.close();
|
|
|
}
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Decommissioning without pre-configured include hosts file.
|
|
|
+ */
|
|
|
+ @Test
|
|
|
+ public void testDecommissionWithoutIncludeFile() throws Exception {
|
|
|
+ // clear exclude hosts
|
|
|
+ writeToHostsFile(excludeHostFile, "");
|
|
|
+ // init conf:
|
|
|
+ // (1) set untracked removal timeout to 500ms
|
|
|
+ // (2) set exclude path (no include path)
|
|
|
+ // (3) enable node untracked without pre-configured include path
|
|
|
+ Configuration conf = new Configuration();
|
|
|
+ conf.setInt(YarnConfiguration.RM_NODEMANAGER_UNTRACKED_REMOVAL_TIMEOUT_MSEC,
|
|
|
+ 500);
|
|
|
+ conf.setBoolean(
|
|
|
+ YarnConfiguration.RM_ENABLE_NODE_UNTRACKED_WITHOUT_INCLUDE_PATH, true);
|
|
|
+ conf.set(YarnConfiguration.RM_NODES_EXCLUDE_FILE_PATH,
|
|
|
+ excludeHostFile.getAbsolutePath());
|
|
|
+
|
|
|
+ rm = new MockRM(conf);
|
|
|
+ rm.start();
|
|
|
+ MockNM nm1 = rm.registerNode("host1:1234", 10240);
|
|
|
+ MockNM nm2 = rm.registerNode("host2:1234", 10240);
|
|
|
+ MockNM nm3 = rm.registerNode("host3:1234", 10240);
|
|
|
+ MockNM nm4 = rm.registerNode("host4:1234", 10240);
|
|
|
+ assertEquals(4, rm.getRMContext().getRMNodes().size());
|
|
|
+ assertEquals(0, rm.getRMContext().getInactiveRMNodes().size());
|
|
|
+
|
|
|
+ // decommission nm1 via adding nm1 into exclude hosts
|
|
|
+ RMNode rmNode1 = rm.getRMContext().getRMNodes().get(nm1.getNodeId());
|
|
|
+ writeToHostsFile(excludeHostFile, "host1");
|
|
|
+ rm.getNodesListManager().refreshNodes(conf);
|
|
|
+ rm.drainEvents();
|
|
|
+ assertEquals(rmNode1.getState(), NodeState.DECOMMISSIONED);
|
|
|
+ assertEquals(3, rm.getRMContext().getRMNodes().size());
|
|
|
+ assertEquals(1, rm.getRMContext().getInactiveRMNodes().size());
|
|
|
+ assertEquals(Sets.newHashSet(nm1.getNodeId()),
|
|
|
+ rm.getRMContext().getInactiveRMNodes().keySet());
|
|
|
+
|
|
|
+ // remove nm1 from exclude hosts, so that it will be marked as untracked
|
|
|
+ // and removed from inactive nodes after the timeout
|
|
|
+ writeToHostsFile(excludeHostFile, "");
|
|
|
+ rm.getNodesListManager().refreshNodes(conf);
|
|
|
+ // confirmed that nm1 should be removed from inactive nodes in 1 second
|
|
|
+ GenericTestUtils.waitFor(
|
|
|
+ () -> rm.getRMContext().getInactiveRMNodes().size() == 0, 100, 1000);
|
|
|
+
|
|
|
+ // lost nm2
|
|
|
+ RMNode rmNode2 = rm.getRMContext().getRMNodes().get(nm2.getNodeId());
|
|
|
+ rm.getRMContext().getDispatcher().getEventHandler()
|
|
|
+ .handle(new RMNodeEvent(nm2.getNodeId(), RMNodeEventType.EXPIRE));
|
|
|
+ rm.drainEvents();
|
|
|
+ assertEquals(rmNode2.getState(), NodeState.LOST);
|
|
|
+ assertEquals(2, rm.getRMContext().getRMNodes().size());
|
|
|
+ assertEquals(1, rm.getRMContext().getInactiveRMNodes().size());
|
|
|
+ assertEquals(Sets.newHashSet(nm2.getNodeId()),
|
|
|
+ rm.getRMContext().getInactiveRMNodes().keySet());
|
|
|
+ // confirmed that nm2 should be removed from inactive nodes in 1 second
|
|
|
+ GenericTestUtils.waitFor(
|
|
|
+ () -> rm.getRMContext().getInactiveRMNodes().size() == 0, 100, 1000);
|
|
|
+
|
|
|
+ // shutdown nm3
|
|
|
+ RMNode rmNode3 = rm.getRMContext().getRMNodes().get(nm3.getNodeId());
|
|
|
+ rm.getRMContext().getDispatcher().getEventHandler()
|
|
|
+ .handle(new RMNodeEvent(nm3.getNodeId(), RMNodeEventType.SHUTDOWN));
|
|
|
+ rm.drainEvents();
|
|
|
+ assertEquals(rmNode3.getState(), NodeState.SHUTDOWN);
|
|
|
+ assertEquals(1, rm.getRMContext().getRMNodes().size());
|
|
|
+ assertEquals(1, rm.getRMContext().getInactiveRMNodes().size());
|
|
|
+ assertEquals(Sets.newHashSet(nm3.getNodeId()),
|
|
|
+ rm.getRMContext().getInactiveRMNodes().keySet());
|
|
|
+ // confirmed that nm3 should be removed from inactive nodes in 1 second
|
|
|
+ GenericTestUtils.waitFor(
|
|
|
+ () -> rm.getRMContext().getInactiveRMNodes().size() == 0, 100, 1000);
|
|
|
+
|
|
|
+ // nm4 is still active node at last
|
|
|
+ assertEquals(Sets.newHashSet(nm4.getNodeId()),
|
|
|
+ rm.getRMContext().getRMNodes().keySet());
|
|
|
+
|
|
|
+ rm.close();
|
|
|
+ }
|
|
|
}
|