Pārlūkot izejas kodu

svn merge -c 1391912 FIXES: YARN-116. Add the ability to change the RM include/exclude file without a restart. (Contributed by xieguiming and Harsh J)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-0.23@1392366 13f79535-47bb-0310-9956-ffa450edef68
Robert Joseph Evans 12 gadi atpakaļ
vecāks
revīzija
8419e83771

+ 3 - 0
hadoop-yarn-project/CHANGES.txt

@@ -14,6 +14,9 @@ Release 0.23.5 - UNRELEASED
     YARN-137. Change the default YARN scheduler to be the CapacityScheduler. 
     (sseth via acmurthy) 
 
+    YARN-116. Add the ability to change the RM include/exclude file without
+    a restart. (xieguiming and Harsh J via sseth)
+
   OPTIMIZATIONS
 
   BUG FIXES

+ 2 - 3
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AdminService.java

@@ -189,15 +189,14 @@ public class AdminService extends AbstractService implements RMAdminProtocol {
       throws YarnRemoteException {
     UserGroupInformation user = checkAcls("refreshNodes");
     try {
-      this.nodesListManager.refreshNodes();
+      this.nodesListManager.refreshNodes(new YarnConfiguration());
       RMAuditLogger.logSuccess(user.getShortUserName(), "refreshNodes",
           "AdminService");
       return recordFactory.newRecordInstance(RefreshNodesResponse.class);
     } catch (IOException ioe) {
       LOG.info("Exception refreshing nodes ", ioe);
       RMAuditLogger.logFailure(user.getShortUserName(), "refreshNodes",
-          adminAcl.toString(), "AdminService",
-          "Exception refreshing nodes");
+          adminAcl.toString(), "AdminService", "Exception refreshing nodes");
       throw RPCUtil.getRemoteException(ioe);
     }
   }

+ 9 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/NodesListManager.java

@@ -87,8 +87,16 @@ public class NodesListManager extends AbstractService{
     }
   }
 
-  public void refreshNodes() throws IOException {
+  public void refreshNodes(Configuration yarnConf) throws IOException {
     synchronized (hostsReader) {
+      if (null == yarnConf) {
+        yarnConf = new YarnConfiguration();
+      }
+      hostsReader.updateFileNames(yarnConf.get(
+          YarnConfiguration.RM_NODES_INCLUDE_FILE_PATH,
+          YarnConfiguration.DEFAULT_RM_NODES_INCLUDE_FILE_PATH), yarnConf.get(
+          YarnConfiguration.RM_NODES_EXCLUDE_FILE_PATH,
+          YarnConfiguration.DEFAULT_RM_NODES_EXCLUDE_FILE_PATH));
       hostsReader.refresh();
       printConfiguredHosts();
     }

+ 79 - 4
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceTrackerService.java

@@ -31,6 +31,7 @@ import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.ContainerStatus;
 import org.apache.hadoop.yarn.api.records.NodeId;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.event.Dispatcher;
 import org.apache.hadoop.yarn.event.DrainDispatcher;
 import org.apache.hadoop.yarn.event.EventHandler;
@@ -56,7 +57,7 @@ public class TestResourceTrackerService {
       .getRecordFactory(null);
 
   /**
-   * decommissioning using a include hosts file
+   * Decommissioning using a pre-configured include hosts file
    */
   @Test
   public void testDecommissionWithIncludeHosts() throws Exception {
@@ -83,7 +84,7 @@ public class TestResourceTrackerService {
 
     writeToHostsFile("host1");
 
-    rm.getNodesListManager().refreshNodes();
+    rm.getNodesListManager().refreshNodes(conf);
 
     nodeHeartbeat = nm1.nodeHeartbeat(true);
     Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction()));
@@ -98,7 +99,7 @@ public class TestResourceTrackerService {
   }
 
   /**
-   * decommissioning using a exclude hosts file
+   * Decommissioning using a pre-configured exclude hosts file
    */
   @Test
   public void testDecommissionWithExcludeHosts() throws Exception {
@@ -123,7 +124,7 @@ public class TestResourceTrackerService {
 
     writeToHostsFile("host2");
 
-    rm.getNodesListManager().refreshNodes();
+    rm.getNodesListManager().refreshNodes(conf);
 
     nodeHeartbeat = nm1.nodeHeartbeat(true);
     Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction()));
@@ -132,7 +133,81 @@ public class TestResourceTrackerService {
         NodeAction.SHUTDOWN.equals(nodeHeartbeat.getNodeAction()));
     checkDecommissionedNMCount(rm, ++initialMetricCount);
   }
+
+  /**
+  * Decommissioning using a post-configured include hosts file
+  */
+  @Test
+  public void testAddNewIncludePathToConfiguration() throws Exception {
+    Configuration conf = new Configuration();
+    rm = new MockRM(conf);
+    rm.start();
+    MockNM nm1 = rm.registerNode("host1:1234", 5120);
+    MockNM nm2 = rm.registerNode("host2:5678", 10240);
+    ClusterMetrics metrics = ClusterMetrics.getMetrics();
+    assert(metrics != null);
+    int initialMetricCount = metrics.getNumDecommisionedNMs();
+    HeartbeatResponse nodeHeartbeat = nm1.nodeHeartbeat(true);
+    Assert.assertEquals(
+        NodeAction.NORMAL,
+        nodeHeartbeat.getNodeAction());
+    nodeHeartbeat = nm2.nodeHeartbeat(true);
+    Assert.assertEquals(
+        NodeAction.NORMAL,
+        nodeHeartbeat.getNodeAction());
+    writeToHostsFile("host1");
+    conf.set(YarnConfiguration.RM_NODES_INCLUDE_FILE_PATH, hostFile
+        .getAbsolutePath());
+    rm.getNodesListManager().refreshNodes(conf);
+    nodeHeartbeat = nm1.nodeHeartbeat(true);
+    Assert.assertEquals(
+        "Node should not have been decomissioned.",
+        NodeAction.NORMAL,
+        nodeHeartbeat.getNodeAction());
+    nodeHeartbeat = nm2.nodeHeartbeat(true);
+    Assert.assertEquals("Node should have been decomissioned but is in state" +
+        nodeHeartbeat.getNodeAction(),
+        NodeAction.SHUTDOWN, nodeHeartbeat.getNodeAction());
+    checkDecommissionedNMCount(rm, ++initialMetricCount);
+  }
   
+  /**
+   * Decommissioning using a post-configured exclude hosts file
+   */
+  @Test
+  public void testAddNewExcludePathToConfiguration() throws Exception {
+    Configuration conf = new Configuration();
+    rm = new MockRM(conf);
+    rm.start();
+    MockNM nm1 = rm.registerNode("host1:1234", 5120);
+    MockNM nm2 = rm.registerNode("host2:5678", 10240);
+    ClusterMetrics metrics = ClusterMetrics.getMetrics();
+    assert(metrics != null);
+    int initialMetricCount = metrics.getNumDecommisionedNMs();
+    HeartbeatResponse nodeHeartbeat = nm1.nodeHeartbeat(true);
+    Assert.assertEquals(
+        NodeAction.NORMAL,
+        nodeHeartbeat.getNodeAction());
+    nodeHeartbeat = nm2.nodeHeartbeat(true);
+    Assert.assertEquals(
+        NodeAction.NORMAL,
+        nodeHeartbeat.getNodeAction());
+    writeToHostsFile("host2");
+    conf.set(YarnConfiguration.RM_NODES_EXCLUDE_FILE_PATH, hostFile
+        .getAbsolutePath());
+    rm.getNodesListManager().refreshNodes(conf);
+    nodeHeartbeat = nm1.nodeHeartbeat(true);
+    Assert.assertEquals(
+        "Node should not have been decomissioned.",
+        NodeAction.NORMAL,
+        nodeHeartbeat.getNodeAction());
+    nodeHeartbeat = nm2.nodeHeartbeat(true);
+    Assert.assertEquals("Node should have been decomissioned but is in state" +
+        nodeHeartbeat.getNodeAction(),
+        NodeAction.SHUTDOWN, nodeHeartbeat.getNodeAction());
+    checkDecommissionedNMCount(rm, ++initialMetricCount);
+  }
+
   @Test
   public void testNodeRegistrationFailure() throws Exception {
     writeToHostsFile("host1");