瀏覽代碼

YARN-11467. RM failover may fail when the nodes.exclude-path file does not exist (#5565)

cxzl25 2 年之前
父節點
當前提交
be50d221f5

+ 5 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/NodesListManager.java

@@ -220,7 +220,11 @@ public class NodesListManager extends CompositeService implements
 
   public void refreshNodes(Configuration yarnConf)
       throws IOException, YarnException {
-    refreshNodes(yarnConf, false);
+    try {
+      refreshNodes(yarnConf, false);
+    } catch (YarnException | IOException ex) {
+      disableHostsFileReader(ex);
+    }
   }
 
   public void refreshNodes(Configuration yarnConf, boolean graceful)

+ 103 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMHA.java

@@ -18,6 +18,10 @@
 
 package org.apache.hadoop.yarn.server.resourcemanager;
 
+import java.io.DataOutputStream;
+import java.io.File;
+import java.nio.file.Files;
+import java.util.UUID;
 import java.util.function.Supplier;
 import org.apache.hadoop.test.GenericTestUtils;
 import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
@@ -742,6 +746,105 @@ public class TestRMHA {
         rm.getRMContext().getResourceProfilesManager());
   }
 
+  @Test
+  public void testTransitionedToActiveWithExcludeFileNotExist() throws Exception {
+    final String errUnforcedRequest = "User request succeeded even when " +
+        "automatic failover is enabled";
+
+    Configuration conf = new YarnConfiguration(configuration);
+    String nodeExcludeFilePath = "/tmp/non-existent-path-" + UUID.randomUUID();
+    conf.set(YarnConfiguration.RM_NODES_EXCLUDE_FILE_PATH, nodeExcludeFilePath);
+
+    DataOutputStream output = null;
+    final File confFile =
+         new File("target/test-classes/"+YarnConfiguration.YARN_SITE_CONFIGURATION_FILE);
+    final File backupConfFile = new File(
+         "target/test-classes/" + YarnConfiguration.YARN_SITE_CONFIGURATION_FILE
+         + ".backup." + UUID.randomUUID());
+    boolean hasRenamed = false;
+    try {
+      if (confFile.exists()) {
+        hasRenamed = confFile.renameTo(backupConfFile);
+        if (!hasRenamed) {
+          Assert.fail("Can not rename " + confFile.getAbsolutePath() + " to "
+              + backupConfFile.getAbsolutePath());
+        }
+      }
+      if (!confFile.createNewFile()) {
+        Assert.fail(
+            "Can not create " + YarnConfiguration.YARN_SITE_CONFIGURATION_FILE);
+      }
+      output = new DataOutputStream(Files.newOutputStream(confFile.toPath()));
+      conf.writeXml(output);
+    } finally {
+      if (output != null) {
+        output.close();
+      }
+    }
+
+    try {
+      rm = new MockRM(conf);
+      rm.init(conf);
+      rm.start();
+      StateChangeRequestInfo requestInfo = new StateChangeRequestInfo(
+          HAServiceProtocol.RequestSource.REQUEST_BY_USER);
+
+      // Transition to standby
+      try {
+        rm.adminService.transitionToStandby(requestInfo);
+        fail(errUnforcedRequest);
+      } catch (AccessControlException e) {
+        // expected
+      }
+      checkMonitorHealth();
+      checkStandbyRMFunctionality();
+
+      // Transition to active
+      try {
+        rm.adminService.transitionToActive(requestInfo);
+        fail(errUnforcedRequest);
+      } catch (AccessControlException e) {
+        // expected
+      }
+      checkMonitorHealth();
+      checkStandbyRMFunctionality();
+
+      final String errForcedRequest =
+          "Forced request by user should work " + "even if automatic failover is enabled";
+      requestInfo = new StateChangeRequestInfo(
+          HAServiceProtocol.RequestSource.REQUEST_BY_USER_FORCED);
+
+      // Transition to standby
+      try {
+        rm.adminService.transitionToStandby(requestInfo);
+      } catch (AccessControlException e) {
+        fail(errForcedRequest);
+      }
+      checkMonitorHealth();
+      checkStandbyRMFunctionality();
+
+      // Transition to active
+      try {
+        rm.adminService.transitionToActive(requestInfo);
+      } catch (AccessControlException e) {
+        fail(errForcedRequest);
+      }
+      checkMonitorHealth();
+      checkActiveRMFunctionality();
+    } finally {
+      if (confFile.exists()) {
+        if (!hasRenamed) {
+          confFile.delete();
+        } else {
+          backupConfFile.renameTo(confFile);
+        }
+      }
+      if (rm != null) {
+        rm.stop();
+      }
+    }
+  }
+
   public void innerTestHAWithRMHostName(boolean includeBindHost) {
     //this is run two times, with and without a bind host configured
     if (includeBindHost) {