浏览代码

YARN-11753. Ensure NM is marked unhealthy if the ProcessBuilder reports an issue with the container-executor (#7290)

Benjamin Teke 3 月之前
父节点
当前提交
0d72896db1

+ 4 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java

@@ -466,10 +466,12 @@ public class LinuxContainerExecutor extends ContainerExecutor {
       Throwable cause = e.getCause() != null ? e.getCause() : e;
       if (cause instanceof IOException) {
         IOException io = (IOException) cause;
-        if (io.getMessage().contains("No such file or directory")) {
+        String containerExecutorPath = getContainerExecutorExecutablePath(conf);
+        if (io.getMessage() != null && io.getMessage().contains("Cannot run program \"" +
+            containerExecutorPath + "\"")) {
           throw new ConfigurationException("Application " + appId + " initialization failed" +
               "(exitCode=" + exitCode + "). Container executor not found at "
-              + getContainerExecutorExecutablePath(conf), e);
+              + containerExecutorPath, e);
         }
       }
 

+ 34 - 28
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutorWithMocks.java

@@ -628,15 +628,17 @@ public class TestLinuxContainerExecutorWithMocks {
     when(context.getEnvironment()).thenReturn(env);
     Path workDir = new Path("/tmp");
 
+    LocalizerStartContext lsc = new LocalizerStartContext.Builder()
+        .setNmPrivateContainerTokens(nmPrivateCTokensPath)
+        .setNmAddr(address)
+        .setUser(appSubmitter)
+        .setAppId(appId.toString())
+        .setLocId("12345")
+        .setDirsHandler(dirService)
+        .build();
+
     try {
-      lce.startLocalizer(new LocalizerStartContext.Builder()
-          .setNmPrivateContainerTokens(nmPrivateCTokensPath)
-          .setNmAddr(address)
-          .setUser(appSubmitter)
-          .setAppId(appId.toString())
-          .setLocId("12345")
-          .setDirsHandler(dirService)
-          .build());
+      lce.startLocalizer(lsc);
       Assert.fail("startLocalizer should have thrown an exception");
     } catch (IOException e) {
       assertTrue("Unexpected exception " + e,
@@ -648,22 +650,14 @@ public class TestLinuxContainerExecutorWithMocks {
         LinuxContainerExecutor.ExitCode.INVALID_CONFIG_FILE.getExitCode(),
     };
 
-    for (int i = 0; i < exitCodesToThrow.length; i++) {
-      int exitCode = exitCodesToThrow[i];
+    for (int exitCode : exitCodesToThrow) {
       doThrow(new PrivilegedOperationException("invalid config", exitCode, null, null))
           .when(spyPrivilegedExecutor).executePrivilegedOperation(
               any(), any(PrivilegedOperation.class),
               any(), any(), anyBoolean(), anyBoolean());
 
       try {
-        lce.startLocalizer(new LocalizerStartContext.Builder()
-            .setNmPrivateContainerTokens(nmPrivateCTokensPath)
-            .setNmAddr(address)
-            .setUser(appSubmitter)
-            .setAppId(appId.toString())
-            .setLocId("12345")
-            .setDirsHandler(dirService)
-            .build());
+        lce.startLocalizer(lsc);
         Assert.fail("startLocalizer should have thrown a ConfigurationException");
       } catch (ConfigurationException e) {
         assertTrue("Unexpected exception " + e,
@@ -671,27 +665,39 @@ public class TestLinuxContainerExecutorWithMocks {
       }
     }
 
+    // Assert that we do catch an IOException thrown by the ProcessBuilder.start
+    // method as a misconfiguration
+    String containerExecutorPath = lce.getContainerExecutorExecutablePath(conf);
     doThrow(new PrivilegedOperationException("IO error",
-        new IOException("No such file or directory")))
+        new IOException("Cannot run program \""+ containerExecutorPath + "\"")))
         .when(spyPrivilegedExecutor).executePrivilegedOperation(
             any(), any(PrivilegedOperation.class),
             any(), any(), anyBoolean(), anyBoolean());
 
     try {
-      lce.startLocalizer(new LocalizerStartContext.Builder()
-          .setNmPrivateContainerTokens(nmPrivateCTokensPath)
-          .setNmAddr(address)
-          .setUser(appSubmitter)
-          .setAppId(appId.toString())
-          .setLocId("12345")
-          .setDirsHandler(dirService)
-          .build());
-      Assert.fail("startLocalizer should have thrown a ConfigurationException");
+      lce.startLocalizer(lsc);
+      Assert.fail("startLocalizer should have thrown an ConfigurationException");
     } catch (ConfigurationException e) {
       assertTrue("Unexpected exception " + e,
           e.getMessage().contains("Container executor not found"));
     }
 
+    // Assert that we do not catch every IOException as a misconfiguration
+    doThrow(new PrivilegedOperationException("IO error",
+        new IOException("No such file or directory")))
+        .when(spyPrivilegedExecutor).executePrivilegedOperation(
+            any(), any(PrivilegedOperation.class),
+            any(), any(), anyBoolean(), anyBoolean());
+
+    try {
+      lce.startLocalizer(lsc);
+      Assert.fail("startLocalizer should have thrown an IOException");
+    } catch (ConfigurationException e) {
+      Assert.fail("startLocalizer should not have thrown a ConfigurationException");
+    } catch (IOException e) {
+      assertTrue("Unexpected exception " + e,
+          e.getMessage().contains("exitCode"));
+    }
 
     doThrow(new PrivilegedOperationException("interrupted"))
         .when(spyPrivilegedExecutor).executePrivilegedOperation(