Sfoglia il codice sorgente

YARN-5567. Fix script exit code checking in NodeHealthScriptRunner#reportHealthStatus. (Yufei Gu via rchiang)

Ray Chiang 8 anni fa
parent
commit
05ede00386

+ 2 - 1
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NodeHealthScriptRunner.java

@@ -106,6 +106,7 @@ public class NodeHealthScriptRunner extends AbstractService {
         shexec.execute();
       } catch (ExitCodeException e) {
         // ignore the exit code of the script
+        exceptionStackTrace = StringUtils.stringifyException(e);
         status = HealthCheckerExitStatus.FAILED_WITH_EXIT_CODE;
         // On Windows, we will not hit the Stream closed IOException
         // thrown by stdout buffered reader for timeout event.
@@ -162,7 +163,7 @@ public class NodeHealthScriptRunner extends AbstractService {
         setHealthStatus(false, exceptionStackTrace);
         break;
       case FAILED_WITH_EXIT_CODE:
-        setHealthStatus(true, "", now);
+        setHealthStatus(false, exceptionStackTrace);
         break;
       case FAILED:
         setHealthStatus(false, shexec.getOutput());

+ 7 - 0
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestNodeHealthScriptRunner.java

@@ -91,6 +91,7 @@ public class TestNodeHealthScriptRunner {
   public void testNodeHealthScript() throws Exception {
     String errorScript = "echo ERROR\n echo \"Tracker not healthy\"";
     String normalScript = "echo \"I am all fine\"";
+    String failWithExitCodeScript = "echo \"Not healthy\"; exit -1";
     String timeOutScript =
       Shell.WINDOWS ? "@echo off\nping -n 4 127.0.0.1 >nul\necho \"I am fine\""
       : "sleep 4\necho \"I am fine\"";
@@ -124,6 +125,12 @@ public class TestNodeHealthScriptRunner {
         nodeHealthScriptRunner.isHealthy());
     Assert.assertEquals("", nodeHealthScriptRunner.getHealthReport());
 
+    // Script which fails with exit code.
+    writeNodeHealthScriptFile(failWithExitCodeScript, true);
+    timerTask.run();
+    Assert.assertFalse("Node health status reported healthy",
+        nodeHealthScriptRunner.isHealthy());
+
     // Timeout script.
     writeNodeHealthScriptFile(timeOutScript, true);
     timerTask.run();