Kaynağa Gözat

HADOOP-12317. Applications fail on NM restart on some linux distro because NM container recovery declares AM container as LOST (adhoot via rkanter)

(cherry picked from commit 1e06299df82b98795124fe8a33578c111e744ff4)
Robert Kanter 9 yıl önce
ebeveyn
işleme
acf241242d

+ 4 - 0
hadoop-common-project/hadoop-common/CHANGES.txt

@@ -566,6 +566,10 @@ Release 2.8.0 - UNRELEASED
     HADOOP-12322. typos in rpcmetrics.java. (Anu Engineer via
     Arpit Agarwal)
 
+    HADOOP-12317. Applications fail on NM restart on some linux distro
+    because NM container recovery declares AM container as LOST
+    (adhoot via rkanter)
+
 Release 2.7.2 - UNRELEASED
 
   INCOMPATIBLE CHANGES

+ 8 - 3
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Shell.java

@@ -212,13 +212,18 @@ abstract public class Shell {
   public static String[] getCheckProcessIsAliveCommand(String pid) {
     return Shell.WINDOWS ?
       new String[] { Shell.WINUTILS, "task", "isAlive", pid } :
-      new String[] { "kill", "-0", isSetsidAvailable ? "-" + pid : pid };
+      isSetsidAvailable ?
+        new String[] { "kill", "-0", "--", "-" + pid } :
+        new String[] { "kill", "-0", pid };
   }
 
   /** Return a command to send a signal to a given pid */
   public static String[] getSignalKillCommand(int code, String pid) {
-    return Shell.WINDOWS ? new String[] { Shell.WINUTILS, "task", "kill", pid } :
-      new String[] { "kill", "-" + code, isSetsidAvailable ? "-" + pid : pid };
+    return Shell.WINDOWS ?
+      new String[] { Shell.WINUTILS, "task", "kill", pid } :
+      isSetsidAvailable ?
+        new String[] { "kill", "-" + code, "--", "-" + pid } :
+        new String[] { "kill", "-" + code, pid };
   }
 
   public static final String ENV_NAME_REGEX = "[A-Za-z_][A-Za-z0-9_]*";

+ 39 - 0
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestShell.java

@@ -18,6 +18,7 @@
 package org.apache.hadoop.util;
 
 import junit.framework.TestCase;
+import org.junit.Assert;
 
 import java.io.BufferedReader;
 import java.io.File;
@@ -150,6 +151,44 @@ public class TestShell extends TestCase {
     System.err.println("after: " + timersAfter);
     assertEquals(timersBefore, timersAfter);
   }
+
+  public void testGetCheckProcessIsAliveCommand() throws Exception {
+    String anyPid = "9999";
+    String[] checkProcessAliveCommand = Shell.getCheckProcessIsAliveCommand(
+        anyPid);
+
+    String[] expectedCommand;
+
+    if (Shell.WINDOWS) {
+      expectedCommand =
+          new String[]{ Shell.WINUTILS, "task", "isAlive", anyPid };
+    } else if (Shell.isSetsidAvailable) {
+      expectedCommand = new String[]{ "kill", "-0", "--", "-" + anyPid };
+    } else {
+      expectedCommand = new String[]{"kill", "-0", anyPid};
+    }
+    Assert.assertArrayEquals(expectedCommand, checkProcessAliveCommand);
+  }
+
+  public void testGetSignalKillCommand() throws Exception {
+    String anyPid = "9999";
+    int anySignal = 9;
+    String[] checkProcessAliveCommand = Shell.getSignalKillCommand(anySignal,
+        anyPid);
+
+    String[] expectedCommand;
+    if (Shell.WINDOWS) {
+      expectedCommand =
+          new String[]{ Shell.WINUTILS, "task", "kill", anyPid };
+    } else if (Shell.isSetsidAvailable) {
+      expectedCommand =
+          new String[]{ "kill", "-" + anySignal, "--", "-" + anyPid };
+    } else {
+      expectedCommand =
+          new String[]{ "kill", "-" + anySignal, anyPid };
+    }
+    Assert.assertArrayEquals(expectedCommand, checkProcessAliveCommand);
+  }
   
 
   private void testInterval(long interval) throws IOException {