Browse Source

MAPREDUCE-5260. Fix JvmManager to deal better with corner-cases in shutting down. Contributed by zhaoyunjiong.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-1@1497918 13f79535-47bb-0310-9956-ffa450edef68
Arun Murthy 12 years ago
parent
commit
e9c1d868ae
2 changed files with 24 additions and 12 deletions
  1. 3 0
      CHANGES.txt
  2. 21 12
      src/mapred/org/apache/hadoop/mapred/JvmManager.java

+ 3 - 0
CHANGES.txt

@@ -97,6 +97,9 @@ Release 1.2.1 - Unreleased
     HDFS-4261. Fix bugs in Balaner causing infinite loop and
     TestBalancerWithNodeGroup timing out.  (Junping Du via szetszwo)
 
+    MAPREDUCE-5260. Fix JvmManager to deal better with corner-cases in
+    shutting down. (zhaoyunjiong via acmurthy) 
+
 Release 1.2.0 - 2013.05.05
 
   INCOMPATIBLE CHANGES

+ 21 - 12
src/mapred/org/apache/hadoop/mapred/JvmManager.java

@@ -384,11 +384,16 @@ class JvmManager {
         return;
       }
       //*MUST* never reach this
-      LOG.fatal("Inconsistent state!!! " +
-      		"JVM Manager reached an unstable state " +
-            "while reaping a JVM for task: " + t.getTask().getTaskID()+
-            " " + getDetails() + ". Aborting. ");
-      System.exit(-1);
+      try {
+        LOG.fatal("Inconsistent state!!! " +
+        		"JVM Manager reached an unstable state " +
+              "while reaping a JVM for task: " + t.getTask().getTaskID()+
+              " " + getDetails() + ". Aborting. ");
+      } catch (Exception e) {
+        LOG.fatal(e);
+      } finally {
+        System.exit(-1);
+      }
     }
     
     private String getDetails() {
@@ -548,13 +553,17 @@ class JvmManager {
           if (pidStr != null) {
             String user = env.conf.getUser();
             int pid = Integer.parseInt(pidStr);
-            // start a thread that will kill the process dead
-            if (sleeptimeBeforeSigkill > 0) {
-              new DelayedProcessKiller(user, pid, sleeptimeBeforeSigkill, 
-                                       Signal.KILL).start();
-              controller.signalTask(user, pid, Signal.TERM);
-            } else {
-              controller.signalTask(user, pid, Signal.KILL);
+            try {
+              // start a thread that will kill the process dead
+              if (sleeptimeBeforeSigkill > 0) {
+                new DelayedProcessKiller(user, pid, sleeptimeBeforeSigkill, 
+                                         Signal.KILL).start();
+                controller.signalTask(user, pid, Signal.TERM);
+              } else {
+                controller.signalTask(user, pid, Signal.KILL);
+              }
+            } catch (IOException e) {
+              LOG.error("Catch Exception caused by lack of user information to prevent inconsistent state: ", e);
             }
           } else {
             LOG.info(String.format("JVM Not killed %s but just removed", jvmId