Browse Source

MAPREDUCE-5406. Improve logging around Task Tracker exiting with JVM manager inconsistent state. Contributed by Chelsey Chang.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-1@1505094 13f79535-47bb-0310-9956-ffa450edef68
Chris Nauroth 12 years ago
parent
commit
7a1a8969e9

+ 3 - 0
CHANGES.txt

@@ -24,6 +24,9 @@ Release 1.3.0 - unreleased
     HDFS-4903. Print trash configuration and trash emptier state in namenode
     log. (Arpit Agarwal via suresh)
 
+    MAPREDUCE-5406. Improve logging around Task Tracker exiting with JVM manager
+    inconsistent state. (Chelsey Chang via cnauroth)
+
   BUG FIXES
 
     MAPREDUCE-5047. keep.failed.task.files=true causes job failure on 

+ 1 - 1
src/core/org/apache/hadoop/util/ProcessTree.java

@@ -140,7 +140,7 @@ public class ProcessTree {
                pgrpId + " ."+ 
           StringUtils.stringifyException(e));
     } finally {
-      LOG.info("Killing process group" + pgrpId + " with signal " + signal + 
+      LOG.info("Killing process group " + pgrpId + " with signal " + signal + 
                ". Exit code " + shexec.getExitCode());
     }
   }

+ 6 - 1
src/mapred/org/apache/hadoop/mapred/JvmManager.java

@@ -271,6 +271,8 @@ class JvmManager {
 
     synchronized public void taskFinished(TaskRunner tr) {
       JVMId jvmId = runningTaskToJvm.remove(tr);
+      LOG.info("Task " + tr.getTask().getTaskID()
+          + " finished. Mark JVM Idle: " + jvmId);
       if (jvmId != null) {
         jvmToRunningTask.remove(jvmId);
         JvmRunner jvmRunner;
@@ -284,6 +286,8 @@ class JvmManager {
                                         ) throws IOException,
                                                  InterruptedException {
       JVMId jvmId = runningTaskToJvm.remove(tr);
+      LOG.info("Task " + tr.getTask().getTaskID() + " killed. Kill JVM: "
+          + jvmId);
       if (jvmId != null) {
         jvmToRunningTask.remove(jvmId);
         killJvm(jvmId);
@@ -339,6 +343,7 @@ class JvmManager {
       // (3) kill an idle JVM (from a different job) 
       // (the order of return is in the order above)
       int numJvmsSpawned = jvmIdToRunner.size();
+      LOG.info("Reaping JVM. Number of active JVMs = " + numJvmsSpawned);
       JvmRunner runnerToKill = null;
       if (numJvmsSpawned >= maxJvms) {
         //go through the list of JVMs for all jobs.
@@ -377,7 +382,7 @@ class JvmManager {
 
       if (spawnNewJvm) {
         if (runnerToKill != null) {
-          LOG.info("Killing JVM: " + runnerToKill.jvmId);
+          LOG.info("Killing JVM: " + runnerToKill.jvmId + " to spawn a new one");
           killJvmRunner(runnerToKill);
         }
         spawnNewJvm(jobId, env, t);

+ 4 - 1
src/mapred/org/apache/hadoop/mapred/TaskTracker.java

@@ -2553,7 +2553,8 @@ public class TaskTracker implements MRConstants, TaskUmbilicalProtocol,
             if (!tip.canBeLaunched()) {
               //got killed externally while still in the launcher queue
               LOG.info("Not launching task " + task.getTaskID() + " as it got"
-                + " killed externally. Task's state is " + tip.getRunState());
+                  + " killed externally. Task's state is " + tip.getRunState()
+                  + ". Add " + task.getNumSlotsRequired() + " slots.");
               addFreeSlots(task.getNumSlotsRequired());
               continue;
             }
@@ -3332,6 +3333,8 @@ public class TaskTracker implements MRConstants, TaskUmbilicalProtocol,
     private synchronized void releaseSlot() {
       if (slotTaken) {
         if (launcher != null) {
+          LOG.info("Releasing " + task.getNumSlotsRequired() + " slots from"
+              + " task " + task.getTaskID());
           launcher.addFreeSlots(task.getNumSlotsRequired());
         }
         slotTaken = false;