1
0
Pārlūkot izejas kodu

MAPREDUCE-2450. Fixed a corner case with interrupted communication threads leading to a long timeout in Task. Contributed by Rajesh Balamohan.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1232314 13f79535-47bb-0310-9956-ffa450edef68
Arun Murthy 13 gadi atpakaļ
vecāks
revīzija
d05e6d2671

+ 3 - 0
hadoop-mapreduce-project/CHANGES.txt

@@ -503,6 +503,9 @@ Release 0.23.1 - Unreleased
     MAPREDUCE-3657. State machine visualize build fails. (Jason Lowe 
     MAPREDUCE-3657. State machine visualize build fails. (Jason Lowe 
     via mahadev)
     via mahadev)
 
 
+    MAPREDUCE-2450. Fixed a corner case with interrupted communication threads
+    leading to a long timeout in Task. (Rajesh Balamohan via acmurthy)
+
 Release 0.23.0 - 2011-11-01 
 Release 0.23.0 - 2011-11-01 
 
 
   INCOMPATIBLE CHANGES
   INCOMPATIBLE CHANGES

+ 20 - 0
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/Task.java

@@ -552,6 +552,8 @@ abstract public class Task implements Writable, Configurable {
     private InputSplit split = null;
     private InputSplit split = null;
     private Progress taskProgress;
     private Progress taskProgress;
     private Thread pingThread = null;
     private Thread pingThread = null;
+    private boolean done = true;
+    private Object lock = new Object();
 
 
     /**
     /**
      * flag that indicates whether progress update needs to be sent to parent.
      * flag that indicates whether progress update needs to be sent to parent.
@@ -648,6 +650,9 @@ abstract public class Task implements Writable, Configurable {
       // get current flag value and reset it as well
       // get current flag value and reset it as well
       boolean sendProgress = resetProgressFlag();
       boolean sendProgress = resetProgressFlag();
       while (!taskDone.get()) {
       while (!taskDone.get()) {
+        synchronized (lock) {
+          done = false;
+        }
         try {
         try {
           boolean taskFound = true; // whether TT knows about this task
           boolean taskFound = true; // whether TT knows about this task
           // sleep for a bit
           // sleep for a bit
@@ -680,6 +685,7 @@ abstract public class Task implements Writable, Configurable {
           // came back up), kill ourselves
           // came back up), kill ourselves
           if (!taskFound) {
           if (!taskFound) {
             LOG.warn("Parent died.  Exiting "+taskId);
             LOG.warn("Parent died.  Exiting "+taskId);
+            resetDoneFlag();
             System.exit(66);
             System.exit(66);
           }
           }
 
 
@@ -692,10 +698,19 @@ abstract public class Task implements Writable, Configurable {
           if (remainingRetries == 0) {
           if (remainingRetries == 0) {
             ReflectionUtils.logThreadInfo(LOG, "Communication exception", 0);
             ReflectionUtils.logThreadInfo(LOG, "Communication exception", 0);
             LOG.warn("Last retry, killing "+taskId);
             LOG.warn("Last retry, killing "+taskId);
+            resetDoneFlag();
             System.exit(65);
             System.exit(65);
           }
           }
         }
         }
       }
       }
+      //Notify that we are done with the work
+      resetDoneFlag();
+    }
+    void resetDoneFlag() {
+      synchronized (lock) {
+        done = true;
+        lock.notify();
+      }
     }
     }
     public void startCommunicationThread() {
     public void startCommunicationThread() {
       if (pingThread == null) {
       if (pingThread == null) {
@@ -706,6 +721,11 @@ abstract public class Task implements Writable, Configurable {
     }
     }
     public void stopCommunicationThread() throws InterruptedException {
     public void stopCommunicationThread() throws InterruptedException {
       if (pingThread != null) {
       if (pingThread != null) {
+        synchronized (lock) {
+          while (!done) {
+            lock.wait();
+          }
+        }
         pingThread.interrupt();
         pingThread.interrupt();
         pingThread.join();
         pingThread.join();
       }
       }