Browse Source

MAPREDUCE-3714. Fixed EventFetcher and Fetcher threads to shut-down properly so that reducers don't hang in corner cases. (vinodkv)
svn merge --ignore-ancestry -c 1235545 ../../trunk/


git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-0.23@1235546 13f79535-47bb-0310-9956-ffa450edef68

Vinod Kumar Vavilapalli 13 năm trước cách đây
mục cha
commit
c516cc863a

+ 3 - 0
hadoop-mapreduce-project/CHANGES.txt

@@ -501,6 +501,9 @@ Release 0.23.1 - Unreleased
     MAPREDUCE-3505. yarn APPLICATION_CLASSPATH needs to be overridable. 
     (ahmed via tucu)
 
+    MAPREDUCE-3714. Fixed EventFetcher and Fetcher threads to shut-down properly
+    so that reducers don't hang in corner cases. (vinodkv)
+
 Release 0.23.0 - 2011-11-01 
 
   INCOMPATIBLE CHANGES

+ 17 - 1
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/EventFetcher.java

@@ -27,6 +27,7 @@ import org.apache.hadoop.mapred.TaskCompletionEvent;
 import org.apache.hadoop.mapred.TaskUmbilicalProtocol;
 import org.apache.hadoop.mapreduce.TaskAttemptID;
 
+@SuppressWarnings("deprecation")
 class EventFetcher<K,V> extends Thread {
   private static final long SLEEP_TIME = 1000;
   private static final int MAX_EVENTS_TO_FETCH = 10000;
@@ -41,6 +42,8 @@ class EventFetcher<K,V> extends Thread {
   private ExceptionReporter exceptionReporter = null;
   
   private int maxMapRuntime = 0;
+
+  private volatile boolean stopped = false;
   
   public EventFetcher(TaskAttemptID reduce,
                       TaskUmbilicalProtocol umbilical,
@@ -60,7 +63,7 @@ class EventFetcher<K,V> extends Thread {
     LOG.info(reduce + " Thread started: " + getName());
     
     try {
-      while (true && !Thread.currentThread().isInterrupted()) {
+      while (!stopped && !Thread.currentThread().isInterrupted()) {
         try {
           int numNewMaps = getMapCompletionEvents();
           failures = 0;
@@ -71,6 +74,9 @@ class EventFetcher<K,V> extends Thread {
           if (!Thread.currentThread().isInterrupted()) {
             Thread.sleep(SLEEP_TIME);
           }
+        } catch (InterruptedException e) {
+          LOG.info("EventFetcher is interrupted.. Returning");
+          return;
         } catch (IOException ie) {
           LOG.info("Exception in getting events", ie);
           // check to see whether to abort
@@ -90,6 +96,16 @@ class EventFetcher<K,V> extends Thread {
       return;
     }
   }
+
+  public void shutDown() {
+    this.stopped = true;
+    interrupt();
+    try {
+      join(5000);
+    } catch(InterruptedException ie) {
+      LOG.warn("Got interrupted while joining " + getName(), ie);
+    }
+  }
   
   /** 
    * Queries the {@link TaskTracker} for a set of map-completion events 

+ 15 - 2
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/Fetcher.java

@@ -48,6 +48,7 @@ import org.apache.hadoop.mapreduce.task.reduce.MapOutput.Type;
 import org.apache.hadoop.util.Progressable;
 import org.apache.hadoop.util.ReflectionUtils;
 
+@SuppressWarnings({"deprecation"})
 class Fetcher<K,V> extends Thread {
   
   private static final Log LOG = LogFactory.getLog(Fetcher.class);
@@ -88,6 +89,8 @@ class Fetcher<K,V> extends Thread {
   private final Decompressor decompressor;
   private final SecretKey jobTokenSecret;
 
+  private volatile boolean stopped = false;
+
   public Fetcher(JobConf job, TaskAttemptID reduceId, 
                  ShuffleScheduler<K,V> scheduler, MergeManager<K,V> merger,
                  Reporter reporter, ShuffleClientMetrics metrics,
@@ -135,7 +138,7 @@ class Fetcher<K,V> extends Thread {
   
   public void run() {
     try {
-      while (true && !Thread.currentThread().isInterrupted()) {
+      while (!stopped && !Thread.currentThread().isInterrupted()) {
         MapHost host = null;
         try {
           // If merge is on, block
@@ -160,7 +163,17 @@ class Fetcher<K,V> extends Thread {
       exceptionReporter.reportException(t);
     }
   }
-  
+
+  public void shutDown() throws InterruptedException {
+    this.stopped = true;
+    interrupt();
+    try {
+      join(5000);
+    } catch (InterruptedException ie) {
+      LOG.warn("Got interrupt while joining " + getName(), ie);
+    }
+  }
+
   /**
    * The crux of the matter...
    * 

+ 4 - 15
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/Shuffle.java

@@ -19,8 +19,6 @@ package org.apache.hadoop.mapreduce.task.reduce;
 
 import java.io.IOException;
 
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.fs.FileSystem;
@@ -33,17 +31,17 @@ import org.apache.hadoop.mapred.RawKeyValueIterator;
 import org.apache.hadoop.mapred.Reducer;
 import org.apache.hadoop.mapred.Reporter;
 import org.apache.hadoop.mapred.Task;
+import org.apache.hadoop.mapred.Task.CombineOutputCollector;
 import org.apache.hadoop.mapred.TaskStatus;
 import org.apache.hadoop.mapred.TaskUmbilicalProtocol;
-import org.apache.hadoop.mapred.Task.CombineOutputCollector;
 import org.apache.hadoop.mapreduce.MRJobConfig;
 import org.apache.hadoop.mapreduce.TaskAttemptID;
 import org.apache.hadoop.util.Progress;
 
 @InterfaceAudience.Private
 @InterfaceStability.Unstable
+@SuppressWarnings({"deprecation", "unchecked", "rawtypes"})
 public class Shuffle<K, V> implements ExceptionReporter {
-  private static final Log LOG = LogFactory.getLog(Shuffle.class);
   private static final int PROGRESS_FREQUENCY = 2000;
   
   private final TaskAttemptID reduceId;
@@ -100,7 +98,6 @@ public class Shuffle<K, V> implements ExceptionReporter {
                                     this, mergePhase, mapOutputFile);
   }
 
-  @SuppressWarnings("unchecked")
   public RawKeyValueIterator run() throws IOException, InterruptedException {
     // Start the map-completion events fetcher thread
     final EventFetcher<K,V> eventFetcher = 
@@ -130,19 +127,11 @@ public class Shuffle<K, V> implements ExceptionReporter {
     }
 
     // Stop the event-fetcher thread
-    eventFetcher.interrupt();
-    try {
-      eventFetcher.join();
-    } catch(Throwable t) {
-      LOG.info("Failed to stop " + eventFetcher.getName(), t);
-    }
+    eventFetcher.shutDown();
     
     // Stop the map-output fetcher threads
     for (Fetcher<K,V> fetcher : fetchers) {
-      fetcher.interrupt();
-    }
-    for (Fetcher<K,V> fetcher : fetchers) {
-      fetcher.join();
+      fetcher.shutDown();
     }
     fetchers = null;