Sfoglia il codice sorgente

HADOOP-598. Fix tasks to retry when reporting completion. Contributed by Owen.

git-svn-id: https://svn.apache.org/repos/asf/lucene/hadoop/trunk@462911 13f79535-47bb-0310-9956-ffa450edef68
Doug Cutting 18 anni fa
parent
commit
a65d912634
2 ha cambiato i file con 24 aggiunte e 5 eliminazioni
  1. 3 0
      CHANGES.txt
  2. 21 5
      src/java/org/apache/hadoop/mapred/Task.java

+ 3 - 0
CHANGES.txt

@@ -13,6 +13,9 @@ Release 0.7.1 - unreleased
     .999, so that nearly all blocks must be reported before filesystem
     modifications are permitted.  (Konstantin Shvachko via cutting)
 
+ 4. HADOOP-598.  Fix tasks to retry when reporting completion, so that
+    a single RPC timeout won't fail a task.  (omalley via cutting)
+
 
 Release 0.7.0 - 2006-10-06
 

+ 21 - 5
src/java/org/apache/hadoop/mapred/Task.java

@@ -176,10 +176,26 @@ abstract class Task implements Writable, Configurable {
     }
   }
 
-  public void done(TaskUmbilicalProtocol umbilical)
-    throws IOException {
-    umbilical.progress(getTaskId(),               // send a final status report
-                       taskProgress.get(), taskProgress.toString(), phase);
-    umbilical.done(getTaskId());
+  public void done(TaskUmbilicalProtocol umbilical) throws IOException {
+    int retries = 10;
+    boolean needProgress = true;
+    while (true) {
+      try {
+        if (needProgress) {
+          // send a final status report
+          umbilical.progress(getTaskId(), taskProgress.get(), 
+                             taskProgress.toString(), phase);
+          needProgress = false;
+        }
+        umbilical.done(getTaskId());
+        return;
+      } catch (IOException ie) {
+        LOG.warn("Failure signalling completion: " + 
+                 StringUtils.stringifyException(ie));
+        if (--retries == 0) {
+          throw ie;
+        }
+      }
+    }
   }
 }