Browse Source

HADOOP-5210. Merge -r 758179:758180 from trunk onto 0.20 branch. Fixes HADOOP-5210.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/core/branches/branch-0.20@776641 13f79535-47bb-0310-9956-ffa450edef68
Devaraj Das 16 years ago
parent
commit
7c6e533ede
2 changed files with 12 additions and 2 deletions
  1. 3 0
      CHANGES.txt
  2. 9 2
      src/mapred/org/apache/hadoop/mapred/Merger.java

+ 3 - 0
CHANGES.txt

@@ -71,6 +71,9 @@ Release 0.20.1 - Unreleased
     correctly in the presence of High RAM jobs.
     (Sreekanth Ramakrishnan via yhemanth)
 
+    HADOOP-5210. Solves a problem in the progress report of the reduce task.
+    (Ravi Gummadi via ddas)
+
 Release 0.20.0 - 2009-04-15
 
   INCOMPATIBLE CHANGES

+ 9 - 2
src/mapred/org/apache/hadoop/mapred/Merger.java

@@ -363,6 +363,7 @@ class Merger {
           new ArrayList<Segment<K, V>>();
         int segmentsConsidered = 0;
         int numSegmentsToConsider = factor;
+        long startBytes = 0; // starting bytes of segments of this merge
         while (true) {
           //extract the smallest 'factor' number of segments  
           //Call cleanup on the empty segments (no key/value data)
@@ -375,8 +376,8 @@ class Merger {
             long startPos = segment.getPosition();
             boolean hasNext = segment.next();
             long endPos = segment.getPosition();
-            totalBytesProcessed += endPos - startPos;
-            mergeProgress.set(totalBytesProcessed * progPerByte);
+            startBytes += endPos - startPos;
+            
             if (hasNext) {
               segmentsToMerge.add(segment);
               segmentsConsidered++;
@@ -406,6 +407,12 @@ class Merger {
         //if we have lesser number of segments remaining, then just return the
         //iterator, else do another single level merge
         if (numSegments <= factor) {
+          // Reset totalBytesProcessed to track the progress of the final merge.
+          // This is considered the progress of the reducePhase, the 3rd phase
+          // of reduce task. Currently totalBytesProcessed is not used in sort
+          // phase of reduce task(i.e. when intermediate merges happen).
+          totalBytesProcessed = startBytes;
+          
           //calculate the length of the remaining segments. Required for 
           //calculating the merge progress
           long totalBytes = 0;