浏览代码

MAPREDUCE-6169. MergeQueue should release reference to the current item from key and value at the end of the iteration to save memory. (Zhihai Xu via kasha)

(cherry picked from commit 90194ca1cbd695d48c3705121c2ac9a8554578a2)
Karthik Kambatla 10 年之前
父节点
当前提交
3a4211ec2c

+ 4 - 0
hadoop-mapreduce-project/CHANGES.txt

@@ -10,6 +10,10 @@ Release 2.7.0 - UNRELEASED
 
   OPTIMIZATIONS
 
+    MAPREDUCE-6169. MergeQueue should release reference to the current item 
+    from key and value at the end of the iteration to save memory. 
+    (Zhihai Xu via kasha)
+
   BUG FIXES
 
     MAPREDUCE-5918. LineRecordReader can return the same decompressor to

+ 10 - 1
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/Merger.java

@@ -528,9 +528,17 @@ public class Merger {
       }
     }
 
+    private void resetKeyValue() {
+      key = null;
+      value.reset(new byte[] {}, 0);
+      diskIFileValue.reset(new byte[] {}, 0);
+    }
+
     public boolean next() throws IOException {
-      if (size() == 0)
+      if (size() == 0) {
+        resetKeyValue();
         return false;
+      }
 
       if (minSegment != null) {
         //minSegment is non-null for all invocations of next except the first
@@ -539,6 +547,7 @@ public class Merger {
         adjustPriorityQueue(minSegment);
         if (size() == 0) {
           minSegment = null;
+          resetKeyValue();
           return false;
         }
       }

+ 2 - 0
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/task/reduce/TestMerger.java

@@ -294,6 +294,8 @@ public class TestMerger {
     // Now there should be no more input
     Assert.assertFalse(mergeQueue.next());
     Assert.assertEquals(1.0f, mergeQueue.getProgress().get(), epsilon);
+    Assert.assertTrue(mergeQueue.getKey() == null);
+    Assert.assertEquals(0, mergeQueue.getValue().getData().length);
   }
 
   private Progressable getReporter() {