浏览代码

MAPREDUCE-6169. MergeQueue should release reference to the current item from key and value at the end of the iteration to save memory. (Zhihai Xu via kasha)

Karthik Kambatla 10 年之前
父节点
当前提交
90194ca1cb

+ 4 - 0
hadoop-mapreduce-project/CHANGES.txt

@@ -237,6 +237,10 @@ Release 2.7.0 - UNRELEASED
 
   OPTIMIZATIONS
 
+    MAPREDUCE-6169. MergeQueue should release reference to the current item 
+    from key and value at the end of the iteration to save memory. 
+    (Zhihai Xu via kasha)
+
   BUG FIXES
 
     MAPREDUCE-5918. LineRecordReader can return the same decompressor to

+ 10 - 1
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/Merger.java

@@ -528,9 +528,17 @@ public class Merger {
       }
     }
 
+    private void resetKeyValue() {
+      key = null;
+      value.reset(new byte[] {}, 0);
+      diskIFileValue.reset(new byte[] {}, 0);
+    }
+
     public boolean next() throws IOException {
-      if (size() == 0)
+      if (size() == 0) {
+        resetKeyValue();
         return false;
+      }
 
       if (minSegment != null) {
         //minSegment is non-null for all invocations of next except the first
@@ -539,6 +547,7 @@ public class Merger {
         adjustPriorityQueue(minSegment);
         if (size() == 0) {
           minSegment = null;
+          resetKeyValue();
           return false;
         }
       }

+ 2 - 0
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/task/reduce/TestMerger.java

@@ -294,6 +294,8 @@ public class TestMerger {
     // Now there should be no more input
     Assert.assertFalse(mergeQueue.next());
     Assert.assertEquals(1.0f, mergeQueue.getProgress().get(), epsilon);
+    Assert.assertTrue(mergeQueue.getKey() == null);
+    Assert.assertEquals(0, mergeQueue.getValue().getData().length);
   }
 
   private Progressable getReporter() {