Przeglądaj źródła

MAPREDUCE-2740. MultipleOutputs in new API creates needless TaskAttemptContexts. Contributed by Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1152875 13f79535-47bb-0310-9956-ffa450edef68
Todd Lipcon 14 lat temu
rodzic
commit
22f232bce2

+ 3 - 0
mapreduce/CHANGES.txt

@@ -212,6 +212,9 @@ Trunk (unreleased changes)
     JobInProgress.getCounters() aquire locks in a shorter time period.
     (Joydeep Sen Sarma via schen)
 
+    MAPREDUCE-2740. MultipleOutputs in new API creates needless
+    TaskAttemptContexts. (todd)
+
   BUG FIXES
 
     MAPREDUCE-2603. Disable High-Ram emulation in system tests. 

+ 11 - 5
mapreduce/src/java/org/apache/hadoop/mapreduce/lib/output/MultipleOutputs.java

@@ -132,6 +132,10 @@ public class MultipleOutputs<KEYOUT, VALUEOUT> {
    * Cache for the taskContexts
    */
   private Map<String, TaskAttemptContext> taskContexts = new HashMap<String, TaskAttemptContext>();
+  /**
+   * Cached TaskAttemptContext which uses the job's configured settings
+   */
+  private TaskAttemptContext jobOutputFormatContext;
 
   /**
    * Checks if a named output name is valid token.
@@ -382,11 +386,13 @@ public class MultipleOutputs<KEYOUT, VALUEOUT> {
   public void write(KEYOUT key, VALUEOUT value, String baseOutputPath) 
       throws IOException, InterruptedException {
     checkBaseOutputPath(baseOutputPath);
-    TaskAttemptContext taskContext = 
-      new TaskAttemptContextImpl(context.getConfiguration(), 
-                                 context.getTaskAttemptID(),
-                                 new WrappedStatusReporter(context));
-    getRecordWriter(taskContext, baseOutputPath).write(key, value);
+    if (jobOutputFormatContext == null) {
+      jobOutputFormatContext = 
+        new TaskAttemptContextImpl(context.getConfiguration(), 
+                                   context.getTaskAttemptID(),
+                                   new WrappedStatusReporter(context));
+    }
+    getRecordWriter(jobOutputFormatContext, baseOutputPath).write(key, value);
   }
 
   // by being synchronized MultipleOutputTask can be use with a