Pārlūkot izejas kodu

HADOOP-3684. Add a cloning function to the contrib/data_join framework
permitting users to define a more efficient method for cloning values from
the reduce than serialization/deserialization. Contributed by Runping Qi.


git-svn-id: https://svn.apache.org/repos/asf/hadoop/core/trunk@675784 13f79535-47bb-0310-9956-ffa450edef68

Christopher Douglas 17 gadi atpakaļ
vecāks
revīzija
7ae6711d26

+ 4 - 0
CHANGES.txt

@@ -69,6 +69,10 @@ Trunk (unreleased changes)
     HADOOP-3660. Add replication factor for injecting blocks in simulated
     datanodes. (Sanjay Radia via cdouglas)
 
+    HADOOP-3684. Add a cloning function to the contrib/data_join framework
+    permitting users to define a more efficient method for cloning values from
+    the reduce than serialization/deserialization. (Runping Qi via cdouglas)
+
   OPTIMIZATIONS
 
     HADOOP-3556. Removed lock contention in MD5Hash by changing the 

+ 3 - 3
src/contrib/data_join/src/java/org/apache/hadoop/contrib/utils/join/DataJoinReducerBase.java

@@ -95,7 +95,6 @@ public abstract class DataJoinReducerBase extends JobBase {
     SortedMap<Object, ResetableIterator> retv = new TreeMap<Object, ResetableIterator>();
     TaggedMapOutput aRecord = null;
     while (arg1.hasNext()) {
-      aRecord = (TaggedMapOutput) arg1.next();
       this.numOfValues += 1;
       if (this.numOfValues % 100 == 0) {
         reporter.setStatus("key: " + key.toString() + " numOfValues: "
@@ -104,13 +103,14 @@ public abstract class DataJoinReducerBase extends JobBase {
       if (this.numOfValues > this.maxNumOfValuesPerGroup) {
         continue;
       }
-      Text tag = new Text((Text)aRecord.getTag());
+      aRecord = ((TaggedMapOutput) arg1.next()).clone(job);
+      Text tag = aRecord.getTag();
       ResetableIterator data = retv.get(tag);
       if (data == null) {
         data = createResetableIterator();
         retv.put(tag, data);
       }
-      data.add(WritableUtils.clone(aRecord, job));
+      data.add(aRecord);
     }
     if (this.numOfValues > this.largestNumOfValues) {
       this.largestNumOfValues = numOfValues;

+ 6 - 0
src/contrib/data_join/src/java/org/apache/hadoop/contrib/utils/join/TaggedMapOutput.java

@@ -20,6 +20,8 @@ package org.apache.hadoop.contrib.utils.join;
 
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableUtils;
+import org.apache.hadoop.mapred.JobConf;
 
 /**
  * This abstract class serves as the base class for the values that 
@@ -46,5 +48,9 @@ public abstract class TaggedMapOutput implements Writable {
   }
 
   public abstract Writable getData();
+  
+  public TaggedMapOutput clone(JobConf job) {
+    return (TaggedMapOutput) WritableUtils.clone(this, job);
+  }
 
 }