%!s(int64=16) %!d(string=hai) anos · ee804ca7b8
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -175,6 +175,9 @@ Release 0.20.1 - Unreleased
 
				     HADOOP-6145. Fix FsShell rm/rmr error messages when there is a FNFE.
			
 
				     (Jakob Homan via szetszwo)
			
 
				 
			
 
				+    MAPREDUCE-565. Fix partitioner to work with new API. (Owen O'Malley via
			
 
				+    cdouglas)
			
 
				+
			
 
				 Release 0.20.0 - 2009-04-15
			
 
				 
			
 
				   INCOMPATIBLE CHANGES
			
--- a/src/mapred/org/apache/hadoop/mapred/MapTask.java
+++ b/src/mapred/org/apache/hadoop/mapred/MapTask.java
@@ -353,7 +353,7 @@ class MapTask extends Task {
 
				       ReflectionUtils.newInstance(job.getMapRunnerClass(), job);
			
 
				 
			
 
				     try {
			
 
				-      runner.run(in, collector, reporter);      
			
 
				+      runner.run(in, new OldOutputCollector(collector, conf), reporter);
			
 
				       collector.flush();
			
 
				     } finally {
			
 
				       //close
			
@@ -427,20 +427,80 @@ class MapTask extends Task {
 
				     }
			
 
				   }
			
 
				 
			
 
				+  /**
			
 
				+   * Since the mapred and mapreduce Partitioners don't share a common interface
			
 
				+   * (JobConfigurable is deprecated and a subtype of mapred.Partitioner), the
			
 
				+   * partitioner lives in Old/NewOutputCollector. Note that, for map-only jobs,
			
 
				+   * the configured partitioner should not be called. It's common for
			
 
				+   * partitioners to compute a result mod numReduces, which causes a div0 error
			
 
				+   */
			
 
				+  private static class OldOutputCollector<K,V> implements OutputCollector<K,V> {
			
 
				+    private final Partitioner<K,V> partitioner;
			
 
				+    private final MapOutputCollector<K,V> collector;
			
 
				+    private final int numPartitions;
			
 
				+
			
 
				+    @SuppressWarnings("unchecked")
			
 
				+    OldOutputCollector(MapOutputCollector<K,V> collector, JobConf conf) {
			
 
				+      numPartitions = conf.getNumReduceTasks();
			
 
				+      if (numPartitions > 0) {
			
 
				+        partitioner = (Partitioner<K,V>)
			
 
				+          ReflectionUtils.newInstance(conf.getPartitionerClass(), conf);
			
 
				+      } else {
			
 
				+        partitioner = new Partitioner<K,V>() {
			
 
				+          @Override
			
 
				+          public void configure(JobConf job) { }
			
 
				+          @Override
			
 
				+          public int getPartition(K key, V value, int numPartitions) {
			
 
				+            return -1;
			
 
				+          }
			
 
				+        };
			
 
				+      }
			
 
				+      this.collector = collector;
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public void collect(K key, V value) throws IOException {
			
 
				+      try {
			
 
				+        collector.collect(key, value,
			
 
				+                          partitioner.getPartition(key, value, numPartitions));
			
 
				+      } catch (InterruptedException ie) {
			
 
				+        Thread.currentThread().interrupt();
			
 
				+        throw new IOException("interrupt exception", ie);
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				   private class NewOutputCollector<K,V>
			
 
				     extends org.apache.hadoop.mapreduce.RecordWriter<K,V> {
			
 
				-    private MapOutputCollector<K,V> collector;
			
 
				+    private final MapOutputCollector<K,V> collector;
			
 
				+    private final org.apache.hadoop.mapreduce.Partitioner<K,V> partitioner;
			
 
				+    private final int partitions;
			
 
				 
			
 
				-    NewOutputCollector(JobConf job, 
			
 
				+    @SuppressWarnings("unchecked")
			
 
				+    NewOutputCollector(org.apache.hadoop.mapreduce.JobContext jobContext,
			
 
				+                       JobConf job,
			
 
				                        TaskUmbilicalProtocol umbilical,
			
 
				                        TaskReporter reporter
			
 
				                        ) throws IOException, ClassNotFoundException {
			
 
				       collector = new MapOutputBuffer<K,V>(umbilical, job, reporter);
			
 
				+      partitions = jobContext.getNumReduceTasks();
			
 
				+      if (partitions > 0) {
			
 
				+        partitioner = (org.apache.hadoop.mapreduce.Partitioner<K,V>)
			
 
				+          ReflectionUtils.newInstance(jobContext.getPartitionerClass(), job);
			
 
				+      } else {
			
 
				+        partitioner = new org.apache.hadoop.mapreduce.Partitioner<K,V>() {
			
 
				+          @Override
			
 
				+          public int getPartition(K key, V value, int numPartitions) {
			
 
				+            return -1;
			
 
				+          }
			
 
				+        };
			
 
				+      }
			
 
				     }
			
 
				 
			
 
				     @Override
			
 
				-    public void write(K key, V value) throws IOException {
			
 
				-      collector.collect(key, value);
			
 
				+    public void write(K key, V value) throws IOException, InterruptedException {
			
 
				+      collector.collect(key, value,
			
 
				+                        partitioner.getPartition(key, value, partitions));
			
 
				     }
			
 
				 
			
 
				     @Override
			
@@ -510,7 +570,7 @@ class MapTask extends Task {
 
				       if (job.getNumReduceTasks() == 0) {
			
 
				         output = outputFormat.getRecordWriter(taskContext);
			
 
				       } else {
			
 
				-        output = new NewOutputCollector(job, umbilical, reporter);
			
 
				+        output = new NewOutputCollector(taskContext, job, umbilical, reporter);
			
 
				       }
			
 
				 
			
 
				       mapperContext = contextConstructor.newInstance(mapper, job, getTaskID(),
			
@@ -532,9 +592,10 @@ class MapTask extends Task {
 
				     }
			
 
				   }
			
 
				 
			
 
				-  interface MapOutputCollector<K, V>
			
 
				-    extends OutputCollector<K, V> {
			
 
				+  interface MapOutputCollector<K, V> {
			
 
				 
			
 
				+    public void collect(K key, V value, int partition
			
 
				+                        ) throws IOException, InterruptedException;
			
 
				     public void close() throws IOException, InterruptedException;
			
 
				     
			
 
				     public void flush() throws IOException, InterruptedException, 
			
@@ -574,7 +635,7 @@ class MapTask extends Task {
 
				                                ClassNotFoundException {
			
 
				     }
			
 
				 
			
 
				-    public void collect(K key, V value) throws IOException {
			
 
				+    public void collect(K key, V value, int partition) throws IOException {
			
 
				       reporter.progress();
			
 
				       out.write(key, value);
			
 
				       mapOutputRecordCounter.increment(1);
			
@@ -585,7 +646,6 @@ class MapTask extends Task {
 
				   class MapOutputBuffer<K extends Object, V extends Object> 
			
 
				   implements MapOutputCollector<K, V>, IndexedSortable {
			
 
				     private final int partitions;
			
 
				-    private final Partitioner<K, V> partitioner;
			
 
				     private final JobConf job;
			
 
				     private final TaskReporter reporter;
			
 
				     private final Class<K> keyClass;
			
@@ -653,7 +713,6 @@ class MapTask extends Task {
 
				       this.reporter = reporter;
			
 
				       localFs = FileSystem.getLocal(job);
			
 
				       partitions = job.getNumReduceTasks();
			
 
				-      partitioner = ReflectionUtils.newInstance(job.getPartitionerClass(), job);
			
 
				        
			
 
				       rfs = ((LocalFileSystem)localFs).getRaw();
			
 
				 
			
@@ -739,8 +798,8 @@ class MapTask extends Task {
 
				       }
			
 
				     }
			
 
				 
			
 
				-    public synchronized void collect(K key, V value)
			
 
				-        throws IOException {
			
 
				+    public synchronized void collect(K key, V value, int partition
			
 
				+                                     ) throws IOException {
			
 
				       reporter.progress();
			
 
				       if (key.getClass() != keyClass) {
			
 
				         throw new IOException("Type mismatch in key from map: expected "
			
@@ -801,7 +860,6 @@ class MapTask extends Task {
 
				         valSerializer.serialize(value);
			
 
				         int valend = bb.markRecord();
			
 
				 
			
 
				-        final int partition = partitioner.getPartition(key, value, partitions);
			
 
				         if (partition < 0 || partition >= partitions) {
			
 
				           throw new IOException("Illegal partition for " + key + " (" +
			
 
				               partition + ")");
			
@@ -821,7 +879,7 @@ class MapTask extends Task {
 
				         kvindex = kvnext;
			
 
				       } catch (MapBufferTooSmallException e) {
			
 
				         LOG.info("Record too large for in-memory buffer: " + e.getMessage());
			
 
				-        spillSingleRecord(key, value);
			
 
				+        spillSingleRecord(key, value, partition);
			
 
				         mapOutputRecordCounter.increment(1);
			
 
				         return;
			
 
				       }
			
@@ -1201,11 +1259,10 @@ class MapTask extends Task {
 
				      * the in-memory buffer, so we must spill the record from collect
			
 
				      * directly to a spill file. Consider this "losing".
			
 
				      */
			
 
				-    private void spillSingleRecord(final K key, final V value) 
			
 
				-        throws IOException {
			
 
				+    private void spillSingleRecord(final K key, final V value,
			
 
				+                                   int partition) throws IOException {
			
 
				       long size = kvbuffer.length + partitions * APPROX_HEADER_LENGTH;
			
 
				       FSDataOutputStream out = null;
			
 
				-      final int partition = partitioner.getPartition(key, value, partitions);
			
 
				       try {
			
 
				         // create spill file
			
 
				         final SpillRecord spillRec = new SpillRecord(partitions);
			
--- a/src/test/org/apache/hadoop/mapred/TestMiniMRLocalFS.java
+++ b/src/test/org/apache/hadoop/mapred/TestMiniMRLocalFS.java
@@ -27,6 +27,12 @@ import java.util.Iterator;
 
				 
			
 
				 import junit.framework.TestCase;
			
 
				 
			
 
				+import org.apache.hadoop.conf.Configuration;
			
 
				+import org.apache.hadoop.examples.SecondarySort.FirstGroupingComparator;
			
 
				+import org.apache.hadoop.examples.SecondarySort.FirstPartitioner;
			
 
				+import org.apache.hadoop.examples.SecondarySort.IntPair;
			
 
				+import org.apache.hadoop.examples.SecondarySort;
			
 
				+import org.apache.hadoop.examples.WordCount;
			
 
				 import org.apache.hadoop.fs.FileSystem;
			
 
				 import org.apache.hadoop.fs.Path;
			
 
				 import org.apache.hadoop.io.IntWritable;
			
@@ -35,6 +41,10 @@ import org.apache.hadoop.io.Writable;
 
				 import org.apache.hadoop.io.WritableComparable;
			
 
				 import org.apache.hadoop.io.WritableUtils;
			
 
				 import org.apache.hadoop.mapred.MRCaching.TestResult;
			
 
				+import org.apache.hadoop.mapreduce.Job;
			
 
				+import org.apache.hadoop.mapreduce.TestMapReduceLocal;
			
 
				+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
			
 
				+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
			
 
				 import org.apache.hadoop.util.Progressable;
			
 
				 
			
 
				 /**
			
@@ -45,7 +55,8 @@ public class TestMiniMRLocalFS extends TestCase {
 
				     new File(System.getProperty("test.build.data","/tmp"))
			
 
				     .toURI().toString().replace(' ', '+');
			
 
				     
			
 
				-  public void testWithLocal() throws IOException {
			
 
				+  public void testWithLocal()
			
 
				+      throws IOException, InterruptedException, ClassNotFoundException {
			
 
				     MiniMRCluster mr = null;
			
 
				     try {
			
 
				       mr = new MiniMRCluster(2, "file:///", 3);
			
@@ -80,6 +91,7 @@ public class TestMiniMRLocalFS extends TestCase {
 
				       assertEquals("number of reduce outputs", 9, 
			
 
				                    counters.getCounter(Task.Counter.REDUCE_OUTPUT_RECORDS));
			
 
				       runCustomFormats(mr);
			
 
				+      runSecondarySort(mr.createJobConf());
			
 
				     } finally {
			
 
				       if (mr != null) { mr.shutdown(); }
			
 
				     }
			
@@ -284,4 +296,47 @@ public class TestMiniMRLocalFS extends TestCase {
 
				                                  JobConf job) throws IOException {
			
 
				     }
			
 
				   }
			
 
				+
			
 
				+  private void runSecondarySort(Configuration conf) throws IOException,
			
 
				+                                                        InterruptedException,
			
 
				+                                                        ClassNotFoundException {
			
 
				+    FileSystem localFs = FileSystem.getLocal(conf);
			
 
				+    localFs.delete(new Path(TEST_ROOT_DIR + "/in"), true);
			
 
				+    localFs.delete(new Path(TEST_ROOT_DIR + "/out"), true);
			
 
				+    TestMapReduceLocal.writeFile
			
 
				+             ("in/part1", "-1 -4\n-3 23\n5 10\n-1 -2\n-1 300\n-1 10\n4 1\n" +
			
 
				+              "4 2\n4 10\n4 -1\n4 -10\n10 20\n10 30\n10 25\n");
			
 
				+    Job job = new Job(conf, "word count");
			
 
				+    job.setJarByClass(WordCount.class);
			
 
				+    job.setNumReduceTasks(2);
			
 
				+    job.setMapperClass(SecondarySort.MapClass.class);
			
 
				+    job.setReducerClass(SecondarySort.Reduce.class);
			
 
				+    // group and partition by the first int in the pair
			
 
				+    job.setPartitionerClass(FirstPartitioner.class);
			
 
				+    job.setGroupingComparatorClass(FirstGroupingComparator.class);
			
 
				+
			
 
				+    // the map output is IntPair, IntWritable
			
 
				+    job.setMapOutputKeyClass(IntPair.class);
			
 
				+    job.setMapOutputValueClass(IntWritable.class);
			
 
				+
			
 
				+    // the reduce output is Text, IntWritable
			
 
				+    job.setOutputKeyClass(Text.class);
			
 
				+    job.setOutputValueClass(IntWritable.class);
			
 
				+
			
 
				+    FileInputFormat.addInputPath(job, new Path(TEST_ROOT_DIR + "/in"));
			
 
				+    FileOutputFormat.setOutputPath(job, new Path(TEST_ROOT_DIR + "/out"));
			
 
				+    assertTrue(job.waitForCompletion(true));
			
 
				+    String out = TestMapReduceLocal.readFile("out/part-r-00000");
			
 
				+    assertEquals("------------------------------------------------\n" +
			
 
				+                 "4\t-10\n4\t-1\n4\t1\n4\t2\n4\t10\n" +
			
 
				+                 "------------------------------------------------\n" +
			
 
				+                 "10\t20\n10\t25\n10\t30\n", out);
			
 
				+    out = TestMapReduceLocal.readFile("out/part-r-00001");
			
 
				+    assertEquals("------------------------------------------------\n" +
			
 
				+                 "-3\t23\n" +
			
 
				+                 "------------------------------------------------\n" +
			
 
				+                 "-1\t-4\n-1\t-2\n-1\t10\n-1\t300\n" +
			
 
				+                 "------------------------------------------------\n" +
			
 
				+                 "5\t10\n", out);
			
 
				+  }
			
 
				 }
			
--- a/src/test/org/apache/hadoop/mapreduce/TestMapReduceLocal.java
+++ b/src/test/org/apache/hadoop/mapreduce/TestMapReduceLocal.java
@@ -27,11 +27,7 @@ import java.io.InputStreamReader;
 
				 import junit.framework.TestCase;
			
 
				 
			
 
				 import org.apache.hadoop.conf.Configuration;
			
 
				-import org.apache.hadoop.examples.SecondarySort;
			
 
				 import org.apache.hadoop.examples.WordCount;
			
 
				-import org.apache.hadoop.examples.SecondarySort.FirstGroupingComparator;
			
 
				-import org.apache.hadoop.examples.SecondarySort.FirstPartitioner;
			
 
				-import org.apache.hadoop.examples.SecondarySort.IntPair;
			
 
				 import org.apache.hadoop.examples.WordCount.IntSumReducer;
			
 
				 import org.apache.hadoop.examples.WordCount.TokenizerMapper;
			
 
				 import org.apache.hadoop.fs.FileSystem;
			
@@ -61,7 +57,7 @@ public class TestMapReduceLocal extends TestCase {
 
				     }
			
 
				   }
			
 
				 
			
 
				-  public Path writeFile(String name, String data) throws IOException {
			
 
				+  public static Path writeFile(String name, String data) throws IOException {
			
 
				     Path file = new Path(TEST_ROOT_DIR + "/" + name);
			
 
				     localFs.delete(file, false);
			
 
				     DataOutputStream f = localFs.create(file);
			
@@ -70,7 +66,7 @@ public class TestMapReduceLocal extends TestCase {
 
				     return file;
			
 
				   }
			
 
				 
			
 
				-  public String readFile(String name) throws IOException {
			
 
				+  public static String readFile(String name) throws IOException {
			
 
				     DataInputStream f = localFs.open(new Path(TEST_ROOT_DIR + "/" + name));
			
 
				     BufferedReader b = new BufferedReader(new InputStreamReader(f));
			
 
				     StringBuilder result = new StringBuilder();
			
@@ -90,7 +86,6 @@ public class TestMapReduceLocal extends TestCase {
 
				       mr = new MiniMRCluster(2, "file:///", 3);
			
 
				       Configuration conf = mr.createJobConf();
			
 
				       runWordCount(conf);
			
 
				-      runSecondarySort(conf);
			
 
				     } finally {
			
 
				       if (mr != null) { mr.shutdown(); }
			
 
				     }
			
@@ -162,43 +157,4 @@ public class TestMapReduceLocal extends TestCase {
 
				     assertTrue("combine in > combine out", combineIn > combineOut);
			
 
				   }
			
 
				 
			
 
				-  private void runSecondarySort(Configuration conf) throws IOException,
			
 
				-                                                        InterruptedException,
			
 
				-                                                        ClassNotFoundException {
			
 
				-    localFs.delete(new Path(TEST_ROOT_DIR + "/in"), true);
			
 
				-    localFs.delete(new Path(TEST_ROOT_DIR + "/out"), true);
			
 
				-    writeFile("in/part1", "-1 -4\n-3 23\n5 10\n-1 -2\n-1 300\n-1 10\n4 1\n" +
			
 
				-              "4 2\n4 10\n4 -1\n4 -10\n10 20\n10 30\n10 25\n");
			
 
				-    Job job = new Job(conf, "word count");     
			
 
				-    job.setJarByClass(WordCount.class);
			
 
				-    job.setMapperClass(SecondarySort.MapClass.class);
			
 
				-    job.setReducerClass(SecondarySort.Reduce.class);
			
 
				-    // group and partition by the first int in the pair
			
 
				-    job.setPartitionerClass(FirstPartitioner.class);
			
 
				-    job.setGroupingComparatorClass(FirstGroupingComparator.class);
			
 
				-
			
 
				-    // the map output is IntPair, IntWritable
			
 
				-    job.setMapOutputKeyClass(IntPair.class);
			
 
				-    job.setMapOutputValueClass(IntWritable.class);
			
 
				-
			
 
				-    // the reduce output is Text, IntWritable
			
 
				-    job.setOutputKeyClass(Text.class);
			
 
				-    job.setOutputValueClass(IntWritable.class);
			
 
				-    
			
 
				-    FileInputFormat.addInputPath(job, new Path(TEST_ROOT_DIR + "/in"));
			
 
				-    FileOutputFormat.setOutputPath(job, new Path(TEST_ROOT_DIR + "/out"));
			
 
				-    assertTrue(job.waitForCompletion(true));
			
 
				-    String out = readFile("out/part-r-00000");
			
 
				-    assertEquals("------------------------------------------------\n" +
			
 
				-                 "-3\t23\n" +
			
 
				-                 "------------------------------------------------\n" +
			
 
				-                 "-1\t-4\n-1\t-2\n-1\t10\n-1\t300\n" +
			
 
				-                 "------------------------------------------------\n" +
			
 
				-                 "4\t-10\n4\t-1\n4\t1\n4\t2\n4\t10\n" +
			
 
				-                 "------------------------------------------------\n" +
			
 
				-                 "5\t10\n" +
			
 
				-                 "------------------------------------------------\n" +
			
 
				-                 "10\t20\n10\t25\n10\t30\n", out);
			
 
				-  }
			
 
				-  
			
 
				 }