il y a 18 ans · 4595516683
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -336,6 +336,9 @@ Trunk (unreleased changes)
 
				     HADOOP-120.  In ArrayWritable, prevent creation with null value
			
 
				     class, and improve documentation.  (Cameron Pope via cutting)
			
 
				 
			
 
				+    HADOOP-1926. Add a random text writer example/benchmark so that we can
			
 
				+    benchmark compression codecs on random data.
			
 
				+
			
 
				 Release 0.14.2 - unreleased
			
 
				 
			
 
				   BUG FIXES
			
--- a/src/examples/org/apache/hadoop/examples/ExampleDriver.java
+++ b/src/examples/org/apache/hadoop/examples/ExampleDriver.java
@@ -38,6 +38,8 @@ public class ExampleDriver {
 
				                    "A map/reduce program that counts the matches of a regex in the input.");
			
 
				       pgd.addClass("randomwriter", RandomWriter.class, 
			
 
				                    "A map/reduce program that writes 10GB of random data per node.");
			
 
				+      pgd.addClass("randomtextwriter", RandomTextWriter.class, 
			
 
				+      "A map/reduce program that writes 10GB of random textual data per node.");
			
 
				       pgd.addClass("sort", Sort.class, "A map/reduce program that sorts the data written by the random writer.");
			
 
				       pgd.addClass("pi", PiEstimator.class, "A map/reduce program that estimates Pi using monte-carlo method.");
			
 
				       pgd.addClass("pentomino", DistributedPentomino.class,
			
--- a/src/examples/org/apache/hadoop/examples/Sort.java
+++ b/src/examples/org/apache/hadoop/examples/Sort.java
@@ -25,6 +25,8 @@ import org.apache.hadoop.conf.Configuration;
 
				 import org.apache.hadoop.conf.Configured;
			
 
				 import org.apache.hadoop.fs.Path;
			
 
				 import org.apache.hadoop.io.BytesWritable;
			
 
				+import org.apache.hadoop.io.Writable;
			
 
				+import org.apache.hadoop.io.WritableComparable;
			
 
				 import org.apache.hadoop.mapred.*;
			
 
				 import org.apache.hadoop.mapred.lib.IdentityMapper;
			
 
				 import org.apache.hadoop.mapred.lib.IdentityReducer;
			
@@ -36,12 +38,22 @@ import org.apache.hadoop.util.ToolRunner;
 
				  * other than use the framework to fragment and sort the input values.
			
 
				  *
			
 
				  * To run: bin/hadoop jar build/hadoop-examples.jar sort
			
 
				- *            [-m <i>maps</i>] [-r <i>reduces</i>] <i>in-dir</i> <i>out-dir</i> 
			
 
				+ *            [-m <i>maps</i>] [-r <i>reduces</i>]
			
 
				+ *            [-inFormat <i>input format class</i>] 
			
 
				+ *            [-outFormat <i>output format class</i>] 
			
 
				+ *            [-outKey <i>output key class</i>] 
			
 
				+ *            [-outValue <i>output value class</i>] 
			
 
				+ *            <i>in-dir</i> <i>out-dir</i> 
			
 
				  */
			
 
				 public class Sort extends Configured implements Tool {
			
 
				 
			
 
				   static int printUsage() {
			
 
				-    System.out.println("sort [-m <maps>] [-r <reduces>] <input> <output>");
			
 
				+    System.out.println("sort [-m <maps>] [-r <reduces>] " +
			
 
				+                       "[-inFormat <input format class>] " +
			
 
				+                       "[-outFormat <output format class>] " + 
			
 
				+                       "[-outKey <output key class>] " +
			
 
				+                       "[-outValue <output value class>] " +
			
 
				+                       "<input> <output>");
			
 
				     ToolRunner.printGenericCommandUsage(System.out);
			
 
				     return -1;
			
 
				   }
			
@@ -57,12 +69,6 @@ public class Sort extends Configured implements Tool {
 
				     JobConf jobConf = new JobConf(getConf(), Sort.class);
			
 
				     jobConf.setJobName("sorter");
			
 
				 
			
 
				-    jobConf.setInputFormat(SequenceFileInputFormat.class);
			
 
				-    jobConf.setOutputFormat(SequenceFileOutputFormat.class);
			
 
				-
			
 
				-    jobConf.setOutputKeyClass(BytesWritable.class);
			
 
				-    jobConf.setOutputValueClass(BytesWritable.class);
			
 
				-
			
 
				     jobConf.setMapperClass(IdentityMapper.class);        
			
 
				     jobConf.setReducerClass(IdentityReducer.class);
			
 
				 
			
@@ -72,6 +78,12 @@ public class Sort extends Configured implements Tool {
 
				     jobConf.getInt("test.sort.maps_per_host", 10);
			
 
				     int num_reduces = cluster.getTaskTrackers() * 
			
 
				     jobConf.getInt("test.sort.reduces_per_host", cluster.getMaxTasks());
			
 
				+    Class<? extends InputFormat> inputFormatClass = 
			
 
				+      SequenceFileInputFormat.class;
			
 
				+    Class<? extends OutputFormat> outputFormatClass = 
			
 
				+      SequenceFileOutputFormat.class;
			
 
				+    Class<? extends WritableComparable> outputKeyClass = BytesWritable.class;
			
 
				+    Class<? extends Writable> outputValueClass = BytesWritable.class;
			
 
				     List<String> otherArgs = new ArrayList<String>();
			
 
				     for(int i=0; i < args.length; ++i) {
			
 
				       try {
			
@@ -79,6 +91,18 @@ public class Sort extends Configured implements Tool {
 
				           num_maps = Integer.parseInt(args[++i]);
			
 
				         } else if ("-r".equals(args[i])) {
			
 
				           num_reduces = Integer.parseInt(args[++i]);
			
 
				+        } else if ("-inFormat".equals(args[i])) {
			
 
				+          inputFormatClass = 
			
 
				+            Class.forName(args[++i]).asSubclass(InputFormat.class);
			
 
				+        } else if ("-outFormat".equals(args[i])) {
			
 
				+          outputFormatClass = 
			
 
				+            Class.forName(args[++i]).asSubclass(OutputFormat.class);
			
 
				+        } else if ("-outKey".equals(args[i])) {
			
 
				+          outputKeyClass = 
			
 
				+            Class.forName(args[++i]).asSubclass(WritableComparable.class);
			
 
				+        } else if ("-outValue".equals(args[i])) {
			
 
				+          outputValueClass = 
			
 
				+            Class.forName(args[++i]).asSubclass(Writable.class);
			
 
				         } else {
			
 
				           otherArgs.add(args[i]);
			
 
				         }
			
@@ -92,9 +116,16 @@ public class Sort extends Configured implements Tool {
 
				       }
			
 
				     }
			
 
				 
			
 
				+    // Set user-supplied (possibly default) job configs
			
 
				     jobConf.setNumMapTasks(num_maps);
			
 
				     jobConf.setNumReduceTasks(num_reduces);
			
 
				 
			
 
				+    jobConf.setInputFormat(inputFormatClass);
			
 
				+    jobConf.setOutputFormat(outputFormatClass);
			
 
				+
			
 
				+    jobConf.setOutputKeyClass(outputKeyClass);
			
 
				+    jobConf.setOutputValueClass(outputValueClass);
			
 
				+
			
 
				     // Make sure there are exactly 2 parameters left.
			
 
				     if (otherArgs.size() != 2) {
			
 
				       System.out.println("ERROR: Wrong number of parameters: " +
			
--- a/src/test/org/apache/hadoop/mapred/SortValidator.java
+++ b/src/test/org/apache/hadoop/mapred/SortValidator.java
@@ -31,7 +31,6 @@ import org.apache.hadoop.io.Writable;
 
				 import org.apache.hadoop.io.WritableComparable;
			
 
				 import org.apache.hadoop.io.WritableComparator;
			
 
				 import org.apache.hadoop.io.WritableUtils;
			
 
				-import org.apache.hadoop.mapred.SortValidator.RecordStatsChecker.RecordStatsWritable;
			
 
				 import org.apache.hadoop.mapred.lib.HashPartitioner;
			
 
				 import org.apache.hadoop.fs.*;
			
 
				 
			
@@ -131,12 +130,13 @@ public class SortValidator {
 
				     }
			
 
				     
			
 
				     public static class Map extends MapReduceBase
			
 
				-      implements Mapper<BytesWritable, BytesWritable,
			
 
				+      implements Mapper<WritableComparable, Writable,
			
 
				                         IntWritable, RecordStatsWritable> {
			
 
				       
			
 
				       private IntWritable key = null;
			
 
				-      private BytesWritable prevKey = null;
			
 
				-      private Partitioner<BytesWritable, BytesWritable> partitioner = null;
			
 
				+      private WritableComparable prevKey = null;
			
 
				+      private Class<? extends WritableComparable> keyClass;
			
 
				+      private Partitioner<WritableComparable, Writable> partitioner = null;
			
 
				       private int partition = -1;
			
 
				       private int noSortReducers = -1;
			
 
				       private long recordId = -1;
			
@@ -146,7 +146,7 @@ public class SortValidator {
 
				         key = deduceInputFile(job);
			
 
				         
			
 
				         if (key == sortOutput) {
			
 
				-          partitioner = new HashPartitioner<BytesWritable, BytesWritable>();
			
 
				+          partitioner = new HashPartitioner<WritableComparable, Writable>();
			
 
				           
			
 
				           // Figure the 'current' partition and no. of reduces of the 'sort'
			
 
				           try {
			
@@ -163,31 +163,40 @@ public class SortValidator {
 
				         }
			
 
				       }
			
 
				       
			
 
				-      public void map(BytesWritable key, 
			
 
				-                      BytesWritable value,
			
 
				+      @SuppressWarnings("unchecked")
			
 
				+      public void map(WritableComparable key, Writable value,
			
 
				                       OutputCollector<IntWritable, RecordStatsWritable> output, 
			
 
				                       Reporter reporter) throws IOException {
			
 
				-        BytesWritable bwKey = key;
			
 
				-        BytesWritable bwValue = value;
			
 
				         ++recordId;
			
 
				         
			
 
				         if (this.key == sortOutput) {
			
 
				           // Check if keys are 'sorted' if this  
			
 
				           // record is from sort's output
			
 
				           if (prevKey == null) {
			
 
				-            prevKey = bwKey;
			
 
				+            prevKey = key;
			
 
				+            keyClass = prevKey.getClass();
			
 
				+            System.err.println("Got key #1 class: " + keyClass);
			
 
				           } else {
			
 
				-            if (prevKey.compareTo(bwKey) > 0) {
			
 
				-              throw new IOException("The 'map-reduce' framework wrongly classifed"
			
 
				-                                    + "(" + prevKey + ") > (" + bwKey + ") for record# " 
			
 
				-                                    + recordId); 
			
 
				+            System.err.println("Got key class: " + key.getClass());
			
 
				+            // Sanity check
			
 
				+            if (keyClass != key.getClass()) {
			
 
				+              throw new IOException("Type mismatch in key: expected " +
			
 
				+                                    keyClass.getName() + ", recieved " +
			
 
				+                                    key.getClass().getName());
			
 
				+            }
			
 
				+            
			
 
				+            // Check if they were sorted correctly
			
 
				+            if (prevKey.compareTo(key) > 0) {
			
 
				+              throw new IOException("The 'map-reduce' framework wrongly" +
			
 
				+                                    " classifed (" + prevKey + ") > (" + 
			
 
				+                                    key + ") "+ "for record# " + recordId); 
			
 
				             }
			
 
				-            prevKey = bwKey;
			
 
				+            prevKey = key;
			
 
				           }
			
 
				 
			
 
				           // Check if the sorted output is 'partitioned' right
			
 
				           int keyPartition = 
			
 
				-            partitioner.getPartition(bwKey, bwValue, noSortReducers);
			
 
				+            partitioner.getPartition(key, value, noSortReducers);
			
 
				           if (partition != keyPartition) {
			
 
				             throw new IOException("Partitions do not match for record# " + 
			
 
				                                   recordId + " ! - '" + partition + "' v/s '" + 
			
@@ -195,13 +204,16 @@ public class SortValidator {
 
				           }
			
 
				         }
			
 
				 
			
 
				+        String keyBytes = key.toString();
			
 
				+        String valueBytes = value.toString();
			
 
				         int keyValueChecksum = 
			
 
				-          (WritableComparator.hashBytes(bwKey.get(), bwKey.getSize()) ^
			
 
				-           WritableComparator.hashBytes(bwValue.get(), bwValue.getSize()));
			
 
				+          (WritableComparator.hashBytes(keyBytes.getBytes(), keyBytes.length()) ^
			
 
				+           WritableComparator.hashBytes(valueBytes.getBytes(), valueBytes.length()));
			
 
				 
			
 
				         // output (this.key, record-stats)
			
 
				-        output.collect(this.key, new RecordStatsWritable(
			
 
				-                                                         (bwKey.getSize()+bwValue.getSize()), 1, keyValueChecksum));
			
 
				+        output.collect(this.key, 
			
 
				+                       new RecordStatsWritable((keyBytes.length()+valueBytes.length()), 
			
 
				+                                               1, keyValueChecksum));
			
 
				       }
			
 
				     }