瀏覽代碼

HADOOP-1350. Add test code for big map outputs. Contributed by Devaraj.

git-svn-id: https://svn.apache.org/repos/asf/lucene/hadoop/trunk@537933 13f79535-47bb-0310-9956-ffa450edef68
Doug Cutting 18 年之前
父節點
當前提交
82fd70f4c2

+ 61 - 0
src/test/org/apache/hadoop/mapred/BigMapOutput.java

@@ -0,0 +1,61 @@
+package org.apache.hadoop.mapred;
+
+import java.io.*;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.*;
+import org.apache.hadoop.mapred.SortValidator.RecordStatsChecker.*;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.io.WritableComparator;
+import org.apache.hadoop.io.WritableUtils;
+import org.apache.hadoop.mapred.lib.*;
+import org.apache.hadoop.fs.*;
+
+public class BigMapOutput {
+
+  public static void main(String[] args) throws IOException {
+    if (args.length != 4) { //input-dir should contain a huge file ( > 2GB)
+      System.err.println("BigMapOutput " +
+                       "-input <input-dir> -output <output-dir>");
+      System.exit(1);
+    } 
+    Path bigMapInput = null;
+    Path outputPath = null;
+    for(int i=0; i < args.length; ++i) {
+      if ("-input".equals(args[i])){
+        bigMapInput = new Path(args[++i]);
+      } else if ("-output".equals(args[i])){
+        outputPath = new Path(args[++i]);
+      }
+    }
+    Configuration defaults = new Configuration();
+    FileSystem fs = FileSystem.get(defaults);
+    
+    JobConf jobConf = new JobConf(defaults, BigMapOutput.class);
+
+    jobConf.setJobName("BigMapOutput");
+    jobConf.setInputFormat(NonSplitableSequenceFileInputFormat.class);
+    jobConf.setOutputFormat(SequenceFileOutputFormat.class);
+    jobConf.setInputPath(bigMapInput);
+    if (fs.exists(outputPath)) {
+      fs.delete(outputPath);
+    }
+    jobConf.setOutputPath(outputPath);
+    jobConf.setMapperClass(IdentityMapper.class);
+    jobConf.setReducerClass(IdentityReducer.class);
+    jobConf.setOutputKeyClass(BytesWritable.class);
+    jobConf.setOutputValueClass(BytesWritable.class);
+
+    Date startTime = new Date();
+    System.out.println("Job started: " + startTime);
+    JobClient.runJob(jobConf);
+    Date end_time = new Date();
+    System.out.println("Job ended: " + end_time);
+      
+  }
+}

+ 4 - 0
src/test/org/apache/hadoop/test/AllTestDriver.java

@@ -22,6 +22,7 @@ import org.apache.hadoop.util.ProgramDriver;
 import org.apache.hadoop.mapred.MRBench;
 import org.apache.hadoop.mapred.SortValidator;
 import org.apache.hadoop.mapred.TestMapRed;
+import org.apache.hadoop.mapred.BigMapOutput;
 import org.apache.hadoop.mapred.TestTextInputFormat;
 import org.apache.hadoop.mapred.TestSequenceFileInputFormat;
 import org.apache.hadoop.dfs.ClusterTestDFS;
@@ -61,6 +62,9 @@ public class AllTestDriver {
       pgd.addClass("DistributedFSCheck", DistributedFSCheck.class, "Distributed checkup of the file system consistency.");
       pgd.addClass("testmapredsort", SortValidator.class, 
                    "A map/reduce program that validates the map-reduce framework's sort.");
+      pgd.addClass("testbigmapoutput", BigMapOutput.class, 
+                   "A map/reduce program that works on a very big " + 
+                   "non-splittable file and does identity map/reduce");
       pgd.driver(argv);
     } catch(Throwable e) {
       e.printStackTrace();