1
0
Просмотр исходного кода

HADOOP-1376. Modify RandomWriter example so that it can generate data for the Terasort benchmark. Contributed by Devaraj.

git-svn-id: https://svn.apache.org/repos/asf/lucene/hadoop/trunk@541786 13f79535-47bb-0310-9956-ffa450edef68
Doug Cutting 18 лет назад
Родитель
Сommit
1060ae5930
2 измененных файлов с 45 добавлено и 4 удалено
  1. 3 0
      CHANGES.txt
  2. 42 4
      src/examples/org/apache/hadoop/examples/RandomWriter.java

+ 3 - 0
CHANGES.txt

@@ -69,6 +69,9 @@ Trunk (unreleased changes)
  22. HADOOP-1408.  Fix a compiler warning by adding a class to replace
      a generic.  (omalley via cutting)
 
+ 23. HADOOP-1376.  Modify RandomWriter example so that it can generate
+     data for the Terasort benchmark.  (Devaraj Das via cutting)
+
 
 Branch 0.13 (unreleased changes)
 

+ 42 - 4
src/examples/org/apache/hadoop/examples/RandomWriter.java

@@ -34,8 +34,33 @@ import org.apache.hadoop.mapred.lib.NullOutputFormat;
  * This program uses map/reduce to just run a distributed job where there is
  * no interaction between the tasks and each task write a large unsorted
  * random binary sequence file of BytesWritable.
- * 
- * @author Owen O'Malley
+ * In order for this program to generate data for terasort with 10-byte keys
+ * and 90-byte values, have the following config:
+ * <xmp>
+ * <?xml version="1.0"?>
+ * <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+ * <configuration>
+ *   <property>
+ *     <name>test.randomwrite.min_key</name>
+ *     <value>10</value>
+ *   </property>
+ *   <property>
+ *     <name>test.randomwrite.max_key</name>
+ *     <value>10</value>
+ *   </property>
+ *   <property>
+ *     <name>test.randomwrite.min_value</name>
+ *     <value>90</value>
+ *   </property>
+ *   <property>
+ *     <name>test.randomwrite.max_value</name>
+ *     <value>90</value>
+ *   </property>
+ *   <property>
+ *     <name>test.randomwrite.total_bytes</name>
+ *     <value>1099511627776</value>
+ *   </property>
+ * </configuration></xmp>
  */
 public class RandomWriter {
   
@@ -220,8 +245,21 @@ public class RandomWriter {
     
     JobClient client = new JobClient(job);
     ClusterStatus cluster = client.getClusterStatus();
-    int numMaps = cluster.getTaskTrackers() * 
-      job.getInt("test.randomwriter.maps_per_host", 10);
+    int numMapsPerHost = job.getInt("test.randomwriter.maps_per_host", 10);
+    long numBytesToWritePerMap = job.getLong("test.randomwrite.bytes_per_map",
+                                             1*1024*1024*1024);
+    if (numBytesToWritePerMap == 0) {
+      System.err.println("Cannot have test.randomwrite.bytes_per_map set to 0");
+      System.exit(-1);
+    }
+    long totalBytesToWrite = job.getLong("test.randomwrite.total_bytes", 
+         numMapsPerHost*numBytesToWritePerMap*cluster.getTaskTrackers());
+    int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap);
+    if (numMaps == 0 && totalBytesToWrite > 0) {
+      numMaps = 1;
+      job.setLong("test.randomwrite.bytes_per_map", totalBytesToWrite);
+    }
+    
     job.setNumMapTasks(numMaps);
     System.out.println("Running " + numMaps + " maps.");
     job.setNumReduceTasks(1);