%!s(int64=17) %!d(string=hai) anos · 76205ad473
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -149,6 +149,9 @@ Trunk (unreleased changes)
 
				 
			
 
				     HADOOP-4649. Improve abstraction for spill indices. (cdouglas)
			
 
				 
			
 
				+    HADOOP-3770. Add gridmix2, an iteration on the gridmix benchmark. (Runping
			
 
				+    Qi via cdouglas)
			
 
				+
			
 
				   OPTIMIZATIONS
			
 
				 
			
 
				     HADOOP-3293. Fixes FileInputFormat to do provide locations for splits
			
--- a/src/benchmarks/gridmix2/README.gridmix2
+++ b/src/benchmarks/gridmix2/README.gridmix2
@@ -0,0 +1,136 @@
 
				+### "Gridmix" Benchmark ###
			
 
				+
			
 
				+Contents:
			
 
				+
			
 
				+0 Overview
			
 
				+1 Getting Started
			
 
				+  1.0 Build
			
 
				+  1.1 Configure
			
 
				+  1.2 Generate test data
			
 
				+2 Running
			
 
				+  2.0 General
			
 
				+  2.1 Non-Hod cluster
			
 
				+  2.2 Hod
			
 
				+    2.2.0 Static cluster
			
 
				+    2.2.1 Hod cluster
			
 
				+
			
 
				+
			
 
				+* 0 Overview
			
 
				+
			
 
				+The scripts in this package model a cluster workload. The workload is
			
 
				+simulated by generating random data and submitting map/reduce jobs that
			
 
				+mimic observed data-access patterns in user jobs. The full benchmark
			
 
				+generates approximately 2.5TB of (often compressed) input data operated on
			
 
				+by the following simulated jobs:
			
 
				+
			
 
				+1) Three stage map/reduce job
			
 
				+	   Input:      500GB compressed (2TB uncompressed) SequenceFile
			
 
				+                 (k,v) = (5 words, 100 words)
			
 
				+                 hadoop-env: FIXCOMPSEQ
			
 
				+     Compute1:   keep 10% map, 40% reduce
			
 
				+	   Compute2:   keep 100% map, 77% reduce
			
 
				+                 Input from Compute1
			
 
				+     Compute3:   keep 116% map, 91% reduce
			
 
				+                 Input from Compute2
			
 
				+     Motivation: Many user workloads are implemented as pipelined map/reduce
			
 
				+                 jobs, including Pig workloads
			
 
				+
			
 
				+2) Large sort of variable key/value size
			
 
				+     Input:      500GB compressed (2TB uncompressed) SequenceFile
			
 
				+                 (k,v) = (5-10 words, 100-10000 words)
			
 
				+                 hadoop-env: VARCOMPSEQ
			
 
				+     Compute:    keep 100% map, 100% reduce
			
 
				+     Motivation: Processing large, compressed datsets is common.
			
 
				+
			
 
				+3) Reference select
			
 
				+     Input:      500GB compressed (2TB uncompressed) SequenceFile
			
 
				+                 (k,v) = (5-10 words, 100-10000 words)
			
 
				+                 hadoop-env: VARCOMPSEQ
			
 
				+     Compute:    keep 0.2% map, 5% reduce
			
 
				+                 1 Reducer
			
 
				+     Motivation: Sampling from a large, reference dataset is common.
			
 
				+
			
 
				+4) API text sort (java, streaming)
			
 
				+     Input:      500GB uncompressed Text
			
 
				+                 (k,v) = (1-10 words, 0-200 words)
			
 
				+                 hadoop-env: VARINFLTEXT
			
 
				+     Compute:    keep 100% map, 100% reduce
			
 
				+     Motivation: This benchmark should exercise each of the APIs to
			
 
				+                 map/reduce
			
 
				+
			
 
				+5) Jobs with combiner (word count jobs)
			
 
				+
			
 
				+A benchmark load is a mix of different numbers of small, medium, and large jobs of the above types.
			
 
				+The exact mix is specified in an xml file (gridmix_config.xml). We have a Java program to 
			
 
				+construct those jobs based on the xml file and put them under the control of a JobControl object.
			
 
				+The JobControl object then submitts the jobs to the cluster and monitors their progress until all jobs complete.
			
 
				+
			
 
				+
			
 
				+Notes(1-3): Since input data are compressed, this means that each mapper
			
 
				+outputs a lot more bytes than it reads in, typically causing map output
			
 
				+spills.
			
 
				+
			
 
				+
			
 
				+
			
 
				+* 1 Getting Started
			
 
				+
			
 
				+1.0 Build
			
 
				+
			
 
				+In the src/benchmarks/gridmix dir, type "ant".
			
 
				+gridmix.jar will be created in the build subdir.
			
 
				+copy gridmix.jar to gridmix dir.
			
 
				+
			
 
				+1.1 Configure environment variables
			
 
				+
			
 
				+One must modify gridmix-env-2 to set the following variables:
			
 
				+
			
 
				+HADOOP_HOME     The hadoop install location
			
 
				+HADOOP_VERSION  The exact hadoop version to be used. e.g. hadoop-0.18.2-dev
			
 
				+HADOOP_CONF_DIR The dir containing the hadoop-site.xml for teh cluster to be used.
			
 
				+USE_REAL_DATA   A large data-set will be created and used by the benchmark if it is set to true.
			
 
				+
			
 
				+
			
 
				+1.2 Configure the job mixture
			
 
				+
			
 
				+A default gridmix_conf.xml file is provided.
			
 
				+One may make appropriate changes as necessary on the number of jobs of various types
			
 
				+and sizes. One can also change the number of reducers of each jobs, and specify whether 
			
 
				+to compress the output data of a map/reduce job.
			
 
				+Note that one can specify multiple numbers of in the 
			
 
				+numOfJobs field and numOfReduces field, like:
			
 
				+<property>
			
 
				+  <name>javaSort.smallJobs.numOfJobs</name>
			
 
				+  <value>8,2</value>
			
 
				+  <description></description>
			
 
				+</property>
			
 
				+
			
 
				+
			
 
				+<property>
			
 
				+  <name>javaSort.smallJobs.numOfReduces</name>
			
 
				+  <value>15,70</value>
			
 
				+  <description></description>
			
 
				+</property>
			
 
				+
			
 
				+The above spec means that we will have 8 small java sort jobs with 15 reducers and 2 small java sort 
			
 
				+jobs with 17 reducers.
			
 
				+
			
 
				+1.3 Generate test data
			
 
				+
			
 
				+Test data is generated using the generateGridmix2Data.sh script. 
			
 
				+        ./generateGridmix2Data.sh
			
 
				+One may modify the structure and size of the data generated here. 
			
 
				+
			
 
				+It is sufficient to run the script without modification, though it may
			
 
				+require up to 4TB of free space in the default filesystem. Changing the size
			
 
				+of the input data (COMPRESSED_DATA_BYTES, UNCOMPRESSED_DATA_BYTES,
			
 
				+INDIRECT_DATA_BYTES) is safe. A 4x compression ratio for generated, block
			
 
				+compressed data is typical.
			
 
				+
			
 
				+* 2 Running
			
 
				+
			
 
				+You need to set HADOOP_CONF_DIR to the right directory where hadoop-site.xml exists.
			
 
				+Then you just need to type 
			
 
				+	./rungridmix_2
			
 
				+It will create start.out to record the start time, and at the end, it will create end.out to record the 
			
 
				+endi time.
			
 
				+
			
--- a/src/benchmarks/gridmix2/build.xml
+++ b/src/benchmarks/gridmix2/build.xml
@@ -0,0 +1,67 @@
 
				+<?xml version="1.0" ?>
			
 
				+<project default="main" basedir=".">
			
 
				+    <property name="Name" value="gridmix"/>
			
 
				+    <property name="version" value="0.1"/>
			
 
				+    <property name="final.name" value="${name}-${version}"/>
			
 
				+    <property name="year" value="2008"/>	
			
 
				+	<property name="hadoop.dir" value="${basedir}/../../../"/>
			
 
				+    <property name="lib.dir" value="${hadoop.dir}/lib"/>
			
 
				+    <property name="src.dir" value="${basedir}/src"/>
			
 
				+    <property name="conf.dir" value="${basedir}/conf"/>
			
 
				+    <property name="docs.dir" value="${basedir}/docs"/>
			
 
				+    <property name="build.dir" value="${basedir}/build"/>
			
 
				+    <property name="dist.dir" value="${basedir}/dist"/>
			
 
				+    <property name="build.classes" value="${build.dir}/classes"/>
			
 
				+	
			
 
				+    <target name="init">
			
 
				+        <mkdir dir="${build.dir}"/>
			
 
				+        <mkdir dir="${dist.dir}"/>
			
 
				+    </target>
			
 
				+
			
 
				+    <target name="main" depends="init, compile, compress" description="Main target">
			
 
				+        <echo>
			
 
				+            Building the .jar files.
			
 
				+        </echo>
			
 
				+    </target>
			
 
				+  
			
 
				+    <target name="compile" depends="init" description="Compilation target">
			
 
				+        <javac srcdir="src/java/" destdir="${build.dir}">
			
 
				+        	<classpath refid="classpath" />
			
 
				+        </javac>
			
 
				+    </target>
			
 
				+	
			
 
				+
			
 
				+	 <target name="compress" depends="compile" description="Compression target">
			
 
				+  	      <jar jarfile="${build.dir}/gridmix.jar" basedir="${build.dir}" includes="**/*.class" />
			
 
				+                   
			
 
				+
			
 
				+        <copy todir="." includeEmptyDirs="false">
			
 
				+            <fileset dir="${build.dir}">
			
 
				+	        <exclude name="**" />
			
 
				+	        <include name="**/*.jar" />
			
 
				+            </fileset>
			
 
				+        </copy>
			
 
				+    </target>
			
 
				+
			
 
				+  
			
 
				+    <!-- ================================================================== -->
			
 
				+    <!-- Clean.  Delete the build files, and their directories              -->
			
 
				+    <!-- ================================================================== -->
			
 
				+    <target name="clean" description="Clean.  Delete the build files, and their directories">
			
 
				+      <delete dir="${build.dir}"/>
			
 
				+      <delete dir="${dist.dir}"/>
			
 
				+    </target>
			
 
				+
			
 
				+    <!-- the normal classpath -->
			
 
				+    <path id="classpath">
			
 
				+	    <pathelement location="${build.classes}"/>
			
 
				+	    <fileset dir="${lib.dir}">
			
 
				+	       <include name="*.jar" />
			
 
				+	       <exclude name="**/excluded/" />
			
 
				+	    </fileset>
			
 
				+	    <fileset dir="${hadoop.dir}/build">
			
 
				+	       <include name="**.jar" />
			
 
				+           <include name="contrib/streaming/*.jar" />
			
 
				+	    </fileset>
			
 
				+    </path>
			
 
				+</project>
			
--- a/src/benchmarks/gridmix2/generateGridmix2data.sh
+++ b/src/benchmarks/gridmix2/generateGridmix2data.sh
@@ -0,0 +1,94 @@
 
				+#!/usr/bin/env bash
			
 
				+ 
			
 
				+##############################################################
			
 
				+# Licensed to the Apache Software Foundation (ASF) under one
			
 
				+# or more contributor license agreements.  See the NOTICE file
			
 
				+# distributed with this work for additional information
			
 
				+# regarding copyright ownership.  The ASF licenses this file
			
 
				+# to you under the Apache License, Version 2.0 (the
			
 
				+# "License"); you may not use this file except in compliance
			
 
				+# with the License.  You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+#
			
 
				+#####################################################################
			
 
				+
			
 
				+GRID_DIR=`dirname "$0"`
			
 
				+GRID_DIR=`cd "$GRID_DIR"; pwd`
			
 
				+source $GRID_DIR/gridmix-env-2
			
 
				+
			
 
				+# Smaller data set is used by default.
			
 
				+COMPRESSED_DATA_BYTES=2147483648
			
 
				+UNCOMPRESSED_DATA_BYTES=536870912
			
 
				+
			
 
				+# Number of partitions for output data
			
 
				+NUM_MAPS=100
			
 
				+
			
 
				+# If the env var USE_REAL_DATASET is set, then use the params to generate the bigger (real) dataset.
			
 
				+if [ ! -z ${USE_REAL_DATASET} ] ; then
			
 
				+  echo "Using real dataset"
			
 
				+  NUM_MAPS=492
			
 
				+  # 2TB data compressing to approx 500GB
			
 
				+  COMPRESSED_DATA_BYTES=2147483648000
			
 
				+  # 500GB
			
 
				+  UNCOMPRESSED_DATA_BYTES=536870912000
			
 
				+fi
			
 
				+
			
 
				+## Data sources
			
 
				+export GRID_MIX_DATA=/gridmix/data
			
 
				+# Variable length key, value compressed SequenceFile
			
 
				+export VARCOMPSEQ=${GRID_MIX_DATA}/WebSimulationBlockCompressed
			
 
				+# Fixed length key, value compressed SequenceFile
			
 
				+export FIXCOMPSEQ=${GRID_MIX_DATA}/MonsterQueryBlockCompressed
			
 
				+# Variable length key, value uncompressed Text File
			
 
				+export VARINFLTEXT=${GRID_MIX_DATA}/SortUncompressed
			
 
				+# Fixed length key, value compressed Text File
			
 
				+export FIXCOMPTEXT=${GRID_MIX_DATA}/EntropySimulationCompressed
			
 
				+
			
 
				+${HADOOP_HOME}/bin/hadoop jar \
			
 
				+  ${EXAMPLE_JAR} randomtextwriter \
			
 
				+  -D test.randomtextwrite.total_bytes=${COMPRESSED_DATA_BYTES} \
			
 
				+  -D test.randomtextwrite.bytes_per_map=$((${COMPRESSED_DATA_BYTES} / ${NUM_MAPS})) \
			
 
				+  -D test.randomtextwrite.min_words_key=5 \
			
 
				+  -D test.randomtextwrite.max_words_key=10 \
			
 
				+  -D test.randomtextwrite.min_words_value=100 \
			
 
				+  -D test.randomtextwrite.max_words_value=10000 \
			
 
				+  -D mapred.output.compress=true \
			
 
				+  -D mapred.map.output.compression.type=BLOCK \
			
 
				+  -outFormat org.apache.hadoop.mapred.SequenceFileOutputFormat \
			
 
				+  ${VARCOMPSEQ} &
			
 
				+
			
 
				+
			
 
				+${HADOOP_HOME}/bin/hadoop jar \
			
 
				+  ${EXAMPLE_JAR} randomtextwriter \
			
 
				+  -D test.randomtextwrite.total_bytes=${COMPRESSED_DATA_BYTES} \
			
 
				+  -D test.randomtextwrite.bytes_per_map=$((${COMPRESSED_DATA_BYTES} / ${NUM_MAPS})) \
			
 
				+  -D test.randomtextwrite.min_words_key=5 \
			
 
				+  -D test.randomtextwrite.max_words_key=5 \
			
 
				+  -D test.randomtextwrite.min_words_value=100 \
			
 
				+  -D test.randomtextwrite.max_words_value=100 \
			
 
				+  -D mapred.output.compress=true \
			
 
				+  -D mapred.map.output.compression.type=BLOCK \
			
 
				+  -outFormat org.apache.hadoop.mapred.SequenceFileOutputFormat \
			
 
				+  ${FIXCOMPSEQ} &
			
 
				+
			
 
				+
			
 
				+${HADOOP_HOME}/bin/hadoop jar \
			
 
				+  ${EXAMPLE_JAR} randomtextwriter \
			
 
				+  -D test.randomtextwrite.total_bytes=${UNCOMPRESSED_DATA_BYTES} \
			
 
				+  -D test.randomtextwrite.bytes_per_map=$((${UNCOMPRESSED_DATA_BYTES} / ${NUM_MAPS})) \
			
 
				+  -D test.randomtextwrite.min_words_key=1 \
			
 
				+  -D test.randomtextwrite.max_words_key=10 \
			
 
				+  -D test.randomtextwrite.min_words_value=0 \
			
 
				+  -D test.randomtextwrite.max_words_value=200 \
			
 
				+  -D mapred.output.compress=false \
			
 
				+  -outFormat org.apache.hadoop.mapred.TextOutputFormat \
			
 
				+  ${VARINFLTEXT} &
			
 
				+
			
 
				+
			
--- a/src/benchmarks/gridmix2/gridmix-env-2
+++ b/src/benchmarks/gridmix2/gridmix-env-2
@@ -0,0 +1,35 @@
 
				+#!/usr/bin/env bash
			
 
				+
			
 
				+##############################################################
			
 
				+# Licensed to the Apache Software Foundation (ASF) under one
			
 
				+# or more contributor license agreements.  See the NOTICE file
			
 
				+# distributed with this work for additional information
			
 
				+# regarding copyright ownership.  The ASF licenses this file
			
 
				+# to you under the Apache License, Version 2.0 (the
			
 
				+# "License"); you may not use this file except in compliance
			
 
				+# with the License.  You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+#
			
 
				+#####################################################################
			
 
				+
			
 
				+
			
 
				+## Environment configuration
			
 
				+# Hadoop installation
			
 
				+export HADOOP_VERSION=hadoop-0.18.2-dev
			
 
				+export HADOOP_HOME=${HADOOP_INSTALL_HOME}/${HADOOP_VERSION}
			
 
				+export HADOOP_CONF_DIR=
			
 
				+export USE_REAL_DATASET=TRUE
			
 
				+
			
 
				+export APP_JAR=${HADOOP_HOME}/${HADOOP_VERSION}-test.jar
			
 
				+export EXAMPLE_JAR=${HADOOP_HOME}/${HADOOP_VERSION}-examples.jar
			
 
				+export STREAMING_JAR=${HADOOP_HOME}/contrib/streaming/${HADOOP_VERSION}-streaming.jar
			
 
				+
			
 
				+
			
 
				+
			
--- a/src/benchmarks/gridmix2/gridmix_config.xml
+++ b/src/benchmarks/gridmix2/gridmix_config.xml
@@ -0,0 +1,550 @@
 
				+<?xml version="1.0"?>
			
 
				+<?xml-stylesheet type="text/xsl" href="nutch-conf.xsl"?>
			
 
				+
			
 
				+<!-- Put site-specific property overrides in this file. -->
			
 
				+
			
 
				+<configuration>
			
 
				+
			
 
				+
			
 
				+<property>
			
 
				+  <name>GRID_MIX_DATA</name>
			
 
				+  <value>/gridmix/data</value>
			
 
				+  <description></description>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>FIXCOMPTEXT</name>
			
 
				+  <value>${GRID_MIX_DATA}/EntropySimulationCompressed</value>
			
 
				+  <description></description>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>VARINFLTEXT</name>
			
 
				+  <value>${GRID_MIX_DATA}/SortUncompressed</value>
			
 
				+  <description></description>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>FIXCOMPSEQ</name>
			
 
				+  <value>${GRID_MIX_DATA}/MonsterQueryBlockCompressed</value>
			
 
				+  <description></description>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>VARCOMPSEQ</name>
			
 
				+  <value>${GRID_MIX_DATA}/WebSimulationBlockCompressed</value>
			
 
				+  <description></description>
			
 
				+</property>
			
 
				+
			
 
				+
			
 
				+<property>
			
 
				+  <name>streamSort.smallJobs.inputFiles</name>
			
 
				+  <value>${VARINFLTEXT}/{part-00000,part-00001,part-00002}</value>
			
 
				+  <description></description>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>streamSort.smallJobs.numOfJobs</name>
			
 
				+  <value>40</value>
			
 
				+  <description></description>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>streamSort.smallJobs.numOfReduces</name>
			
 
				+  <value>15</value>
			
 
				+  <description></description>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>streamSort.smallJobs.numOfMapoutputCompressed</name>
			
 
				+  <value>40</value>
			
 
				+  <description> </description>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>streamSort.smallJobs.numOfOutputCompressed</name>
			
 
				+  <value>20</value>
			
 
				+  <description> </description>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>streamSort.mediumJobs.numOfJobs</name>
			
 
				+  <value>16</value>
			
 
				+  <description></description>
			
 
				+</property>
			
 
				+<property>
			
 
				+  <name>streamSort.mediumJobs.inputFiles</name>
			
 
				+  <value>${VARINFLTEXT}/{part-000*0,part-000*1,part-000*2}</value>
			
 
				+  <description></description>
			
 
				+</property>
			
 
				+<property>
			
 
				+  <name>streamSort.mediumJobs.numOfReduces</name>
			
 
				+  <value>170</value>
			
 
				+  <description></description>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>streamSort.mediumJobs.numOfMapoutputCompressed</name>
			
 
				+  <value>16</value>
			
 
				+  <description> </description>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>streamSort.mediumJobs.numOfOutputCompressed</name>
			
 
				+  <value>12</value>
			
 
				+  <description> </description>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>streamSort.largeJobs.numOfJobs</name>
			
 
				+  <value>5</value>
			
 
				+  <description></description>
			
 
				+</property>
			
 
				+<property>
			
 
				+  <name>streamSort.largeJobs.inputFiles</name>
			
 
				+  <value>${VARINFLTEXT}</value>
			
 
				+  <description></description>
			
 
				+</property>
			
 
				+<property>
			
 
				+  <name>streamSort.largeJobs.numOfReduces</name>
			
 
				+  <value>370</value>
			
 
				+  <description></description>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>streamSort.largeJobs.numOfMapoutputCompressed</name>
			
 
				+  <value>5</value>
			
 
				+  <description> </description>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>streamSort.largeJobs.numOfOutputCompressed</name>
			
 
				+  <value>3</value>
			
 
				+  <description> </description>
			
 
				+</property>
			
 
				+
			
 
				+
			
 
				+<property>
			
 
				+  <name>javaSort.smallJobs.numOfJobs</name>
			
 
				+  <value>8,2</value>
			
 
				+  <description></description>
			
 
				+</property>
			
 
				+<property>
			
 
				+  <name>javaSort.smallJobs.inputFiles</name>
			
 
				+  <value>${VARINFLTEXT}/{part-00000,part-00001,part-00002}</value>
			
 
				+  <description></description>
			
 
				+</property>
			
 
				+<property>
			
 
				+  <name>javaSort.smallJobs.numOfReduces</name>
			
 
				+  <value>15,70</value>
			
 
				+  <description></description>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>javaSort.smallJobs.numOfMapoutputCompressed</name>
			
 
				+  <value>10</value>
			
 
				+  <description> </description>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>javaSort.smallJobs.numOfOutputCompressed</name>
			
 
				+  <value>3</value>
			
 
				+  <description> </description>
			
 
				+</property>
			
 
				+
			
 
				+
			
 
				+<property>
			
 
				+  <name>javaSort.mediumJobs.numOfJobs</name>
			
 
				+  <value>4,2</value>
			
 
				+  <description></description>
			
 
				+</property>
			
 
				+<property>
			
 
				+  <name>javaSort.mediumJobs.inputFiles</name>
			
 
				+  <value>${VARINFLTEXT}/{part-000*0,part-000*1,part-000*2}</value>
			
 
				+  <description></description>
			
 
				+</property>
			
 
				+<property>
			
 
				+  <name>javaSort.mediumJobs.numOfReduces</name>
			
 
				+  <value>170,70</value>
			
 
				+  <description></description>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>javaSort.mediumJobs.numOfMapoutputCompressed</name>
			
 
				+  <value>6</value>
			
 
				+  <description> </description>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>javaSort.mediumJobs.numOfOutputCompressed</name>
			
 
				+  <value>4</value>
			
 
				+  <description> </description>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>javaSort.largeJobs.numOfJobs</name>
			
 
				+  <value>3</value>
			
 
				+  <description></description>
			
 
				+</property>
			
 
				+<property>
			
 
				+  <name>javaSort.largeJobs.inputFiles</name>
			
 
				+  <value>${VARINFLTEXT}</value>
			
 
				+  <description></description>
			
 
				+</property>
			
 
				+<property>
			
 
				+  <name>javaSort.largeJobs.numOfReduces</name>
			
 
				+  <value>370</value>
			
 
				+  <description></description>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>javaSort.largeJobs.numOfMapoutputCompressed</name>
			
 
				+  <value>3</value>
			
 
				+  <description> </description>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>javaSort.largeJobs.numOfOutputCompressed</name>
			
 
				+  <value>2</value>
			
 
				+  <description> </description>
			
 
				+</property>
			
 
				+
			
 
				+
			
 
				+<property>
			
 
				+  <name>combiner.smallJobs.numOfJobs</name>
			
 
				+  <value>11,4</value>
			
 
				+  <description></description>
			
 
				+</property>
			
 
				+<property>
			
 
				+  <name>combiner.smallJobs.inputFiles</name>
			
 
				+  <value>${VARINFLTEXT}/{part-00000,part-00001,part-00002}</value>
			
 
				+  <description></description>
			
 
				+</property>
			
 
				+<property>
			
 
				+  <name>combiner.smallJobs.numOfReduces</name>
			
 
				+  <value>10,1</value>
			
 
				+  <description></description>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>combiner.smallJobs.numOfMapoutputCompressed</name>
			
 
				+  <value>15</value>
			
 
				+  <description> </description>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>combiner.smallJobs.numOfOutputCompressed</name>
			
 
				+  <value>0</value>
			
 
				+  <description> </description>
			
 
				+</property>
			
 
				+
			
 
				+
			
 
				+<property>
			
 
				+  <name>combiner.mediumJobs.numOfJobs</name>
			
 
				+  <value>8</value>
			
 
				+  <description></description>
			
 
				+</property>
			
 
				+<property>
			
 
				+  <name>combiner.mediumJobs.inputFiles</name>
			
 
				+  <value>${VARINFLTEXT}/{part-000*0,part-000*1,part-000*2}</value>
			
 
				+  <description></description>
			
 
				+</property>
			
 
				+<property>
			
 
				+  <name>combiner.mediumJobs.numOfReduces</name>
			
 
				+  <value>100</value>
			
 
				+  <description></description>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>combiner.mediumJobs.numOfMapoutputCompressed</name>
			
 
				+  <value>8</value>
			
 
				+  <description> </description>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>combiner.mediumJobs.numOfOutputCompressed</name>
			
 
				+  <value>0</value>
			
 
				+  <description> </description>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>combiner.largeJobs.numOfJobs</name>
			
 
				+  <value>4</value>
			
 
				+  <description></description>
			
 
				+</property>
			
 
				+<property>
			
 
				+  <name>combiner.largeJobs.inputFiles</name>
			
 
				+  <value>${VARINFLTEXT}</value>
			
 
				+  <description></description>
			
 
				+</property>
			
 
				+<property>
			
 
				+  <name>combiner.largeJobs.numOfReduces</name>
			
 
				+  <value>360</value>
			
 
				+  <description></description>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>combiner.largeJobs.numOfMapoutputCompressed</name>
			
 
				+  <value>4</value>
			
 
				+  <description> </description>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>combiner.largeJobs.numOfOutputCompressed</name>
			
 
				+  <value>0</value>
			
 
				+  <description> </description>
			
 
				+</property>
			
 
				+
			
 
				+
			
 
				+<property>
			
 
				+  <name>monsterQuery.smallJobs.numOfJobs</name>
			
 
				+  <value>7</value>
			
 
				+  <description></description>
			
 
				+</property>
			
 
				+<property>
			
 
				+  <name>monsterQuery.smallJobs.inputFiles</name>
			
 
				+  <value>${FIXCOMPSEQ}/{part-00000,part-00001,part-00002}</value>
			
 
				+  <description></description>
			
 
				+</property>
			
 
				+<property>
			
 
				+  <name>monsterQuery.smallJobs.numOfReduces</name>
			
 
				+  <value>5</value>
			
 
				+  <description></description>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>monsterQuery.smallJobs.numOfMapoutputCompressed</name>
			
 
				+  <value>7</value>
			
 
				+  <description> </description>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>monsterQuery.smallJobs.numOfOutputCompressed</name>
			
 
				+  <value>0</value>
			
 
				+  <description> </description>
			
 
				+</property>
			
 
				+
			
 
				+
			
 
				+<property>
			
 
				+  <name>monsterQuery.mediumJobs.numOfJobs</name>
			
 
				+  <value>5</value>
			
 
				+  <description></description>
			
 
				+</property>
			
 
				+<property>
			
 
				+  <name>monsterQuery.mediumJobs.inputFiles</name>
			
 
				+  <value>${FIXCOMPSEQ}/{part-000*0,part-000*1,part-000*2}</value>
			
 
				+  <description></description>
			
 
				+</property>
			
 
				+<property>
			
 
				+  <name>monsterQuery.mediumJobs.numOfReduces</name>
			
 
				+  <value>100</value>
			
 
				+  <description></description>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>monsterQuery.mediumJobs.numOfMapoutputCompressed</name>
			
 
				+  <value>5</value>
			
 
				+  <description> </description>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>monsterQuery.mediumJobs.numOfOutputCompressed</name>
			
 
				+  <value>0</value>
			
 
				+  <description> </description>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>monsterQuery.largeJobs.numOfJobs</name>
			
 
				+  <value>3</value>
			
 
				+  <description></description>
			
 
				+</property>
			
 
				+<property>
			
 
				+  <name>monsterQuery.largeJobs.inputFiles</name>
			
 
				+  <value>${FIXCOMPSEQ}</value>
			
 
				+  <description></description>
			
 
				+</property>
			
 
				+<property>
			
 
				+  <name>monsterQuery.largeJobs.numOfReduces</name>
			
 
				+  <value>370</value>
			
 
				+  <description></description>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>monsterQuery.largeJobs.numOfMapoutputCompressed</name>
			
 
				+  <value>3</value>
			
 
				+  <description> </description>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>monsterQuery.largeJobs.numOfOutputCompressed</name>
			
 
				+  <value>0</value>
			
 
				+  <description> </description>
			
 
				+</property>
			
 
				+
			
 
				+
			
 
				+<property>
			
 
				+  <name>webdataScan.smallJobs.numOfJobs</name>
			
 
				+  <value>24</value>
			
 
				+  <description></description>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>webdataScan.smallJobs.inputFiles</name>
			
 
				+  <value>${VARCOMPSEQ}/{part-00000,part-00001,part-00002}</value>
			
 
				+  <description></description>
			
 
				+</property>
			
 
				+<property>
			
 
				+  <name>webdataScan.smallJobs.numOfMapoutputCompressed</name>
			
 
				+  <value>24</value>
			
 
				+  <description> </description>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>webdataScan.smallJobs.numOfOutputCompressed</name>
			
 
				+  <value>0</value>
			
 
				+  <description> </description>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>webdataScan.mediumJobs.numOfJobs</name>
			
 
				+  <value>12</value>
			
 
				+  <description></description>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>webdataScan.mediumJobs.inputFiles</name>
			
 
				+  <value>${VARCOMPSEQ}/{part-000*0,part-000*1,part-000*2}</value>
			
 
				+  <description></description>
			
 
				+</property>
			
 
				+<property>
			
 
				+  <name>webdataScan.mediumJobs.numOfMapoutputCompressed</name>
			
 
				+  <value>12</value>
			
 
				+  <description> </description>
			
 
				+</property>
			
 
				+<property>
			
 
				+  <name>webdataScan.mediumJobs.numOfReduces</name>
			
 
				+  <value>7</value>
			
 
				+  <description></description>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>webdataScan.mediumJobs.numOfOutputCompressed</name>
			
 
				+  <value>0</value>
			
 
				+  <description> </description>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>webdataScan.largeJobs.numOfJobs</name>
			
 
				+  <value>2</value>
			
 
				+  <description></description>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>webdataScan.largeJobs.inputFiles</name>
			
 
				+  <value>${VARCOMPSEQ}</value>
			
 
				+  <description></description>
			
 
				+</property>
			
 
				+<property>
			
 
				+  <name>webdataScan.largeJobs.numOfMapoutputCompressed</name>
			
 
				+  <value>3</value>
			
 
				+  <description> </description>
			
 
				+</property>
			
 
				+<property>
			
 
				+  <name>webdataScan.largeJobs.numOfReduces</name>
			
 
				+  <value>70</value>
			
 
				+  <description></description>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>webdataScan.largeJobs.numOfOutputCompressed</name>
			
 
				+  <value>3</value>
			
 
				+  <description> </description>
			
 
				+</property>
			
 
				+
			
 
				+
			
 
				+<property>
			
 
				+  <name>webdataSort.smallJobs.numOfJobs</name>
			
 
				+  <value>7</value>
			
 
				+  <description></description>
			
 
				+</property>
			
 
				+<property>
			
 
				+  <name>webdataSort.smallJobs.inputFiles</name>
			
 
				+  <value>${VARCOMPSEQ}/{part-00000,part-00001,part-00002}</value>
			
 
				+  <description></description>
			
 
				+</property>
			
 
				+<property>
			
 
				+  <name>webdataSort.smallJobs.numOfReduces</name>
			
 
				+  <value>15</value>
			
 
				+  <description></description>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>webdataSort.smallJobs.numOfMapoutputCompressed</name>
			
 
				+  <value>7</value>
			
 
				+  <description> </description>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>webdataSort.smallJobs.numOfOutputCompressed</name>
			
 
				+  <value>7</value>
			
 
				+  <description> </description>
			
 
				+</property>
			
 
				+
			
 
				+
			
 
				+<property>
			
 
				+  <name>webdataSort.mediumJobs.numOfJobs</name>
			
 
				+  <value>4</value>
			
 
				+  <description></description>
			
 
				+</property>
			
 
				+<property>
			
 
				+  <name>webdataSort.mediumJobs.inputFiles</name>
			
 
				+  <value>${VARCOMPSEQ}/{part-000*0,part-000*1,part-000*2}</value>
			
 
				+  <description></description>
			
 
				+</property>
			
 
				+<property>
			
 
				+  <name>webdataSort.mediumJobs.numOfReduces</name>
			
 
				+  <value>170</value>
			
 
				+  <description></description>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>webdataSort.mediumJobs.numOfMapoutputCompressed</name>
			
 
				+  <value>4</value>
			
 
				+  <description> </description>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>webdataSort.mediumJobs.numOfOutputCompressed</name>
			
 
				+  <value>4</value>
			
 
				+  <description> </description>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>webdataSort.largeJobs.numOfJobs</name>
			
 
				+  <value>1</value>
			
 
				+  <description></description>
			
 
				+</property>
			
 
				+<property>
			
 
				+  <name>webdataSort.largeJobs.inputFiles</name>
			
 
				+  <value>${VARCOMPSEQ}</value>
			
 
				+  <description></description>
			
 
				+</property>
			
 
				+<property>
			
 
				+  <name>webdataSort.largeJobs.numOfReduces</name>
			
 
				+  <value>800</value>
			
 
				+  <description></description>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>webdataSort.largeJobs.numOfMapoutputCompressed</name>
			
 
				+  <value>1</value>
			
 
				+  <description> </description>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>webdataSort.largeJobs.numOfOutputCompressed</name>
			
 
				+  <value>1</value>
			
 
				+  <description> </description>
			
 
				+</property>
			
 
				+
			
 
				+</configuration>
			
--- a/src/benchmarks/gridmix2/rungridmix_2
+++ b/src/benchmarks/gridmix2/rungridmix_2
@@ -0,0 +1,37 @@
 
				+#!/usr/bin/env bash
			
 
				+
			
 
				+##############################################################
			
 
				+# Licensed to the Apache Software Foundation (ASF) under one
			
 
				+# or more contributor license agreements.  See the NOTICE file
			
 
				+# distributed with this work for additional information
			
 
				+# regarding copyright ownership.  The ASF licenses this file
			
 
				+# to you under the Apache License, Version 2.0 (the
			
 
				+# "License"); you may not use this file except in compliance
			
 
				+# with the License.  You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+#
			
 
				+#####################################################################
			
 
				+
			
 
				+## Environment configuration
			
 
				+
			
 
				+GRID_DIR=`dirname "$0"`
			
 
				+GRID_DIR=`cd "$GRID_DIR"; pwd`
			
 
				+source $GRID_DIR/gridmix-env-2
			
 
				+
			
 
				+Date=`date +%F-%H-%M-%S-%N`
			
 
				+echo $Date >  $1_start.out
			
 
				+
			
 
				+export HADOOP_CLASSPATH=${APP_JAR}:${EXAMPLE_JAR}:${STREAMING_JAR}
			
 
				+export LIBJARS=${APP_JAR},${EXAMPLE_JAR},${STREAMING_JAR}
			
 
				+${HADOOP_HOME}/bin/hadoop jar  -libjars ${LIBJARS} ./gridmix.jar org.apache.hadoop.mapred.GridMixRunner
			
 
				+
			
 
				+Date=`date +%F-%H-%M-%S-%N`
			
 
				+echo $Date >  $1_end.out
			
 
				+
			
--- a/src/benchmarks/gridmix2/src/java/org/apache/hadoop/mapred/CombinerJobCreator.java
+++ b/src/benchmarks/gridmix2/src/java/org/apache/hadoop/mapred/CombinerJobCreator.java
@@ -0,0 +1,70 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.mapred;
			
 
				+
			
 
				+import org.apache.hadoop.examples.WordCount;
			
 
				+import org.apache.hadoop.fs.Path;
			
 
				+import org.apache.hadoop.io.IntWritable;
			
 
				+import org.apache.hadoop.io.Text;
			
 
				+
			
 
				+public class CombinerJobCreator extends WordCount {
			
 
				+
			
 
				+  public JobConf createJob(String[] args) throws Exception {
			
 
				+    JobConf conf = new JobConf(WordCount.class);
			
 
				+    conf.setJobName("GridmixCombinerJob");
			
 
				+
			
 
				+    // the keys are words (strings)
			
 
				+    conf.setOutputKeyClass(Text.class);
			
 
				+    // the values are counts (ints)
			
 
				+    conf.setOutputValueClass(IntWritable.class);
			
 
				+
			
 
				+    conf.setMapperClass(MapClass.class);
			
 
				+    conf.setCombinerClass(Reduce.class);
			
 
				+    conf.setReducerClass(Reduce.class);
			
 
				+    boolean mapoutputCompressed = false;
			
 
				+    boolean outputCompressed = false;
			
 
				+    // List<String> other_args = new ArrayList<String>();
			
 
				+    for (int i = 0; i < args.length; ++i) {
			
 
				+      try {
			
 
				+        if ("-r".equals(args[i])) {
			
 
				+          conf.setNumReduceTasks(Integer.parseInt(args[++i]));
			
 
				+        } else if ("-indir".equals(args[i])) {
			
 
				+          FileInputFormat.setInputPaths(conf, args[++i]);
			
 
				+        } else if ("-outdir".equals(args[i])) {
			
 
				+          FileOutputFormat.setOutputPath(conf, new Path(args[++i]));
			
 
				+
			
 
				+        } else if ("-mapoutputCompressed".equals(args[i])) {
			
 
				+          mapoutputCompressed = Boolean.valueOf(args[++i]).booleanValue();
			
 
				+        } else if ("-outputCompressed".equals(args[i])) {
			
 
				+          outputCompressed = Boolean.valueOf(args[++i]).booleanValue();
			
 
				+        }
			
 
				+      } catch (NumberFormatException except) {
			
 
				+        System.out.println("ERROR: Integer expected instead of " + args[i]);
			
 
				+        return null;
			
 
				+      } catch (ArrayIndexOutOfBoundsException except) {
			
 
				+        System.out.println("ERROR: Required parameter missing from "
			
 
				+            + args[i - 1]);
			
 
				+        return null;
			
 
				+      }
			
 
				+    }
			
 
				+    conf.setCompressMapOutput(mapoutputCompressed);
			
 
				+    conf.setBoolean("mapred.output.compress", outputCompressed);
			
 
				+    return conf;
			
 
				+  }
			
 
				+}
			
--- a/src/benchmarks/gridmix2/src/java/org/apache/hadoop/mapred/GenericMRLoadJobCreator.java
+++ b/src/benchmarks/gridmix2/src/java/org/apache/hadoop/mapred/GenericMRLoadJobCreator.java
@@ -0,0 +1,98 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.mapred;
			
 
				+
			
 
				+import java.util.Random;
			
 
				+import java.util.Stack;
			
 
				+
			
 
				+import org.apache.hadoop.fs.FileStatus;
			
 
				+import org.apache.hadoop.fs.FileSystem;
			
 
				+import org.apache.hadoop.fs.Path;
			
 
				+import org.apache.hadoop.io.LongWritable;
			
 
				+import org.apache.hadoop.io.SequenceFile;
			
 
				+import org.apache.hadoop.io.Text;
			
 
				+import org.apache.hadoop.mapred.GenericMRLoadGenerator;
			
 
				+import org.apache.hadoop.mapred.lib.NullOutputFormat;
			
 
				+import org.apache.hadoop.mapred.JobConf;
			
 
				+
			
 
				+public class GenericMRLoadJobCreator extends GenericMRLoadGenerator {
			
 
				+
			
 
				+  public JobConf createJob(String[] argv, boolean mapoutputCompressed,
			
 
				+      boolean outputCompressed) throws Exception {
			
 
				+
			
 
				+    JobConf job = new JobConf();
			
 
				+    job.setJarByClass(GenericMRLoadGenerator.class);
			
 
				+    job.setMapperClass(SampleMapper.class);
			
 
				+    job.setReducerClass(SampleReducer.class);
			
 
				+    if (!parseArgs(argv, job)) {
			
 
				+      return null;
			
 
				+    }
			
 
				+
			
 
				+    if (null == FileOutputFormat.getOutputPath(job)) {
			
 
				+      // No output dir? No writes
			
 
				+      job.setOutputFormat(NullOutputFormat.class);
			
 
				+    }
			
 
				+
			
 
				+    if (0 == FileInputFormat.getInputPaths(job).length) {
			
 
				+      // No input dir? Generate random data
			
 
				+      System.err.println("No input path; ignoring InputFormat");
			
 
				+      confRandom(job);
			
 
				+    } else if (null != job.getClass("mapred.indirect.input.format", null)) {
			
 
				+      // specified IndirectInputFormat? Build src list
			
 
				+      JobClient jClient = new JobClient(job);
			
 
				+      Path sysdir = jClient.getSystemDir();
			
 
				+      Random r = new Random();
			
 
				+      Path indirInputFile = new Path(sysdir, Integer.toString(r
			
 
				+          .nextInt(Integer.MAX_VALUE), 36)
			
 
				+          + "_files");
			
 
				+      job.set("mapred.indirect.input.file", indirInputFile.toString());
			
 
				+      SequenceFile.Writer writer = SequenceFile.createWriter(sysdir
			
 
				+          .getFileSystem(job), job, indirInputFile, LongWritable.class,
			
 
				+          Text.class, SequenceFile.CompressionType.NONE);
			
 
				+      try {
			
 
				+        for (Path p : FileInputFormat.getInputPaths(job)) {
			
 
				+          FileSystem fs = p.getFileSystem(job);
			
 
				+          Stack<Path> pathstack = new Stack<Path>();
			
 
				+          pathstack.push(p);
			
 
				+          while (!pathstack.empty()) {
			
 
				+            for (FileStatus stat : fs.listStatus(pathstack.pop())) {
			
 
				+              if (stat.isDir()) {
			
 
				+                if (!stat.getPath().getName().startsWith("_")) {
			
 
				+                  pathstack.push(stat.getPath());
			
 
				+                }
			
 
				+              } else {
			
 
				+                writer.sync();
			
 
				+                writer.append(new LongWritable(stat.getLen()), new Text(stat
			
 
				+                    .getPath().toUri().toString()));
			
 
				+              }
			
 
				+            }
			
 
				+          }
			
 
				+        }
			
 
				+      } finally {
			
 
				+        writer.close();
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+    job.setCompressMapOutput(mapoutputCompressed);
			
 
				+    job.setBoolean("mapred.output.compress", outputCompressed);
			
 
				+    return job;
			
 
				+
			
 
				+  }
			
 
				+
			
 
				+}
			
--- a/src/benchmarks/gridmix2/src/java/org/apache/hadoop/mapred/GridMixConfig.java
+++ b/src/benchmarks/gridmix2/src/java/org/apache/hadoop/mapred/GridMixConfig.java
@@ -0,0 +1,34 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.mapred;
			
 
				+
			
 
				+import org.apache.hadoop.conf.Configuration;
			
 
				+
			
 
				+public class GridMixConfig extends Configuration {
			
 
				+
			
 
				+  public int[] getInts(String name, int defautValue) {
			
 
				+    String[] valuesInString = getStrings(name, String.valueOf(defautValue));
			
 
				+    int[] results = new int[valuesInString.length];
			
 
				+    for (int i = 0; i < valuesInString.length; i++) {
			
 
				+      results[i] = Integer.parseInt(valuesInString[i]);
			
 
				+    }
			
 
				+    return results;
			
 
				+
			
 
				+  }
			
 
				+}
			
--- a/src/benchmarks/gridmix2/src/java/org/apache/hadoop/mapred/GridMixRunner.java
+++ b/src/benchmarks/gridmix2/src/java/org/apache/hadoop/mapred/GridMixRunner.java
@@ -0,0 +1,1548 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.mapred;
			
 
				+
			
 
				+import java.util.Arrays;
			
 
				+import java.util.Calendar;
			
 
				+import java.util.Date;
			
 
				+import java.util.ArrayList;
			
 
				+import java.util.Iterator;
			
 
				+import java.util.TreeMap;
			
 
				+import java.util.Map.Entry;
			
 
				+import java.io.IOException;
			
 
				+
			
 
				+import org.apache.hadoop.examples.Sort;
			
 
				+import org.apache.hadoop.fs.FileSystem;
			
 
				+import org.apache.hadoop.fs.Path;
			
 
				+
			
 
				+import org.apache.hadoop.mapred.Counters.Group;
			
 
				+import org.apache.hadoop.mapred.jobcontrol.*;
			
 
				+import org.apache.hadoop.mapred.lib.IdentityMapper;
			
 
				+import org.apache.hadoop.mapred.lib.IdentityReducer;
			
 
				+import org.apache.hadoop.mapred.JobConf;
			
 
				+import org.apache.hadoop.mapred.JobClient;
			
 
				+import org.apache.hadoop.mapred.RunningJob;
			
 
				+import org.apache.hadoop.conf.Configuration;
			
 
				+import org.apache.hadoop.streaming.StreamJob;
			
 
				+import org.apache.hadoop.mapred.GridMixConfig;
			
 
				+
			
 
				+public class GridMixRunner {
			
 
				+
			
 
				+  private static int NUM_OF_LARGE_JOBS_PER_CLASS = 0;
			
 
				+
			
 
				+  private static int NUM_OF_MEDIUM_JOBS_PER_CLASS = 0;
			
 
				+
			
 
				+  private static int NUM_OF_SMALL_JOBS_PER_CLASS = 0;
			
 
				+
			
 
				+  private static int NUM_OF_REDUCERS_FOR_LARGE_JOB = 370;
			
 
				+
			
 
				+  private static int NUM_OF_REDUCERS_FOR_MEDIUM_JOB = 170;
			
 
				+
			
 
				+  private static int NUM_OF_REDUCERS_FOR_SMALL_JOB = 15;
			
 
				+
			
 
				+  private static String GRID_MIX_DATA = "/gridmix/data";
			
 
				+
			
 
				+  private static String VARCOMPSEQ = GRID_MIX_DATA
			
 
				+      + "/WebSimulationBlockCompressed";
			
 
				+
			
 
				+  private static String FIXCOMPSEQ = GRID_MIX_DATA
			
 
				+      + "/MonsterQueryBlockCompressed";
			
 
				+
			
 
				+  private static String VARINFLTEXT = GRID_MIX_DATA + "/SortUncompressed";
			
 
				+
			
 
				+  private JobControl gridmix;
			
 
				+
			
 
				+  private FileSystem fs;
			
 
				+
			
 
				+  private GridMixConfig config;
			
 
				+
			
 
				+  private static final String GRIDMIXCONFIG = "gridmix_config.xml";
			
 
				+
			
 
				+  private int numOfJobs = 0;
			
 
				+
			
 
				+  private void initConfig() {
			
 
				+    String configFile = System.getenv("GRIDMIXCONFIG");
			
 
				+    if (configFile == null) {
			
 
				+      String configDir = System.getProperty("user.dir");
			
 
				+      if (configDir == null) {
			
 
				+        configDir = ".";
			
 
				+      }
			
 
				+      configFile = configDir + "/" + GRIDMIXCONFIG;
			
 
				+    }
			
 
				+
			
 
				+    if (config == null) {
			
 
				+      try {
			
 
				+        Path fileResource = new Path(configFile);
			
 
				+        config = new GridMixConfig();
			
 
				+        config.addResource(fileResource);
			
 
				+      } catch (Exception e) {
			
 
				+        System.out.println("Error reading configuration file:" + configFile);
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  public GridMixRunner() throws IOException {
			
 
				+    gridmix = new JobControl("GridMix");
			
 
				+    Configuration conf = new Configuration();
			
 
				+    try {
			
 
				+      fs = FileSystem.get(conf);
			
 
				+    } catch (IOException ex) {
			
 
				+      System.out.println("fs initation error:" + ex.getMessage());
			
 
				+      throw ex;
			
 
				+    }
			
 
				+    initConfig();
			
 
				+  }
			
 
				+
			
 
				+  private void addStreamSort(int num_of_reducers, boolean mapoutputCompressed,
			
 
				+      boolean outputCompressed, String size) {
			
 
				+
			
 
				+    String defaultIndir = VARINFLTEXT + "/{part-00000,part-00001,part-00002}";
			
 
				+    String indir = getInputDirsFor("streamSort.smallJobs.inputFiles",
			
 
				+        defaultIndir);
			
 
				+    String outdir = addTSSuffix("perf-out/stream-out-dir-small_");
			
 
				+    if ("medium".compareToIgnoreCase(size) == 0) {
			
 
				+      defaultIndir = VARINFLTEXT + "/{part-000*0,part-000*1,part-000*2}";
			
 
				+      indir = getInputDirsFor("streamSort.mediumJobs.inputFiles", defaultIndir);
			
 
				+      outdir = addTSSuffix("perf-out/stream-out-dir-medium_");
			
 
				+    } else if ("large".compareToIgnoreCase(size) == 0) {
			
 
				+      defaultIndir = VARINFLTEXT;
			
 
				+      indir = getInputDirsFor("streamSort.largeJobs.inputFiles", defaultIndir);
			
 
				+      outdir = addTSSuffix("perf-out/stream-out-dir-large_");
			
 
				+    }
			
 
				+
			
 
				+    StringBuffer sb = new StringBuffer();
			
 
				+
			
 
				+    sb.append("-input ").append(indir).append(" ");
			
 
				+    sb.append("-output ").append(outdir).append(" ");
			
 
				+    sb.append("-mapper cat ");
			
 
				+    sb.append("-reducer cat ");
			
 
				+    sb.append("-numReduceTasks ").append(num_of_reducers);
			
 
				+
			
 
				+    String[] args = sb.toString().split(" ");
			
 
				+
			
 
				+    clearDir(outdir);
			
 
				+    try {
			
 
				+      JobConf jobconf = StreamJob.createJob(args);
			
 
				+      jobconf.setJobName("GridmixStreamingSorter." + size);
			
 
				+      jobconf.setCompressMapOutput(mapoutputCompressed);
			
 
				+      jobconf.setBoolean("mapred.output.compress", outputCompressed);
			
 
				+
			
 
				+      Job job = new Job(jobconf);
			
 
				+      gridmix.addJob(job);
			
 
				+      numOfJobs++;
			
 
				+    } catch (Exception ex) {
			
 
				+      ex.printStackTrace();
			
 
				+      System.out.println(ex.toString());
			
 
				+    }
			
 
				+
			
 
				+  }
			
 
				+
			
 
				+  private String getInputDirsFor(String jobType, String defaultIndir) {
			
 
				+    String inputFile[] = config.getStrings(jobType, defaultIndir);
			
 
				+    StringBuffer indirBuffer = new StringBuffer();
			
 
				+    for (int i = 0; i < inputFile.length; i++) {
			
 
				+      indirBuffer = indirBuffer.append(inputFile[i]).append(",");
			
 
				+    }
			
 
				+    return indirBuffer.substring(0, indirBuffer.length() - 1);
			
 
				+  }
			
 
				+
			
 
				+  private void addStreamSortSmall(int NUM_OF_REDUCERS,
			
 
				+      boolean mapoutputCompressed, boolean outputCompressed) {
			
 
				+    addStreamSort(NUM_OF_REDUCERS, mapoutputCompressed, outputCompressed,
			
 
				+        "small");
			
 
				+  }
			
 
				+
			
 
				+  private void addStreamSortMedium(int NUM_OF_REDUCERS,
			
 
				+      boolean mapoutputCompressed, boolean outputCompressed) {
			
 
				+    addStreamSort(NUM_OF_REDUCERS, mapoutputCompressed, outputCompressed,
			
 
				+        "medium");
			
 
				+  }
			
 
				+
			
 
				+  private void addStreamSortLarge(int NUM_OF_REDUCERS,
			
 
				+      boolean mapoutputCompressed, boolean outputCompressed) {
			
 
				+    addStreamSort(NUM_OF_REDUCERS, mapoutputCompressed, outputCompressed,
			
 
				+        "large");
			
 
				+  }
			
 
				+
			
 
				+  private void clearDir(String dir) {
			
 
				+    try {
			
 
				+      Path outfile = new Path(dir);
			
 
				+      fs.delete(outfile);
			
 
				+    } catch (IOException ex) {
			
 
				+      ex.printStackTrace();
			
 
				+      System.out.println("delete file error:");
			
 
				+      System.out.println(ex.toString());
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  private void addJavaSort(int num_of_reducers, boolean mapoutputCompressed,
			
 
				+      boolean outputCompressed, String size) {
			
 
				+
			
 
				+    String defaultIndir = VARINFLTEXT + "/{part-00000,part-00001,part-00002}";
			
 
				+    String indir = getInputDirsFor("javaSort.smallJobs.inputFiles",
			
 
				+        defaultIndir);
			
 
				+    String outdir = addTSSuffix("perf-out/sort-out-dir-small_");
			
 
				+    if ("medium".compareToIgnoreCase(size) == 0) {
			
 
				+      defaultIndir = VARINFLTEXT + "/{part-000*0,part-000*1,part-000*2}";
			
 
				+      indir = getInputDirsFor("javaSort.mediumJobs.inputFiles", defaultIndir);
			
 
				+      outdir = addTSSuffix("perf-out/sort-out-dir-medium_");
			
 
				+    } else if ("large".compareToIgnoreCase(size) == 0) {
			
 
				+      defaultIndir = VARINFLTEXT;
			
 
				+      indir = getInputDirsFor("javaSort.largeJobs.inputFiles", defaultIndir);
			
 
				+      outdir = addTSSuffix("perf-out/sort-out-dir-large_");
			
 
				+    }
			
 
				+
			
 
				+    clearDir(outdir);
			
 
				+
			
 
				+    try {
			
 
				+      JobConf jobConf = new JobConf();
			
 
				+      jobConf.setJarByClass(Sort.class);
			
 
				+      jobConf.setJobName("GridmixJavaSorter." + size);
			
 
				+      jobConf.setMapperClass(IdentityMapper.class);
			
 
				+      jobConf.setReducerClass(IdentityReducer.class);
			
 
				+
			
 
				+      jobConf.setNumReduceTasks(num_of_reducers);
			
 
				+      jobConf
			
 
				+          .setInputFormat(org.apache.hadoop.mapred.KeyValueTextInputFormat.class);
			
 
				+      jobConf.setOutputFormat(org.apache.hadoop.mapred.TextOutputFormat.class);
			
 
				+
			
 
				+      jobConf.setOutputKeyClass(org.apache.hadoop.io.Text.class);
			
 
				+      jobConf.setOutputValueClass(org.apache.hadoop.io.Text.class);
			
 
				+      jobConf.setCompressMapOutput(mapoutputCompressed);
			
 
				+      jobConf.setBoolean("mapred.output.compress", outputCompressed);
			
 
				+
			
 
				+      FileInputFormat.addInputPaths(jobConf, indir);
			
 
				+
			
 
				+      FileOutputFormat.setOutputPath(jobConf, new Path(outdir));
			
 
				+
			
 
				+      Job job = new Job(jobConf);
			
 
				+
			
 
				+      gridmix.addJob(job);
			
 
				+      numOfJobs++;
			
 
				+
			
 
				+    } catch (Exception ex) {
			
 
				+      ex.printStackTrace();
			
 
				+    }
			
 
				+
			
 
				+  }
			
 
				+
			
 
				+  private void addJavaSortSmall(int NUM_OF_REDUCERS,
			
 
				+      boolean mapoutputCompressed, boolean outputCompressed) {
			
 
				+    addJavaSort(NUM_OF_REDUCERS, mapoutputCompressed, outputCompressed, "small");
			
 
				+  }
			
 
				+
			
 
				+  private void addJavaSortMedium(int NUM_OF_REDUCERS,
			
 
				+      boolean mapoutputCompressed, boolean outputCompressed) {
			
 
				+    addJavaSort(NUM_OF_REDUCERS, mapoutputCompressed, outputCompressed,
			
 
				+        "medium");
			
 
				+  }
			
 
				+
			
 
				+  private void addJavaSortLarge(int NUM_OF_REDUCERS,
			
 
				+      boolean mapoutputCompressed, boolean outputCompressed) {
			
 
				+    addJavaSort(NUM_OF_REDUCERS, mapoutputCompressed, outputCompressed, "large");
			
 
				+  }
			
 
				+
			
 
				+  private boolean select(int total, int selected, int index) {
			
 
				+    int step;
			
 
				+    if (selected > 0 && selected < total) {
			
 
				+      step = total / selected;
			
 
				+    } else if (selected <= 0) {
			
 
				+      return false;
			
 
				+    } else {
			
 
				+      return true;
			
 
				+    }
			
 
				+
			
 
				+    int effectiveTotal = total - total % selected;
			
 
				+
			
 
				+    if (index <= effectiveTotal - 1 && (index % step == 0)) {
			
 
				+      return true;
			
 
				+    } else {
			
 
				+      return false;
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  private void addTextSortJobs() {
			
 
				+
			
 
				+    int[] nums_of_small_streamsort_job = config.getInts(
			
 
				+        "streamSort.smallJobs.numOfJobs", NUM_OF_SMALL_JOBS_PER_CLASS);
			
 
				+    int[] nums_of_small_javasort_job = config.getInts(
			
 
				+        "javaSort.smallJobs.numOfJobs", NUM_OF_SMALL_JOBS_PER_CLASS);
			
 
				+
			
 
				+    int num_of_small_streamsort_job_mapoutputCompressed = config.getInt(
			
 
				+        "streamSort.smallJobs.numOfMapoutputCompressed", 0);
			
 
				+    int num_of_small_javasort_job_mapoutputCompressed = config.getInt(
			
 
				+        "javaSort.smallJobs.numOfMapoutputCompressed", 0);
			
 
				+
			
 
				+    int num_of_small_streamsort_job_outputCompressed = config.getInt(
			
 
				+        "streamSort.smallJobs.numOfOutputCompressed",
			
 
				+        NUM_OF_SMALL_JOBS_PER_CLASS);
			
 
				+    int num_of_small_javasort_job_outputCompressed = config
			
 
				+        .getInt("javaSort.smallJobs.numOfOutputCompressed",
			
 
				+            NUM_OF_SMALL_JOBS_PER_CLASS);
			
 
				+
			
 
				+    int[] streamsort_smallJobs_numsOfReduces = config.getInts(
			
 
				+        "streamSort.smallJobs.numOfReduces", NUM_OF_REDUCERS_FOR_SMALL_JOB);
			
 
				+    int[] javasort_smallJobs_numsOfReduces = config.getInts(
			
 
				+        "javaSort.smallJobs.numOfReduces", NUM_OF_REDUCERS_FOR_SMALL_JOB);
			
 
				+
			
 
				+    int len1, len2;
			
 
				+
			
 
				+    len1 = nums_of_small_streamsort_job.length;
			
 
				+    len2 = streamsort_smallJobs_numsOfReduces.length;
			
 
				+
			
 
				+    if (len1 != len2) {
			
 
				+      System.out
			
 
				+          .println(" Configuration error: "
			
 
				+              + "streamSort.smallJobs.numOfJobs and streamSort.smallJobs.numOfReduces must have the same number of items");
			
 
				+
			
 
				+    }
			
 
				+    int totalNum = 0;
			
 
				+    for (int i = 0; i < len1; i++) {
			
 
				+      totalNum = totalNum + nums_of_small_streamsort_job[i];
			
 
				+    }
			
 
				+    int currentIndex = 0;
			
 
				+    for (int index = 0; index < len1; index++) {
			
 
				+      int num_of_small_streamsort_job = nums_of_small_streamsort_job[index];
			
 
				+      int streamsort_smallJobs_numOfReduces = streamsort_smallJobs_numsOfReduces[index];
			
 
				+      for (int i = 0; i < num_of_small_streamsort_job; i++) {
			
 
				+        boolean mapoutputCompressed = select(totalNum,
			
 
				+            num_of_small_streamsort_job_mapoutputCompressed, currentIndex);
			
 
				+        boolean outputCompressed = select(totalNum,
			
 
				+            num_of_small_streamsort_job_outputCompressed, currentIndex);
			
 
				+        addStreamSortSmall(streamsort_smallJobs_numOfReduces,
			
 
				+            mapoutputCompressed, outputCompressed);
			
 
				+        currentIndex = currentIndex + 1;
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+    len1 = nums_of_small_javasort_job.length;
			
 
				+    len2 = javasort_smallJobs_numsOfReduces.length;
			
 
				+    totalNum = 0;
			
 
				+
			
 
				+    for (int i = 0; i < len1; i++) {
			
 
				+      totalNum = totalNum + nums_of_small_javasort_job[i];
			
 
				+    }
			
 
				+
			
 
				+    if (len1 != len2) {
			
 
				+      System.out
			
 
				+          .println(" Configuration error: javaSort.smallJobs.numOfJobs, "
			
 
				+              + "javaSort.smallJobs.numOfReduces must have the same number of items");
			
 
				+
			
 
				+    }
			
 
				+
			
 
				+    currentIndex = 0;
			
 
				+    for (int index = 0; index < len1; index++) {
			
 
				+      int num_of_small_javasort_job = nums_of_small_javasort_job[index];
			
 
				+      int javasort_smallJobs_numOfReduces = javasort_smallJobs_numsOfReduces[index];
			
 
				+      for (int i = 0; i < num_of_small_javasort_job; i++) {
			
 
				+
			
 
				+        boolean mapoutputCompressed = select(totalNum,
			
 
				+            num_of_small_javasort_job_mapoutputCompressed, currentIndex);
			
 
				+        boolean outputCompressed = select(totalNum,
			
 
				+            num_of_small_javasort_job_outputCompressed, currentIndex);
			
 
				+
			
 
				+        addJavaSortSmall(javasort_smallJobs_numOfReduces, mapoutputCompressed,
			
 
				+            outputCompressed);
			
 
				+        currentIndex = currentIndex + 1;
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+    int[] nums_of_medium_streamsort_job = config.getInts(
			
 
				+        "streamSort.mediumJobs.numOfJobs", NUM_OF_MEDIUM_JOBS_PER_CLASS);
			
 
				+    int[] nums_of_medium_javasort_job = config.getInts(
			
 
				+        "javaSort.mediumJobs.numOfJobs", NUM_OF_MEDIUM_JOBS_PER_CLASS);
			
 
				+
			
 
				+    int num_of_medium_streamsort_job_mapoutputCompressed = config.getInt(
			
 
				+        "streamSort.mediumJobs.numOfMapoutputCompressed", 0);
			
 
				+    int num_of_medium_javasort_job_mapoutputCompressed = config.getInt(
			
 
				+        "javaSort.mediumJobs.numOfMapoutputCompressed", 0);
			
 
				+
			
 
				+    int num_of_medium_streamsort_job_outputCompressed = config.getInt(
			
 
				+        "streamSort.mediumJobs.numOfOutputCompressed",
			
 
				+        NUM_OF_MEDIUM_JOBS_PER_CLASS);
			
 
				+    int num_of_medium_javasort_job_outputCompressed = config.getInt(
			
 
				+        "javaSort.mediumJobs.numOfOutputCompressed",
			
 
				+        NUM_OF_MEDIUM_JOBS_PER_CLASS);
			
 
				+
			
 
				+    int[] streamsort_mediumJobs_numsOfReduces = config.getInts(
			
 
				+        "streamSort.mediumJobs.numOfReduces", NUM_OF_REDUCERS_FOR_MEDIUM_JOB);
			
 
				+    int[] javasort_mediumJobs_numsOfReduces = config.getInts(
			
 
				+        "javaSort.mediumJobs.numOfReduces", NUM_OF_REDUCERS_FOR_MEDIUM_JOB);
			
 
				+
			
 
				+    len1 = nums_of_medium_streamsort_job.length;
			
 
				+    len2 = streamsort_mediumJobs_numsOfReduces.length;
			
 
				+    totalNum = 0;
			
 
				+
			
 
				+    for (int i = 0; i < len1; i++) {
			
 
				+      totalNum = totalNum + nums_of_medium_streamsort_job[i];
			
 
				+    }
			
 
				+
			
 
				+    if (len1 != len2) {
			
 
				+      System.out
			
 
				+          .println(" Configuration error: streamSort.mediumJobs.numOfJobs, "
			
 
				+              + "streamSort.mediumJobs.numOfReduces must have the same number of items");
			
 
				+
			
 
				+    }
			
 
				+
			
 
				+    currentIndex = 0;
			
 
				+    for (int index = 0; index < len1; index++) {
			
 
				+      int num_of_medium_streamsort_job = nums_of_medium_streamsort_job[index];
			
 
				+      int streamsort_mediumJobs_numOfReduces = streamsort_mediumJobs_numsOfReduces[index];
			
 
				+
			
 
				+      for (int i = 0; i < num_of_medium_streamsort_job; i++) {
			
 
				+        boolean mapoutputCompressed = select(totalNum,
			
 
				+            num_of_medium_streamsort_job_mapoutputCompressed, currentIndex);
			
 
				+        boolean outputCompressed = select(totalNum,
			
 
				+            num_of_medium_streamsort_job_outputCompressed, currentIndex);
			
 
				+
			
 
				+        addStreamSortMedium(streamsort_mediumJobs_numOfReduces,
			
 
				+            mapoutputCompressed, outputCompressed);
			
 
				+        currentIndex = currentIndex + 1;
			
 
				+
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+    len1 = nums_of_medium_javasort_job.length;
			
 
				+    len2 = javasort_mediumJobs_numsOfReduces.length;
			
 
				+    totalNum = 0;
			
 
				+
			
 
				+    for (int i = 0; i < len1; i++) {
			
 
				+      totalNum = totalNum + nums_of_medium_javasort_job[i];
			
 
				+    }
			
 
				+
			
 
				+    if (len1 != len2) {
			
 
				+      System.out
			
 
				+          .println(" Configuration error: javaSort.mediumJobs.numOfJobs, "
			
 
				+              + "javaSort.mediumJobs.numOfReduces must have the same number of items");
			
 
				+
			
 
				+    }
			
 
				+    currentIndex = 0;
			
 
				+    for (int index = 0; index < len1; index++) {
			
 
				+      int num_of_medium_javasort_job = nums_of_medium_javasort_job[index];
			
 
				+      int javasort_mediumJobs_numOfReduces = javasort_mediumJobs_numsOfReduces[index];
			
 
				+      for (int i = 0; i < num_of_medium_javasort_job; i++) {
			
 
				+        boolean mapoutputCompressed = select(totalNum,
			
 
				+            num_of_medium_javasort_job_mapoutputCompressed, currentIndex);
			
 
				+        boolean outputCompressed = select(totalNum,
			
 
				+            num_of_medium_javasort_job_outputCompressed, currentIndex);
			
 
				+
			
 
				+        addJavaSortMedium(javasort_mediumJobs_numOfReduces,
			
 
				+            mapoutputCompressed, outputCompressed);
			
 
				+        currentIndex = currentIndex + 1;
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+    int[] nums_of_large_streamsort_job = config.getInts(
			
 
				+        "streamSort.largeJobs.numOfJobs", NUM_OF_LARGE_JOBS_PER_CLASS);
			
 
				+    int[] nums_of_large_javasort_job = config.getInts(
			
 
				+        "javaSort.largeJobs.numOfJobs", NUM_OF_LARGE_JOBS_PER_CLASS);
			
 
				+
			
 
				+    int num_of_large_streamsort_job_mapoutputCompressed = config.getInt(
			
 
				+        "streamSort.largeJobs.numOfMapoutputCompressed", 0);
			
 
				+    int num_of_large_javasort_job_mapoutputCompressed = config.getInt(
			
 
				+        "javaSort.largeJobs.numOfMapoutputCompressed", 0);
			
 
				+
			
 
				+    int num_of_large_streamsort_job_outputCompressed = config.getInt(
			
 
				+        "streamSort.largeJobs.numOfOutputCompressed",
			
 
				+        NUM_OF_LARGE_JOBS_PER_CLASS);
			
 
				+    int num_of_large_javasort_job_outputCompressed = config
			
 
				+        .getInt("javaSort.largeJobs.numOfOutputCompressed",
			
 
				+            NUM_OF_LARGE_JOBS_PER_CLASS);
			
 
				+
			
 
				+    int[] streamsort_largeJobs_numsOfReduces = config.getInts(
			
 
				+        "streamSort.largeJobs.numOfReduces", NUM_OF_REDUCERS_FOR_LARGE_JOB);
			
 
				+    int[] javasort_largeJobs_numsOfReduces = config.getInts(
			
 
				+        "javaSort.largeJobs.numOfReduces", NUM_OF_REDUCERS_FOR_LARGE_JOB);
			
 
				+
			
 
				+    len1 = nums_of_large_streamsort_job.length;
			
 
				+    len2 = streamsort_largeJobs_numsOfReduces.length;
			
 
				+    totalNum = 0;
			
 
				+
			
 
				+    for (int i = 0; i < len1; i++) {
			
 
				+      totalNum = totalNum + nums_of_large_streamsort_job[i];
			
 
				+    }
			
 
				+
			
 
				+    if (len1 != len2) {
			
 
				+      System.out
			
 
				+          .println(" Configuration error: streamSort.largeJobs.numOfJobs, "
			
 
				+              + "streamSort.largeJobs.numOfReduces must have the same number of items");
			
 
				+
			
 
				+    }
			
 
				+
			
 
				+    currentIndex = 0;
			
 
				+    for (int index = 0; index < len1; index++) {
			
 
				+      int num_of_large_streamsort_job = nums_of_large_streamsort_job[index];
			
 
				+      int streamsort_largeJobs_numOfReduces = streamsort_largeJobs_numsOfReduces[index];
			
 
				+      for (int i = 0; i < num_of_large_streamsort_job; i++) {
			
 
				+        boolean mapoutputCompressed = select(totalNum,
			
 
				+            num_of_large_streamsort_job_mapoutputCompressed, currentIndex);
			
 
				+        boolean outputCompressed = select(totalNum,
			
 
				+            num_of_large_streamsort_job_outputCompressed, currentIndex);
			
 
				+        addStreamSortLarge(streamsort_largeJobs_numOfReduces,
			
 
				+            mapoutputCompressed, outputCompressed);
			
 
				+        currentIndex = currentIndex + 1;
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+    len1 = nums_of_large_javasort_job.length;
			
 
				+    len2 = javasort_largeJobs_numsOfReduces.length;
			
 
				+    totalNum = 0;
			
 
				+
			
 
				+    for (int i = 0; i < len1; i++) {
			
 
				+      totalNum = totalNum + nums_of_large_javasort_job[i];
			
 
				+    }
			
 
				+
			
 
				+    if (len1 != len2) {
			
 
				+      System.out
			
 
				+          .println(" Configuration error: javaSort.largeJobs.numOfJobs, "
			
 
				+              + "javaSort.largeJobs.numOfReduces must have the same number of items");
			
 
				+
			
 
				+    }
			
 
				+    currentIndex = 0;
			
 
				+    for (int index = 0; index < len1; index++) {
			
 
				+      int num_of_large_javasort_job = nums_of_large_javasort_job[index];
			
 
				+      int javasort_largeJobs_numOfReduces = javasort_largeJobs_numsOfReduces[index];
			
 
				+      for (int i = 0; i < num_of_large_javasort_job; i++) {
			
 
				+        boolean mapoutputCompressed = select(totalNum,
			
 
				+            num_of_large_javasort_job_mapoutputCompressed, currentIndex);
			
 
				+        boolean outputCompressed = select(totalNum,
			
 
				+            num_of_large_javasort_job_outputCompressed, currentIndex);
			
 
				+
			
 
				+        addJavaSortLarge(javasort_largeJobs_numOfReduces, mapoutputCompressed,
			
 
				+            outputCompressed);
			
 
				+        currentIndex = currentIndex + 1;
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+  }
			
 
				+
			
 
				+  private void addWebdataScan(int num_of_reducers, boolean mapoutputCompressed,
			
 
				+      boolean outputCompressed, String size) {
			
 
				+    String defaultIndir = VARCOMPSEQ + "/{part-00000,part-00001,part-00002}";
			
 
				+    String indir = getInputDirsFor("webdataScan.smallJobs.inputFiles",
			
 
				+        defaultIndir);
			
 
				+    String outdir = addTSSuffix("perf-out/webdata-scan-out-dir-small_");
			
 
				+    if ("medium".compareToIgnoreCase(size) == 0) {
			
 
				+      defaultIndir = VARCOMPSEQ + "/{part-000*0,part-000*1,part-000*2}";
			
 
				+      indir = getInputDirsFor("webdataScan.mediumJobs.inputFiles", defaultIndir);
			
 
				+      outdir = addTSSuffix("perf-out/webdata-scan-out-dir-medium_");
			
 
				+    } else if ("large".compareToIgnoreCase(size) == 0) {
			
 
				+      defaultIndir = VARCOMPSEQ;
			
 
				+      indir = getInputDirsFor("webdataScan.largeJobs.inputFiles", defaultIndir);
			
 
				+      outdir = addTSSuffix("perf-out/webdata-scan-dir-large_");
			
 
				+    }
			
 
				+
			
 
				+    GenericMRLoadJobCreator jobcreator = new GenericMRLoadJobCreator();
			
 
				+    StringBuffer sb = new StringBuffer();
			
 
				+    sb.append("-keepmap 0.2 ");
			
 
				+    sb.append("-keepred 5 ");
			
 
				+    sb.append("-inFormat org.apache.hadoop.mapred.SequenceFileInputFormat ");
			
 
				+    sb.append("-outFormat org.apache.hadoop.mapred.SequenceFileOutputFormat ");
			
 
				+    sb.append("-outKey org.apache.hadoop.io.Text ");
			
 
				+    sb.append("-outValue org.apache.hadoop.io.Text ");
			
 
				+    sb.append("-indir ").append(indir).append(" ");
			
 
				+    sb.append("-outdir ").append(outdir).append(" ");
			
 
				+    sb.append("-r ").append(num_of_reducers);
			
 
				+
			
 
				+    String[] args = sb.toString().split(" ");
			
 
				+    clearDir(outdir);
			
 
				+    try {
			
 
				+      JobConf jobconf = jobcreator.createJob(args, mapoutputCompressed,
			
 
				+          outputCompressed);
			
 
				+      jobconf.setJobName("GridmixWebdatascan." + size);
			
 
				+      Job job = new Job(jobconf);
			
 
				+      gridmix.addJob(job);
			
 
				+      numOfJobs++;
			
 
				+    } catch (Exception ex) {
			
 
				+      System.out.println(ex.getStackTrace());
			
 
				+    }
			
 
				+
			
 
				+  }
			
 
				+
			
 
				+  private void addWebdataScanSmall(int num_of_reducers,
			
 
				+      boolean mapoutputCompressed, boolean outputCompressed) {
			
 
				+    addWebdataScan(num_of_reducers, mapoutputCompressed, outputCompressed,
			
 
				+        "small");
			
 
				+  }
			
 
				+
			
 
				+  private void addWebdataScanMedium(int num_of_reducers,
			
 
				+      boolean mapoutputCompressed, boolean outputCompressed) {
			
 
				+    addWebdataScan(num_of_reducers, mapoutputCompressed, outputCompressed,
			
 
				+        "medium");
			
 
				+  }
			
 
				+
			
 
				+  private void addWebdataScanLarge(int num_of_reducers,
			
 
				+      boolean mapoutputCompressed, boolean outputCompressed) {
			
 
				+    addWebdataScan(num_of_reducers, mapoutputCompressed, outputCompressed,
			
 
				+        "large");
			
 
				+  }
			
 
				+
			
 
				+  private void addWebdataScanJobs() {
			
 
				+
			
 
				+    int[] nums_of_small_webdatascan_job = config.getInts(
			
 
				+        "webdataScan.smallJobs.numOfJobs", NUM_OF_SMALL_JOBS_PER_CLASS);
			
 
				+    int num_of_small_webdatascan_job_mapoutputCompressed = config.getInt(
			
 
				+        "webdataScan.smallJobs.numOfMapoutputCompressed", 0);
			
 
				+    int num_of_small_webdatascan_job_outputCompressed = config.getInt(
			
 
				+        "webdataScan.smallJobs.numOfOutputCompressed",
			
 
				+        NUM_OF_REDUCERS_FOR_SMALL_JOB);
			
 
				+
			
 
				+    int[] webdatascan_smallJobs_numsOfReduces = config.getInts(
			
 
				+        "webdataScan.smallJobs.numOfReduces", NUM_OF_REDUCERS_FOR_SMALL_JOB);
			
 
				+    int len1, len2, totalNum, currentIndex;
			
 
				+
			
 
				+    len1 = nums_of_small_webdatascan_job.length;
			
 
				+    len2 = webdatascan_smallJobs_numsOfReduces.length;
			
 
				+    totalNum = 0;
			
 
				+
			
 
				+    for (int i = 0; i < len1; i++) {
			
 
				+      totalNum = totalNum + nums_of_small_webdatascan_job[i];
			
 
				+    }
			
 
				+
			
 
				+    if (len1 != len2) {
			
 
				+      System.out
			
 
				+          .println(" Configuration error: webdataScan.smallJobs.numOfJobs, "
			
 
				+              + "webdataScan.smallJobs.numOfReduces must have the same number of items");
			
 
				+    }
			
 
				+
			
 
				+    currentIndex = 0;
			
 
				+    for (int index = 0; index < len1; index++) {
			
 
				+      int num_of_small_webdatascan_job = nums_of_small_webdatascan_job[index];
			
 
				+      int webdatascan_smallJobs_numOfReduces = webdatascan_smallJobs_numsOfReduces[index];
			
 
				+
			
 
				+      for (int i = 0; i < num_of_small_webdatascan_job; i++) {
			
 
				+        boolean mapoutputCompressed = select(totalNum,
			
 
				+            num_of_small_webdatascan_job_mapoutputCompressed, currentIndex);
			
 
				+        boolean outputCompressed = select(totalNum,
			
 
				+            num_of_small_webdatascan_job_outputCompressed, currentIndex);
			
 
				+        addWebdataScanSmall(webdatascan_smallJobs_numOfReduces,
			
 
				+            mapoutputCompressed, outputCompressed);
			
 
				+        currentIndex = currentIndex + 1;
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+    int[] nums_of_medium_webdatascan_job = config.getInts(
			
 
				+        "webdataScan.mediumJobs.numOfJobs", NUM_OF_MEDIUM_JOBS_PER_CLASS);
			
 
				+    int num_of_medium_webdatascan_job_mapoutputCompressed = config.getInt(
			
 
				+        "webdataScan.mediumJobs.numOfMapoutputCompressed", 0);
			
 
				+    int num_of_medium_webdatascan_job_outputCompressed = config.getInt(
			
 
				+        "webdataScan.mediumJobs.numOfOutputCompressed",
			
 
				+        NUM_OF_REDUCERS_FOR_MEDIUM_JOB);
			
 
				+
			
 
				+    int[] webdatascan_mediumJobs_numsOfReduces = config.getInts(
			
 
				+        "webdataScan.mediumJobs.numOfReduces", NUM_OF_REDUCERS_FOR_MEDIUM_JOB);
			
 
				+
			
 
				+    len1 = nums_of_medium_webdatascan_job.length;
			
 
				+    len2 = webdatascan_mediumJobs_numsOfReduces.length;
			
 
				+    totalNum = 0;
			
 
				+
			
 
				+    for (int i = 0; i < len1; i++) {
			
 
				+      totalNum = totalNum + nums_of_medium_webdatascan_job[i];
			
 
				+    }
			
 
				+
			
 
				+    if (len1 != len2) {
			
 
				+      System.out
			
 
				+          .println(" Configuration error: webdataScan.mediumJobs.numOfJobs, "
			
 
				+              + "webdataScan.mediumJobs.numOfReduces must have the same number of items");
			
 
				+    }
			
 
				+
			
 
				+    currentIndex = 0;
			
 
				+    for (int index = 0; index < len1; index++) {
			
 
				+      int num_of_medium_webdatascan_job = nums_of_medium_webdatascan_job[index];
			
 
				+      int webdatascan_mediumJobs_numOfReduces = webdatascan_mediumJobs_numsOfReduces[index];
			
 
				+
			
 
				+      for (int i = 0; i < num_of_medium_webdatascan_job; i++) {
			
 
				+        boolean mapoutputCompressed = select(totalNum,
			
 
				+            num_of_medium_webdatascan_job_mapoutputCompressed, currentIndex);
			
 
				+        boolean outputCompressed = select(totalNum,
			
 
				+            num_of_medium_webdatascan_job_outputCompressed, currentIndex);
			
 
				+        addWebdataScanMedium(webdatascan_mediumJobs_numOfReduces,
			
 
				+            mapoutputCompressed, outputCompressed);
			
 
				+        currentIndex = currentIndex + 1;
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+    int[] nums_of_large_webdatascan_job = config.getInts(
			
 
				+        "webdataScan.largeJobs.numOfJobs", NUM_OF_LARGE_JOBS_PER_CLASS);
			
 
				+    int num_of_large_webdatascan_job_mapoutputCompressed = config.getInt(
			
 
				+        "webdataScan.largeJobs.numOfMapoutputCompressed", 0);
			
 
				+    int num_of_large_webdatascan_job_outputCompressed = config.getInt(
			
 
				+        "webdataScan.largeJobs.numOfOutputCompressed",
			
 
				+        NUM_OF_REDUCERS_FOR_LARGE_JOB);
			
 
				+
			
 
				+    int[] webdatascan_largeJobs_numsOfReduces = config.getInts(
			
 
				+        "webdataScan.largeJobs.numOfReduces", NUM_OF_REDUCERS_FOR_LARGE_JOB);
			
 
				+
			
 
				+    len1 = nums_of_large_webdatascan_job.length;
			
 
				+    len2 = webdatascan_largeJobs_numsOfReduces.length;
			
 
				+    totalNum = 0;
			
 
				+
			
 
				+    for (int i = 0; i < len1; i++) {
			
 
				+      totalNum = totalNum + nums_of_large_webdatascan_job[i];
			
 
				+    }
			
 
				+
			
 
				+    if (len1 != len2) {
			
 
				+      System.out
			
 
				+          .println(" Configuration error: webdataScan.largeJobs.numOfJobs, "
			
 
				+              + "webdataScan.largeJobs.numOfReduces must have the same number of items");
			
 
				+    }
			
 
				+
			
 
				+    currentIndex = 0;
			
 
				+    for (int index = 0; index < len1; index++) {
			
 
				+      int num_of_large_webdatascan_job = nums_of_large_webdatascan_job[index];
			
 
				+      int webdatascan_largeJobs_numOfReduces = webdatascan_largeJobs_numsOfReduces[index];
			
 
				+
			
 
				+      for (int i = 0; i < num_of_large_webdatascan_job; i++) {
			
 
				+        boolean mapoutputCompressed = select(totalNum,
			
 
				+            num_of_large_webdatascan_job_mapoutputCompressed, currentIndex);
			
 
				+        boolean outputCompressed = select(totalNum,
			
 
				+            num_of_large_webdatascan_job_outputCompressed, currentIndex);
			
 
				+        addWebdataScanLarge(webdatascan_largeJobs_numOfReduces,
			
 
				+            mapoutputCompressed, outputCompressed);
			
 
				+        currentIndex = currentIndex + 1;
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+  }
			
 
				+
			
 
				+  private void addCombiner(int num_of_reducers, boolean mapoutputCompressed,
			
 
				+      boolean outputCompressed, String size) {
			
 
				+
			
 
				+    String defaultIndir = VARCOMPSEQ + "/{part-00000,part-00001,part-00002}";
			
 
				+    String indir = getInputDirsFor("combiner.smallJobs.inputFiles",
			
 
				+        defaultIndir);
			
 
				+    String outdir = addTSSuffix("perf-out/combiner-out-dir-small_");
			
 
				+    if ("medium".compareToIgnoreCase(size) == 0) {
			
 
				+      defaultIndir = VARCOMPSEQ + "/{part-000*0,part-000*1,part-000*2}";
			
 
				+      indir = getInputDirsFor("combiner.mediumJobs.inputFiles", defaultIndir);
			
 
				+      outdir = addTSSuffix("perf-out/combiner-out-dir-medium_");
			
 
				+    } else if ("large".compareToIgnoreCase(size) == 0) {
			
 
				+      defaultIndir = VARCOMPSEQ;
			
 
				+      indir = getInputDirsFor("combiner.largeJobs.inputFiles", defaultIndir);
			
 
				+      outdir = addTSSuffix("perf-out/combiner-dir-large_");
			
 
				+    }
			
 
				+
			
 
				+    CombinerJobCreator jobcreator = new CombinerJobCreator();
			
 
				+    StringBuffer sb = new StringBuffer();
			
 
				+    sb.append("-r ").append(num_of_reducers).append(" ");
			
 
				+    sb.append("-indir ").append(indir).append(" ");
			
 
				+    sb.append("-outdir ").append(outdir);
			
 
				+    sb.append("-mapoutputCompressed ").append(mapoutputCompressed).append(" ");
			
 
				+    sb.append("-outputCompressed ").append(outputCompressed);
			
 
				+
			
 
				+    String[] args = sb.toString().split(" ");
			
 
				+    clearDir(outdir);
			
 
				+    try {
			
 
				+      JobConf jobconf = jobcreator.createJob(args);
			
 
				+      jobconf.setJobName("GridmixCombinerJob." + size);
			
 
				+      Job job = new Job(jobconf);
			
 
				+      gridmix.addJob(job);
			
 
				+      numOfJobs++;
			
 
				+    } catch (Exception ex) {
			
 
				+      ex.printStackTrace();
			
 
				+    }
			
 
				+
			
 
				+  }
			
 
				+
			
 
				+  private void addCombinerSmall(int NUM_OF_REDUCERS,
			
 
				+      boolean mapoutputCompressed, boolean outputCompressed) {
			
 
				+    addCombiner(NUM_OF_REDUCERS, mapoutputCompressed, outputCompressed, "small");
			
 
				+  }
			
 
				+
			
 
				+  private void addCombinerMedium(int NUM_OF_REDUCERS,
			
 
				+      boolean mapoutputCompressed, boolean outputCompressed) {
			
 
				+    addCombiner(NUM_OF_REDUCERS, mapoutputCompressed, outputCompressed, "medium");
			
 
				+  }
			
 
				+
			
 
				+  private void addCombinerLarge(int NUM_OF_REDUCERS,
			
 
				+      boolean mapoutputCompressed, boolean outputCompressed) {
			
 
				+    addCombiner(NUM_OF_REDUCERS, mapoutputCompressed, outputCompressed, "large");
			
 
				+  }
			
 
				+
			
 
				+  private void addCombinerJobs() {
			
 
				+    int[] nums_of_small_combiner_job = config.getInts(
			
 
				+        "combiner.smallJobs.numOfJobs", NUM_OF_SMALL_JOBS_PER_CLASS);
			
 
				+    int num_of_small_combiner_job_mapoutputCompressed = config.getInt(
			
 
				+        "combiner.smallJobs.numOfMapoutputCompressed", 0);
			
 
				+    int num_of_small_combiner_job_outputCompressed = config
			
 
				+        .getInt("combiner.smallJobs.numOfOutputCompressed",
			
 
				+            NUM_OF_SMALL_JOBS_PER_CLASS);
			
 
				+    int[] combiner_smallJobs_numsOfReduces = config.getInts(
			
 
				+        "combiner.smallJobs.numOfReduces", NUM_OF_REDUCERS_FOR_SMALL_JOB);
			
 
				+    int len1, len2, totalNum, currentIndex;
			
 
				+
			
 
				+    len1 = nums_of_small_combiner_job.length;
			
 
				+    len2 = combiner_smallJobs_numsOfReduces.length;
			
 
				+    totalNum = 0;
			
 
				+
			
 
				+    for (int i = 0; i < len1; i++) {
			
 
				+      totalNum = totalNum + nums_of_small_combiner_job[i];
			
 
				+    }
			
 
				+
			
 
				+    if (len1 != len2) {
			
 
				+      System.out
			
 
				+          .println(" Configuration error: combiner.smallJobs.numOfJobs, "
			
 
				+              + "combiner.smallJobs.numOfReduces must have the same number of items");
			
 
				+    }
			
 
				+
			
 
				+    currentIndex = 0;
			
 
				+    for (int index = 0; index < len1; index++) {
			
 
				+      int num_of_small_combiner_job = nums_of_small_combiner_job[index];
			
 
				+      int combiner_smallJobs_numOfReduces = combiner_smallJobs_numsOfReduces[index];
			
 
				+
			
 
				+      for (int i = 0; i < num_of_small_combiner_job; i++) {
			
 
				+        boolean mapoutputCompressed = select(totalNum,
			
 
				+            num_of_small_combiner_job_mapoutputCompressed, currentIndex);
			
 
				+        boolean outputCompressed = select(totalNum,
			
 
				+            num_of_small_combiner_job_outputCompressed, currentIndex);
			
 
				+        addCombinerSmall(combiner_smallJobs_numOfReduces, mapoutputCompressed,
			
 
				+            outputCompressed);
			
 
				+        currentIndex = currentIndex + 1;
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+    int[] nums_of_medium_combiner_job = config.getInts(
			
 
				+        "combiner.mediumJobs.numOfJobs", NUM_OF_MEDIUM_JOBS_PER_CLASS);
			
 
				+    int num_of_medium_combiner_job_mapoutputCompressed = config.getInt(
			
 
				+        "combiner.mediumJobs.numOfMapoutputCompressed", 0);
			
 
				+    int num_of_medium_combiner_job_outputCompressed = config.getInt(
			
 
				+        "combiner.mediumJobs.numOfOutputCompressed",
			
 
				+        NUM_OF_MEDIUM_JOBS_PER_CLASS);
			
 
				+    int[] combiner_mediumJobs_numsOfReduces = config.getInts(
			
 
				+        "combiner.mediumJobs.numOfReduces", NUM_OF_REDUCERS_FOR_MEDIUM_JOB);
			
 
				+
			
 
				+    len1 = nums_of_medium_combiner_job.length;
			
 
				+    len2 = combiner_mediumJobs_numsOfReduces.length;
			
 
				+    totalNum = 0;
			
 
				+
			
 
				+    for (int i = 0; i < len1; i++) {
			
 
				+      totalNum = totalNum + nums_of_medium_combiner_job[i];
			
 
				+    }
			
 
				+
			
 
				+    if (len1 != len2) {
			
 
				+      System.out
			
 
				+          .println(" Configuration error: combiner.mediumJobs.numOfJobs, "
			
 
				+              + "combiner.mediumJobs.numOfReduces must have the same number of items");
			
 
				+    }
			
 
				+
			
 
				+    currentIndex = 0;
			
 
				+    for (int index = 0; index < len1; index++) {
			
 
				+      int num_of_medium_combiner_job = nums_of_medium_combiner_job[index];
			
 
				+      int combiner_mediumJobs_numOfReduces = combiner_mediumJobs_numsOfReduces[index];
			
 
				+      for (int i = 0; i < num_of_medium_combiner_job; i++) {
			
 
				+        boolean mapoutputCompressed = select(totalNum,
			
 
				+            num_of_medium_combiner_job_mapoutputCompressed, currentIndex);
			
 
				+        boolean outputCompressed = select(totalNum,
			
 
				+            num_of_medium_combiner_job_outputCompressed, currentIndex);
			
 
				+
			
 
				+        addCombinerMedium(combiner_mediumJobs_numOfReduces,
			
 
				+            mapoutputCompressed, outputCompressed);
			
 
				+        currentIndex = currentIndex + 1;
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+    int[] nums_of_large_combiner_job = config.getInts(
			
 
				+        "combiner.largeJobs.numOfJobs", NUM_OF_LARGE_JOBS_PER_CLASS);
			
 
				+    int num_of_large_combiner_job_mapoutputCompressed = config.getInt(
			
 
				+        "combiner.largeJobs.numOfMapoutputCompressed", 0);
			
 
				+    int num_of_large_combiner_job_outputCompressed = config
			
 
				+        .getInt("combiner.largeJobs.numOfOutputCompressed",
			
 
				+            NUM_OF_LARGE_JOBS_PER_CLASS);
			
 
				+    int[] combiner_largeJobs_numsOfReduces = config.getInts(
			
 
				+        "combiner.largeJobs.numOfReduces", NUM_OF_REDUCERS_FOR_LARGE_JOB);
			
 
				+
			
 
				+    len1 = nums_of_large_combiner_job.length;
			
 
				+    len2 = combiner_largeJobs_numsOfReduces.length;
			
 
				+    totalNum = 0;
			
 
				+
			
 
				+    for (int i = 0; i < len1; i++) {
			
 
				+      totalNum = totalNum + nums_of_large_combiner_job[i];
			
 
				+    }
			
 
				+
			
 
				+    if (len1 != len2) {
			
 
				+      System.out
			
 
				+          .println(" Configuration error: combiner.largeJobs.numOfJobs, "
			
 
				+              + "combiner.largeJobs.numOfReduces must have the same number of items");
			
 
				+    }
			
 
				+
			
 
				+    currentIndex = 0;
			
 
				+    for (int index = 0; index < len1; index++) {
			
 
				+      int num_of_large_combiner_job = nums_of_large_combiner_job[index];
			
 
				+      int combiner_largeJobs_numOfReduces = combiner_largeJobs_numsOfReduces[index];
			
 
				+      for (int i = 0; i < num_of_large_combiner_job; i++) {
			
 
				+        boolean mapoutputCompressed = select(totalNum,
			
 
				+            num_of_large_combiner_job_mapoutputCompressed, currentIndex);
			
 
				+        boolean outputCompressed = select(totalNum,
			
 
				+            num_of_large_combiner_job_outputCompressed, currentIndex);
			
 
				+
			
 
				+        addCombinerLarge(combiner_largeJobs_numOfReduces, mapoutputCompressed,
			
 
				+            outputCompressed);
			
 
				+        currentIndex = currentIndex + 1;
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+  }
			
 
				+
			
 
				+  private void addMonsterQuery(int num_of_reducer, boolean mapoutputCompressed,
			
 
				+      boolean outputCompressed, String size) {
			
 
				+    GenericMRLoadJobCreator jobcreator = new GenericMRLoadJobCreator();
			
 
				+    String defaultIndir = FIXCOMPSEQ + "/{part-00000,part-00001,part-00002}";
			
 
				+    String indir = getInputDirsFor("monsterQuery.smallJobs.inputFiles",
			
 
				+        defaultIndir);
			
 
				+    String outdir = addTSSuffix("perf-out/mq-out-dir-small_");
			
 
				+
			
 
				+    if ("medium".compareToIgnoreCase(size) == 0) {
			
 
				+      defaultIndir = FIXCOMPSEQ + "/{part-000*0,part-000*1,part-000*2}";
			
 
				+      indir = getInputDirsFor("monsterQuery.mediumJobs.inputFiles",
			
 
				+          defaultIndir);
			
 
				+      outdir = addTSSuffix("perf-out/mq-out-dir-medium_");
			
 
				+    } else if ("large".compareToIgnoreCase(size) == 0) {
			
 
				+      defaultIndir = FIXCOMPSEQ;
			
 
				+      indir = getInputDirsFor("monsterQuery.largeJobs.inputFiles", defaultIndir);
			
 
				+      outdir = addTSSuffix("perf-out/mq-out-dir-large_");
			
 
				+    }
			
 
				+
			
 
				+    int iter = 3;
			
 
				+    try {
			
 
				+
			
 
				+      Job pjob = null;
			
 
				+      Job job = null;
			
 
				+      for (int i = 0; i < iter; i++) {
			
 
				+        String outdirfull = outdir + "." + i;
			
 
				+        String indirfull;
			
 
				+        if (i == 0) {
			
 
				+          indirfull = indir;
			
 
				+        } else {
			
 
				+          indirfull = outdir + "." + (i - 1);
			
 
				+        }
			
 
				+        Path outfile = new Path(outdirfull);
			
 
				+
			
 
				+        StringBuffer sb = new StringBuffer();
			
 
				+
			
 
				+        sb.append("-keepmap 10 ");
			
 
				+        sb.append("-keepred 40 ");
			
 
				+        sb
			
 
				+            .append("-inFormat org.apache.hadoop.mapred.SequenceFileInputFormat ");
			
 
				+        sb
			
 
				+            .append("-outFormat org.apache.hadoop.mapred.SequenceFileOutputFormat ");
			
 
				+        sb.append("-outKey org.apache.hadoop.io.Text ");
			
 
				+        sb.append("-outValue org.apache.hadoop.io.Text ");
			
 
				+
			
 
				+        sb.append("-indir ").append(indirfull).append(" ");
			
 
				+
			
 
				+        sb.append("-outdir ").append(outdirfull).append(" ");
			
 
				+        sb.append("-r ").append(num_of_reducer);
			
 
				+
			
 
				+        String[] args = sb.toString().split(" ");
			
 
				+
			
 
				+        try {
			
 
				+          fs.delete(outfile);
			
 
				+        } catch (IOException ex) {
			
 
				+          System.out.println(ex.toString());
			
 
				+        }
			
 
				+
			
 
				+        JobConf jobconf = jobcreator.createJob(args, mapoutputCompressed,
			
 
				+            outputCompressed);
			
 
				+        jobconf.setJobName("GridmixMonsterQuery." + size);
			
 
				+        job = new Job(jobconf);
			
 
				+        if (pjob != null) {
			
 
				+          job.addDependingJob(pjob);
			
 
				+        }
			
 
				+        gridmix.addJob(job);
			
 
				+        numOfJobs++;
			
 
				+        pjob = job;
			
 
				+
			
 
				+      }
			
 
				+
			
 
				+    } catch (Exception e) {
			
 
				+      System.out.println(e.getStackTrace());
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  private void addMonsterQuerySmall(int NUM_OF_REDUCERS,
			
 
				+      boolean mapoutputCompressed, boolean outputCompressed) {
			
 
				+    addMonsterQuery(NUM_OF_REDUCERS, mapoutputCompressed, outputCompressed,
			
 
				+        "small");
			
 
				+  }
			
 
				+
			
 
				+  private void addMonsterQueryMedium(int NUM_OF_REDUCERS,
			
 
				+      boolean mapoutputCompressed, boolean outputCompressed) {
			
 
				+    addMonsterQuery(NUM_OF_REDUCERS, mapoutputCompressed, outputCompressed,
			
 
				+        "medium");
			
 
				+  }
			
 
				+
			
 
				+  private void addMonsterQueryLarge(int NUM_OF_REDUCERS,
			
 
				+      boolean mapoutputCompressed, boolean outputCompressed) {
			
 
				+    addMonsterQuery(NUM_OF_REDUCERS, mapoutputCompressed, outputCompressed,
			
 
				+        "large");
			
 
				+  }
			
 
				+
			
 
				+  private void addMonsterQueryJobs() {
			
 
				+    int[] nums_of_small_monsterquery_job = config.getInts(
			
 
				+        "monsterQuery.smallJobs.numOfJobs", NUM_OF_SMALL_JOBS_PER_CLASS);
			
 
				+    int num_of_small_monsterquery_job_mapoutputCompressed = config.getInt(
			
 
				+        "monsterQuery.smallJobs.numOfMapoutputCompressed", 0);
			
 
				+    int num_of_small_monsterquery_job_outputCompressed = config.getInt(
			
 
				+        "monsterQuery.smallJobs.numOfOutputCompressed",
			
 
				+        NUM_OF_SMALL_JOBS_PER_CLASS);
			
 
				+    int[] monsterquery_smallJobs_numsOfReduces = config.getInts(
			
 
				+        "monsterQuery.smallJobs.numOfReduces", NUM_OF_REDUCERS_FOR_SMALL_JOB);
			
 
				+    int len1, len2, totalNum, currentIndex;
			
 
				+
			
 
				+    len1 = nums_of_small_monsterquery_job.length;
			
 
				+    len2 = monsterquery_smallJobs_numsOfReduces.length;
			
 
				+    totalNum = 0;
			
 
				+
			
 
				+    for (int i = 0; i < len1; i++) {
			
 
				+      totalNum = totalNum + nums_of_small_monsterquery_job[i];
			
 
				+    }
			
 
				+
			
 
				+    if (len1 != len2) {
			
 
				+      System.out
			
 
				+          .println(" Configuration error: monseterquery.smallJobs.numOfJobs, "
			
 
				+              + "monsterquery.smallJobs.numOfReduces must have the same number of items");
			
 
				+    }
			
 
				+
			
 
				+    currentIndex = 0;
			
 
				+    for (int index = 0; index < len1; index++) {
			
 
				+      int num_of_small_monsterquery_job = nums_of_small_monsterquery_job[index];
			
 
				+      int monsterquery_smallJobs_numOfReduces = monsterquery_smallJobs_numsOfReduces[index];
			
 
				+      for (int i = 0; i < num_of_small_monsterquery_job; i++) {
			
 
				+        boolean mapoutputCompressed = select(totalNum,
			
 
				+            num_of_small_monsterquery_job_mapoutputCompressed, currentIndex);
			
 
				+        boolean outputCompressed = select(totalNum,
			
 
				+            num_of_small_monsterquery_job_outputCompressed, currentIndex);
			
 
				+
			
 
				+        addMonsterQuerySmall(monsterquery_smallJobs_numOfReduces,
			
 
				+            mapoutputCompressed, outputCompressed);
			
 
				+        currentIndex = currentIndex + 1;
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+    int[] nums_of_medium_monsterquery_job = config.getInts(
			
 
				+        "monsterQuery.mediumJobs.numOfJobs", NUM_OF_MEDIUM_JOBS_PER_CLASS);
			
 
				+    int num_of_medium_monsterquery_job_mapoutputCompressed = config.getInt(
			
 
				+        "monsterQuery.mediumJobs.numOfMapoutputCompressed", 0);
			
 
				+    int num_of_medium_monsterquery_job_outputCompressed = config.getInt(
			
 
				+        "monsterQuery.mediumJobs.numOfOutputCompressed",
			
 
				+        NUM_OF_MEDIUM_JOBS_PER_CLASS);
			
 
				+    int[] monsterquery_mediumJobs_numsOfReduces = config.getInts(
			
 
				+        "monsterQuery.mediumJobs.numOfReduces", NUM_OF_REDUCERS_FOR_MEDIUM_JOB);
			
 
				+    len1 = nums_of_medium_monsterquery_job.length;
			
 
				+    len2 = monsterquery_mediumJobs_numsOfReduces.length;
			
 
				+    totalNum = 0;
			
 
				+
			
 
				+    for (int i = 0; i < len1; i++) {
			
 
				+      totalNum = totalNum + nums_of_medium_monsterquery_job[i];
			
 
				+    }
			
 
				+
			
 
				+    if (len1 != len2) {
			
 
				+      System.out
			
 
				+          .println(" Configuration error: monseterquery.mediumJobs.numOfJobs, "
			
 
				+              + "monsterquery.mediumJobs.numOfReduces must have the same number of items");
			
 
				+    }
			
 
				+
			
 
				+    currentIndex = 0;
			
 
				+    for (int index = 0; index < len1; index++) {
			
 
				+      int num_of_medium_monsterquery_job = nums_of_medium_monsterquery_job[index];
			
 
				+      int monsterquery_mediumJobs_numOfReduces = monsterquery_mediumJobs_numsOfReduces[index];
			
 
				+      for (int i = 0; i < num_of_medium_monsterquery_job; i++) {
			
 
				+        boolean mapoutputCompressed = select(totalNum,
			
 
				+            num_of_medium_monsterquery_job_mapoutputCompressed, currentIndex);
			
 
				+        boolean outputCompressed = select(totalNum,
			
 
				+            num_of_medium_monsterquery_job_outputCompressed, currentIndex);
			
 
				+
			
 
				+        addMonsterQueryMedium(monsterquery_mediumJobs_numOfReduces,
			
 
				+            mapoutputCompressed, outputCompressed);
			
 
				+        currentIndex = currentIndex + 1;
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+    int[] nums_of_large_monsterquery_job = config.getInts(
			
 
				+        "monsterQuery.largeJobs.numOfJobs", NUM_OF_LARGE_JOBS_PER_CLASS);
			
 
				+    int num_of_large_monsterquery_job_mapoutputCompressed = config.getInt(
			
 
				+        "monsterQuery.largeJobs.numOfMapoutputCompressed", 0);
			
 
				+    int num_of_large_monsterquery_job_outputCompressed = config.getInt(
			
 
				+        "monsterQuery.largeJobs.numOfOutputCompressed",
			
 
				+        NUM_OF_LARGE_JOBS_PER_CLASS);
			
 
				+    int[] monsterquery_largeJobs_numsOfReduces = config.getInts(
			
 
				+        "monsterQuery.largeJobs.numOfReduces", NUM_OF_REDUCERS_FOR_LARGE_JOB);
			
 
				+
			
 
				+    len1 = nums_of_large_monsterquery_job.length;
			
 
				+    len2 = monsterquery_largeJobs_numsOfReduces.length;
			
 
				+    totalNum = 0;
			
 
				+
			
 
				+    for (int i = 0; i < len1; i++) {
			
 
				+      totalNum = totalNum + nums_of_large_monsterquery_job[i];
			
 
				+    }
			
 
				+
			
 
				+    if (len1 != len2) {
			
 
				+      System.out
			
 
				+          .println(" Configuration error: monseterquery.largeJobs.numOfJobs, "
			
 
				+              + "monsterquery.largeJobs.numOfReduces must have the same number of items");
			
 
				+    }
			
 
				+
			
 
				+    currentIndex = 0;
			
 
				+    for (int index = 0; index < len1; index++) {
			
 
				+      int num_of_large_monsterquery_job = nums_of_large_monsterquery_job[index];
			
 
				+      int monsterquery_largeJobs_numOfReduces = monsterquery_largeJobs_numsOfReduces[index];
			
 
				+      for (int i = 0; i < num_of_large_monsterquery_job; i++) {
			
 
				+        boolean mapoutputCompressed = select(totalNum,
			
 
				+            num_of_large_monsterquery_job_mapoutputCompressed, currentIndex);
			
 
				+        boolean outputCompressed = select(totalNum,
			
 
				+            num_of_large_monsterquery_job_outputCompressed, currentIndex);
			
 
				+
			
 
				+        addMonsterQueryLarge(monsterquery_largeJobs_numOfReduces,
			
 
				+            mapoutputCompressed, outputCompressed);
			
 
				+        currentIndex = currentIndex + 1;
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  private String addTSSuffix(String s) {
			
 
				+    Date date = Calendar.getInstance().getTime();
			
 
				+    String ts = String.valueOf(date.getTime());
			
 
				+    return s + ts;
			
 
				+  }
			
 
				+
			
 
				+  private void addWebdataSort(int num_of_reducers, boolean mapoutputCompressed,
			
 
				+      boolean outputCompressed, String size) {
			
 
				+    String defaultIndir = VARCOMPSEQ + "/{part-00000,part-00001,part-00002}";
			
 
				+    String indir = getInputDirsFor("webdataSort.smallJobs.inputFiles",
			
 
				+        defaultIndir);
			
 
				+
			
 
				+    String outdir = addTSSuffix("perf-out/webdata-sort-out-dir-small_");
			
 
				+    if ("medium".compareToIgnoreCase(size) == 0) {
			
 
				+      defaultIndir = VARCOMPSEQ + "/{part-000*0,part-000*1,part-000*2}";
			
 
				+      indir = getInputDirsFor("webdataSort.mediumJobs.inputFiles", defaultIndir);
			
 
				+      outdir = addTSSuffix("perf-out/webdata-sort-out-dir-medium_");
			
 
				+    } else if ("large".compareToIgnoreCase(size) == 0) {
			
 
				+      defaultIndir = VARCOMPSEQ;
			
 
				+      indir = getInputDirsFor("webdataSort.largeJobs.inputFiles", defaultIndir);
			
 
				+
			
 
				+      outdir = addTSSuffix("perf-out/webdata-sort-dir-large_");
			
 
				+    }
			
 
				+    GenericMRLoadJobCreator jobcreator = new GenericMRLoadJobCreator();
			
 
				+    StringBuffer sb = new StringBuffer();
			
 
				+    sb.append("-keepmap 100 ");
			
 
				+    sb.append("-keepred 100 ");
			
 
				+    sb.append("-inFormat org.apache.hadoop.mapred.SequenceFileInputFormat ");
			
 
				+    sb.append("-outFormat org.apache.hadoop.mapred.SequenceFileOutputFormat ");
			
 
				+    sb.append("-outKey org.apache.hadoop.io.Text ");
			
 
				+    sb.append("-outValue org.apache.hadoop.io.Text ");
			
 
				+    sb.append("-indir ").append(indir).append(" ");
			
 
				+    sb.append("-outdir ").append(outdir).append(" ");
			
 
				+    sb.append("-r ").append(num_of_reducers);
			
 
				+
			
 
				+    String[] args = sb.toString().split(" ");
			
 
				+    clearDir(outdir);
			
 
				+    try {
			
 
				+      JobConf jobconf = jobcreator.createJob(args, mapoutputCompressed,
			
 
				+          outputCompressed);
			
 
				+      jobconf.setJobName("GridmixWebdataSort." + size);
			
 
				+      Job job = new Job(jobconf);
			
 
				+      gridmix.addJob(job);
			
 
				+      numOfJobs++;
			
 
				+    } catch (Exception ex) {
			
 
				+      System.out.println(ex.getStackTrace());
			
 
				+    }
			
 
				+
			
 
				+  }
			
 
				+
			
 
				+  private void addWebdataSortSmall(int NUM_OF_REDUCERS,
			
 
				+      boolean mapoutputCompressed, boolean outputCompressed) {
			
 
				+    addWebdataSort(NUM_OF_REDUCERS, mapoutputCompressed, outputCompressed,
			
 
				+        "small");
			
 
				+  }
			
 
				+
			
 
				+  private void addWebdataSortMedium(int NUM_OF_REDUCERS,
			
 
				+      boolean mapoutputCompressed, boolean outputCompressed) {
			
 
				+
			
 
				+    addWebdataSort(NUM_OF_REDUCERS, mapoutputCompressed, outputCompressed,
			
 
				+        "medium");
			
 
				+  }
			
 
				+
			
 
				+  private void addWebdataSortLarge(int NUM_OF_REDUCERS,
			
 
				+      boolean mapoutputCompressed, boolean outputCompressed) {
			
 
				+
			
 
				+    addWebdataSort(NUM_OF_REDUCERS, mapoutputCompressed, outputCompressed,
			
 
				+        "large");
			
 
				+  }
			
 
				+
			
 
				+  private void addWebdataSortJobs() {
			
 
				+    int[] nums_of_small_webdatasort_job = config.getInts(
			
 
				+        "webdataSort.smallJobs.numOfJobs", NUM_OF_SMALL_JOBS_PER_CLASS);
			
 
				+    int num_of_small_webdatasort_job_mapoutputCompressed = config.getInt(
			
 
				+        "webdataSort.smallJobs.numOfMapoutputCompressed", 0);
			
 
				+    int num_of_small_webdatasort_job_outputCompressed = config.getInt(
			
 
				+        "webdataSort.smallJobs.numOfOutputCompressed",
			
 
				+        NUM_OF_SMALL_JOBS_PER_CLASS);
			
 
				+    int[] webdatasort_smallJobs_numsOfReduces = config.getInts(
			
 
				+        "webdataSort.smallJobs.numOfReduces", NUM_OF_REDUCERS_FOR_SMALL_JOB);
			
 
				+
			
 
				+    int len1, len2, totalNum, currentIndex;
			
 
				+
			
 
				+    len1 = nums_of_small_webdatasort_job.length;
			
 
				+    len2 = webdatasort_smallJobs_numsOfReduces.length;
			
 
				+    totalNum = 0;
			
 
				+
			
 
				+    for (int i = 0; i < len1; i++) {
			
 
				+      totalNum = totalNum + nums_of_small_webdatasort_job[i];
			
 
				+    }
			
 
				+
			
 
				+    if (len1 != len2) {
			
 
				+      System.out
			
 
				+          .println(" Configuration error: webdatasort.smallJobs.numOfJobs, "
			
 
				+              + "webdatasort.smallJobs.numOfReduces must have the same number of items");
			
 
				+    }
			
 
				+
			
 
				+    currentIndex = 0;
			
 
				+    for (int index = 0; index < len1; index++) {
			
 
				+      int num_of_small_webdatasort_job = nums_of_small_webdatasort_job[index];
			
 
				+      int webdatasort_smallJobs_numOfReduces = webdatasort_smallJobs_numsOfReduces[index];
			
 
				+
			
 
				+      for (int i = 0; i < num_of_small_webdatasort_job; i++) {
			
 
				+        boolean mapoutputCompressed = select(totalNum,
			
 
				+            num_of_small_webdatasort_job_mapoutputCompressed, currentIndex);
			
 
				+        boolean outputCompressed = select(totalNum,
			
 
				+            num_of_small_webdatasort_job_outputCompressed, currentIndex);
			
 
				+
			
 
				+        addWebdataSortSmall(webdatasort_smallJobs_numOfReduces,
			
 
				+            mapoutputCompressed, outputCompressed);
			
 
				+        currentIndex = currentIndex + 1;
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+    int[] nums_of_medium_webdatasort_job = config.getInts(
			
 
				+        "webdataSort.mediumJobs.numOfJobs", NUM_OF_MEDIUM_JOBS_PER_CLASS);
			
 
				+    int num_of_medium_webdatasort_job_mapoutputCompressed = config.getInt(
			
 
				+        "webdataSort.mediumJobs.numOfMapoutputCompressed", 0);
			
 
				+    int num_of_medium_webdatasort_job_outputCompressed = config.getInt(
			
 
				+        "webdataSort.mediumJobs.numOfOutputCompressed",
			
 
				+        NUM_OF_MEDIUM_JOBS_PER_CLASS);
			
 
				+    int[] webdatasort_mediumJobs_numsOfReduces = config.getInts(
			
 
				+        "webdataSort.mediumJobs.numOfReduces", NUM_OF_REDUCERS_FOR_MEDIUM_JOB);
			
 
				+
			
 
				+    len1 = nums_of_medium_webdatasort_job.length;
			
 
				+    len2 = webdatasort_mediumJobs_numsOfReduces.length;
			
 
				+    totalNum = 0;
			
 
				+
			
 
				+    for (int i = 0; i < len1; i++) {
			
 
				+      totalNum = totalNum + nums_of_medium_webdatasort_job[i];
			
 
				+    }
			
 
				+
			
 
				+    if (len1 != len2) {
			
 
				+      System.out
			
 
				+          .println(" Configuration error: webdatasort.mediumJobs.numOfJobs, "
			
 
				+              + "webdatasort.mediumJobs.numOfReduces must have the same number of items");
			
 
				+    }
			
 
				+
			
 
				+    currentIndex = 0;
			
 
				+    for (int index = 0; index < len1; index++) {
			
 
				+      int num_of_medium_webdatasort_job = nums_of_medium_webdatasort_job[index];
			
 
				+      int webdatasort_mediumJobs_numOfReduces = webdatasort_mediumJobs_numsOfReduces[index];
			
 
				+      for (int i = 0; i < num_of_medium_webdatasort_job; i++) {
			
 
				+        boolean mapoutputCompressed = select(totalNum,
			
 
				+            num_of_medium_webdatasort_job_mapoutputCompressed, currentIndex);
			
 
				+        boolean outputCompressed = select(totalNum,
			
 
				+            num_of_medium_webdatasort_job_outputCompressed, currentIndex);
			
 
				+
			
 
				+        addWebdataSortMedium(webdatasort_mediumJobs_numOfReduces,
			
 
				+            mapoutputCompressed, outputCompressed);
			
 
				+        currentIndex = currentIndex + 1;
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+    int[] nums_of_large_webdatasort_job = config.getInts(
			
 
				+        "webdataSort.largeJobs.numOfJobs", NUM_OF_LARGE_JOBS_PER_CLASS);
			
 
				+    int num_of_large_webdatasort_job_mapoutputCompressed = config.getInt(
			
 
				+        "webdataSort.largeJobs.numOfMapoutputCompressed", 0);
			
 
				+    int num_of_large_webdatasort_job_outputCompressed = config.getInt(
			
 
				+        "webdataSort.largeJobs.numOfOutputCompressed",
			
 
				+        NUM_OF_LARGE_JOBS_PER_CLASS);
			
 
				+    int[] webdatasort_largeJobs_numsOfReduces = config.getInts(
			
 
				+        "webdataSort.largeJobs.numOfReduces", NUM_OF_REDUCERS_FOR_LARGE_JOB);
			
 
				+
			
 
				+    len1 = nums_of_large_webdatasort_job.length;
			
 
				+    len2 = webdatasort_largeJobs_numsOfReduces.length;
			
 
				+    totalNum = 0;
			
 
				+
			
 
				+    for (int i = 0; i < len1; i++) {
			
 
				+      totalNum = totalNum + nums_of_large_webdatasort_job[i];
			
 
				+    }
			
 
				+
			
 
				+    if (len1 != len2) {
			
 
				+      System.out
			
 
				+          .println(" Configuration error: webdatasort.largeJobs.numOfJobs, "
			
 
				+              + "webdatasort.largeJobs.numOfReduces must have the same number of items");
			
 
				+    }
			
 
				+
			
 
				+    currentIndex = 0;
			
 
				+    for (int index = 0; index < len1; index++) {
			
 
				+      int num_of_large_webdatasort_job = nums_of_large_webdatasort_job[index];
			
 
				+      int webdatasort_largeJobs_numOfReduces = webdatasort_largeJobs_numsOfReduces[index];
			
 
				+      for (int i = 0; i < num_of_large_webdatasort_job; i++) {
			
 
				+        boolean mapoutputCompressed = select(totalNum,
			
 
				+            num_of_large_webdatasort_job_mapoutputCompressed, currentIndex);
			
 
				+        boolean outputCompressed = select(totalNum,
			
 
				+            num_of_large_webdatasort_job_outputCompressed, currentIndex);
			
 
				+
			
 
				+        addWebdataSortLarge(webdatasort_largeJobs_numOfReduces,
			
 
				+            mapoutputCompressed, outputCompressed);
			
 
				+        currentIndex = currentIndex + 1;
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+  }
			
 
				+
			
 
				+  public void addjobs() {
			
 
				+
			
 
				+    addTextSortJobs();
			
 
				+
			
 
				+    addCombinerJobs();
			
 
				+
			
 
				+    addMonsterQueryJobs();
			
 
				+
			
 
				+    addWebdataScanJobs();
			
 
				+
			
 
				+    addWebdataSortJobs();
			
 
				+
			
 
				+    System.out.println("total " + gridmix.getWaitingJobs().size() + " jobs");
			
 
				+  }
			
 
				+
			
 
				+  class SimpleStats {
			
 
				+    long minValue;
			
 
				+
			
 
				+    long maxValue;
			
 
				+
			
 
				+    long averageValue;
			
 
				+
			
 
				+    long mediumValue;
			
 
				+
			
 
				+    int n;
			
 
				+
			
 
				+    SimpleStats(long[] data) {
			
 
				+      Arrays.sort(data);
			
 
				+      n = data.length;
			
 
				+      minValue = data[0];
			
 
				+      maxValue = data[n - 1];
			
 
				+      mediumValue = data[n / 2];
			
 
				+      long total = 0;
			
 
				+      for (int i = 0; i < n; i++) {
			
 
				+        total += data[i];
			
 
				+      }
			
 
				+      averageValue = total / n;
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  class TaskExecutionStats {
			
 
				+    TreeMap<String, SimpleStats> theStats;
			
 
				+
			
 
				+    void computeStats(String name, long[] data) {
			
 
				+      SimpleStats v = new SimpleStats(data);
			
 
				+      theStats.put(name, v);
			
 
				+    }
			
 
				+
			
 
				+    TaskExecutionStats() {
			
 
				+      theStats = new TreeMap<String, SimpleStats>();
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  private TreeMap<String, String> getStatForJob(Job job) {
			
 
				+    TreeMap<String, String> retv = new TreeMap<String, String>();
			
 
				+    String mapreduceID = job.getAssignedJobID().toString();
			
 
				+    JobClient jc = job.getJobClient();
			
 
				+    JobConf jobconf = job.getJobConf();
			
 
				+    String jobName = jobconf.getJobName();
			
 
				+    retv.put("JobId", mapreduceID);
			
 
				+    retv.put("JobName", jobName);
			
 
				+
			
 
				+    TaskExecutionStats theTaskExecutionStats = new TaskExecutionStats();
			
 
				+
			
 
				+    try {
			
 
				+      RunningJob running = jc.getJob(JobID.forName(mapreduceID));
			
 
				+      Counters jobCounters = running.getCounters();
			
 
				+      Iterator<Group> groups = jobCounters.iterator();
			
 
				+      while (groups.hasNext()) {
			
 
				+        Group g = groups.next();
			
 
				+        String gn = g.getName();
			
 
				+        Iterator<Counters.Counter> cs = g.iterator();
			
 
				+        while (cs.hasNext()) {
			
 
				+          Counters.Counter c = cs.next();
			
 
				+          String n = c.getName();
			
 
				+          long v = c.getCounter();
			
 
				+          retv.put(mapreduceID + "." + jobName + "." + gn + "." + n, "" + v);
			
 
				+        }
			
 
				+      }
			
 
				+      TaskReport[] maps = jc.getMapTaskReports(JobID.forName(mapreduceID));
			
 
				+      TaskReport[] reduces = jc
			
 
				+          .getReduceTaskReports(JobID.forName(mapreduceID));
			
 
				+      retv.put(mapreduceID + "." + jobName + "." + "numOfMapTasks", ""
			
 
				+          + maps.length);
			
 
				+      retv.put(mapreduceID + "." + jobName + "." + "numOfReduceTasks", ""
			
 
				+          + reduces.length);
			
 
				+      long[] mapExecutionTimes = new long[maps.length];
			
 
				+      long[] reduceExecutionTimes = new long[reduces.length];
			
 
				+      Date date = Calendar.getInstance().getTime();
			
 
				+      long startTime = date.getTime();
			
 
				+      long finishTime = 0;
			
 
				+      for (int j = 0; j < maps.length; j++) {
			
 
				+        TaskReport map = maps[j];
			
 
				+        long thisStartTime = map.getStartTime();
			
 
				+        long thisFinishTime = map.getFinishTime();
			
 
				+        if (thisStartTime > 0 && thisFinishTime > 0) {
			
 
				+          mapExecutionTimes[j] = thisFinishTime - thisStartTime;
			
 
				+        }
			
 
				+        if (startTime > thisStartTime) {
			
 
				+          startTime = thisStartTime;
			
 
				+        }
			
 
				+        if (finishTime < thisFinishTime) {
			
 
				+          finishTime = thisFinishTime;
			
 
				+        }
			
 
				+      }
			
 
				+
			
 
				+      theTaskExecutionStats.computeStats("mapExecutionTimeStats",
			
 
				+          mapExecutionTimes);
			
 
				+
			
 
				+      retv.put(mapreduceID + "." + jobName + "." + "mapStartTime", ""
			
 
				+          + startTime);
			
 
				+      retv.put(mapreduceID + "." + jobName + "." + "mapEndTime", ""
			
 
				+          + finishTime);
			
 
				+      for (int j = 0; j < reduces.length; j++) {
			
 
				+        TaskReport reduce = reduces[j];
			
 
				+        long thisStartTime = reduce.getStartTime();
			
 
				+        long thisFinishTime = reduce.getFinishTime();
			
 
				+        if (thisStartTime > 0 && thisFinishTime > 0) {
			
 
				+          reduceExecutionTimes[j] = thisFinishTime - thisStartTime;
			
 
				+        }
			
 
				+        if (startTime > thisStartTime) {
			
 
				+          startTime = thisStartTime;
			
 
				+        }
			
 
				+        if (finishTime < thisFinishTime) {
			
 
				+          finishTime = thisFinishTime;
			
 
				+        }
			
 
				+      }
			
 
				+
			
 
				+      theTaskExecutionStats.computeStats("reduceExecutionTimeStats",
			
 
				+          reduceExecutionTimes);
			
 
				+
			
 
				+      retv.put(mapreduceID + "." + jobName + "." + "reduceStartTime", ""
			
 
				+          + startTime);
			
 
				+      retv.put(mapreduceID + "." + jobName + "." + "reduceEndTime", ""
			
 
				+          + finishTime);
			
 
				+      if (job.getState() == Job.SUCCESS) {
			
 
				+        retv.put(mapreduceID + "." + "jobStatus", "successful");
			
 
				+      } else if (job.getState() == Job.FAILED) {
			
 
				+        retv.put(mapreduceID + "." + jobName + "." + "jobStatus", "failed");
			
 
				+      } else {
			
 
				+        retv.put(mapreduceID + "." + jobName + "." + "jobStatus", "unknown");
			
 
				+      }
			
 
				+      Iterator<Entry<String, SimpleStats>> entries = theTaskExecutionStats.theStats
			
 
				+          .entrySet().iterator();
			
 
				+      while (entries.hasNext()) {
			
 
				+        Entry<String, SimpleStats> e = entries.next();
			
 
				+        SimpleStats v = e.getValue();
			
 
				+        retv.put(mapreduceID + "." + jobName + "." + e.getKey() + "." + "min",
			
 
				+            "" + v.minValue);
			
 
				+        retv.put(mapreduceID + "." + jobName + "." + e.getKey() + "." + "max",
			
 
				+            "" + v.maxValue);
			
 
				+        retv.put(mapreduceID + "." + jobName + "." + e.getKey() + "."
			
 
				+            + "medium", "" + v.mediumValue);
			
 
				+        retv.put(mapreduceID + "." + jobName + "." + e.getKey() + "." + "avg",
			
 
				+            "" + v.averageValue);
			
 
				+        retv.put(mapreduceID + "." + jobName + "." + e.getKey() + "."
			
 
				+            + "numOfItems", "" + v.n);
			
 
				+      }
			
 
				+    } catch (Exception e) {
			
 
				+      e.printStackTrace();
			
 
				+    }
			
 
				+    return retv;
			
 
				+  }
			
 
				+
			
 
				+  private void printJobStat(TreeMap<String, String> stat) {
			
 
				+    Iterator<Entry<String, String>> entries = stat.entrySet().iterator();
			
 
				+    while (entries.hasNext()) {
			
 
				+      Entry<String, String> e = entries.next();
			
 
				+      System.out.println(e.getKey() + "\t" + e.getValue());
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  private void printStatsForJobs(ArrayList<Job> jobs) {
			
 
				+    for (int i = 0; i < jobs.size(); i++) {
			
 
				+      printJobStat(getStatForJob(jobs.get(i)));
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  public void run() {
			
 
				+
			
 
				+    Thread theGridmixRunner = new Thread(gridmix);
			
 
				+    theGridmixRunner.start();
			
 
				+    long startTime = System.currentTimeMillis();
			
 
				+    while (!gridmix.allFinished()) {
			
 
				+      System.out.println("Jobs in waiting state: "
			
 
				+          + gridmix.getWaitingJobs().size());
			
 
				+      System.out.println("Jobs in ready state: "
			
 
				+          + gridmix.getReadyJobs().size());
			
 
				+      System.out.println("Jobs in running state: "
			
 
				+          + gridmix.getRunningJobs().size());
			
 
				+      System.out.println("Jobs in success state: "
			
 
				+          + gridmix.getSuccessfulJobs().size());
			
 
				+      System.out.println("Jobs in failed state: "
			
 
				+          + gridmix.getFailedJobs().size());
			
 
				+      System.out.println("\n");
			
 
				+
			
 
				+      try {
			
 
				+        Thread.sleep(10 * 1000);
			
 
				+      } catch (Exception e) {
			
 
				+
			
 
				+      }
			
 
				+    }
			
 
				+    long endTime = System.currentTimeMillis();
			
 
				+    ArrayList<Job> fail = gridmix.getFailedJobs();
			
 
				+    ArrayList<Job> succeed = gridmix.getSuccessfulJobs();
			
 
				+    int numOfSuccessfulJob = succeed.size();
			
 
				+    if (numOfSuccessfulJob > 0) {
			
 
				+      System.out.println(numOfSuccessfulJob + " jobs succeeded");
			
 
				+      printStatsForJobs(succeed);
			
 
				+
			
 
				+    }
			
 
				+    int numOfFailedjob = fail.size();
			
 
				+    if (numOfFailedjob > 0) {
			
 
				+      System.out.println("------------------------------- ");
			
 
				+      System.out.println(numOfFailedjob + " jobs failed");
			
 
				+      printStatsForJobs(fail);
			
 
				+    }
			
 
				+    System.out.println("GridMix results:");
			
 
				+    System.out.println("Total num of Jobs: " + numOfJobs);
			
 
				+    System.out.println("ExecutionTime: " + ((endTime-startTime)/1000));
			
 
				+    gridmix.stop();
			
 
				+  }
			
 
				+
			
 
				+  public static void main(String argv[]) {
			
 
				+
			
 
				+    try {
			
 
				+      GridMixRunner gridmixRunner = new GridMixRunner();
			
 
				+      gridmixRunner.addjobs();
			
 
				+      gridmixRunner.run();
			
 
				+    } catch (Exception ex) {
			
 
				+      System.out.println(ex.getMessage());
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+}
			
--- a/src/benchmarks/gridmix2/src/java/org/apache/hadoop/mapred/SortJobCreator.java
+++ b/src/benchmarks/gridmix2/src/java/org/apache/hadoop/mapred/SortJobCreator.java
@@ -0,0 +1,121 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.mapred;
			
 
				+
			
 
				+import java.util.ArrayList;
			
 
				+import java.util.List;
			
 
				+
			
 
				+import org.apache.hadoop.examples.Sort;
			
 
				+import org.apache.hadoop.fs.Path;
			
 
				+import org.apache.hadoop.io.BytesWritable;
			
 
				+import org.apache.hadoop.io.Writable;
			
 
				+import org.apache.hadoop.io.WritableComparable;
			
 
				+import org.apache.hadoop.mapred.JobConf;
			
 
				+import org.apache.hadoop.mapred.lib.IdentityMapper;
			
 
				+import org.apache.hadoop.mapred.lib.IdentityReducer;
			
 
				+import org.apache.hadoop.conf.Configured;
			
 
				+
			
 
				+public class SortJobCreator extends Configured {
			
 
				+
			
 
				+  public JobConf createJob(String[] args) throws Exception {
			
 
				+
			
 
				+    // JobConf jobConf = new JobConf(getConf(), Sort.class);
			
 
				+    JobConf jobConf = new JobConf();
			
 
				+    jobConf.setJarByClass(Sort.class);
			
 
				+    jobConf.setJobName("GridmixJavaSorter");
			
 
				+
			
 
				+    jobConf.setMapperClass(IdentityMapper.class);
			
 
				+    jobConf.setReducerClass(IdentityReducer.class);
			
 
				+
			
 
				+    JobClient client = new JobClient(jobConf);
			
 
				+    ClusterStatus cluster = client.getClusterStatus();
			
 
				+    int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9);
			
 
				+    String sort_reduces = jobConf.get("test.sort.reduces_per_host");
			
 
				+    if (sort_reduces != null) {
			
 
				+      num_reduces = cluster.getTaskTrackers() * Integer.parseInt(sort_reduces);
			
 
				+    }
			
 
				+    Class<? extends InputFormat> inputFormatClass = SequenceFileInputFormat.class;
			
 
				+    Class<? extends OutputFormat> outputFormatClass = SequenceFileOutputFormat.class;
			
 
				+    Class<? extends WritableComparable> outputKeyClass = BytesWritable.class;
			
 
				+    Class<? extends Writable> outputValueClass = BytesWritable.class;
			
 
				+    boolean mapoutputCompressed = false;
			
 
				+    boolean outputCompressed = false;
			
 
				+    List<String> otherArgs = new ArrayList<String>();
			
 
				+    for (int i = 0; i < args.length; ++i) {
			
 
				+      try {
			
 
				+        if ("-m".equals(args[i])) {
			
 
				+          jobConf.setNumMapTasks(Integer.parseInt(args[++i]));
			
 
				+
			
 
				+        } else if ("-r".equals(args[i])) {
			
 
				+          num_reduces = Integer.parseInt(args[++i]);
			
 
				+        } else if ("-inFormat".equals(args[i])) {
			
 
				+          inputFormatClass = Class.forName(args[++i]).asSubclass(
			
 
				+              InputFormat.class);
			
 
				+        } else if ("-outFormat".equals(args[i])) {
			
 
				+          outputFormatClass = Class.forName(args[++i]).asSubclass(
			
 
				+              OutputFormat.class);
			
 
				+        } else if ("-outKey".equals(args[i])) {
			
 
				+          outputKeyClass = Class.forName(args[++i]).asSubclass(
			
 
				+              WritableComparable.class);
			
 
				+        } else if ("-outValue".equals(args[i])) {
			
 
				+          outputValueClass = Class.forName(args[++i])
			
 
				+              .asSubclass(Writable.class);
			
 
				+        } else if ("-mapoutputCompressed".equals(args[i])) {
			
 
				+          mapoutputCompressed = Boolean.valueOf(args[++i]).booleanValue();
			
 
				+        } else if ("-outputCompressed".equals(args[i])) {
			
 
				+          outputCompressed = Boolean.valueOf(args[++i]).booleanValue();
			
 
				+        } else {
			
 
				+          otherArgs.add(args[i]);
			
 
				+        }
			
 
				+      } catch (NumberFormatException except) {
			
 
				+        System.out.println("ERROR: Integer expected instead of " + args[i]);
			
 
				+        return null;
			
 
				+      } catch (ArrayIndexOutOfBoundsException except) {
			
 
				+        System.out.println("ERROR: Required parameter missing from "
			
 
				+            + args[i - 1]);
			
 
				+        return null; // exits
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+    // Set user-supplied (possibly default) job configs
			
 
				+    jobConf.setNumReduceTasks(num_reduces);
			
 
				+
			
 
				+    jobConf.setInputFormat(inputFormatClass);
			
 
				+    jobConf.setOutputFormat(outputFormatClass);
			
 
				+
			
 
				+    jobConf.setOutputKeyClass(outputKeyClass);
			
 
				+    jobConf.setOutputValueClass(outputValueClass);
			
 
				+    jobConf.setCompressMapOutput(mapoutputCompressed);
			
 
				+    jobConf.setBoolean("mapred.output.compress", outputCompressed);
			
 
				+
			
 
				+    // Make sure there are exactly 2 parameters left.
			
 
				+    if (otherArgs.size() != 2) {
			
 
				+      System.out.println("ERROR: Wrong number of parameters: "
			
 
				+          + otherArgs.size() + " instead of 2.");
			
 
				+      return null;
			
 
				+    } //jobConf.setInputPath(new Path(otherArgs.get(0)));
			
 
				+
			
 
				+    FileInputFormat.addInputPaths(jobConf, otherArgs.get(0));
			
 
				+
			
 
				+    FileOutputFormat.setOutputPath(jobConf, new Path(otherArgs.get(1)));
			
 
				+
			
 
				+    return jobConf;
			
 
				+  }
			
 
				+
			
 
				+}
			
--- a/src/contrib/streaming/src/java/org/apache/hadoop/streaming/StreamJob.java
+++ b/src/contrib/streaming/src/java/org/apache/hadoop/streaming/StreamJob.java
@@ -106,6 +106,7 @@ public class StreamJob implements Tool {
 
				   
			
 
				   public StreamJob() {
			
 
				     setupOptions();
			
 
				+    this.config_ = new Configuration();
			
 
				   }
			
 
				   
			
 
				   @Override
			
@@ -136,6 +137,25 @@ public class StreamJob implements Tool {
 
				     }
			
 
				   }
			
 
				   
			
 
				+  /**
			
 
				+   * This method creates a streaming job from the given argument list.
			
 
				+   * The created object can be used and/or submitted to a jobtracker for 
			
 
				+   * execution by a job agent such as JobControl
			
 
				+   * @param argv the list args for creating a streaming job
			
 
				+   * @return the created JobConf object 
			
 
				+   * @throws IOException
			
 
				+   */
			
 
				+  static public JobConf createJob(String[] argv) throws IOException {
			
 
				+    StreamJob job = new StreamJob();
			
 
				+    job.argv_ = argv;
			
 
				+    job.init();
			
 
				+    job.preProcessArgs();
			
 
				+    job.parseArgv();
			
 
				+    job.postProcessArgs();
			
 
				+    job.setJobConf();
			
 
				+    return job.jobConf_;
			
 
				+  }
			
 
				+
			
 
				   /**
			
 
				    * This is the method that actually 
			
 
				    * intializes the job conf and submits the job