Bläddra i källkod

HADOOP-3755. Update gridmix to work with HOD 0.4 Contributed by Runping Qi.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/core/trunk@677375 13f79535-47bb-0310-9956-ffa450edef68
Christopher Douglas 17 år sedan
förälder
incheckning
5416c3bd0f

+ 2 - 0
CHANGES.txt

@@ -120,6 +120,8 @@ Trunk (unreleased changes)
     HADOOP-3721. Refactor CompositeRecordReader and related mapred.join classes
     to make them clearer. (cdouglas)
 
+    HADOOP-3755. Update gridmix to work with HOD 0.4 (Runping Qi via cdouglas)
+
 Release 0.18.0 - Unreleased
 
   INCOMPATIBLE CHANGES

+ 5 - 4
src/benchmarks/gridmix/generateData.sh

@@ -13,6 +13,7 @@ INDIRECT_DATA_BYTES=58720256
 if [ -z ${NUM_MAPS} ] ; then
   NUM_MAPS=100
 fi
+
 INDIRECT_DATA_FILES=200
 
 # If the env var USE_REAL_DATASET is set, then use the params to generate the bigger (real) dataset.
@@ -37,7 +38,7 @@ ${HADOOP_HOME}/bin/hadoop jar \
   -D mapred.output.compress=true \
   -D mapred.map.output.compression.type=BLOCK \
   -outFormat org.apache.hadoop.mapred.SequenceFileOutputFormat \
-  ${VARCOMPSEQ}
+  ${VARCOMPSEQ} &
 
 ${HADOOP_HOME}/bin/hadoop jar \
   ${EXAMPLE_JAR} randomtextwriter \
@@ -50,7 +51,7 @@ ${HADOOP_HOME}/bin/hadoop jar \
   -D mapred.output.compress=true \
   -D mapred.map.output.compression.type=BLOCK \
   -outFormat org.apache.hadoop.mapred.SequenceFileOutputFormat \
-  ${FIXCOMPSEQ}
+  ${FIXCOMPSEQ} &
 
 ${HADOOP_HOME}/bin/hadoop jar \
   ${EXAMPLE_JAR} randomtextwriter \
@@ -62,7 +63,7 @@ ${HADOOP_HOME}/bin/hadoop jar \
   -D test.randomtextwrite.max_words_value=200 \
   -D mapred.output.compress=false \
   -outFormat org.apache.hadoop.mapred.TextOutputFormat \
-  ${VARINFLTEXT}
+  ${VARINFLTEXT} &
 
 ${HADOOP_HOME}/bin/hadoop jar \
   ${EXAMPLE_JAR} randomtextwriter \
@@ -75,4 +76,4 @@ ${HADOOP_HOME}/bin/hadoop jar \
   -D mapred.output.compress=true \
   -D mapred.map.output.compression.type=BLOCK \
   -outFormat org.apache.hadoop.mapred.TextOutputFormat \
-  ${FIXCOMPTEXT}
+  ${FIXCOMPTEXT} &

+ 13 - 9
src/benchmarks/gridmix/gridmix-env

@@ -15,17 +15,17 @@ fi
 # Hadoop example jar
 # set var only if it has not already been set externally
 if [ -z "${EXAMPLE_JAR}" ] ; then
-  export EXAMPLE_JAR="${HADOOP_HOME}/hadoop-*-examples.jar"
+  export EXAMPLE_JAR="${HADOOP_HOME}/hadoop-*examples.jar"
 fi
 # Hadoop test jar
 # set var only if it has not already been set externally
 if [ -z "${APP_JAR}" ] ; then
-  export APP_JAR="${HADOOP_HOME}/hadoop-*-test.jar"
+  export APP_JAR="${HADOOP_HOME}/hadoop-*test.jar"
 fi
 # Hadoop streaming jar
 # set var only if it has not already been set externally
 if [ -z "${STREAM_JAR}" ] ; then
-  export STREAM_JAR="${HADOOP_HOME}/contrib/hadoop-*-streaming.jar"
+  export STREAM_JAR="${HADOOP_HOME}/contrib/streaming/hadoop-*streaming.jar"
 fi
 # Location on default filesystem for writing gridmix data (usually HDFS)
 # Default: /gridmix/data
@@ -64,9 +64,13 @@ export NUM_OF_REDUCERS_FOR_SMALL_JOB=15
 export INTERVAL_BETWEEN_SUBMITION=20
 
 ## Hod
-#export HOD_OPTIONS="--ringmaster.hadoop-tar-ball=/path/to/hadoop-0.15.0-dev.tar.gz"
-#export HOD_CONFIG=
-#export ALL_HOD_OPTIONS="-c ${HOD_CONFIG} ${HOD_OPTIONS}"
-#export SMALL_JOB_HOD_OPTIONS="$ALL_HOD_OPTIONS -m 5"
-#export MEDIUM_JOB_HOD_OPTIONS="$ALL_HOD_OPTIONS -m 50"
-#export LARGE_JOB_HOD_OPTIONS="$ALL_HOD_OPTIONS -m 100"
+#export HOD_OPTIONS=""
+
+export CLUSTER_DIR_BASE=$GRID_MIX_HOME/CLUSTER_DIR_BASE
+export HOD_CONFIG=
+export ALL_HOD_OPTIONS="-c ${HOD_CONFIG} ${HOD_OPTIONS}
+export SMALL_JOB_HOD_OPTIONS="$ALL_HOD_OPTIONS -n 5"
+export MEDIUM_JOB_HOD_OPTIONS="$ALL_HOD_OPTIONS -n 50"
+export LARGE_JOB_HOD_OPTIONS="$ALL_HOD_OPTIONS -n 100"
+export USE_REAL_DATASET=false
+

+ 5 - 1
src/benchmarks/gridmix/submissionScripts/maxentHod

@@ -3,9 +3,13 @@
 GRID_DIR=`dirname "$0"`
 GRID_DIR=`cd "$GRID_DIR"; pwd`
 source $GRID_DIR/../gridmix-env
+
 for ((i=0; i < $NUM_OF_LARGE_JOBS_FOR_ENTROPY_CLASS; i++))
 do
     echo $i
-    hod $LARGE_JOB_HOD_OPTIONS --hod.script=$GRID_MIX_HOME/maxent/maxent.large  2>&1 > maxent.large.$i.out &
+    CLUSTER_DIR=$CLUSTER_DIR_BASE/maxent.large.$i
+    mkdir $CLUSTER_DIR
+    #hod $LARGE_JOB_HOD_OPTIONS --hod.script=$GRID_MIX_HOME/maxent/maxent.large  2>&1 > maxent.large.$i.out &
+    hod script -d $CLUSTER_DIR $LARGE_JOB_HOD_OPTIONS -s $GRID_MIX_HOME/maxent/maxent.large  2>&1 > maxent.large.$i.out &
     $GRID_MIX_HOME/submissionScripts/sleep_if_too_busy 
 done

+ 10 - 3
src/benchmarks/gridmix/submissionScripts/monsterQueriesHod

@@ -7,20 +7,27 @@ source $GRID_DIR/../gridmix-env
 for ((i=0; i < $NUM_OF_SMALL_JOBS_PER_CLASS; i++))
 do
     echo $i
-    hod $SMALL_JOB_HOD_OPTIONS --hod.script=$GRID_MIX_HOME/monsterQuery/monster_query.small  2>&1 > monster_query.small.$i.out &
+    CLUSTER_DIR=$CLUSTER_DIR_BASE/monster_query.small.$i
+    mkdir $CLUSTER_DIR
+
+    hod script -d $CLUSTER_DIR $SMALL_JOB_HOD_OPTIONS -s $GRID_MIX_HOME/monsterQuery/monster_query.small  2>&1 > monster_query.small.$i.out &
     $GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
 done
     
 for ((i=0; i < $NUM_OF_MEDIUM_JOBS_PER_CLASS; i++))
 do
     echo $i
-    hod $MEDIUM_JOB_HOD_OPTIONS --hod.script=$GRID_MIX_HOME/monsterQuery/monster_query.medium  2>&1 > monster_query.medium.$i.out &
+    CLUSTER_DIR=$CLUSTER_DIR_BASE/monster_query.medium.$i
+    mkdir $CLUSTER_DIR
+    hod script -d $CLUSTER_DIR $MEDIUM_JOB_HOD_OPTIONS -s $GRID_MIX_HOME/monsterQuery/monster_query.medium  2>&1 > monster_query.medium.$i.out &
     $GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
 done
 
 for ((i=0; i < $NUM_OF_LARGE_JOBS_PER_CLASS; i++))
 do
     echo $i
-    hod $LARGE_JOB_HOD_OPTIONS --hod.script=$GRID_MIX_HOME/monsterQuery/monster_query.large  2>&1 > monster_query.large.$i.out &
+    CLUSTER_DIR=$CLUSTER_DIR_BASE/monster_query.large.$i
+    mkdir $CLUSTER_DIR
+    hod script -d  $CLUSTER_DIR $LARGE_JOB_HOD_OPTIONS -s $GRID_MIX_HOME/monsterQuery/monster_query.large  2>&1 > monster_query.large.$i.out &
     $GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
 done

+ 34 - 9
src/benchmarks/gridmix/submissionScripts/textSortHod

@@ -7,33 +7,58 @@ source $GRID_DIR/../gridmix-env
 for ((i=0; i < $NUM_OF_SMALL_JOBS_PER_CLASS; i++))
 do
     echo $i
-    hod $SMALL_JOB_HOD_OPTIONS --hod.script=$GRID_MIX_HOME/pipesort/text-sort.small  2>&1 > pipesort.small.$i.out &
+    CLUSTER_DIR=$CLUSTER_DIR_BASE/pipesort.small.$i
+    mkdir $CLUSTER_DIR
+
+    hod script -d $CLUSTER_DIR $SMALL_JOB_HOD_OPTIONS -s $GRID_MIX_HOME/pipesort/text-sort.small  2>&1 > pipesort.small.$i.out &
     $GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
-    hod $SMALL_JOB_HOD_OPTIONS --hod.script=$GRID_MIX_HOME/streamsort/text-sort.small  2>&1 > streamsort.small.$i.out &
+
+    CLUSTER_DIR=$CLUSTER_DIR_BASE/streamsort.small.$i
+    mkdir $CLUSTER_DIR
+    hod script -d $CLUSTER_DIR $SMALL_JOB_HOD_OPTIONS -s $GRID_MIX_HOME/streamsort/text-sort.small  2>&1 > streamsort.small.$i.out &
     $GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
-    hod $SMALL_JOB_HOD_OPTIONS --hod.script=$GRID_MIX_HOME/javasort/text-sort.small  2>&1 > javasort.small.$i.out  &
+
+    CLUSTER_DIR=$CLUSTER_DIR_BASE/javasort.small.$i
+    mkdir $CLUSTER_DIR
+    hod script -d $CLUSTER_DIR $SMALL_JOB_HOD_OPTIONS -s $GRID_MIX_HOME/javasort/text-sort.small  2>&1 > javasort.small.$i.out  &
     $GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
 done
     
 for ((i=0; i < $NUM_OF_MEDIUM_JOBS_PER_CLASS; i++))
 do
     echo $i
-    hod $MEDIUM_JOB_HOD_OPTIONS --hod.script=$GRID_MIX_HOME/pipesort/text-sort.medium  2>&1 > pipesort.medium.$i.out &
+    CLUSTER_DIR=$CLUSTER_DIR_BASE/pipesort.medium.$i
+    mkdir $CLUSTER_DIR
+    hod script -d $CLUSTER_DIR $MEDIUM_JOB_HOD_OPTIONS -s $GRID_MIX_HOME/pipesort/text-sort.medium  2>&1 > pipesort.medium.$i.out &
     $GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
-    hod $MEDIUM_JOB_HOD_OPTIONS --hod.script=$GRID_MIX_HOME/streamsort/text-sort.medium  2>&1 > streamsort.medium.$i.out &
+
+    CLUSTER_DIR=$CLUSTER_DIR_BASE/streamsort.medium.$i
+    mkdir $CLUSTER_DIR
+    hod script -d $CLUSTER_DIR $MEDIUM_JOB_HOD_OPTIONS -s $GRID_MIX_HOME/streamsort/text-sort.medium  2>&1 > streamsort.medium.$i.out &
     $GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
-    hod $MEDIUM_JOB_HOD_OPTIONS --hod.script=$GRID_MIX_HOME/javasort/text-sort.medium  2>&1 > javasort.medium.$i.out  &
+
+    CLUSTER_DIR=$CLUSTER_DIR_BASE/javasort.medium.$i
+    mkdir $CLUSTER_DIR
+    hod script -d $CLUSTER_DIR $MEDIUM_JOB_HOD_OPTIONS --hod.script=$GRID_MIX_HOME/javasort/text-sort.medium  2>&1 > javasort.medium.$i.out  &
     $GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
 done
 
 for ((i=0; i < $NUM_OF_LARGE_JOBS_PER_CLASS; i++))
 do
     echo $i
-    hod $LARGE_JOB_HOD_OPTIONS --hod.script=$GRID_MIX_HOME/pipesort/text-sort.large  2>&1 >  pipesort.large.$i.out &
+    CLUSTER_DIR=$CLUSTER_DIR_BASE/pipesort.large.$i
+    mkdir $CLUSTER_DIR
+    hod  script -d $CLUSTER_DIR $LARGE_JOB_HOD_OPTIONS -s $GRID_MIX_HOME/pipesort/text-sort.large  2>&1 >  pipesort.large.$i.out &
     $GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
-    hod $LARGE_JOB_HOD_OPTIONS --hod.script=$GRID_MIX_HOME/streamsort/text-sort.large  2>&1 > streamsort.large.$i.out &
+
+    CLUSTER_DIR=$CLUSTER_DIR_BASE/streamsort.large.$i
+    mkdir $CLUSTER_DIR
+    hod script -d $CLUSTER_DIR $LARGE_JOB_HOD_OPTIONS -s $GRID_MIX_HOME/streamsort/text-sort.large  2>&1 > streamsort.large.$i.out &
     $GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
-    hod $LARGE_JOB_HOD_OPTIONS --hod.script=$GRID_MIX_HOME/javasort/text-sort.large  2>&1 > javasort.large.$i.out &
+
+    CLUSTER_DIR=$CLUSTER_DIR_BASE/javasort.large.$i
+    mkdir $CLUSTER_DIR
+    hod  script -d $CLUSTER_DIR $LARGE_JOB_HOD_OPTIONS -s $GRID_MIX_HOME/javasort/text-sort.large  2>&1 > javasort.large.$i.out &
     $GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
 done
     

+ 9 - 3
src/benchmarks/gridmix/submissionScripts/webdataScanHod

@@ -7,7 +7,9 @@ source $GRID_DIR/../gridmix-env
 for ((i=0; i < $NUM_OF_SMALL_JOBS_PER_CLASS; i++))
 do
     echo $i
-    hod $SMALL_JOB_HOD_OPTIONS --hod.script=$GRID_MIX_HOME/webdatascan/webdata_scan.small  2>&1 > webdata_scan.small.$i.out&
+    CLUSTER_DIR=$CLUSTER_DIR_BASE/webdata_scan.small.$i
+    mkdir $CLUSTER_DIR
+    hod script -d $CLUSTER_DIR $SMALL_JOB_HOD_OPTIONS -s $GRID_MIX_HOME/webdatascan/webdata_scan.small  2>&1 > webdata_scan.small.$i.out&
     $GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
 done
     
@@ -15,14 +17,18 @@ done
 for ((i=0; i < $NUM_OF_MEDIUM_JOBS_PER_CLASS; i++))
 do
     echo $i
-    hod $MEDIUM_JOB_HOD_OPTIONS --hod.script=$GRID_MIX_HOME/webdatascan/webdata_scan.medium  2>&1 > webdata_scan.medium.$i.out &
+    CLUSTER_DIR=$CLUSTER_DIR_BASE/webdata_scan.medium.$i
+    mkdir $CLUSTER_DIR
+    hod script -d $CLUSTER_DIR $MEDIUM_JOB_HOD_OPTIONS -s $GRID_MIX_HOME/webdatascan/webdata_scan.medium  2>&1 > webdata_scan.medium.$i.out &
     $GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
 done
     
 for ((i=0; i < $NUM_OF_LARGE_JOBS_PER_CLASS; i++))
 do
     echo $i
-    hod $LARGE_JOB_HOD_OPTIONS --hod.script=$GRID_MIX_HOME/webdatascan/webdata_scan.large  2>&1 > webdata_scan.large.$i.out &
+    CLUSTER_DIR=$CLUSTER_DIR_BASE/webdata_scan.large.$i
+    mkdir $CLUSTER_DIR
+    hod script -d $CLUSTER_DIR $LARGE_JOB_HOD_OPTIONS -s $GRID_MIX_HOME/webdatascan/webdata_scan.large  2>&1 > webdata_scan.large.$i.out &
     $GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
 done
     

+ 5 - 2
src/benchmarks/gridmix/submissionScripts/webdataSortHod

@@ -4,11 +4,14 @@ GRID_DIR=`dirname "$0"`
 GRID_DIR=`cd "$GRID_DIR"; pwd`
 source $GRID_DIR/../gridmix-env
 
-
 for ((i=0; i < $NUM_OF_LARGE_JOBS_PER_CLASS; i++))
 do
     echo $i
-    hod $LARGE_JOB_HOD_OPTIONS --hod.script=$GRID_MIX_HOME/webdatasort/webdata_sort.large  2>&1 > webdata_sort.large.$i.out &
+    CLUSTER_DIR=$CLUSTER_DIR_BASE/webdata_sort.large.$i
+    mkdir $CLUSTER_DIR
+    #hod $LARGE_JOB_HOD_OPTIONS --hod.script=$GRID_MIX_HOME/webdatasort/webdata_sort.large  2>&1 > webdata_sort.large.$i.out &
+    echo "hod script -d $CLUSTER_DIR $LARGE_JOB_HOD_OPTIONS -s $GRID_MIX_HOME/webdatasort/webdata_sort.large  "
+    hod script -d $CLUSTER_DIR $LARGE_JOB_HOD_OPTIONS -s $GRID_MIX_HOME/webdatasort/webdata_sort.large  2>&1 > webdata_sort.large.$i.out &
     $GRID_MIX_HOME/submissionScripts/sleep_if_too_busy
 done