|
@@ -80,13 +80,13 @@ public class StreamJob implements Tool {
|
|
|
|
|
|
protected static final Log LOG = LogFactory.getLog(StreamJob.class.getName());
|
|
|
final static String REDUCE_NONE = "NONE";
|
|
|
-
|
|
|
+
|
|
|
/** -----------Streaming CLI Implementation **/
|
|
|
- private CommandLineParser parser = new BasicParser();
|
|
|
+ private CommandLineParser parser = new BasicParser();
|
|
|
private Options allOptions;
|
|
|
- /**@deprecated use StreamJob() with ToolRunner or set the
|
|
|
- * Configuration using {@link #setConf(Configuration)} and
|
|
|
- * run with {@link #run(String[])}.
|
|
|
+ /**@deprecated use StreamJob() with ToolRunner or set the
|
|
|
+ * Configuration using {@link #setConf(Configuration)} and
|
|
|
+ * run with {@link #run(String[])}.
|
|
|
*/
|
|
|
@Deprecated
|
|
|
public StreamJob(String[] argv, boolean mayExit) {
|
|
@@ -94,12 +94,12 @@ public class StreamJob implements Tool {
|
|
|
argv_ = argv;
|
|
|
this.config_ = new Configuration();
|
|
|
}
|
|
|
-
|
|
|
+
|
|
|
public StreamJob() {
|
|
|
setupOptions();
|
|
|
this.config_ = new Configuration();
|
|
|
}
|
|
|
-
|
|
|
+
|
|
|
@Override
|
|
|
public Configuration getConf() {
|
|
|
return config_;
|
|
@@ -109,13 +109,13 @@ public class StreamJob implements Tool {
|
|
|
public void setConf(Configuration conf) {
|
|
|
this.config_ = conf;
|
|
|
}
|
|
|
-
|
|
|
+
|
|
|
@Override
|
|
|
public int run(String[] args) throws Exception {
|
|
|
try {
|
|
|
this.argv_ = args;
|
|
|
init();
|
|
|
-
|
|
|
+
|
|
|
preProcessArgs();
|
|
|
parseArgv();
|
|
|
if (printUsage) {
|
|
@@ -123,7 +123,7 @@ public class StreamJob implements Tool {
|
|
|
return 0;
|
|
|
}
|
|
|
postProcessArgs();
|
|
|
-
|
|
|
+
|
|
|
setJobConf();
|
|
|
} catch (IllegalArgumentException ex) {
|
|
|
//ignore, since log will already be printed
|
|
@@ -133,13 +133,13 @@ public class StreamJob implements Tool {
|
|
|
}
|
|
|
return submitAndMonitorJob();
|
|
|
}
|
|
|
-
|
|
|
+
|
|
|
/**
|
|
|
* This method creates a streaming job from the given argument list.
|
|
|
- * The created object can be used and/or submitted to a jobtracker for
|
|
|
+ * The created object can be used and/or submitted to a jobtracker for
|
|
|
* execution by a job agent such as JobControl
|
|
|
* @param argv the list args for creating a streaming job
|
|
|
- * @return the created JobConf object
|
|
|
+ * @return the created JobConf object
|
|
|
* @throws IOException
|
|
|
*/
|
|
|
static public JobConf createJob(String[] argv) throws IOException {
|
|
@@ -154,7 +154,7 @@ public class StreamJob implements Tool {
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
- * This is the method that actually
|
|
|
+ * This is the method that actually
|
|
|
* intializes the job conf and submits the job
|
|
|
* to the jobtracker
|
|
|
* @throws IOException
|
|
@@ -169,7 +169,7 @@ public class StreamJob implements Tool {
|
|
|
throw new IOException(ex.getMessage());
|
|
|
}
|
|
|
}
|
|
|
-
|
|
|
+
|
|
|
protected void init() {
|
|
|
try {
|
|
|
env_ = new Environment();
|
|
@@ -186,7 +186,7 @@ public class StreamJob implements Tool {
|
|
|
}
|
|
|
|
|
|
void postProcessArgs() throws IOException {
|
|
|
-
|
|
|
+
|
|
|
if (inputSpecs_.size() == 0) {
|
|
|
fail("Required argument: -input <name>");
|
|
|
}
|
|
@@ -253,7 +253,7 @@ public class StreamJob implements Tool {
|
|
|
LOG.error(oe.getMessage());
|
|
|
exitUsage(argv_.length > 0 && "-info".equals(argv_[0]));
|
|
|
}
|
|
|
-
|
|
|
+
|
|
|
if (cmdLine != null) {
|
|
|
detailedUsage_ = cmdLine.hasOption("info");
|
|
|
if (cmdLine.hasOption("help") || detailedUsage_) {
|
|
@@ -263,21 +263,21 @@ public class StreamJob implements Tool {
|
|
|
verbose_ = cmdLine.hasOption("verbose");
|
|
|
background_ = cmdLine.hasOption("background");
|
|
|
debug_ = cmdLine.hasOption("debug")? debug_ + 1 : debug_;
|
|
|
-
|
|
|
+
|
|
|
String[] values = cmdLine.getOptionValues("input");
|
|
|
if (values != null && values.length > 0) {
|
|
|
for (String input : values) {
|
|
|
inputSpecs_.add(input);
|
|
|
}
|
|
|
}
|
|
|
- output_ = cmdLine.getOptionValue("output");
|
|
|
-
|
|
|
- mapCmd_ = cmdLine.getOptionValue("mapper");
|
|
|
- comCmd_ = cmdLine.getOptionValue("combiner");
|
|
|
- redCmd_ = cmdLine.getOptionValue("reducer");
|
|
|
-
|
|
|
+ output_ = cmdLine.getOptionValue("output");
|
|
|
+
|
|
|
+ mapCmd_ = cmdLine.getOptionValue("mapper");
|
|
|
+ comCmd_ = cmdLine.getOptionValue("combiner");
|
|
|
+ redCmd_ = cmdLine.getOptionValue("reducer");
|
|
|
+
|
|
|
lazyOutput_ = cmdLine.hasOption("lazyOutput");
|
|
|
-
|
|
|
+
|
|
|
values = cmdLine.getOptionValues("file");
|
|
|
if (values != null && values.length > 0) {
|
|
|
LOG.warn("-file option is deprecated, please use generic option" +
|
|
@@ -306,34 +306,34 @@ public class StreamJob implements Tool {
|
|
|
LOG.warn("-dfs option is deprecated, please use -fs instead.");
|
|
|
config_.set("fs.default.name", fsName);
|
|
|
}
|
|
|
-
|
|
|
- additionalConfSpec_ = cmdLine.getOptionValue("additionalconfspec");
|
|
|
- inputFormatSpec_ = cmdLine.getOptionValue("inputformat");
|
|
|
+
|
|
|
+ additionalConfSpec_ = cmdLine.getOptionValue("additionalconfspec");
|
|
|
+ inputFormatSpec_ = cmdLine.getOptionValue("inputformat");
|
|
|
outputFormatSpec_ = cmdLine.getOptionValue("outputformat");
|
|
|
- numReduceTasksSpec_ = cmdLine.getOptionValue("numReduceTasks");
|
|
|
+ numReduceTasksSpec_ = cmdLine.getOptionValue("numReduceTasks");
|
|
|
partitionerSpec_ = cmdLine.getOptionValue("partitioner");
|
|
|
- inReaderSpec_ = cmdLine.getOptionValue("inputreader");
|
|
|
- mapDebugSpec_ = cmdLine.getOptionValue("mapdebug");
|
|
|
+ inReaderSpec_ = cmdLine.getOptionValue("inputreader");
|
|
|
+ mapDebugSpec_ = cmdLine.getOptionValue("mapdebug");
|
|
|
reduceDebugSpec_ = cmdLine.getOptionValue("reducedebug");
|
|
|
ioSpec_ = cmdLine.getOptionValue("io");
|
|
|
-
|
|
|
- String[] car = cmdLine.getOptionValues("cacheArchive");
|
|
|
+
|
|
|
+ String[] car = cmdLine.getOptionValues("cacheArchive");
|
|
|
if (null != car && car.length > 0){
|
|
|
LOG.warn("-cacheArchive option is deprecated, please use -archives instead.");
|
|
|
for(String s : car){
|
|
|
- cacheArchives = (cacheArchives == null)?s :cacheArchives + "," + s;
|
|
|
+ cacheArchives = (cacheArchives == null)?s :cacheArchives + "," + s;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- String[] caf = cmdLine.getOptionValues("cacheFile");
|
|
|
+ String[] caf = cmdLine.getOptionValues("cacheFile");
|
|
|
if (null != caf && caf.length > 0){
|
|
|
LOG.warn("-cacheFile option is deprecated, please use -files instead.");
|
|
|
for(String s : caf){
|
|
|
- cacheFiles = (cacheFiles == null)?s :cacheFiles + "," + s;
|
|
|
+ cacheFiles = (cacheFiles == null)?s :cacheFiles + "," + s;
|
|
|
}
|
|
|
}
|
|
|
-
|
|
|
- String[] jobconf = cmdLine.getOptionValues("jobconf");
|
|
|
+
|
|
|
+ String[] jobconf = cmdLine.getOptionValues("jobconf");
|
|
|
if (null != jobconf && jobconf.length > 0){
|
|
|
LOG.warn("-jobconf option is deprecated, please use -D instead.");
|
|
|
for(String s : jobconf){
|
|
@@ -341,8 +341,8 @@ public class StreamJob implements Tool {
|
|
|
config_.set(parts[0], parts[1]);
|
|
|
}
|
|
|
}
|
|
|
-
|
|
|
- String[] cmd = cmdLine.getOptionValues("cmdenv");
|
|
|
+
|
|
|
+ String[] cmd = cmdLine.getOptionValues("cmdenv");
|
|
|
if (null != cmd && cmd.length > 0){
|
|
|
for(String s : cmd) {
|
|
|
if (addTaskEnvironment_.length() > 0) {
|
|
@@ -361,8 +361,8 @@ public class StreamJob implements Tool {
|
|
|
System.out.println("STREAM: " + msg);
|
|
|
}
|
|
|
}
|
|
|
-
|
|
|
- private Option createOption(String name, String desc,
|
|
|
+
|
|
|
+ private Option createOption(String name, String desc,
|
|
|
String argName, int max, boolean required){
|
|
|
return OptionBuilder
|
|
|
.withArgName(argName)
|
|
@@ -371,87 +371,87 @@ public class StreamJob implements Tool {
|
|
|
.isRequired(required)
|
|
|
.create(name);
|
|
|
}
|
|
|
-
|
|
|
+
|
|
|
private Option createBoolOption(String name, String desc){
|
|
|
return OptionBuilder.withDescription(desc).create(name);
|
|
|
}
|
|
|
-
|
|
|
- private void validate(final List<String> values)
|
|
|
+
|
|
|
+ private void validate(final List<String> values)
|
|
|
throws IllegalArgumentException {
|
|
|
for (String file : values) {
|
|
|
- File f = new File(file);
|
|
|
+ File f = new File(file);
|
|
|
if (!f.canRead()) {
|
|
|
- fail("File: " + f.getAbsolutePath()
|
|
|
- + " does not exist, or is not readable.");
|
|
|
+ fail("File: " + f.getAbsolutePath()
|
|
|
+ + " does not exist, or is not readable.");
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
-
|
|
|
+
|
|
|
private void setupOptions(){
|
|
|
|
|
|
// input and output are not required for -info and -help options,
|
|
|
// though they are required for streaming job to be run.
|
|
|
- Option input = createOption("input",
|
|
|
- "DFS input file(s) for the Map step",
|
|
|
- "path",
|
|
|
- Integer.MAX_VALUE,
|
|
|
- false);
|
|
|
-
|
|
|
- Option output = createOption("output",
|
|
|
- "DFS output directory for the Reduce step",
|
|
|
- "path", 1, false);
|
|
|
- Option mapper = createOption("mapper",
|
|
|
+ Option input = createOption("input",
|
|
|
+ "DFS input file(s) for the Map step",
|
|
|
+ "path",
|
|
|
+ Integer.MAX_VALUE,
|
|
|
+ false);
|
|
|
+
|
|
|
+ Option output = createOption("output",
|
|
|
+ "DFS output directory for the Reduce step",
|
|
|
+ "path", 1, false);
|
|
|
+ Option mapper = createOption("mapper",
|
|
|
"The streaming command to run", "cmd", 1, false);
|
|
|
- Option combiner = createOption("combiner",
|
|
|
+ Option combiner = createOption("combiner",
|
|
|
"The streaming command to run", "cmd", 1, false);
|
|
|
- // reducer could be NONE
|
|
|
- Option reducer = createOption("reducer",
|
|
|
- "The streaming command to run", "cmd", 1, false);
|
|
|
- Option file = createOption("file",
|
|
|
- "File to be shipped in the Job jar file",
|
|
|
- "file", Integer.MAX_VALUE, false);
|
|
|
- Option dfs = createOption("dfs",
|
|
|
- "Optional. Override DFS configuration", "<h:p>|local", 1, false);
|
|
|
- Option additionalconfspec = createOption("additionalconfspec",
|
|
|
+ // reducer could be NONE
|
|
|
+ Option reducer = createOption("reducer",
|
|
|
+ "The streaming command to run", "cmd", 1, false);
|
|
|
+ Option file = createOption("file",
|
|
|
+ "File to be shipped in the Job jar file",
|
|
|
+ "file", Integer.MAX_VALUE, false);
|
|
|
+ Option dfs = createOption("dfs",
|
|
|
+ "Optional. Override DFS configuration", "<h:p>|local", 1, false);
|
|
|
+ Option additionalconfspec = createOption("additionalconfspec",
|
|
|
"Optional.", "spec", 1, false);
|
|
|
- Option inputformat = createOption("inputformat",
|
|
|
+ Option inputformat = createOption("inputformat",
|
|
|
"Optional.", "spec", 1, false);
|
|
|
- Option outputformat = createOption("outputformat",
|
|
|
+ Option outputformat = createOption("outputformat",
|
|
|
"Optional.", "spec", 1, false);
|
|
|
- Option partitioner = createOption("partitioner",
|
|
|
+ Option partitioner = createOption("partitioner",
|
|
|
"Optional.", "spec", 1, false);
|
|
|
- Option numReduceTasks = createOption("numReduceTasks",
|
|
|
+ Option numReduceTasks = createOption("numReduceTasks",
|
|
|
"Optional.", "spec",1, false );
|
|
|
- Option inputreader = createOption("inputreader",
|
|
|
+ Option inputreader = createOption("inputreader",
|
|
|
"Optional.", "spec", 1, false);
|
|
|
Option mapDebug = createOption("mapdebug",
|
|
|
"Optional.", "spec", 1, false);
|
|
|
Option reduceDebug = createOption("reducedebug",
|
|
|
"Optional", "spec",1, false);
|
|
|
- Option jobconf =
|
|
|
- createOption("jobconf",
|
|
|
- "(n=v) Optional. Add or override a JobConf property.",
|
|
|
+ Option jobconf =
|
|
|
+ createOption("jobconf",
|
|
|
+ "(n=v) Optional. Add or override a JobConf property.",
|
|
|
"spec", 1, false);
|
|
|
-
|
|
|
- Option cmdenv =
|
|
|
- createOption("cmdenv", "(n=v) Pass env.var to streaming commands.",
|
|
|
+
|
|
|
+ Option cmdenv =
|
|
|
+ createOption("cmdenv", "(n=v) Pass env.var to streaming commands.",
|
|
|
"spec", 1, false);
|
|
|
- Option cacheFile = createOption("cacheFile",
|
|
|
+ Option cacheFile = createOption("cacheFile",
|
|
|
"File name URI", "fileNameURI", Integer.MAX_VALUE, false);
|
|
|
- Option cacheArchive = createOption("cacheArchive",
|
|
|
+ Option cacheArchive = createOption("cacheArchive",
|
|
|
"File name URI", "fileNameURI", Integer.MAX_VALUE, false);
|
|
|
Option io = createOption("io",
|
|
|
"Optional.", "spec", 1, false);
|
|
|
-
|
|
|
+
|
|
|
// boolean properties
|
|
|
-
|
|
|
- Option background = createBoolOption("background", "Submit the job and don't wait till it completes.");
|
|
|
- Option verbose = createBoolOption("verbose", "print verbose output");
|
|
|
- Option info = createBoolOption("info", "print verbose output");
|
|
|
- Option help = createBoolOption("help", "print this help message");
|
|
|
- Option debug = createBoolOption("debug", "print debug output");
|
|
|
+
|
|
|
+ Option background = createBoolOption("background", "Submit the job and don't wait till it completes.");
|
|
|
+ Option verbose = createBoolOption("verbose", "print verbose output");
|
|
|
+ Option info = createBoolOption("info", "print verbose output");
|
|
|
+ Option help = createBoolOption("help", "print this help message");
|
|
|
+ Option debug = createBoolOption("debug", "print debug output");
|
|
|
Option lazyOutput = createBoolOption("lazyOutput", "create outputs lazily");
|
|
|
-
|
|
|
+
|
|
|
allOptions = new Options().
|
|
|
addOption(input).
|
|
|
addOption(output).
|
|
@@ -490,9 +490,9 @@ public class StreamJob implements Tool {
|
|
|
System.out.println("Usage: $HADOOP_PREFIX/bin/hadoop jar hadoop-streaming.jar"
|
|
|
+ " [options]");
|
|
|
System.out.println("Options:");
|
|
|
- System.out.println(" -input <path> DFS input file(s) for the Map"
|
|
|
+ System.out.println(" -input <path> DFS input file(s) for the Map"
|
|
|
+ " step.");
|
|
|
- System.out.println(" -output <path> DFS output directory for the"
|
|
|
+ System.out.println(" -output <path> DFS output directory for the"
|
|
|
+ " Reduce step.");
|
|
|
System.out.println(" -mapper <cmd|JavaClassName> Optional. Command"
|
|
|
+ " to be run as mapper.");
|
|
@@ -501,7 +501,7 @@ public class StreamJob implements Tool {
|
|
|
System.out.println(" -reducer <cmd|JavaClassName> Optional. Command"
|
|
|
+ " to be run as reducer.");
|
|
|
System.out.println(" -file <file> Optional. File/dir to be "
|
|
|
- + "shipped in the Job jar file.\n" +
|
|
|
+ + "shipped in the Job jar file.\n" +
|
|
|
" Deprecated. Use generic option \"-files\" instead.");
|
|
|
System.out.println(" -inputformat <TextInputFormat(default)"
|
|
|
+ "|SequenceFileAsTextInputFormat|JavaClassName>\n"
|
|
@@ -533,7 +533,7 @@ public class StreamJob implements Tool {
|
|
|
GenericOptionsParser.printGenericCommandUsage(System.out);
|
|
|
|
|
|
if (!detailed) {
|
|
|
- System.out.println();
|
|
|
+ System.out.println();
|
|
|
System.out.println("For more details about these options:");
|
|
|
System.out.println("Use " +
|
|
|
"$HADOOP_PREFIX/bin/hadoop jar hadoop-streaming.jar -info");
|
|
@@ -592,7 +592,7 @@ public class StreamJob implements Tool {
|
|
|
System.out.println(" -D " + MRConfig.LOCAL_DIR + "=/tmp/local");
|
|
|
System.out.println(" -D " + JTConfig.JT_SYSTEM_DIR + "=/tmp/system");
|
|
|
System.out.println(" -D " + MRConfig.TEMP_DIR + "=/tmp/temp");
|
|
|
- System.out.println("To treat tasks with non-zero exit status as SUCCEDED:");
|
|
|
+ System.out.println("To treat tasks with non-zero exit status as SUCCEDED:");
|
|
|
System.out.println(" -D stream.non.zero.exit.is.failure=false");
|
|
|
System.out.println("Use a custom hadoop streaming build along with standard"
|
|
|
+ " hadoop install:");
|
|
@@ -621,7 +621,7 @@ public class StreamJob implements Tool {
|
|
|
System.out.println(" daily logs for days in month 2006-04");
|
|
|
}
|
|
|
|
|
|
- public void fail(String message) {
|
|
|
+ public void fail(String message) {
|
|
|
System.err.println(message);
|
|
|
System.err.println("Try -help for more information");
|
|
|
throw new IllegalArgumentException(message);
|
|
@@ -659,7 +659,7 @@ public class StreamJob implements Tool {
|
|
|
// $HADOOP_PREFIX/bin/hadoop jar /not/first/on/classpath/custom-hadoop-streaming.jar
|
|
|
// where findInClasspath() would find the version of hadoop-streaming.jar in $HADOOP_PREFIX
|
|
|
String runtimeClasses = config_.get("stream.shipped.hadoopstreaming"); // jar or class dir
|
|
|
-
|
|
|
+
|
|
|
if (runtimeClasses == null) {
|
|
|
runtimeClasses = StreamUtil.findInClasspath(StreamJob.class.getName());
|
|
|
}
|
|
@@ -700,7 +700,7 @@ public class StreamJob implements Tool {
|
|
|
builder.merge(packageFiles_, unjarFiles, jobJarName);
|
|
|
return jobJarName;
|
|
|
}
|
|
|
-
|
|
|
+
|
|
|
/**
|
|
|
* get the uris of all the files/caches
|
|
|
*/
|
|
@@ -710,7 +710,7 @@ public class StreamJob implements Tool {
|
|
|
fileURIs = StringUtils.stringToURI(files);
|
|
|
archiveURIs = StringUtils.stringToURI(archives);
|
|
|
}
|
|
|
-
|
|
|
+
|
|
|
protected void setJobConf() throws IOException {
|
|
|
if (additionalConfSpec_ != null) {
|
|
|
LOG.warn("-additionalconfspec option is deprecated, please use -conf instead.");
|
|
@@ -719,15 +719,15 @@ public class StreamJob implements Tool {
|
|
|
|
|
|
// general MapRed job properties
|
|
|
jobConf_ = new JobConf(config_, StreamJob.class);
|
|
|
-
|
|
|
+
|
|
|
// All streaming jobs get the task timeout value
|
|
|
// from the configuration settings.
|
|
|
|
|
|
// The correct FS must be set before this is called!
|
|
|
- // (to resolve local vs. dfs drive letter differences)
|
|
|
+ // (to resolve local vs. dfs drive letter differences)
|
|
|
// (mapreduce.job.working.dir will be lazily initialized ONCE and depends on FS)
|
|
|
for (int i = 0; i < inputSpecs_.size(); i++) {
|
|
|
- FileInputFormat.addInputPaths(jobConf_,
|
|
|
+ FileInputFormat.addInputPaths(jobConf_,
|
|
|
(String) inputSpecs_.get(i));
|
|
|
}
|
|
|
|
|
@@ -773,7 +773,7 @@ public class StreamJob implements Tool {
|
|
|
fail("-inputformat : class not found : " + inputFormatSpec_);
|
|
|
}
|
|
|
}
|
|
|
- }
|
|
|
+ }
|
|
|
if (fmt == null) {
|
|
|
fmt = StreamInputFormat.class;
|
|
|
}
|
|
@@ -786,20 +786,20 @@ public class StreamJob implements Tool {
|
|
|
jobConf_.set("stream.reduce.input", ioSpec_);
|
|
|
jobConf_.set("stream.reduce.output", ioSpec_);
|
|
|
}
|
|
|
-
|
|
|
- Class<? extends IdentifierResolver> idResolverClass =
|
|
|
+
|
|
|
+ Class<? extends IdentifierResolver> idResolverClass =
|
|
|
jobConf_.getClass("stream.io.identifier.resolver.class",
|
|
|
IdentifierResolver.class, IdentifierResolver.class);
|
|
|
IdentifierResolver idResolver = ReflectionUtils.newInstance(idResolverClass, jobConf_);
|
|
|
-
|
|
|
+
|
|
|
idResolver.resolve(jobConf_.get("stream.map.input", IdentifierResolver.TEXT_ID));
|
|
|
jobConf_.setClass("stream.map.input.writer.class",
|
|
|
idResolver.getInputWriterClass(), InputWriter.class);
|
|
|
-
|
|
|
+
|
|
|
idResolver.resolve(jobConf_.get("stream.reduce.input", IdentifierResolver.TEXT_ID));
|
|
|
jobConf_.setClass("stream.reduce.input.writer.class",
|
|
|
idResolver.getInputWriterClass(), InputWriter.class);
|
|
|
-
|
|
|
+
|
|
|
jobConf_.set("stream.addenvironment", addTaskEnvironment_);
|
|
|
|
|
|
boolean isMapperACommand = false;
|
|
@@ -811,7 +811,7 @@ public class StreamJob implements Tool {
|
|
|
isMapperACommand = true;
|
|
|
jobConf_.setMapperClass(PipeMapper.class);
|
|
|
jobConf_.setMapRunnerClass(PipeMapRunner.class);
|
|
|
- jobConf_.set("stream.map.streamprocessor",
|
|
|
+ jobConf_.set("stream.map.streamprocessor",
|
|
|
URLEncoder.encode(mapCmd_, "UTF-8"));
|
|
|
}
|
|
|
}
|
|
@@ -900,7 +900,7 @@ public class StreamJob implements Tool {
|
|
|
jobConf_.set(k, v);
|
|
|
}
|
|
|
}
|
|
|
-
|
|
|
+
|
|
|
FileOutputFormat.setOutputPath(jobConf_, new Path(output_));
|
|
|
fmt = null;
|
|
|
if (outputFormatSpec_!= null) {
|
|
@@ -928,7 +928,7 @@ public class StreamJob implements Tool {
|
|
|
fail("-partitioner : class not found : " + partitionerSpec_);
|
|
|
}
|
|
|
}
|
|
|
-
|
|
|
+
|
|
|
if(mapDebugSpec_ != null){
|
|
|
jobConf_.setMapDebugScript(mapDebugSpec_);
|
|
|
}
|
|
@@ -942,7 +942,7 @@ public class StreamJob implements Tool {
|
|
|
if (jar_ != null) {
|
|
|
jobConf_.setJar(jar_);
|
|
|
}
|
|
|
-
|
|
|
+
|
|
|
if ((cacheArchives != null) || (cacheFiles != null)){
|
|
|
getURIs(cacheArchives, cacheFiles);
|
|
|
boolean b = DistributedCache.checkURIs(fileURIs, archiveURIs);
|
|
@@ -955,11 +955,11 @@ public class StreamJob implements Tool {
|
|
|
DistributedCache.setCacheArchives(archiveURIs, jobConf_);
|
|
|
if (cacheFiles != null)
|
|
|
DistributedCache.setCacheFiles(fileURIs, jobConf_);
|
|
|
-
|
|
|
+
|
|
|
if (verbose_) {
|
|
|
listJobConfProperties();
|
|
|
}
|
|
|
-
|
|
|
+
|
|
|
msg("submitting to jobconf: " + getJobTrackerHostPort());
|
|
|
}
|
|
|
|
|
@@ -1013,7 +1013,7 @@ public class StreamJob implements Tool {
|
|
|
LOG.error("Error launching job , Invalid job conf : " + je.getMessage());
|
|
|
return 3;
|
|
|
} catch(FileAlreadyExistsException fae) {
|
|
|
- LOG.error("Error launching job , Output path already exists : "
|
|
|
+ LOG.error("Error launching job , Output path already exists : "
|
|
|
+ fae.getMessage());
|
|
|
return 4;
|
|
|
} catch(IOException ioe) {
|
|
@@ -1047,9 +1047,9 @@ public class StreamJob implements Tool {
|
|
|
protected ArrayList<String> inputSpecs_ = new ArrayList<String>();
|
|
|
protected TreeSet<String> seenPrimary_ = new TreeSet<String>();
|
|
|
protected boolean hasSimpleInputSpecs_;
|
|
|
- protected ArrayList<String> packageFiles_ = new ArrayList<String>();
|
|
|
+ protected ArrayList<String> packageFiles_ = new ArrayList<String>();
|
|
|
protected ArrayList<String> shippedCanonFiles_ = new ArrayList<String>();
|
|
|
- //protected TreeMap<String, String> userJobConfProps_ = new TreeMap<String, String>();
|
|
|
+ //protected TreeMap<String, String> userJobConfProps_ = new TreeMap<String, String>();
|
|
|
protected String output_;
|
|
|
protected String mapCmd_;
|
|
|
protected String comCmd_;
|