123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614 |
- /**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- package org.apache.hadoop.mapred;
- import java.io.IOException;
- import java.io.File;
- import java.util.StringTokenizer;
- import java.util.ArrayList;
- import java.util.Collections;
- import java.util.Enumeration;
- import java.net.URL;
- import java.net.URLDecoder;
- import org.apache.hadoop.fs.FileSystem;
- import org.apache.hadoop.fs.Path;
- import org.apache.hadoop.conf.Configuration;
- import org.apache.hadoop.io.*;
- import org.apache.hadoop.io.compress.CompressionCodec;
- import org.apache.hadoop.mapred.lib.IdentityMapper;
- import org.apache.hadoop.mapred.lib.IdentityReducer;
- import org.apache.hadoop.mapred.lib.HashPartitioner;
- import org.apache.hadoop.util.ReflectionUtils;
- /** A map/reduce job configuration. This names the {@link Mapper}, combiner
- * (if any), {@link Partitioner}, {@link Reducer}, {@link InputFormat}, and
- * {@link OutputFormat} implementations to be used. It also indicates the set
- * of input files, and where the output files should be written. */
- public class JobConf extends Configuration {
- private void initialize() {
- addDefaultResource("mapred-default.xml");
- }
-
- /**
- * Construct a map/reduce job configuration.
- */
- public JobConf() {
- initialize();
- }
- /**
- * Construct a map/reduce job configuration.
- * @param exampleClass a class whose containing jar is used as the job's jar.
- */
- public JobConf(Class exampleClass) {
- initialize();
- setJarByClass(exampleClass);
- }
-
- /**
- * Construct a map/reduce job configuration.
- *
- * @param conf a Configuration whose settings will be inherited.
- */
- public JobConf(Configuration conf) {
- super(conf);
- initialize();
- }
- /** Construct a map/reduce job configuration.
- *
- * @param conf a Configuration whose settings will be inherited.
- * @param exampleClass a class whose containing jar is used as the job's jar.
- */
- public JobConf(Configuration conf, Class exampleClass) {
- this(conf);
- initialize();
- setJarByClass(exampleClass);
- }
- /** Construct a map/reduce configuration.
- *
- * @param config a Configuration-format XML job description file
- */
- public JobConf(String config) {
- this(new Path(config));
- }
- /** Construct a map/reduce configuration.
- *
- * @param config a Configuration-format XML job description file
- */
- public JobConf(Path config) {
- super();
- addDefaultResource("mapred-default.xml");
- addDefaultResource(config);
- }
- public String getJar() { return get("mapred.jar"); }
- public void setJar(String jar) { set("mapred.jar", jar); }
-
- /**
- * Set the job's jar file by finding an example class location.
- * @param cls the example class
- */
- public void setJarByClass(Class cls) {
- String jar = findContainingJar(cls);
- if (jar != null) {
- setJar(jar);
- }
- }
- public Path getSystemDir() {
- return new Path(get("mapred.system.dir", "/tmp/hadoop/mapred/system"));
- }
- public String[] getLocalDirs() throws IOException {
- return getStrings("mapred.local.dir");
- }
- public void deleteLocalFiles() throws IOException {
- String[] localDirs = getLocalDirs();
- for (int i = 0; i < localDirs.length; i++) {
- FileSystem.getLocal(this).delete(new Path(localDirs[i]));
- }
- }
- public void deleteLocalFiles(String subdir) throws IOException {
- String[] localDirs = getLocalDirs();
- for (int i = 0; i < localDirs.length; i++) {
- FileSystem.getLocal(this).delete(new Path(localDirs[i], subdir));
- }
- }
- /** Constructs a local file name. Files are distributed among configured
- * local directories.*/
- public Path getLocalPath(String pathString) throws IOException {
- return getLocalPath("mapred.local.dir", pathString);
- }
- public void setInputPath(Path dir) {
- dir = new Path(getWorkingDirectory(), dir);
- set("mapred.input.dir", dir);
- }
- public void addInputPath(Path dir) {
- dir = new Path(getWorkingDirectory(), dir);
- String dirs = get("mapred.input.dir");
- set("mapred.input.dir", dirs == null ? dir.toString() : dirs + "," + dir);
- }
- public Path[] getInputPaths() {
- String dirs = get("mapred.input.dir", "");
- ArrayList list = Collections.list(new StringTokenizer(dirs, ","));
- Path[] result = new Path[list.size()];
- for (int i = 0; i < list.size(); i++) {
- result[i] = new Path((String)list.get(i));
- }
- return result;
- }
- /**
- * Get the reported username for this job.
- * @return the username
- */
- public String getUser() {
- return get("user.name");
- }
-
- /**
- * Set the reported username for this job.
- * @param user the username
- */
- public void setUser(String user) {
- set("user.name", user);
- }
-
- /**
- * Set whether the framework should keep the intermediate files for
- * failed tasks.
- */
- public void setKeepFailedTaskFiles(boolean keep) {
- setBoolean("keep.failed.task.files", keep);
- }
-
- /**
- * Should the temporary files for failed tasks be kept?
- * @return should the files be kept?
- */
- public boolean getKeepFailedTaskFiles() {
- return getBoolean("keep.failed.task.files", false);
- }
-
- /**
- * Set a regular expression for task names that should be kept.
- * The regular expression ".*_m_000123_0" would keep the files
- * for the first instance of map 123 that ran.
- * @param pattern the java.util.regex.Pattern to match against the
- * task names.
- */
- public void setKeepTaskFilesPattern(String pattern) {
- set("keep.task.files.pattern", pattern);
- }
-
- /**
- * Get the regular expression that is matched against the task names
- * to see if we need to keep the files.
- * @return the pattern as a string, if it was set, othewise null
- */
- public String getKeepTaskFilesPattern() {
- return get("keep.task.files.pattern");
- }
-
- /**
- * Set the current working directory for the default file system
- * @param dir the new current working directory
- */
- public void setWorkingDirectory(Path dir) {
- dir = new Path(getWorkingDirectory(), dir);
- set("mapred.working.dir", dir.toString());
- }
-
- /**
- * Get the current working directory for the default file system.
- * @return the directory name
- */
- public Path getWorkingDirectory() {
- String name = get("mapred.working.dir");
- if (name != null) {
- return new Path(name);
- } else {
- try {
- Path dir = FileSystem.get(this).getWorkingDirectory();
- set("mapred.working.dir", dir.toString());
- return dir;
- } catch (IOException e) {
- throw new RuntimeException(e);
- }
- }
- }
-
- public Path getOutputPath() {
- String name = get("mapred.output.dir");
- return name == null ? null: new Path(name);
- }
- public void setOutputPath(Path dir) {
- dir = new Path(getWorkingDirectory(), dir);
- set("mapred.output.dir", dir);
- }
- public InputFormat getInputFormat() {
- return (InputFormat)ReflectionUtils.newInstance(getClass("mapred.input.format.class",
- TextInputFormat.class,
- InputFormat.class),
- this);
- }
- public void setInputFormat(Class theClass) {
- setClass("mapred.input.format.class", theClass, InputFormat.class);
- }
- public OutputFormat getOutputFormat() {
- return (OutputFormat)ReflectionUtils.newInstance(getClass("mapred.output.format.class",
- TextOutputFormat.class,
- OutputFormat.class),
- this);
- }
- public void setOutputFormat(Class theClass) {
- setClass("mapred.output.format.class", theClass, OutputFormat.class);
- }
- /** @deprecated Call {@link RecordReader#createKey()}. */
- public Class getInputKeyClass() {
- return getClass("mapred.input.key.class",
- LongWritable.class, WritableComparable.class);
- }
- /** @deprecated Not used */
- public void setInputKeyClass(Class theClass) {
- setClass("mapred.input.key.class", theClass, WritableComparable.class);
- }
- /** @deprecated Call {@link RecordReader#createValue()}. */
- public Class getInputValueClass() {
- return getClass("mapred.input.value.class", Text.class, Writable.class);
- }
- /** @deprecated Not used */
- public void setInputValueClass(Class theClass) {
- setClass("mapred.input.value.class", theClass, Writable.class);
- }
-
- /**
- * Should the map outputs be compressed before transfer?
- * Uses the SequenceFile compression.
- */
- public void setCompressMapOutput(boolean compress) {
- setBoolean("mapred.compress.map.output", compress);
- }
-
- /**
- * Are the outputs of the maps be compressed?
- * @return are they compressed?
- */
- public boolean getCompressMapOutput() {
- return getBoolean("mapred.compress.map.output", false);
- }
- /**
- * Set the compression type for the map outputs.
- * @param style NONE, RECORD, or BLOCK to control how the map outputs are
- * compressed
- */
- public void setMapOutputCompressionType(SequenceFile.CompressionType style) {
- set("map.output.compression.type", style.toString());
- }
-
- /**
- * Get the compression type for the map outputs.
- * @return the compression type, defaulting to job output compression type
- */
- public SequenceFile.CompressionType getMapOutputCompressionType() {
- String val = get("map.output.compression.type", "RECORD");
- return SequenceFile.CompressionType.valueOf(val);
- }
-
- /**
- * Set the given class as the compression codec for the map outputs.
- * @param codecClass the CompressionCodec class that will compress the
- * map outputs
- */
- public void setMapOutputCompressorClass(Class codecClass) {
- setCompressMapOutput(true);
- setClass("mapred.output.compression.codec", codecClass,
- CompressionCodec.class);
- }
-
- /**
- * Get the codec for compressing the map outputs
- * @param defaultValue the value to return if it is not set
- * @return the CompressionCodec class that should be used to compress the
- * map outputs
- * @throws IllegalArgumentException if the class was specified, but not found
- */
- public Class getMapOutputCompressorClass(Class defaultValue) {
- String name = get("mapred.output.compression.codec");
- if (name == null) {
- return defaultValue;
- } else {
- try {
- return getClassByName(name);
- } catch (ClassNotFoundException e) {
- throw new IllegalArgumentException("Compression codec " + name +
- " was not found.", e);
- }
- }
- }
-
- /**
- * Get the key class for the map output data. If it is not set, use the
- * (final) output ket class This allows the map output key class to be
- * different than the final output key class
- *
- * @return map output key class
- */
- public Class getMapOutputKeyClass() {
- Class retv = getClass("mapred.mapoutput.key.class", null,
- WritableComparable.class);
- if (retv == null) {
- retv = getOutputKeyClass();
- }
- return retv;
- }
-
- /**
- * Set the key class for the map output data. This allows the user to
- * specify the map output key class to be different than the final output
- * value class
- */
- public void setMapOutputKeyClass(Class theClass) {
- setClass("mapred.mapoutput.key.class", theClass,
- WritableComparable.class);
- }
-
- /**
- * Get the value class for the map output data. If it is not set, use the
- * (final) output value class This allows the map output value class to be
- * different than the final output value class
- *
- * @return map output value class
- */
- public Class getMapOutputValueClass() {
- Class retv = getClass("mapred.mapoutput.value.class", null,
- Writable.class);
- if (retv == null) {
- retv = getOutputValueClass();
- }
- return retv;
- }
-
- /**
- * Set the value class for the map output data. This allows the user to
- * specify the map output value class to be different than the final output
- * value class
- */
- public void setMapOutputValueClass(Class theClass) {
- setClass("mapred.mapoutput.value.class", theClass, Writable.class);
- }
-
- public Class getOutputKeyClass() {
- return getClass("mapred.output.key.class",
- LongWritable.class, WritableComparable.class);
- }
-
- public void setOutputKeyClass(Class theClass) {
- setClass("mapred.output.key.class", theClass, WritableComparable.class);
- }
- public WritableComparator getOutputKeyComparator() {
- Class theClass = getClass("mapred.output.key.comparator.class", null,
- WritableComparator.class);
- if (theClass != null)
- return (WritableComparator)ReflectionUtils.newInstance(theClass, this);
- return WritableComparator.get(getMapOutputKeyClass());
- }
- public void setOutputKeyComparatorClass(Class theClass) {
- setClass("mapred.output.key.comparator.class",
- theClass, WritableComparator.class);
- }
- public Class getOutputValueClass() {
- return getClass("mapred.output.value.class", Text.class, Writable.class);
- }
- public void setOutputValueClass(Class theClass) {
- setClass("mapred.output.value.class", theClass, Writable.class);
- }
- public Class getMapperClass() {
- return getClass("mapred.mapper.class", IdentityMapper.class, Mapper.class);
- }
- public void setMapperClass(Class theClass) {
- setClass("mapred.mapper.class", theClass, Mapper.class);
- }
- public Class getMapRunnerClass() {
- return getClass("mapred.map.runner.class",
- MapRunner.class, MapRunnable.class);
- }
- public void setMapRunnerClass(Class theClass) {
- setClass("mapred.map.runner.class", theClass, MapRunnable.class);
- }
- public Class getPartitionerClass() {
- return getClass("mapred.partitioner.class",
- HashPartitioner.class, Partitioner.class);
- }
- public void setPartitionerClass(Class theClass) {
- setClass("mapred.partitioner.class", theClass, Partitioner.class);
- }
- public Class getReducerClass() {
- return getClass("mapred.reducer.class",
- IdentityReducer.class, Reducer.class);
- }
- public void setReducerClass(Class theClass) {
- setClass("mapred.reducer.class", theClass, Reducer.class);
- }
- public Class getCombinerClass() {
- return getClass("mapred.combiner.class", null, Reducer.class);
- }
- public void setCombinerClass(Class theClass) {
- setClass("mapred.combiner.class", theClass, Reducer.class);
- }
-
- /**
- * Should speculative execution be used for this job?
- * @return Defaults to true
- */
- public boolean getSpeculativeExecution() {
- return getBoolean("mapred.speculative.execution", true);
- }
-
- /**
- * Turn on or off speculative execution for this job.
- * In general, it should be turned off for map jobs that have side effects.
- */
- public void setSpeculativeExecution(boolean new_val) {
- setBoolean("mapred.speculative.execution", new_val);
- }
-
- public int getNumMapTasks() { return getInt("mapred.map.tasks", 1); }
- public void setNumMapTasks(int n) { setInt("mapred.map.tasks", n); }
- public int getNumReduceTasks() { return getInt("mapred.reduce.tasks", 1); }
- public void setNumReduceTasks(int n) { setInt("mapred.reduce.tasks", n); }
-
- /** Get the configured number of maximum attempts that will be made to run a
- * map task, as specified by the <code>mapred.map.max.attempts</code>
- * property. If this property is not already set, the default is 4 attempts
- * @return the max number of attempts
- */
- public int getMaxMapAttempts() {
- return getInt("mapred.map.max.attempts", 4);
- }
- /** Expert: Set the number of maximum attempts that will be made to run a
- * map task
- * @param n the number of attempts
- *
- */
- public void setMaxMapAttempts(int n) {
- setInt("mapred.map.max.attempts", n);
- }
- /** Get the configured number of maximum attempts that will be made to run a
- * reduce task, as specified by the <code>mapred.reduce.max.attempts</code>
- * property. If this property is not already set, the default is 4 attempts
- * @return the max number of attempts
- */
- public int getMaxReduceAttempts() {
- return getInt("mapred.reduce.max.attempts", 4);
- }
- /** Expert: Set the number of maximum attempts that will be made to run a
- * reduce task
- * @param n the number of attempts
- *
- */
- public void setMaxReduceAttempts(int n) {
- setInt("mapred.reduce.max.attempts", n);
- }
-
- /**
- * Get the user-specified job name. This is only used to identify the
- * job to the user.
- * @return the job's name, defaulting to ""
- */
- public String getJobName() {
- return get("mapred.job.name", "");
- }
-
- /**
- * Set the user-specified job name.
- * @param name the job's new name
- */
- public void setJobName(String name) {
- set("mapred.job.name", name);
- }
-
- /**
- * Set the maximum no. of failures of a given job per tasktracker.
- *
- * @param noFailures maximum no. of failures of a given job per tasktracker.
- */
- public void setMaxTaskFailuresPerTracker(int noFailures) {
- setInt("mapred.max.tracker.failures", noFailures);
- }
-
- /**
- * Get the maximum no. of failures of a given job per tasktracker.
- *
- * @return the maximum no. of failures of a given job per tasktracker.
- */
- public int getMaxTaskFailuresPerTracker() {
- return getInt("mapred.max.tracker.failures", 4);
- }
-
- /** Find a jar that contains a class of the same name, if any.
- * It will return a jar file, even if that is not the first thing
- * on the class path that has a class with the same name.
- * @author Owen O'Malley
- * @param my_class the class to find
- * @return a jar file that contains the class, or null
- * @throws IOException
- */
- private static String findContainingJar(Class my_class) {
- ClassLoader loader = my_class.getClassLoader();
- String class_file = my_class.getName().replaceAll("\\.", "/") + ".class";
- try {
- for(Enumeration itr = loader.getResources(class_file);
- itr.hasMoreElements();) {
- URL url = (URL) itr.nextElement();
- if ("jar".equals(url.getProtocol())) {
- String toReturn = url.getPath();
- if (toReturn.startsWith("file:")) {
- toReturn = toReturn.substring("file:".length());
- }
- toReturn = URLDecoder.decode(toReturn, "UTF-8");
- return toReturn.replaceAll("!.*$", "");
- }
- }
- } catch (IOException e) {
- throw new RuntimeException(e);
- }
- return null;
- }
- }
|