JobConf.java 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614
  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. package org.apache.hadoop.mapred;
  19. import java.io.IOException;
  20. import java.io.File;
  21. import java.util.StringTokenizer;
  22. import java.util.ArrayList;
  23. import java.util.Collections;
  24. import java.util.Enumeration;
  25. import java.net.URL;
  26. import java.net.URLDecoder;
  27. import org.apache.hadoop.fs.FileSystem;
  28. import org.apache.hadoop.fs.Path;
  29. import org.apache.hadoop.conf.Configuration;
  30. import org.apache.hadoop.io.*;
  31. import org.apache.hadoop.io.compress.CompressionCodec;
  32. import org.apache.hadoop.mapred.lib.IdentityMapper;
  33. import org.apache.hadoop.mapred.lib.IdentityReducer;
  34. import org.apache.hadoop.mapred.lib.HashPartitioner;
  35. import org.apache.hadoop.util.ReflectionUtils;
  36. /** A map/reduce job configuration. This names the {@link Mapper}, combiner
  37. * (if any), {@link Partitioner}, {@link Reducer}, {@link InputFormat}, and
  38. * {@link OutputFormat} implementations to be used. It also indicates the set
  39. * of input files, and where the output files should be written. */
  40. public class JobConf extends Configuration {
  41. private void initialize() {
  42. addDefaultResource("mapred-default.xml");
  43. }
  44. /**
  45. * Construct a map/reduce job configuration.
  46. */
  47. public JobConf() {
  48. initialize();
  49. }
  50. /**
  51. * Construct a map/reduce job configuration.
  52. * @param exampleClass a class whose containing jar is used as the job's jar.
  53. */
  54. public JobConf(Class exampleClass) {
  55. initialize();
  56. setJarByClass(exampleClass);
  57. }
  58. /**
  59. * Construct a map/reduce job configuration.
  60. *
  61. * @param conf a Configuration whose settings will be inherited.
  62. */
  63. public JobConf(Configuration conf) {
  64. super(conf);
  65. initialize();
  66. }
  67. /** Construct a map/reduce job configuration.
  68. *
  69. * @param conf a Configuration whose settings will be inherited.
  70. * @param exampleClass a class whose containing jar is used as the job's jar.
  71. */
  72. public JobConf(Configuration conf, Class exampleClass) {
  73. this(conf);
  74. initialize();
  75. setJarByClass(exampleClass);
  76. }
  77. /** Construct a map/reduce configuration.
  78. *
  79. * @param config a Configuration-format XML job description file
  80. */
  81. public JobConf(String config) {
  82. this(new Path(config));
  83. }
  84. /** Construct a map/reduce configuration.
  85. *
  86. * @param config a Configuration-format XML job description file
  87. */
  88. public JobConf(Path config) {
  89. super();
  90. addDefaultResource("mapred-default.xml");
  91. addDefaultResource(config);
  92. }
  93. public String getJar() { return get("mapred.jar"); }
  94. public void setJar(String jar) { set("mapred.jar", jar); }
  95. /**
  96. * Set the job's jar file by finding an example class location.
  97. * @param cls the example class
  98. */
  99. public void setJarByClass(Class cls) {
  100. String jar = findContainingJar(cls);
  101. if (jar != null) {
  102. setJar(jar);
  103. }
  104. }
  105. public Path getSystemDir() {
  106. return new Path(get("mapred.system.dir", "/tmp/hadoop/mapred/system"));
  107. }
  108. public String[] getLocalDirs() throws IOException {
  109. return getStrings("mapred.local.dir");
  110. }
  111. public void deleteLocalFiles() throws IOException {
  112. String[] localDirs = getLocalDirs();
  113. for (int i = 0; i < localDirs.length; i++) {
  114. FileSystem.getLocal(this).delete(new Path(localDirs[i]));
  115. }
  116. }
  117. public void deleteLocalFiles(String subdir) throws IOException {
  118. String[] localDirs = getLocalDirs();
  119. for (int i = 0; i < localDirs.length; i++) {
  120. FileSystem.getLocal(this).delete(new Path(localDirs[i], subdir));
  121. }
  122. }
  123. /** Constructs a local file name. Files are distributed among configured
  124. * local directories.*/
  125. public Path getLocalPath(String pathString) throws IOException {
  126. return getLocalPath("mapred.local.dir", pathString);
  127. }
  128. public void setInputPath(Path dir) {
  129. dir = new Path(getWorkingDirectory(), dir);
  130. set("mapred.input.dir", dir);
  131. }
  132. public void addInputPath(Path dir) {
  133. dir = new Path(getWorkingDirectory(), dir);
  134. String dirs = get("mapred.input.dir");
  135. set("mapred.input.dir", dirs == null ? dir.toString() : dirs + "," + dir);
  136. }
  137. public Path[] getInputPaths() {
  138. String dirs = get("mapred.input.dir", "");
  139. ArrayList list = Collections.list(new StringTokenizer(dirs, ","));
  140. Path[] result = new Path[list.size()];
  141. for (int i = 0; i < list.size(); i++) {
  142. result[i] = new Path((String)list.get(i));
  143. }
  144. return result;
  145. }
  146. /**
  147. * Get the reported username for this job.
  148. * @return the username
  149. */
  150. public String getUser() {
  151. return get("user.name");
  152. }
  153. /**
  154. * Set the reported username for this job.
  155. * @param user the username
  156. */
  157. public void setUser(String user) {
  158. set("user.name", user);
  159. }
  160. /**
  161. * Set whether the framework should keep the intermediate files for
  162. * failed tasks.
  163. */
  164. public void setKeepFailedTaskFiles(boolean keep) {
  165. setBoolean("keep.failed.task.files", keep);
  166. }
  167. /**
  168. * Should the temporary files for failed tasks be kept?
  169. * @return should the files be kept?
  170. */
  171. public boolean getKeepFailedTaskFiles() {
  172. return getBoolean("keep.failed.task.files", false);
  173. }
  174. /**
  175. * Set a regular expression for task names that should be kept.
  176. * The regular expression ".*_m_000123_0" would keep the files
  177. * for the first instance of map 123 that ran.
  178. * @param pattern the java.util.regex.Pattern to match against the
  179. * task names.
  180. */
  181. public void setKeepTaskFilesPattern(String pattern) {
  182. set("keep.task.files.pattern", pattern);
  183. }
  184. /**
  185. * Get the regular expression that is matched against the task names
  186. * to see if we need to keep the files.
  187. * @return the pattern as a string, if it was set, othewise null
  188. */
  189. public String getKeepTaskFilesPattern() {
  190. return get("keep.task.files.pattern");
  191. }
  192. /**
  193. * Set the current working directory for the default file system
  194. * @param dir the new current working directory
  195. */
  196. public void setWorkingDirectory(Path dir) {
  197. dir = new Path(getWorkingDirectory(), dir);
  198. set("mapred.working.dir", dir.toString());
  199. }
  200. /**
  201. * Get the current working directory for the default file system.
  202. * @return the directory name
  203. */
  204. public Path getWorkingDirectory() {
  205. String name = get("mapred.working.dir");
  206. if (name != null) {
  207. return new Path(name);
  208. } else {
  209. try {
  210. Path dir = FileSystem.get(this).getWorkingDirectory();
  211. set("mapred.working.dir", dir.toString());
  212. return dir;
  213. } catch (IOException e) {
  214. throw new RuntimeException(e);
  215. }
  216. }
  217. }
  218. public Path getOutputPath() {
  219. String name = get("mapred.output.dir");
  220. return name == null ? null: new Path(name);
  221. }
  222. public void setOutputPath(Path dir) {
  223. dir = new Path(getWorkingDirectory(), dir);
  224. set("mapred.output.dir", dir);
  225. }
  226. public InputFormat getInputFormat() {
  227. return (InputFormat)ReflectionUtils.newInstance(getClass("mapred.input.format.class",
  228. TextInputFormat.class,
  229. InputFormat.class),
  230. this);
  231. }
  232. public void setInputFormat(Class theClass) {
  233. setClass("mapred.input.format.class", theClass, InputFormat.class);
  234. }
  235. public OutputFormat getOutputFormat() {
  236. return (OutputFormat)ReflectionUtils.newInstance(getClass("mapred.output.format.class",
  237. TextOutputFormat.class,
  238. OutputFormat.class),
  239. this);
  240. }
  241. public void setOutputFormat(Class theClass) {
  242. setClass("mapred.output.format.class", theClass, OutputFormat.class);
  243. }
  244. /** @deprecated Call {@link RecordReader#createKey()}. */
  245. public Class getInputKeyClass() {
  246. return getClass("mapred.input.key.class",
  247. LongWritable.class, WritableComparable.class);
  248. }
  249. /** @deprecated Not used */
  250. public void setInputKeyClass(Class theClass) {
  251. setClass("mapred.input.key.class", theClass, WritableComparable.class);
  252. }
  253. /** @deprecated Call {@link RecordReader#createValue()}. */
  254. public Class getInputValueClass() {
  255. return getClass("mapred.input.value.class", Text.class, Writable.class);
  256. }
  257. /** @deprecated Not used */
  258. public void setInputValueClass(Class theClass) {
  259. setClass("mapred.input.value.class", theClass, Writable.class);
  260. }
  261. /**
  262. * Should the map outputs be compressed before transfer?
  263. * Uses the SequenceFile compression.
  264. */
  265. public void setCompressMapOutput(boolean compress) {
  266. setBoolean("mapred.compress.map.output", compress);
  267. }
  268. /**
  269. * Are the outputs of the maps be compressed?
  270. * @return are they compressed?
  271. */
  272. public boolean getCompressMapOutput() {
  273. return getBoolean("mapred.compress.map.output", false);
  274. }
  275. /**
  276. * Set the compression type for the map outputs.
  277. * @param style NONE, RECORD, or BLOCK to control how the map outputs are
  278. * compressed
  279. */
  280. public void setMapOutputCompressionType(SequenceFile.CompressionType style) {
  281. set("map.output.compression.type", style.toString());
  282. }
  283. /**
  284. * Get the compression type for the map outputs.
  285. * @return the compression type, defaulting to job output compression type
  286. */
  287. public SequenceFile.CompressionType getMapOutputCompressionType() {
  288. String val = get("map.output.compression.type", "RECORD");
  289. return SequenceFile.CompressionType.valueOf(val);
  290. }
  291. /**
  292. * Set the given class as the compression codec for the map outputs.
  293. * @param codecClass the CompressionCodec class that will compress the
  294. * map outputs
  295. */
  296. public void setMapOutputCompressorClass(Class codecClass) {
  297. setCompressMapOutput(true);
  298. setClass("mapred.output.compression.codec", codecClass,
  299. CompressionCodec.class);
  300. }
  301. /**
  302. * Get the codec for compressing the map outputs
  303. * @param defaultValue the value to return if it is not set
  304. * @return the CompressionCodec class that should be used to compress the
  305. * map outputs
  306. * @throws IllegalArgumentException if the class was specified, but not found
  307. */
  308. public Class getMapOutputCompressorClass(Class defaultValue) {
  309. String name = get("mapred.output.compression.codec");
  310. if (name == null) {
  311. return defaultValue;
  312. } else {
  313. try {
  314. return getClassByName(name);
  315. } catch (ClassNotFoundException e) {
  316. throw new IllegalArgumentException("Compression codec " + name +
  317. " was not found.", e);
  318. }
  319. }
  320. }
  321. /**
  322. * Get the key class for the map output data. If it is not set, use the
  323. * (final) output ket class This allows the map output key class to be
  324. * different than the final output key class
  325. *
  326. * @return map output key class
  327. */
  328. public Class getMapOutputKeyClass() {
  329. Class retv = getClass("mapred.mapoutput.key.class", null,
  330. WritableComparable.class);
  331. if (retv == null) {
  332. retv = getOutputKeyClass();
  333. }
  334. return retv;
  335. }
  336. /**
  337. * Set the key class for the map output data. This allows the user to
  338. * specify the map output key class to be different than the final output
  339. * value class
  340. */
  341. public void setMapOutputKeyClass(Class theClass) {
  342. setClass("mapred.mapoutput.key.class", theClass,
  343. WritableComparable.class);
  344. }
  345. /**
  346. * Get the value class for the map output data. If it is not set, use the
  347. * (final) output value class This allows the map output value class to be
  348. * different than the final output value class
  349. *
  350. * @return map output value class
  351. */
  352. public Class getMapOutputValueClass() {
  353. Class retv = getClass("mapred.mapoutput.value.class", null,
  354. Writable.class);
  355. if (retv == null) {
  356. retv = getOutputValueClass();
  357. }
  358. return retv;
  359. }
  360. /**
  361. * Set the value class for the map output data. This allows the user to
  362. * specify the map output value class to be different than the final output
  363. * value class
  364. */
  365. public void setMapOutputValueClass(Class theClass) {
  366. setClass("mapred.mapoutput.value.class", theClass, Writable.class);
  367. }
  368. public Class getOutputKeyClass() {
  369. return getClass("mapred.output.key.class",
  370. LongWritable.class, WritableComparable.class);
  371. }
  372. public void setOutputKeyClass(Class theClass) {
  373. setClass("mapred.output.key.class", theClass, WritableComparable.class);
  374. }
  375. public WritableComparator getOutputKeyComparator() {
  376. Class theClass = getClass("mapred.output.key.comparator.class", null,
  377. WritableComparator.class);
  378. if (theClass != null)
  379. return (WritableComparator)ReflectionUtils.newInstance(theClass, this);
  380. return WritableComparator.get(getMapOutputKeyClass());
  381. }
  382. public void setOutputKeyComparatorClass(Class theClass) {
  383. setClass("mapred.output.key.comparator.class",
  384. theClass, WritableComparator.class);
  385. }
  386. public Class getOutputValueClass() {
  387. return getClass("mapred.output.value.class", Text.class, Writable.class);
  388. }
  389. public void setOutputValueClass(Class theClass) {
  390. setClass("mapred.output.value.class", theClass, Writable.class);
  391. }
  392. public Class getMapperClass() {
  393. return getClass("mapred.mapper.class", IdentityMapper.class, Mapper.class);
  394. }
  395. public void setMapperClass(Class theClass) {
  396. setClass("mapred.mapper.class", theClass, Mapper.class);
  397. }
  398. public Class getMapRunnerClass() {
  399. return getClass("mapred.map.runner.class",
  400. MapRunner.class, MapRunnable.class);
  401. }
  402. public void setMapRunnerClass(Class theClass) {
  403. setClass("mapred.map.runner.class", theClass, MapRunnable.class);
  404. }
  405. public Class getPartitionerClass() {
  406. return getClass("mapred.partitioner.class",
  407. HashPartitioner.class, Partitioner.class);
  408. }
  409. public void setPartitionerClass(Class theClass) {
  410. setClass("mapred.partitioner.class", theClass, Partitioner.class);
  411. }
  412. public Class getReducerClass() {
  413. return getClass("mapred.reducer.class",
  414. IdentityReducer.class, Reducer.class);
  415. }
  416. public void setReducerClass(Class theClass) {
  417. setClass("mapred.reducer.class", theClass, Reducer.class);
  418. }
  419. public Class getCombinerClass() {
  420. return getClass("mapred.combiner.class", null, Reducer.class);
  421. }
  422. public void setCombinerClass(Class theClass) {
  423. setClass("mapred.combiner.class", theClass, Reducer.class);
  424. }
  425. /**
  426. * Should speculative execution be used for this job?
  427. * @return Defaults to true
  428. */
  429. public boolean getSpeculativeExecution() {
  430. return getBoolean("mapred.speculative.execution", true);
  431. }
  432. /**
  433. * Turn on or off speculative execution for this job.
  434. * In general, it should be turned off for map jobs that have side effects.
  435. */
  436. public void setSpeculativeExecution(boolean new_val) {
  437. setBoolean("mapred.speculative.execution", new_val);
  438. }
  439. public int getNumMapTasks() { return getInt("mapred.map.tasks", 1); }
  440. public void setNumMapTasks(int n) { setInt("mapred.map.tasks", n); }
  441. public int getNumReduceTasks() { return getInt("mapred.reduce.tasks", 1); }
  442. public void setNumReduceTasks(int n) { setInt("mapred.reduce.tasks", n); }
  443. /** Get the configured number of maximum attempts that will be made to run a
  444. * map task, as specified by the <code>mapred.map.max.attempts</code>
  445. * property. If this property is not already set, the default is 4 attempts
  446. * @return the max number of attempts
  447. */
  448. public int getMaxMapAttempts() {
  449. return getInt("mapred.map.max.attempts", 4);
  450. }
  451. /** Expert: Set the number of maximum attempts that will be made to run a
  452. * map task
  453. * @param n the number of attempts
  454. *
  455. */
  456. public void setMaxMapAttempts(int n) {
  457. setInt("mapred.map.max.attempts", n);
  458. }
  459. /** Get the configured number of maximum attempts that will be made to run a
  460. * reduce task, as specified by the <code>mapred.reduce.max.attempts</code>
  461. * property. If this property is not already set, the default is 4 attempts
  462. * @return the max number of attempts
  463. */
  464. public int getMaxReduceAttempts() {
  465. return getInt("mapred.reduce.max.attempts", 4);
  466. }
  467. /** Expert: Set the number of maximum attempts that will be made to run a
  468. * reduce task
  469. * @param n the number of attempts
  470. *
  471. */
  472. public void setMaxReduceAttempts(int n) {
  473. setInt("mapred.reduce.max.attempts", n);
  474. }
  475. /**
  476. * Get the user-specified job name. This is only used to identify the
  477. * job to the user.
  478. * @return the job's name, defaulting to ""
  479. */
  480. public String getJobName() {
  481. return get("mapred.job.name", "");
  482. }
  483. /**
  484. * Set the user-specified job name.
  485. * @param name the job's new name
  486. */
  487. public void setJobName(String name) {
  488. set("mapred.job.name", name);
  489. }
  490. /**
  491. * Set the maximum no. of failures of a given job per tasktracker.
  492. *
  493. * @param noFailures maximum no. of failures of a given job per tasktracker.
  494. */
  495. public void setMaxTaskFailuresPerTracker(int noFailures) {
  496. setInt("mapred.max.tracker.failures", noFailures);
  497. }
  498. /**
  499. * Get the maximum no. of failures of a given job per tasktracker.
  500. *
  501. * @return the maximum no. of failures of a given job per tasktracker.
  502. */
  503. public int getMaxTaskFailuresPerTracker() {
  504. return getInt("mapred.max.tracker.failures", 4);
  505. }
  506. /** Find a jar that contains a class of the same name, if any.
  507. * It will return a jar file, even if that is not the first thing
  508. * on the class path that has a class with the same name.
  509. * @author Owen O'Malley
  510. * @param my_class the class to find
  511. * @return a jar file that contains the class, or null
  512. * @throws IOException
  513. */
  514. private static String findContainingJar(Class my_class) {
  515. ClassLoader loader = my_class.getClassLoader();
  516. String class_file = my_class.getName().replaceAll("\\.", "/") + ".class";
  517. try {
  518. for(Enumeration itr = loader.getResources(class_file);
  519. itr.hasMoreElements();) {
  520. URL url = (URL) itr.nextElement();
  521. if ("jar".equals(url.getProtocol())) {
  522. String toReturn = url.getPath();
  523. if (toReturn.startsWith("file:")) {
  524. toReturn = toReturn.substring("file:".length());
  525. }
  526. toReturn = URLDecoder.decode(toReturn, "UTF-8");
  527. return toReturn.replaceAll("!.*$", "");
  528. }
  529. }
  530. } catch (IOException e) {
  531. throw new RuntimeException(e);
  532. }
  533. return null;
  534. }
  535. }