hod_config_guide.html 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598
  1. <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
  2. <html>
  3. <head>
  4. <META http-equiv="Content-Type" content="text/html; charset=UTF-8">
  5. <meta content="Apache Forrest" name="Generator">
  6. <meta name="Forrest-version" content="0.8">
  7. <meta name="Forrest-skin-name" content="pelt">
  8. <title>
  9. Hadoop On Demand: Configuration Guide
  10. </title>
  11. <link type="text/css" href="skin/basic.css" rel="stylesheet">
  12. <link media="screen" type="text/css" href="skin/screen.css" rel="stylesheet">
  13. <link media="print" type="text/css" href="skin/print.css" rel="stylesheet">
  14. <link type="text/css" href="skin/profile.css" rel="stylesheet">
  15. <script src="skin/getBlank.js" language="javascript" type="text/javascript"></script><script src="skin/getMenu.js" language="javascript" type="text/javascript"></script><script src="skin/fontsize.js" language="javascript" type="text/javascript"></script>
  16. <link rel="shortcut icon" href="images/favicon.ico">
  17. </head>
  18. <body onload="init()">
  19. <script type="text/javascript">ndeSetTextSize();</script>
  20. <div id="top">
  21. <!--+
  22. |breadtrail
  23. +-->
  24. <div class="breadtrail">
  25. <a href="http://www.apache.org/">Apache</a> &gt; <a href="http://hadoop.apache.org/">Hadoop</a> &gt; <a href="http://hadoop.apache.org/core/">Core</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
  26. </div>
  27. <!--+
  28. |header
  29. +-->
  30. <div class="header">
  31. <!--+
  32. |start group logo
  33. +-->
  34. <div class="grouplogo">
  35. <a href="http://hadoop.apache.org/"><img class="logoImage" alt="Hadoop" src="images/hadoop-logo.jpg" title="Apache Hadoop"></a>
  36. </div>
  37. <!--+
  38. |end group logo
  39. +-->
  40. <!--+
  41. |start Project Logo
  42. +-->
  43. <div class="projectlogo">
  44. <a href="http://hadoop.apache.org/core/"><img class="logoImage" alt="Hadoop" src="images/core-logo.gif" title="Scalable Computing Platform"></a>
  45. </div>
  46. <!--+
  47. |end Project Logo
  48. +-->
  49. <!--+
  50. |start Search
  51. +-->
  52. <div class="searchbox">
  53. <form action="http://www.google.com/search" method="get" class="roundtopsmall">
  54. <input value="hadoop.apache.org" name="sitesearch" type="hidden"><input onFocus="getBlank (this, 'Search the site with google');" size="25" name="q" id="query" type="text" value="Search the site with google">&nbsp;
  55. <input name="Search" value="Search" type="submit">
  56. </form>
  57. </div>
  58. <!--+
  59. |end search
  60. +-->
  61. <!--+
  62. |start Tabs
  63. +-->
  64. <ul id="tabs">
  65. <li>
  66. <a class="unselected" href="http://hadoop.apache.org/core/">Project</a>
  67. </li>
  68. <li>
  69. <a class="unselected" href="http://wiki.apache.org/hadoop">Wiki</a>
  70. </li>
  71. <li class="current">
  72. <a class="selected" href="index.html">Hadoop 0.19 Documentation</a>
  73. </li>
  74. </ul>
  75. <!--+
  76. |end Tabs
  77. +-->
  78. </div>
  79. </div>
  80. <div id="main">
  81. <div id="publishedStrip">
  82. <!--+
  83. |start Subtabs
  84. +-->
  85. <div id="level2tabs"></div>
  86. <!--+
  87. |end Endtabs
  88. +-->
  89. <script type="text/javascript"><!--
  90. document.write("Last Published: " + document.lastModified);
  91. // --></script>
  92. </div>
  93. <!--+
  94. |breadtrail
  95. +-->
  96. <div class="breadtrail">
  97. &nbsp;
  98. </div>
  99. <!--+
  100. |start Menu, mainarea
  101. +-->
  102. <!--+
  103. |start Menu
  104. +-->
  105. <div id="menu">
  106. <div onclick="SwitchMenu('menu_1.1', 'skin/')" id="menu_1.1Title" class="menutitle">Documentation</div>
  107. <div id="menu_1.1" class="menuitemgroup">
  108. <div class="menuitem">
  109. <a href="index.html">Overview</a>
  110. </div>
  111. <div class="menuitem">
  112. <a href="quickstart.html">Quickstart</a>
  113. </div>
  114. <div class="menuitem">
  115. <a href="cluster_setup.html">Cluster Setup</a>
  116. </div>
  117. <div class="menuitem">
  118. <a href="hdfs_design.html">HDFS Architecture</a>
  119. </div>
  120. <div class="menuitem">
  121. <a href="hdfs_user_guide.html">HDFS User Guide</a>
  122. </div>
  123. <div class="menuitem">
  124. <a href="hdfs_permissions_guide.html">HDFS Permissions Guide</a>
  125. </div>
  126. <div class="menuitem">
  127. <a href="hdfs_quota_admin_guide.html">HDFS Quotas Administrator Guide</a>
  128. </div>
  129. <div class="menuitem">
  130. <a href="commands_manual.html">Commands Manual</a>
  131. </div>
  132. <div class="menuitem">
  133. <a href="hdfs_shell.html">FS Shell Guide</a>
  134. </div>
  135. <div class="menuitem">
  136. <a href="SLG_user_guide.html">Synthetic Load Generator User Guide</a>
  137. </div>
  138. <div class="menuitem">
  139. <a href="distcp.html">DistCp Guide</a>
  140. </div>
  141. <div class="menuitem">
  142. <a href="mapred_tutorial.html">Map-Reduce Tutorial</a>
  143. </div>
  144. <div class="menuitem">
  145. <a href="native_libraries.html">Native Hadoop Libraries</a>
  146. </div>
  147. <div class="menuitem">
  148. <a href="streaming.html">Streaming</a>
  149. </div>
  150. <div class="menuitem">
  151. <a href="hadoop_archives.html">Hadoop Archives</a>
  152. </div>
  153. <div class="menuitem">
  154. <a href="hod.html">Hadoop On Demand</a>
  155. </div>
  156. <div class="menuitem">
  157. <a href="capacity_scheduler.html">Capacity Scheduler</a>
  158. </div>
  159. <div class="menuitem">
  160. <a href="api/index.html">API Docs</a>
  161. </div>
  162. <div class="menuitem">
  163. <a href="jdiff/changes.html">API Changes</a>
  164. </div>
  165. <div class="menuitem">
  166. <a href="http://wiki.apache.org/hadoop/">Wiki</a>
  167. </div>
  168. <div class="menuitem">
  169. <a href="http://wiki.apache.org/hadoop/FAQ">FAQ</a>
  170. </div>
  171. <div class="menuitem">
  172. <a href="http://hadoop.apache.org/core/mailing_lists.html">Mailing Lists</a>
  173. </div>
  174. <div class="menuitem">
  175. <a href="releasenotes.html">Release Notes</a>
  176. </div>
  177. <div class="menuitem">
  178. <a href="changes.html">All Changes</a>
  179. </div>
  180. </div>
  181. <div id="credit"></div>
  182. <div id="roundbottom">
  183. <img style="display: none" class="corner" height="15" width="15" alt="" src="skin/images/rc-b-l-15-1body-2menu-3menu.png"></div>
  184. <!--+
  185. |alternative credits
  186. +-->
  187. <div id="credit2"></div>
  188. </div>
  189. <!--+
  190. |end Menu
  191. +-->
  192. <!--+
  193. |start content
  194. +-->
  195. <div id="content">
  196. <div title="Portable Document Format" class="pdflink">
  197. <a class="dida" href="hod_config_guide.pdf"><img alt="PDF -icon" src="skin/images/pdfdoc.gif" class="skin"><br>
  198. PDF</a>
  199. </div>
  200. <h1>
  201. Hadoop On Demand: Configuration Guide
  202. </h1>
  203. <div id="minitoc-area">
  204. <ul class="minitoc">
  205. <li>
  206. <a href="#1.+Introduction">1. Introduction</a>
  207. </li>
  208. <li>
  209. <a href="#2.+Sections">2. Sections</a>
  210. </li>
  211. <li>
  212. <a href="#3.+HOD+Configuration+Options">3. HOD Configuration Options</a>
  213. <ul class="minitoc">
  214. <li>
  215. <a href="#3.1+Common+configuration+options">3.1 Common configuration options</a>
  216. </li>
  217. <li>
  218. <a href="#3.2+hod+options">3.2 hod options</a>
  219. </li>
  220. <li>
  221. <a href="#3.3+resource_manager+options">3.3 resource_manager options</a>
  222. </li>
  223. <li>
  224. <a href="#3.4+ringmaster+options">3.4 ringmaster options</a>
  225. </li>
  226. <li>
  227. <a href="#3.5+gridservice-hdfs+options">3.5 gridservice-hdfs options</a>
  228. </li>
  229. <li>
  230. <a href="#3.6+gridservice-mapred+options">3.6 gridservice-mapred options</a>
  231. </li>
  232. <li>
  233. <a href="#3.7+hodring+options">3.7 hodring options</a>
  234. </li>
  235. </ul>
  236. </li>
  237. </ul>
  238. </div>
  239. <a name="N1000C"></a><a name="1.+Introduction"></a>
  240. <h2 class="h3">1. Introduction</h2>
  241. <div class="section">
  242. <p>This document explains some of the most important and commonly used
  243. Hadoop On Demand (HOD) configuration options. Configuration options
  244. can be specified in two ways: a configuration file
  245. in the INI format, and as command line options to the HOD shell,
  246. specified in the format --section.option[=value]. If the same option is
  247. specified in both places, the value specified on the command line
  248. overrides the value in the configuration file.</p>
  249. <p>
  250. To get a simple description of all configuration options, type:
  251. </p>
  252. <table class="ForrestTable" cellspacing="1" cellpadding="4">
  253. <tr>
  254. <td colspan="1" rowspan="1"><span class="codefrag">$ hod --verbose-help</span></td>
  255. </tr>
  256. </table>
  257. </div>
  258. <a name="N10021"></a><a name="2.+Sections"></a>
  259. <h2 class="h3">2. Sections</h2>
  260. <div class="section">
  261. <p>HOD organizes configuration options into these sections:</p>
  262. <ul>
  263. <li> hod: Options for the HOD client</li>
  264. <li> resource_manager: Options for specifying which resource manager
  265. to use, and other parameters for using that resource manager</li>
  266. <li> ringmaster: Options for the RingMaster process, </li>
  267. <li> hodring: Options for the HodRing processes</li>
  268. <li> gridservice-mapred: Options for the Map/Reduce daemons</li>
  269. <li> gridservice-hdfs: Options for the HDFS daemons.</li>
  270. </ul>
  271. </div>
  272. <a name="N10040"></a><a name="3.+HOD+Configuration+Options"></a>
  273. <h2 class="h3">3. HOD Configuration Options</h2>
  274. <div class="section">
  275. <p>The following section describes configuration options common to most
  276. HOD sections followed by sections that describe configuration options
  277. specific to each HOD section.</p>
  278. <a name="N10049"></a><a name="3.1+Common+configuration+options"></a>
  279. <h3 class="h4">3.1 Common configuration options</h3>
  280. <p>Certain configuration options are defined in most of the sections of
  281. the HOD configuration. Options defined in a section, are used by the
  282. process for which that section applies. These options have the same
  283. meaning, but can have different values in each section.
  284. </p>
  285. <ul>
  286. <li>temp-dir: Temporary directory for usage by the HOD processes. Make
  287. sure that the users who will run hod have rights to create
  288. directories under the directory specified here. If you
  289. wish to make this directory vary across allocations,
  290. you can make use of the environmental variables which will
  291. be made available by the resource manager to the HOD
  292. processes. For example, in a Torque setup, having
  293. --ringmaster.temp-dir=/tmp/hod-temp-dir.$PBS_JOBID would
  294. let ringmaster use different temp-dir for each
  295. allocation; Torque expands this variable before starting
  296. the ringmaster.</li>
  297. <li>debug: Numeric value from 1-4. 4 produces the most log information,
  298. and 1 the least.</li>
  299. <li>log-dir: Directory where log files are stored. By default, this is
  300. &lt;install-location&gt;/logs/. The restrictions and notes for the
  301. temp-dir variable apply here too.
  302. </li>
  303. <li>xrs-port-range: Range of ports, among which an available port shall
  304. be picked for use to run an XML-RPC server.</li>
  305. <li>http-port-range: Range of ports, among which an available port shall
  306. be picked for use to run an HTTP server.</li>
  307. <li>java-home: Location of Java to be used by Hadoop.</li>
  308. <li>syslog-address: Address to which a syslog daemon is bound to. The format
  309. of the value is host:port. If configured, HOD log messages
  310. will be logged to syslog using this value.</li>
  311. </ul>
  312. <a name="N1006B"></a><a name="3.2+hod+options"></a>
  313. <h3 class="h4">3.2 hod options</h3>
  314. <ul>
  315. <li>cluster: Descriptive name given to the cluster. For Torque, this is
  316. specified as a 'Node property' for every node in the cluster.
  317. HOD uses this value to compute the number of available nodes.</li>
  318. <li>client-params: Comma-separated list of hadoop config parameters
  319. specified as key-value pairs. These will be used to
  320. generate a hadoop-site.xml on the submit node that
  321. should be used for running Map/Reduce jobs.</li>
  322. <li>job-feasibility-attr: Regular expression string that specifies
  323. whether and how to check job feasibility - resource
  324. manager or scheduler limits. The current
  325. implementation corresponds to the torque job
  326. attribute 'comment' and by default is disabled.
  327. When set, HOD uses it to decide what type
  328. of limit violation is triggered and either
  329. deallocates the cluster or stays in queued state
  330. according as the request is beyond maximum limits or
  331. the cumulative usage has crossed maximum limits.
  332. The torque comment attribute may be updated
  333. periodically by an external mechanism. For example,
  334. comment attribute can be updated by running <a href="hod_admin_guide.html#checklimits.sh+-+Tool+to+update+torque+comment+field+reflecting+resource+limits">
  335. checklimits.sh</a> script in hod/support directory,
  336. and then setting job-feasibility-attr equal to the
  337. value TORQUE_USER_LIMITS_COMMENT_FIELD,
  338. "User-limits exceeded. Requested:([0-9]*)
  339. Used:([0-9]*) MaxLimit:([0-9]*)", will make HOD
  340. behave accordingly.
  341. </li>
  342. </ul>
  343. <a name="N10082"></a><a name="3.3+resource_manager+options"></a>
  344. <h3 class="h4">3.3 resource_manager options</h3>
  345. <ul>
  346. <li>queue: Name of the queue configured in the resource manager to which
  347. jobs are to be submitted.</li>
  348. <li>batch-home: Install directory to which 'bin' is appended and under
  349. which the executables of the resource manager can be
  350. found.</li>
  351. <li>env-vars: Comma-separated list of key-value pairs,
  352. expressed as key=value, which would be passed to the jobs
  353. launched on the compute nodes.
  354. For example, if the python installation is
  355. in a non-standard location, one can set the environment
  356. variable 'HOD_PYTHON_HOME' to the path to the python
  357. executable. The HOD processes launched on the compute nodes
  358. can then use this variable.</li>
  359. <li>options: Comma-separated list of key-value pairs,
  360. expressed as
  361. &lt;option&gt;:&lt;sub-option&gt;=&lt;value&gt;. When
  362. passing to the job submission program, these are expanded
  363. as -&lt;option&gt; &lt;sub-option&gt;=&lt;value&gt;. These
  364. are generally used for specifying additional resource
  365. contraints for scheduling. For instance, with a Torque
  366. setup, one can specify
  367. --resource_manager.options='l:arch=x86_64' for
  368. constraining the nodes being allocated to a particular
  369. architecture; this option will be passed to Torque's qsub
  370. command as "-l arch=x86_64".</li>
  371. </ul>
  372. <a name="N10098"></a><a name="3.4+ringmaster+options"></a>
  373. <h3 class="h4">3.4 ringmaster options</h3>
  374. <ul>
  375. <li>work-dirs: Comma-separated list of paths that will serve
  376. as the root for directories that HOD generates and passes
  377. to Hadoop for use to store DFS and Map/Reduce data. For
  378. example,
  379. this is where DFS data blocks will be stored. Typically,
  380. as many paths are specified as there are disks available
  381. to ensure all disks are being utilized. The restrictions
  382. and notes for the temp-dir variable apply here too.</li>
  383. <li>max-master-failures: Number of times a hadoop master
  384. daemon can fail to launch, beyond which HOD will fail
  385. the cluster allocation altogether. In HOD clusters,
  386. sometimes there might be a single or few "bad" nodes due
  387. to issues like missing java, missing or incorrect version
  388. of Hadoop etc. When this configuration variable is set
  389. to a positive integer, the RingMaster returns an error
  390. to the client only when the number of times a hadoop
  391. master (JobTracker or NameNode) fails to start on these
  392. bad nodes because of above issues, exceeds the specified
  393. value. If the number is not exceeded, the next HodRing
  394. which requests for a command to launch is given the same
  395. hadoop master again. This way, HOD tries its best for a
  396. successful allocation even in the presence of a few bad
  397. nodes in the cluster.
  398. </li>
  399. <li>workers_per_ring: Number of workers per service per HodRing.
  400. By default this is set to 1. If this configuration
  401. variable is set to a value 'n', the HodRing will run
  402. 'n' instances of the workers (TaskTrackers or DataNodes)
  403. on each node acting as a slave. This can be used to run
  404. multiple workers per HodRing, so that the total number of
  405. workers in a HOD cluster is not limited by the total
  406. number of nodes requested during allocation. However, note
  407. that this will mean each worker should be configured to use
  408. only a proportional fraction of the capacity of the
  409. resources on the node. In general, this feature is only
  410. useful for testing and simulation purposes, and not for
  411. production use.</li>
  412. </ul>
  413. <a name="N100AB"></a><a name="3.5+gridservice-hdfs+options"></a>
  414. <h3 class="h4">3.5 gridservice-hdfs options</h3>
  415. <ul>
  416. <li>external: If false, indicates that a HDFS cluster must be
  417. bought up by the HOD system, on the nodes which it
  418. allocates via the allocate command. Note that in that case,
  419. when the cluster is de-allocated, it will bring down the
  420. HDFS cluster, and all the data will be lost.
  421. If true, it will try and connect to an externally configured
  422. HDFS system.
  423. Typically, because input for jobs are placed into HDFS
  424. before jobs are run, and also the output from jobs in HDFS
  425. is required to be persistent, an internal HDFS cluster is
  426. of little value in a production system. However, it allows
  427. for quick testing.</li>
  428. <li>host: Hostname of the externally configured NameNode, if any</li>
  429. <li>fs_port: Port to which NameNode RPC server is bound.</li>
  430. <li>info_port: Port to which the NameNode web UI server is bound.</li>
  431. <li>pkgs: Installation directory, under which bin/hadoop executable is
  432. located. This can be used to use a pre-installed version of
  433. Hadoop on the cluster.</li>
  434. <li>server-params: Comma-separated list of hadoop config parameters
  435. specified key-value pairs. These will be used to
  436. generate a hadoop-site.xml that will be used by the
  437. NameNode and DataNodes.</li>
  438. <li>final-server-params: Same as above, except they will be marked final.</li>
  439. </ul>
  440. <a name="N100CA"></a><a name="3.6+gridservice-mapred+options"></a>
  441. <h3 class="h4">3.6 gridservice-mapred options</h3>
  442. <ul>
  443. <li>external: If false, indicates that a Map/Reduce cluster must be
  444. bought up by the HOD system on the nodes which it allocates
  445. via the allocate command.
  446. If true, if will try and connect to an externally
  447. configured Map/Reduce system.</li>
  448. <li>host: Hostname of the externally configured JobTracker, if any</li>
  449. <li>tracker_port: Port to which the JobTracker RPC server is bound</li>
  450. <li>info_port: Port to which the JobTracker web UI server is bound.</li>
  451. <li>pkgs: Installation directory, under which bin/hadoop executable is
  452. located</li>
  453. <li>server-params: Comma-separated list of hadoop config parameters
  454. specified key-value pairs. These will be used to
  455. generate a hadoop-site.xml that will be used by the
  456. JobTracker and TaskTrackers</li>
  457. <li>final-server-params: Same as above, except they will be marked final.</li>
  458. </ul>
  459. <a name="N100E9"></a><a name="3.7+hodring+options"></a>
  460. <h3 class="h4">3.7 hodring options</h3>
  461. <ul>
  462. <li>mapred-system-dir-root: Directory in the DFS under which HOD will
  463. generate sub-directory names and pass the full path
  464. as the value of the 'mapred.system.dir' configuration
  465. parameter to Hadoop daemons. The format of the full
  466. path will be value-of-this-option/userid/mapredsystem/cluster-id.
  467. Note that the directory specified here should be such
  468. that all users can create directories under this, if
  469. permissions are enabled in HDFS. Setting the value of
  470. this option to /user will make HOD use the user's
  471. home directory to generate the mapred.system.dir value.</li>
  472. <li>log-destination-uri: URL describing a path in an external, static DFS or the
  473. cluster node's local file system where HOD will upload
  474. Hadoop logs when a cluster is deallocated. To specify a
  475. DFS path, use the format 'hdfs://path'. To specify a
  476. cluster node's local file path, use the format 'file://path'.
  477. When clusters are deallocated by HOD, the hadoop logs will
  478. be deleted as part of HOD's cleanup process. To ensure these
  479. logs persist, you can use this configuration option.
  480. The format of the path is
  481. value-of-this-option/userid/hod-logs/cluster-id
  482. Note that the directory you specify here must be such that all
  483. users can create sub-directories under this. Setting this value
  484. to hdfs://user will make the logs come in the user's home directory
  485. in DFS.</li>
  486. <li>pkgs: Installation directory, under which bin/hadoop executable is located. This will
  487. be used by HOD to upload logs if a HDFS URL is specified in log-destination-uri
  488. option. Note that this is useful if the users are using a tarball whose version
  489. may differ from the external, static HDFS version.</li>
  490. </ul>
  491. </div>
  492. </div>
  493. <!--+
  494. |end content
  495. +-->
  496. <div class="clearboth">&nbsp;</div>
  497. </div>
  498. <div id="footer">
  499. <!--+
  500. |start bottomstrip
  501. +-->
  502. <div class="lastmodified">
  503. <script type="text/javascript"><!--
  504. document.write("Last Published: " + document.lastModified);
  505. // --></script>
  506. </div>
  507. <div class="copyright">
  508. Copyright &copy;
  509. 2008 <a href="http://www.apache.org/licenses/">The Apache Software Foundation.</a>
  510. </div>
  511. <!--+
  512. |end bottomstrip
  513. +-->
  514. </div>
  515. </body>
  516. </html>