hod_config_guide.html 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444
  1. <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
  2. <html>
  3. <head>
  4. <META http-equiv="Content-Type" content="text/html; charset=UTF-8">
  5. <meta content="Apache Forrest" name="Generator">
  6. <meta name="Forrest-version" content="0.8">
  7. <meta name="Forrest-skin-name" content="pelt">
  8. <title>
  9. Hadoop On Demand: Configuration Guide
  10. </title>
  11. <link type="text/css" href="skin/basic.css" rel="stylesheet">
  12. <link media="screen" type="text/css" href="skin/screen.css" rel="stylesheet">
  13. <link media="print" type="text/css" href="skin/print.css" rel="stylesheet">
  14. <link type="text/css" href="skin/profile.css" rel="stylesheet">
  15. <script src="skin/getBlank.js" language="javascript" type="text/javascript"></script><script src="skin/getMenu.js" language="javascript" type="text/javascript"></script><script src="skin/fontsize.js" language="javascript" type="text/javascript"></script>
  16. <link rel="shortcut icon" href="images/favicon.ico">
  17. </head>
  18. <body onload="init()">
  19. <script type="text/javascript">ndeSetTextSize();</script>
  20. <div id="top">
  21. <!--+
  22. |breadtrail
  23. +-->
  24. <div class="breadtrail">
  25. <a href="http://www.apache.org/">Apache</a> &gt; <a href="http://hadoop.apache.org/">Hadoop</a> &gt; <a href="http://hadoop.apache.org/core/">Core</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
  26. </div>
  27. <!--+
  28. |header
  29. +-->
  30. <div class="header">
  31. <!--+
  32. |start group logo
  33. +-->
  34. <div class="grouplogo">
  35. <a href="http://hadoop.apache.org/"><img class="logoImage" alt="Hadoop" src="images/hadoop-logo.jpg" title="Apache Hadoop"></a>
  36. </div>
  37. <!--+
  38. |end group logo
  39. +-->
  40. <!--+
  41. |start Project Logo
  42. +-->
  43. <div class="projectlogo">
  44. <a href="http://hadoop.apache.org/core/"><img class="logoImage" alt="Hadoop" src="images/core-logo.jpg" title="Scalable Computing Platform"></a>
  45. </div>
  46. <!--+
  47. |end Project Logo
  48. +-->
  49. <!--+
  50. |start Search
  51. +-->
  52. <div class="searchbox">
  53. <form action="http://www.google.com/search" method="get" class="roundtopsmall">
  54. <input value="hadoop.apache.org" name="sitesearch" type="hidden"><input onFocus="getBlank (this, 'Search the site with google');" size="25" name="q" id="query" type="text" value="Search the site with google">&nbsp;
  55. <input name="Search" value="Search" type="submit">
  56. </form>
  57. </div>
  58. <!--+
  59. |end search
  60. +-->
  61. <!--+
  62. |start Tabs
  63. +-->
  64. <ul id="tabs">
  65. <li>
  66. <a class="unselected" href="http://hadoop.apache.org/core/">Project</a>
  67. </li>
  68. <li>
  69. <a class="unselected" href="http://wiki.apache.org/hadoop">Wiki</a>
  70. </li>
  71. <li class="current">
  72. <a class="selected" href="index.html">Hadoop 0.16 Documentation</a>
  73. </li>
  74. </ul>
  75. <!--+
  76. |end Tabs
  77. +-->
  78. </div>
  79. </div>
  80. <div id="main">
  81. <div id="publishedStrip">
  82. <!--+
  83. |start Subtabs
  84. +-->
  85. <div id="level2tabs"></div>
  86. <!--+
  87. |end Endtabs
  88. +-->
  89. <script type="text/javascript"><!--
  90. document.write("Last Published: " + document.lastModified);
  91. // --></script>
  92. </div>
  93. <!--+
  94. |breadtrail
  95. +-->
  96. <div class="breadtrail">
  97. &nbsp;
  98. </div>
  99. <!--+
  100. |start Menu, mainarea
  101. +-->
  102. <!--+
  103. |start Menu
  104. +-->
  105. <div id="menu">
  106. <div onclick="SwitchMenu('menu_1.1', 'skin/')" id="menu_1.1Title" class="menutitle">Documentation</div>
  107. <div id="menu_1.1" class="menuitemgroup">
  108. <div class="menuitem">
  109. <a href="index.html">Overview</a>
  110. </div>
  111. <div class="menuitem">
  112. <a href="quickstart.html">Quickstart</a>
  113. </div>
  114. <div class="menuitem">
  115. <a href="cluster_setup.html">Cluster Setup</a>
  116. </div>
  117. <div class="menuitem">
  118. <a href="hdfs_design.html">HDFS Architecture</a>
  119. </div>
  120. <div class="menuitem">
  121. <a href="hdfs_user_guide.html">HDFS User Guide</a>
  122. </div>
  123. <div class="menuitem">
  124. <a href="hdfs_permissions_guide.html">HDFS Permissions Guide</a>
  125. </div>
  126. <div class="menuitem">
  127. <a href="mapred_tutorial.html">Map-Reduce Tutorial</a>
  128. </div>
  129. <div class="menuitem">
  130. <a href="native_libraries.html">Native Hadoop Libraries</a>
  131. </div>
  132. <div class="menuitem">
  133. <a href="streaming.html">Streaming</a>
  134. </div>
  135. <div class="menuitem">
  136. <a href="hod.html">Hadoop On Demand</a>
  137. </div>
  138. <div class="menuitem">
  139. <a href="api/index.html">API Docs</a>
  140. </div>
  141. <div class="menuitem">
  142. <a href="http://wiki.apache.org/hadoop/">Wiki</a>
  143. </div>
  144. <div class="menuitem">
  145. <a href="http://wiki.apache.org/hadoop/FAQ">FAQ</a>
  146. </div>
  147. <div class="menuitem">
  148. <a href="http://hadoop.apache.org/core/mailing_lists.html">Mailing Lists</a>
  149. </div>
  150. </div>
  151. <div id="credit"></div>
  152. <div id="roundbottom">
  153. <img style="display: none" class="corner" height="15" width="15" alt="" src="skin/images/rc-b-l-15-1body-2menu-3menu.png"></div>
  154. <!--+
  155. |alternative credits
  156. +-->
  157. <div id="credit2"></div>
  158. </div>
  159. <!--+
  160. |end Menu
  161. +-->
  162. <!--+
  163. |start content
  164. +-->
  165. <div id="content">
  166. <div title="Portable Document Format" class="pdflink">
  167. <a class="dida" href="hod_config_guide.pdf"><img alt="PDF -icon" src="skin/images/pdfdoc.gif" class="skin"><br>
  168. PDF</a>
  169. </div>
  170. <h1>
  171. Hadoop On Demand: Configuration Guide
  172. </h1>
  173. <div id="minitoc-area">
  174. <ul class="minitoc">
  175. <li>
  176. <a href="#1.+Introduction">1. Introduction</a>
  177. </li>
  178. <li>
  179. <a href="#2.+Sections">2. Sections</a>
  180. </li>
  181. <li>
  182. <a href="#3.+Important+%2F+Commonly+Used+Configuration+Options">3. Important / Commonly Used Configuration Options</a>
  183. <ul class="minitoc">
  184. <li>
  185. <a href="#3.1+Common+configuration+options">3.1 Common configuration options</a>
  186. </li>
  187. <li>
  188. <a href="#3.2+hod+options">3.2 hod options</a>
  189. </li>
  190. <li>
  191. <a href="#3.3+resource_manager+options">3.3 resource_manager options</a>
  192. </li>
  193. <li>
  194. <a href="#3.4+ringmaster+options">3.4 ringmaster options</a>
  195. </li>
  196. <li>
  197. <a href="#3.5+gridservice-hdfs+options">3.5 gridservice-hdfs options</a>
  198. </li>
  199. <li>
  200. <a href="#3.6+gridservice-mapred+options">3.6 gridservice-mapred options</a>
  201. </li>
  202. </ul>
  203. </li>
  204. </ul>
  205. </div>
  206. <a name="N1000C"></a><a name="1.+Introduction"></a>
  207. <h2 class="h3">1. Introduction</h2>
  208. <div class="section">
  209. <p>Configuration options for HOD are organized as sections and options
  210. within them. They can be specified in two ways: a configuration file
  211. in the INI format, and as command line options to the HOD shell,
  212. specified in the format --section.option[=value]. If the same option is
  213. specified in both places, the value specified on the command line
  214. overrides the value in the configuration file.</p>
  215. <p>
  216. To get a simple description of all configuration options, you can type
  217. </p>
  218. <table class="ForrestTable" cellspacing="1" cellpadding="4">
  219. <tr>
  220. <td colspan="1" rowspan="1"><span class="codefrag">$ hod --verbose-help</span></td>
  221. </tr>
  222. </table>
  223. <p>This document explains some of the most important or commonly used
  224. configuration options in some more detail.</p>
  225. </div>
  226. <a name="N10024"></a><a name="2.+Sections"></a>
  227. <h2 class="h3">2. Sections</h2>
  228. <div class="section">
  229. <p>The following are the various sections in the HOD configuration:</p>
  230. <ul>
  231. <li> hod: Options for the HOD client</li>
  232. <li> resource_manager: Options for specifying which resource manager
  233. to use, and other parameters for using that resource manager</li>
  234. <li> ringmaster: Options for the RingMaster process, </li>
  235. <li> hodring: Options for the HodRing processes</li>
  236. <li> gridservice-mapred: Options for the MapReduce daemons</li>
  237. <li> gridservice-hdfs: Options for the HDFS daemons.</li>
  238. </ul>
  239. <p>The next section deals with some of the important options in the HOD
  240. configuration.</p>
  241. </div>
  242. <a name="N10046"></a><a name="3.+Important+%2F+Commonly+Used+Configuration+Options"></a>
  243. <h2 class="h3">3. Important / Commonly Used Configuration Options</h2>
  244. <div class="section">
  245. <a name="N1004C"></a><a name="3.1+Common+configuration+options"></a>
  246. <h3 class="h4">3.1 Common configuration options</h3>
  247. <p>Certain configuration options are defined in most of the sections of
  248. the HOD configuration. Options defined in a section, are used by the
  249. process for which that section applies. These options have the same
  250. meaning, but can have different values in each section.
  251. </p>
  252. <ul>
  253. <li>temp-dir: Temporary directory for usage by the HOD processes. Make
  254. sure that the users who will run hod have rights to create
  255. directories under the directory specified here.</li>
  256. <li>debug: A numeric value from 1-4. 4 produces the most log information,
  257. and 1 the least.</li>
  258. <li>log-dir: Directory where log files are stored. By default, this is
  259. &lt;install-location&gt;/logs/. The restrictions and notes for the
  260. temp-dir variable apply here too.
  261. </li>
  262. <li>xrs-port-range: A range of ports, among which an available port shall
  263. be picked for use to run an XML-RPC server.</li>
  264. <li>http-port-range: A range of ports, among which an available port shall
  265. be picked for use to run an HTTP server.</li>
  266. <li>java-home: Location of Java to be used by Hadoop.</li>
  267. </ul>
  268. <a name="N1006B"></a><a name="3.2+hod+options"></a>
  269. <h3 class="h4">3.2 hod options</h3>
  270. <ul>
  271. <li>cluster: A descriptive name given to the cluster. For Torque, this is
  272. specified as a 'Node property' for every node in the cluster.
  273. HOD uses this value to compute the number of available nodes.</li>
  274. <li>client-params: A comma-separated list of hadoop config parameters
  275. specified as key-value pairs. These will be used to
  276. generate a hadoop-site.xml on the submit node that
  277. should be used for running MapReduce jobs.</li>
  278. </ul>
  279. <a name="N1007B"></a><a name="3.3+resource_manager+options"></a>
  280. <h3 class="h4">3.3 resource_manager options</h3>
  281. <ul>
  282. <li>queue: Name of the queue configured in the resource manager to which
  283. jobs are to be submitted.</li>
  284. <li>batch-home: Install directory to which 'bin' is appended and under
  285. which the executables of the resource manager can be
  286. found.</li>
  287. <li>env-vars: This is a comma separated list of key-value pairs,
  288. expressed as key=value, which would be passed to the jobs
  289. launched on the compute nodes.
  290. For example, if the python installation is
  291. in a non-standard location, one can set the environment
  292. variable 'HOD_PYTHON_HOME' to the path to the python
  293. executable. The HOD processes launched on the compute nodes
  294. can then use this variable.</li>
  295. </ul>
  296. <a name="N1008E"></a><a name="3.4+ringmaster+options"></a>
  297. <h3 class="h4">3.4 ringmaster options</h3>
  298. <ul>
  299. <li>work-dirs: These are a list of comma separated paths that will serve
  300. as the root for directories that HOD generates and passes
  301. to Hadoop for use to store DFS / MapReduce data. For e.g.
  302. this is where DFS data blocks will be stored. Typically,
  303. as many paths are specified as there are disks available
  304. to ensure all disks are being utilized. The restrictions
  305. and notes for the temp-dir variable apply here too.</li>
  306. </ul>
  307. <a name="N1009B"></a><a name="3.5+gridservice-hdfs+options"></a>
  308. <h3 class="h4">3.5 gridservice-hdfs options</h3>
  309. <ul>
  310. <li>external: If false, this indicates that a HDFS cluster must be
  311. bought up by the HOD system, on the nodes which it
  312. allocates via the allocate command. Note that in that case,
  313. when the cluster is de-allocated, it will bring down the
  314. HDFS cluster, and all the data will be lost.
  315. If true, it will try and connect to an externally configured
  316. HDFS system.
  317. Typically, because input for jobs are placed into HDFS
  318. before jobs are run, and also the output from jobs in HDFS
  319. is required to be persistent, an internal HDFS cluster is
  320. of little value in a production system. However, it allows
  321. for quick testing.</li>
  322. <li>host: Hostname of the externally configured NameNode, if any</li>
  323. <li>fs_port: Port to which NameNode RPC server is bound.</li>
  324. <li>info_port: Port to which the NameNode web UI server is bound.</li>
  325. <li>pkgs: Installation directory, under which bin/hadoop executable is
  326. located. This can be used to use a pre-installed version of
  327. Hadoop on the cluster.</li>
  328. <li>server-params: A comma-separated list of hadoop config parameters
  329. specified key-value pairs. These will be used to
  330. generate a hadoop-site.xml that will be used by the
  331. NameNode and DataNodes.</li>
  332. <li>final-server-params: Same as above, except they will be marked final.</li>
  333. </ul>
  334. <a name="N100BA"></a><a name="3.6+gridservice-mapred+options"></a>
  335. <h3 class="h4">3.6 gridservice-mapred options</h3>
  336. <ul>
  337. <li>external: If false, this indicates that a MapReduce cluster must be
  338. bought up by the HOD system on the nodes which it allocates
  339. via the allocate command.
  340. If true, if will try and connect to an externally
  341. configured MapReduce system.</li>
  342. <li>host: Hostname of the externally configured JobTracker, if any</li>
  343. <li>tracker_port: Port to which the JobTracker RPC server is bound</li>
  344. <li>info_port: Port to which the JobTracker web UI server is bound.</li>
  345. <li>pkgs: Installation directory, under which bin/hadoop executable is
  346. located</li>
  347. <li>server-params: A comma-separated list of hadoop config parameters
  348. specified key-value pairs. These will be used to
  349. generate a hadoop-site.xml that will be used by the
  350. JobTracker and TaskTrackers</li>
  351. <li>final-server-params: Same as above, except they will be marked final.</li>
  352. </ul>
  353. </div>
  354. </div>
  355. <!--+
  356. |end content
  357. +-->
  358. <div class="clearboth">&nbsp;</div>
  359. </div>
  360. <div id="footer">
  361. <!--+
  362. |start bottomstrip
  363. +-->
  364. <div class="lastmodified">
  365. <script type="text/javascript"><!--
  366. document.write("Last Published: " + document.lastModified);
  367. // --></script>
  368. </div>
  369. <div class="copyright">
  370. Copyright &copy;
  371. 2007 <a href="http://www.apache.org/licenses/">The Apache Software Foundation.</a>
  372. </div>
  373. <!--+
  374. |end bottomstrip
  375. +-->
  376. </div>
  377. </body>
  378. </html>