hod_admin_guide.html 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497
  1. <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
  2. <html>
  3. <head>
  4. <META http-equiv="Content-Type" content="text/html; charset=UTF-8">
  5. <meta content="Apache Forrest" name="Generator">
  6. <meta name="Forrest-version" content="0.8">
  7. <meta name="Forrest-skin-name" content="pelt">
  8. <title>
  9. Hadoop On Demand
  10. </title>
  11. <link type="text/css" href="skin/basic.css" rel="stylesheet">
  12. <link media="screen" type="text/css" href="skin/screen.css" rel="stylesheet">
  13. <link media="print" type="text/css" href="skin/print.css" rel="stylesheet">
  14. <link type="text/css" href="skin/profile.css" rel="stylesheet">
  15. <script src="skin/getBlank.js" language="javascript" type="text/javascript"></script><script src="skin/getMenu.js" language="javascript" type="text/javascript"></script><script src="skin/fontsize.js" language="javascript" type="text/javascript"></script>
  16. <link rel="shortcut icon" href="images/favicon.ico">
  17. </head>
  18. <body onload="init()">
  19. <script type="text/javascript">ndeSetTextSize();</script>
  20. <div id="top">
  21. <!--+
  22. |breadtrail
  23. +-->
  24. <div class="breadtrail">
  25. <a href="http://www.apache.org/">Apache</a> &gt; <a href="http://hadoop.apache.org/">Hadoop</a> &gt; <a href="http://hadoop.apache.org/core/">Core</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
  26. </div>
  27. <!--+
  28. |header
  29. +-->
  30. <div class="header">
  31. <!--+
  32. |start group logo
  33. +-->
  34. <div class="grouplogo">
  35. <a href="http://hadoop.apache.org/"><img class="logoImage" alt="Hadoop" src="images/hadoop-logo.jpg" title="Apache Hadoop"></a>
  36. </div>
  37. <!--+
  38. |end group logo
  39. +-->
  40. <!--+
  41. |start Project Logo
  42. +-->
  43. <div class="projectlogo">
  44. <a href="http://hadoop.apache.org/core/"><img class="logoImage" alt="Hadoop" src="images/core-logo.gif" title="Scalable Computing Platform"></a>
  45. </div>
  46. <!--+
  47. |end Project Logo
  48. +-->
  49. <!--+
  50. |start Search
  51. +-->
  52. <div class="searchbox">
  53. <form action="http://www.google.com/search" method="get" class="roundtopsmall">
  54. <input value="hadoop.apache.org" name="sitesearch" type="hidden"><input onFocus="getBlank (this, 'Search the site with google');" size="25" name="q" id="query" type="text" value="Search the site with google">&nbsp;
  55. <input name="Search" value="Search" type="submit">
  56. </form>
  57. </div>
  58. <!--+
  59. |end search
  60. +-->
  61. <!--+
  62. |start Tabs
  63. +-->
  64. <ul id="tabs">
  65. <li>
  66. <a class="unselected" href="http://hadoop.apache.org/core/">Project</a>
  67. </li>
  68. <li>
  69. <a class="unselected" href="http://wiki.apache.org/hadoop">Wiki</a>
  70. </li>
  71. <li class="current">
  72. <a class="selected" href="index.html">Hadoop 0.16 Documentation</a>
  73. </li>
  74. </ul>
  75. <!--+
  76. |end Tabs
  77. +-->
  78. </div>
  79. </div>
  80. <div id="main">
  81. <div id="publishedStrip">
  82. <!--+
  83. |start Subtabs
  84. +-->
  85. <div id="level2tabs"></div>
  86. <!--+
  87. |end Endtabs
  88. +-->
  89. <script type="text/javascript"><!--
  90. document.write("Last Published: " + document.lastModified);
  91. // --></script>
  92. </div>
  93. <!--+
  94. |breadtrail
  95. +-->
  96. <div class="breadtrail">
  97. &nbsp;
  98. </div>
  99. <!--+
  100. |start Menu, mainarea
  101. +-->
  102. <!--+
  103. |start Menu
  104. +-->
  105. <div id="menu">
  106. <div onclick="SwitchMenu('menu_1.1', 'skin/')" id="menu_1.1Title" class="menutitle">Documentation</div>
  107. <div id="menu_1.1" class="menuitemgroup">
  108. <div class="menuitem">
  109. <a href="index.html">Overview</a>
  110. </div>
  111. <div class="menuitem">
  112. <a href="quickstart.html">Quickstart</a>
  113. </div>
  114. <div class="menuitem">
  115. <a href="cluster_setup.html">Cluster Setup</a>
  116. </div>
  117. <div class="menuitem">
  118. <a href="hdfs_design.html">HDFS Architecture</a>
  119. </div>
  120. <div class="menuitem">
  121. <a href="hdfs_user_guide.html">HDFS User Guide</a>
  122. </div>
  123. <div class="menuitem">
  124. <a href="hdfs_shell.html">HDFS Shell Guide</a>
  125. </div>
  126. <div class="menuitem">
  127. <a href="hdfs_permissions_guide.html">HDFS Permissions Guide</a>
  128. </div>
  129. <div class="menuitem">
  130. <a href="mapred_tutorial.html">Map-Reduce Tutorial</a>
  131. </div>
  132. <div class="menuitem">
  133. <a href="native_libraries.html">Native Hadoop Libraries</a>
  134. </div>
  135. <div class="menuitem">
  136. <a href="streaming.html">Streaming</a>
  137. </div>
  138. <div class="menuitem">
  139. <a href="hod.html">Hadoop On Demand</a>
  140. </div>
  141. <div class="menuitem">
  142. <a href="api/index.html">API Docs</a>
  143. </div>
  144. <div class="menuitem">
  145. <a href="http://wiki.apache.org/hadoop/">Wiki</a>
  146. </div>
  147. <div class="menuitem">
  148. <a href="http://wiki.apache.org/hadoop/FAQ">FAQ</a>
  149. </div>
  150. <div class="menuitem">
  151. <a href="http://hadoop.apache.org/core/mailing_lists.html">Mailing Lists</a>
  152. </div>
  153. <div class="menuitem">
  154. <a href="changes.html">Release Notes</a>
  155. </div>
  156. </div>
  157. <div id="credit"></div>
  158. <div id="roundbottom">
  159. <img style="display: none" class="corner" height="15" width="15" alt="" src="skin/images/rc-b-l-15-1body-2menu-3menu.png"></div>
  160. <!--+
  161. |alternative credits
  162. +-->
  163. <div id="credit2"></div>
  164. </div>
  165. <!--+
  166. |end Menu
  167. +-->
  168. <!--+
  169. |start content
  170. +-->
  171. <div id="content">
  172. <div title="Portable Document Format" class="pdflink">
  173. <a class="dida" href="hod_admin_guide.pdf"><img alt="PDF -icon" src="skin/images/pdfdoc.gif" class="skin"><br>
  174. PDF</a>
  175. </div>
  176. <h1>
  177. Hadoop On Demand
  178. </h1>
  179. <div id="minitoc-area">
  180. <ul class="minitoc">
  181. <li>
  182. <a href="#Overview">Overview</a>
  183. </li>
  184. <li>
  185. <a href="#Pre-requisites">Pre-requisites</a>
  186. </li>
  187. <li>
  188. <a href="#Resource+Manager">Resource Manager</a>
  189. </li>
  190. <li>
  191. <a href="#Installing+HOD">Installing HOD</a>
  192. </li>
  193. <li>
  194. <a href="#Configuring+HOD">Configuring HOD</a>
  195. <ul class="minitoc">
  196. <li>
  197. <a href="#Minimal+Configuration+to+get+started">Minimal Configuration to get started</a>
  198. </li>
  199. <li>
  200. <a href="#Advanced+Configuration">Advanced Configuration</a>
  201. </li>
  202. </ul>
  203. </li>
  204. <li>
  205. <a href="#Running+HOD">Running HOD</a>
  206. </li>
  207. </ul>
  208. </div>
  209. <a name="N1000C"></a><a name="Overview"></a>
  210. <h2 class="h3">Overview</h2>
  211. <div class="section">
  212. <p>The Hadoop On Demand (HOD) project is a system for provisioning and
  213. managing independent Hadoop MapReduce and HDFS instances on a shared cluster
  214. of nodes. HOD is a tool that makes it easy for administrators and users to
  215. quickly setup and use Hadoop. It is also a very useful tool for Hadoop developers
  216. and testers who need to share a physical cluster for testing their own Hadoop
  217. versions.
  218. </p>
  219. <p>HOD relies on a resource manager (RM) for allocation of nodes that it can use for
  220. running Hadoop instances. At present it runs with the <a href="http://www.clusterresources.com/pages/products/torque-resource-manager.php">Torque
  221. resource manager</a>.
  222. </p>
  223. <p>
  224. The basic system architecture of HOD includes components from:</p>
  225. <ul>
  226. <li>A Resource manager (possibly together with a scheduler),</li>
  227. <li>HOD components, and </li>
  228. <li>Hadoop Map/Reduce and HDFS daemons.</li>
  229. </ul>
  230. <p>
  231. HOD provisions and maintains Hadoop Map/Reduce and, optionally, HDFS instances
  232. through interaction with the above components on a given cluster of nodes. A cluster of
  233. nodes can be thought of as comprising of two sets of nodes:</p>
  234. <ul>
  235. <li>Submit nodes: Users use the HOD client on these nodes to allocate clusters, and then
  236. use the Hadoop client to submit Hadoop jobs. </li>
  237. <li>Compute nodes: Using the resource manager, HOD components are run on these nodes to
  238. provision the Hadoop daemons. After that Hadoop jobs run on them.</li>
  239. </ul>
  240. <p>
  241. Here is a brief description of the sequence of operations in allocating a cluster and
  242. running jobs on them.
  243. </p>
  244. <ul>
  245. <li>The user uses the HOD client on the Submit node to allocate a required number of
  246. cluster nodes, and provision Hadoop on them.</li>
  247. <li>The HOD client uses a Resource Manager interface, (qsub, in Torque), to submit a HOD
  248. process, called the RingMaster, as a Resource Manager job, requesting the user desired number
  249. of nodes. This job is submitted to the central server of the Resource Manager (pbs_server, in Torque).</li>
  250. <li>On the compute nodes, the resource manager slave daemons, (pbs_moms in Torque), accept
  251. and run jobs that they are given by the central server (pbs_server in Torque). The RingMaster
  252. process is started on one of the compute nodes (mother superior, in Torque).</li>
  253. <li>The Ringmaster then uses another Resource Manager interface, (pbsdsh, in Torque), to run
  254. the second HOD component, HodRing, as distributed tasks on each of the compute
  255. nodes allocated.</li>
  256. <li>The Hodrings, after initializing, communicate with the Ringmaster to get Hadoop commands,
  257. and run them accordingly. Once the Hadoop commands are started, they register with the RingMaster,
  258. giving information about the daemons.</li>
  259. <li>All the configuration files needed for Hadoop instances are generated by HOD itself,
  260. some obtained from options given by user in its own configuration file.</li>
  261. <li>The HOD client keeps communicating with the RingMaster to find out the location of the
  262. JobTracker and HDFS daemons.</li>
  263. </ul>
  264. <p>The rest of the document deals with the steps needed to setup HOD on a physical cluster of nodes.</p>
  265. </div>
  266. <a name="N10056"></a><a name="Pre-requisites"></a>
  267. <h2 class="h3">Pre-requisites</h2>
  268. <div class="section">
  269. <p>Operating System: HOD is currently tested on RHEL4.<br>
  270. Nodes : HOD requires a minimum of 3 nodes configured through a resource manager.<br>
  271. </p>
  272. <p> Software </p>
  273. <p>The following components are to be installed on *ALL* the nodes before using HOD:</p>
  274. <ul>
  275. <li>Torque: Resource manager</li>
  276. <li>
  277. <a href="http://www.python.org">Python</a> : HOD requires version 2.5.1 of Python.</li>
  278. </ul>
  279. <p>The following components can be optionally installed for getting better
  280. functionality from HOD:</p>
  281. <ul>
  282. <li>
  283. <a href="http://twistedmatrix.com/trac/">Twisted Python</a>: This can be
  284. used for improving the scalability of HOD. If this module is detected to be
  285. installed, HOD uses it, else it falls back to default modules.</li>
  286. <li>
  287. <a href="http://hadoop.apache.org/core/">Hadoop</a>: HOD can automatically
  288. distribute Hadoop to all nodes in the cluster. However, it can also use a
  289. pre-installed version of Hadoop, if it is available on all nodes in the cluster.
  290. HOD currently supports Hadoop 0.15 and above.</li>
  291. </ul>
  292. <p>NOTE: HOD configuration requires the location of installs of these
  293. components to be the same on all nodes in the cluster. It will also
  294. make the configuration simpler to have the same location on the submit
  295. nodes.
  296. </p>
  297. </div>
  298. <a name="N1008A"></a><a name="Resource+Manager"></a>
  299. <h2 class="h3">Resource Manager</h2>
  300. <div class="section">
  301. <p> Currently HOD works with the Torque resource manager, which it uses for its node
  302. allocation and job submission. Torque is an open source resource manager from
  303. <a href="http://www.clusterresources.com">Cluster Resources</a>, a community effort
  304. based on the PBS project. It provides control over batch jobs and distributed compute nodes. Torque is
  305. freely available for download from <a href="http://www.clusterresources.com/downloads/torque/">here</a>.
  306. </p>
  307. <p> All documentation related to torque can be seen under
  308. the section TORQUE Resource Manager <a href="http://www.clusterresources.com/pages/resources/documentation.php">here</a>. You can
  309. get wiki documentation from <a href="http://www.clusterresources.com/wiki/doku.php?id=torque:torque_wiki">here</a>.
  310. Users may wish to subscribe to TORQUE&rsquo;s mailing list or view the archive for questions,
  311. comments <a href="http://www.clusterresources.com/pages/resources/mailing-lists.php">here</a>.
  312. </p>
  313. <p>For using HOD with Torque:</p>
  314. <ul>
  315. <li>Install Torque components: pbs_server on one node(head node), pbs_mom on all
  316. compute nodes, and PBS client tools on all compute nodes and submit
  317. nodes. Perform atleast a basic configuration so that the Torque system is up and
  318. running i.e pbs_server knows which machines to talk to. Look <a href="http://www.clusterresources.com/wiki/doku.php?id=torque:1.2_basic_configuration">here</a>
  319. for basic configuration.
  320. For advanced configuration, see <a href="http://www.clusterresources.com/wiki/doku.php?id=torque:1.3_advanced_configuration">here</a>
  321. </li>
  322. <li>Create a queue for submitting jobs on the pbs_server. The name of the queue is the
  323. same as the HOD configuration parameter, resource-manager.queue. The Hod client uses this queue to
  324. submit the Ringmaster process as a Torque job.</li>
  325. <li>Specify a 'cluster name' as a 'property' for all nodes in the cluster.
  326. This can be done by using the 'qmgr' command. For example:
  327. qmgr -c "set node node properties=cluster-name". The name of the cluster is the same as
  328. the HOD configuration parameter, hod.cluster. </li>
  329. <li>Ensure that jobs can be submitted to the nodes. This can be done by
  330. using the 'qsub' command. For example:
  331. echo "sleep 30" | qsub -l nodes=3</li>
  332. </ul>
  333. </div>
  334. <a name="N100C4"></a><a name="Installing+HOD"></a>
  335. <h2 class="h3">Installing HOD</h2>
  336. <div class="section">
  337. <p>Now that the resource manager set up is done, we proceed on to obtaining and
  338. installing HOD.</p>
  339. <ul>
  340. <li>If you are getting HOD from the Hadoop tarball,it is available under the
  341. 'contrib' section of Hadoop, under the root directory 'hod'.</li>
  342. <li>If you are building from source, you can run ant tar from the Hadoop root
  343. directory, to generate the Hadoop tarball, and then pick HOD from there,
  344. as described in the point above.</li>
  345. <li>Distribute the files under this directory to all the nodes in the
  346. cluster. Note that the location where the files are copied should be
  347. the same on all the nodes.</li>
  348. <li>Note that compiling hadoop would build HOD with appropriate permissions
  349. set on all the required script files in HOD.</li>
  350. </ul>
  351. </div>
  352. <a name="N100DD"></a><a name="Configuring+HOD"></a>
  353. <h2 class="h3">Configuring HOD</h2>
  354. <div class="section">
  355. <p>After HOD installation is done, it has to be configured before we start using
  356. it.</p>
  357. <a name="N100E6"></a><a name="Minimal+Configuration+to+get+started"></a>
  358. <h3 class="h4">Minimal Configuration to get started</h3>
  359. <ul>
  360. <li>On the node from where you want to run hod, edit the file hodrc
  361. which can be found in the &lt;install dir&gt;/conf directory. This file
  362. contains the minimal set of values required for running hod.</li>
  363. <li>
  364. <p>Specify values suitable to your environment for the following
  365. variables defined in the configuration file. Note that some of these
  366. variables are defined at more than one place in the file.</p>
  367. <ul>
  368. <li>${JAVA_HOME}: Location of Java for Hadoop. Hadoop supports Sun JDK
  369. 1.5.x and above.</li>
  370. <li>${CLUSTER_NAME}: Name of the cluster which is specified in the
  371. 'node property' as mentioned in resource manager configuration.</li>
  372. <li>${HADOOP_HOME}: Location of Hadoop installation on the compute and
  373. submit nodes.</li>
  374. <li>${RM_QUEUE}: Queue configured for submiting jobs in the resource
  375. manager configuration.</li>
  376. <li>${RM_HOME}: Location of the resource manager installation on the
  377. compute and submit nodes.</li>
  378. </ul>
  379. </li>
  380. <li>
  381. <p>The following environment variables *may* need to be set depending on
  382. your environment. These variables must be defined where you run the
  383. HOD client, and also be specified in the HOD configuration file as the
  384. value of the key resource_manager.env-vars. Multiple variables can be
  385. specified as a comma separated list of key=value pairs.</p>
  386. <ul>
  387. <li>HOD_PYTHON_HOME: If you install python to a non-default location
  388. of the compute nodes, or submit nodes, then, this variable must be
  389. defined to point to the python executable in the non-standard
  390. location.</li>
  391. </ul>
  392. </li>
  393. </ul>
  394. <a name="N10117"></a><a name="Advanced+Configuration"></a>
  395. <h3 class="h4">Advanced Configuration</h3>
  396. <p> You can review other configuration options in the file and modify them to suit
  397. your needs. Refer to the <a href="hod_config_guide.html">Configuration Guide</a> for information about the HOD
  398. configuration.
  399. </p>
  400. </div>
  401. <a name="N10126"></a><a name="Running+HOD"></a>
  402. <h2 class="h3">Running HOD</h2>
  403. <div class="section">
  404. <p>You can now proceed to <a href="hod_user_guide.html">HOD User Guide</a> for information about how to run HOD,
  405. what are the various features, options and for help in trouble-shooting.</p>
  406. </div>
  407. </div>
  408. <!--+
  409. |end content
  410. +-->
  411. <div class="clearboth">&nbsp;</div>
  412. </div>
  413. <div id="footer">
  414. <!--+
  415. |start bottomstrip
  416. +-->
  417. <div class="lastmodified">
  418. <script type="text/javascript"><!--
  419. document.write("Last Published: " + document.lastModified);
  420. // --></script>
  421. </div>
  422. <div class="copyright">
  423. Copyright &copy;
  424. 2007 <a href="http://www.apache.org/licenses/">The Apache Software Foundation.</a>
  425. </div>
  426. <!--+
  427. |end bottomstrip
  428. +-->
  429. </div>
  430. </body>
  431. </html>