cluster_setup.html 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939
  1. <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
  2. <html>
  3. <head>
  4. <META http-equiv="Content-Type" content="text/html; charset=UTF-8">
  5. <meta content="Apache Forrest" name="Generator">
  6. <meta name="Forrest-version" content="0.8">
  7. <meta name="Forrest-skin-name" content="pelt">
  8. <title>Hadoop Cluster Setup</title>
  9. <link type="text/css" href="skin/basic.css" rel="stylesheet">
  10. <link media="screen" type="text/css" href="skin/screen.css" rel="stylesheet">
  11. <link media="print" type="text/css" href="skin/print.css" rel="stylesheet">
  12. <link type="text/css" href="skin/profile.css" rel="stylesheet">
  13. <script src="skin/getBlank.js" language="javascript" type="text/javascript"></script><script src="skin/getMenu.js" language="javascript" type="text/javascript"></script><script src="skin/fontsize.js" language="javascript" type="text/javascript"></script>
  14. <link rel="shortcut icon" href="images/favicon.ico">
  15. </head>
  16. <body onload="init()">
  17. <script type="text/javascript">ndeSetTextSize();</script>
  18. <div id="top">
  19. <!--+
  20. |breadtrail
  21. +-->
  22. <div class="breadtrail">
  23. <a href="http://www.apache.org/">Apache</a> &gt; <a href="http://hadoop.apache.org/">Hadoop</a> &gt; <a href="http://hadoop.apache.org/core/">Core</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
  24. </div>
  25. <!--+
  26. |header
  27. +-->
  28. <div class="header">
  29. <!--+
  30. |start group logo
  31. +-->
  32. <div class="grouplogo">
  33. <a href="http://hadoop.apache.org/"><img class="logoImage" alt="Hadoop" src="images/hadoop-logo.jpg" title="Apache Hadoop"></a>
  34. </div>
  35. <!--+
  36. |end group logo
  37. +-->
  38. <!--+
  39. |start Project Logo
  40. +-->
  41. <div class="projectlogo">
  42. <a href="http://hadoop.apache.org/core/"><img class="logoImage" alt="Hadoop" src="images/core-logo.gif" title="Scalable Computing Platform"></a>
  43. </div>
  44. <!--+
  45. |end Project Logo
  46. +-->
  47. <!--+
  48. |start Search
  49. +-->
  50. <div class="searchbox">
  51. <form action="http://www.google.com/search" method="get" class="roundtopsmall">
  52. <input value="hadoop.apache.org" name="sitesearch" type="hidden"><input onFocus="getBlank (this, 'Search the site with google');" size="25" name="q" id="query" type="text" value="Search the site with google">&nbsp;
  53. <input name="Search" value="Search" type="submit">
  54. </form>
  55. </div>
  56. <!--+
  57. |end search
  58. +-->
  59. <!--+
  60. |start Tabs
  61. +-->
  62. <ul id="tabs">
  63. <li>
  64. <a class="unselected" href="http://hadoop.apache.org/core/">Project</a>
  65. </li>
  66. <li>
  67. <a class="unselected" href="http://wiki.apache.org/hadoop">Wiki</a>
  68. </li>
  69. <li class="current">
  70. <a class="selected" href="index.html">Hadoop 0.19 Documentation</a>
  71. </li>
  72. </ul>
  73. <!--+
  74. |end Tabs
  75. +-->
  76. </div>
  77. </div>
  78. <div id="main">
  79. <div id="publishedStrip">
  80. <!--+
  81. |start Subtabs
  82. +-->
  83. <div id="level2tabs"></div>
  84. <!--+
  85. |end Endtabs
  86. +-->
  87. <script type="text/javascript"><!--
  88. document.write("Last Published: " + document.lastModified);
  89. // --></script>
  90. </div>
  91. <!--+
  92. |breadtrail
  93. +-->
  94. <div class="breadtrail">
  95. &nbsp;
  96. </div>
  97. <!--+
  98. |start Menu, mainarea
  99. +-->
  100. <!--+
  101. |start Menu
  102. +-->
  103. <div id="menu">
  104. <div onclick="SwitchMenu('menu_selected_1.1', 'skin/')" id="menu_selected_1.1Title" class="menutitle" style="background-image: url('skin/images/chapter_open.gif');">Documentation</div>
  105. <div id="menu_selected_1.1" class="selectedmenuitemgroup" style="display: block;">
  106. <div class="menuitem">
  107. <a href="index.html">Overview</a>
  108. </div>
  109. <div class="menuitem">
  110. <a href="quickstart.html">Hadoop Quick Start</a>
  111. </div>
  112. <div class="menupage">
  113. <div class="menupagetitle">Hadoop Cluster Setup</div>
  114. </div>
  115. <div class="menuitem">
  116. <a href="mapred_tutorial.html">Hadoop Map/Reduce Tutorial</a>
  117. </div>
  118. <div class="menuitem">
  119. <a href="commands_manual.html">Hadoop Command Guide</a>
  120. </div>
  121. <div class="menuitem">
  122. <a href="hdfs_shell.html">Hadoop FS Shell Guide</a>
  123. </div>
  124. <div class="menuitem">
  125. <a href="distcp.html">Hadoop DistCp Guide</a>
  126. </div>
  127. <div class="menuitem">
  128. <a href="native_libraries.html">Hadoop Native Libraries</a>
  129. </div>
  130. <div class="menuitem">
  131. <a href="streaming.html">Hadoop Streaming</a>
  132. </div>
  133. <div class="menuitem">
  134. <a href="hadoop_archives.html">Hadoop Archives</a>
  135. </div>
  136. <div class="menuitem">
  137. <a href="hdfs_user_guide.html">HDFS User Guide</a>
  138. </div>
  139. <div class="menuitem">
  140. <a href="hdfs_design.html">HDFS Architecture</a>
  141. </div>
  142. <div class="menuitem">
  143. <a href="hdfs_permissions_guide.html">HDFS Admin Guide: Permissions</a>
  144. </div>
  145. <div class="menuitem">
  146. <a href="hdfs_quota_admin_guide.html">HDFS Admin Guide: Quotas</a>
  147. </div>
  148. <div class="menuitem">
  149. <a href="SLG_user_guide.html">HDFS Utilities</a>
  150. </div>
  151. <div class="menuitem">
  152. <a href="libhdfs.html">HDFS C API</a>
  153. </div>
  154. <div class="menuitem">
  155. <a href="hod_user_guide.html">HOD User Guide</a>
  156. </div>
  157. <div class="menuitem">
  158. <a href="hod_admin_guide.html">HOD Admin Guide</a>
  159. </div>
  160. <div class="menuitem">
  161. <a href="hod_config_guide.html">HOD Config Guide</a>
  162. </div>
  163. <div class="menuitem">
  164. <a href="capacity_scheduler.html">Capacity Scheduler</a>
  165. </div>
  166. <div class="menuitem">
  167. <a href="api/index.html">API Docs</a>
  168. </div>
  169. <div class="menuitem">
  170. <a href="jdiff/changes.html">API Changes</a>
  171. </div>
  172. <div class="menuitem">
  173. <a href="http://wiki.apache.org/hadoop/">Wiki</a>
  174. </div>
  175. <div class="menuitem">
  176. <a href="http://wiki.apache.org/hadoop/FAQ">FAQ</a>
  177. </div>
  178. <div class="menuitem">
  179. <a href="releasenotes.html">Release Notes</a>
  180. </div>
  181. <div class="menuitem">
  182. <a href="changes.html">Change Log</a>
  183. </div>
  184. </div>
  185. <div id="credit"></div>
  186. <div id="roundbottom">
  187. <img style="display: none" class="corner" height="15" width="15" alt="" src="skin/images/rc-b-l-15-1body-2menu-3menu.png"></div>
  188. <!--+
  189. |alternative credits
  190. +-->
  191. <div id="credit2"></div>
  192. </div>
  193. <!--+
  194. |end Menu
  195. +-->
  196. <!--+
  197. |start content
  198. +-->
  199. <div id="content">
  200. <div title="Portable Document Format" class="pdflink">
  201. <a class="dida" href="cluster_setup.pdf"><img alt="PDF -icon" src="skin/images/pdfdoc.gif" class="skin"><br>
  202. PDF</a>
  203. </div>
  204. <h1>Hadoop Cluster Setup</h1>
  205. <div id="minitoc-area">
  206. <ul class="minitoc">
  207. <li>
  208. <a href="#Purpose">Purpose</a>
  209. </li>
  210. <li>
  211. <a href="#Pre-requisites">Pre-requisites</a>
  212. </li>
  213. <li>
  214. <a href="#Installation">Installation</a>
  215. </li>
  216. <li>
  217. <a href="#Configuration">Configuration</a>
  218. <ul class="minitoc">
  219. <li>
  220. <a href="#Configuration+Files">Configuration Files</a>
  221. </li>
  222. <li>
  223. <a href="#Site+Configuration">Site Configuration</a>
  224. <ul class="minitoc">
  225. <li>
  226. <a href="#Configuring+the+Environment+of+the+Hadoop+Daemons">Configuring the Environment of the Hadoop Daemons</a>
  227. </li>
  228. <li>
  229. <a href="#Configuring+the+Hadoop+Daemons">Configuring the Hadoop Daemons</a>
  230. </li>
  231. <li>
  232. <a href="#Slaves">Slaves</a>
  233. </li>
  234. <li>
  235. <a href="#Logging">Logging</a>
  236. </li>
  237. </ul>
  238. </li>
  239. </ul>
  240. </li>
  241. <li>
  242. <a href="#Cluster+Restartability">Cluster Restartability</a>
  243. <ul class="minitoc">
  244. <li>
  245. <a href="#Map%2FReduce">Map/Reduce</a>
  246. </li>
  247. </ul>
  248. </li>
  249. <li>
  250. <a href="#Hadoop+Rack+Awareness">Hadoop Rack Awareness</a>
  251. </li>
  252. <li>
  253. <a href="#Hadoop+Startup">Hadoop Startup</a>
  254. </li>
  255. <li>
  256. <a href="#Hadoop+Shutdown">Hadoop Shutdown</a>
  257. </li>
  258. </ul>
  259. </div>
  260. <a name="N1000D"></a><a name="Purpose"></a>
  261. <h2 class="h3">Purpose</h2>
  262. <div class="section">
  263. <p>This document describes how to install, configure and manage non-trivial
  264. Hadoop clusters ranging from a few nodes to extremely large clusters with
  265. thousands of nodes.</p>
  266. <p>
  267. To play with Hadoop, you may first want to install Hadoop on a single machine (see <a href="quickstart.html"> Hadoop Quick Start</a>).
  268. </p>
  269. </div>
  270. <a name="N1001E"></a><a name="Pre-requisites"></a>
  271. <h2 class="h3">Pre-requisites</h2>
  272. <div class="section">
  273. <ol>
  274. <li>
  275. Make sure all <a href="quickstart.html#PreReqs">requisite</a> software
  276. is installed on all nodes in your cluster.
  277. </li>
  278. <li>
  279. <a href="quickstart.html#Download">Get</a> the Hadoop software.
  280. </li>
  281. </ol>
  282. </div>
  283. <a name="N10036"></a><a name="Installation"></a>
  284. <h2 class="h3">Installation</h2>
  285. <div class="section">
  286. <p>Installing a Hadoop cluster typically involves unpacking the software
  287. on all the machines in the cluster.</p>
  288. <p>Typically one machine in the cluster is designated as the
  289. <span class="codefrag">NameNode</span> and another machine the as <span class="codefrag">JobTracker</span>,
  290. exclusively. These are the <em>masters</em>. The rest of the machines in
  291. the cluster act as both <span class="codefrag">DataNode</span> <em>and</em>
  292. <span class="codefrag">TaskTracker</span>. These are the <em>slaves</em>.</p>
  293. <p>The root of the distribution is referred to as
  294. <span class="codefrag">HADOOP_HOME</span>. All machines in the cluster usually have the same
  295. <span class="codefrag">HADOOP_HOME</span> path.</p>
  296. </div>
  297. <a name="N10061"></a><a name="Configuration"></a>
  298. <h2 class="h3">Configuration</h2>
  299. <div class="section">
  300. <p>The following sections describe how to configure a Hadoop cluster.</p>
  301. <a name="N1006A"></a><a name="Configuration+Files"></a>
  302. <h3 class="h4">Configuration Files</h3>
  303. <p>Hadoop configuration is driven by two important configuration files
  304. found in the <span class="codefrag">conf/</span> directory of the distribution:</p>
  305. <ol>
  306. <li>
  307. <a href="http://hadoop.apache.org/core/docs/current/hadoop-default.html">hadoop-default.xml</a> - Read-only
  308. default configuration.
  309. </li>
  310. <li>
  311. <em>hadoop-site.xml</em> - Site-specific configuration.
  312. </li>
  313. </ol>
  314. <p>To learn more about how the Hadoop framework is controlled by these
  315. configuration files, look
  316. <a href="api/org/apache/hadoop/conf/Configuration.html">here</a>.</p>
  317. <p>Additionally, you can control the Hadoop scripts found in the
  318. <span class="codefrag">bin/</span> directory of the distribution, by setting site-specific
  319. values via the <span class="codefrag">conf/hadoop-env.sh</span>.</p>
  320. <a name="N10097"></a><a name="Site+Configuration"></a>
  321. <h3 class="h4">Site Configuration</h3>
  322. <p>To configure the Hadoop cluster you will need to configure the
  323. <em>environment</em> in which the Hadoop daemons execute as well as
  324. the <em>configuration parameters</em> for the Hadoop daemons.</p>
  325. <p>The Hadoop daemons are <span class="codefrag">NameNode</span>/<span class="codefrag">DataNode</span>
  326. and <span class="codefrag">JobTracker</span>/<span class="codefrag">TaskTracker</span>.</p>
  327. <a name="N100B5"></a><a name="Configuring+the+Environment+of+the+Hadoop+Daemons"></a>
  328. <h4>Configuring the Environment of the Hadoop Daemons</h4>
  329. <p>Administrators should use the <span class="codefrag">conf/hadoop-env.sh</span> script
  330. to do site-specific customization of the Hadoop daemons' process
  331. environment.</p>
  332. <p>At the very least you should specify the
  333. <span class="codefrag">JAVA_HOME</span> so that it is correctly defined on each
  334. remote node.</p>
  335. <p>Administrators can configure individual daemons using the
  336. configuration options <span class="codefrag">HADOOP_*_OPTS</span>. Various options
  337. available are shown below in the table. </p>
  338. <table class="ForrestTable" cellspacing="1" cellpadding="4">
  339. <tr>
  340. <th colspan="1" rowspan="1">Daemon</th><th colspan="1" rowspan="1">Configure Options</th>
  341. </tr>
  342. <tr>
  343. <td colspan="1" rowspan="1">NameNode</td><td colspan="1" rowspan="1">HADOOP_NAMENODE_OPTS</td>
  344. </tr>
  345. <tr>
  346. <td colspan="1" rowspan="1">DataNode</td><td colspan="1" rowspan="1">HADOOP_DATANODE_OPTS</td>
  347. </tr>
  348. <tr>
  349. <td colspan="1" rowspan="1">SecondaryNamenode</td>
  350. <td colspan="1" rowspan="1">HADOOP_SECONDARYNAMENODE_OPTS</td>
  351. </tr>
  352. <tr>
  353. <td colspan="1" rowspan="1">JobTracker</td><td colspan="1" rowspan="1">HADOOP_JOBTRACKER_OPTS</td>
  354. </tr>
  355. <tr>
  356. <td colspan="1" rowspan="1">TaskTracker</td><td colspan="1" rowspan="1">HADOOP_TASKTRACKER_OPTS</td>
  357. </tr>
  358. </table>
  359. <p> For example, To configure Namenode to use parallelGC, the
  360. following statement should be added in <span class="codefrag">hadoop-env.sh</span> :
  361. <br>
  362. <span class="codefrag">
  363. export HADOOP_NAMENODE_OPTS="-XX:+UseParallelGC ${HADOOP_NAMENODE_OPTS}"
  364. </span>
  365. <br>
  366. </p>
  367. <p>Other useful configuration parameters that you can customize
  368. include:</p>
  369. <ul>
  370. <li>
  371. <span class="codefrag">HADOOP_LOG_DIR</span> - The directory where the daemons'
  372. log files are stored. They are automatically created if they don't
  373. exist.
  374. </li>
  375. <li>
  376. <span class="codefrag">HADOOP_HEAPSIZE</span> - The maximum amount of heapsize
  377. to use, in MB e.g. <span class="codefrag">1000MB</span>. This is used to
  378. configure the heap size for the hadoop daemon. By default,
  379. the value is <span class="codefrag">1000MB</span>.
  380. </li>
  381. </ul>
  382. <a name="N10130"></a><a name="Configuring+the+Hadoop+Daemons"></a>
  383. <h4>Configuring the Hadoop Daemons</h4>
  384. <p>This section deals with important parameters to be specified in the
  385. <span class="codefrag">conf/hadoop-site.xml</span> for the Hadoop cluster.</p>
  386. <table class="ForrestTable" cellspacing="1" cellpadding="4">
  387. <tr>
  388. <th colspan="1" rowspan="1">Parameter</th>
  389. <th colspan="1" rowspan="1">Value</th>
  390. <th colspan="1" rowspan="1">Notes</th>
  391. </tr>
  392. <tr>
  393. <td colspan="1" rowspan="1">fs.default.name</td>
  394. <td colspan="1" rowspan="1">URI of <span class="codefrag">NameNode</span>.</td>
  395. <td colspan="1" rowspan="1"><em>hdfs://hostname/</em></td>
  396. </tr>
  397. <tr>
  398. <td colspan="1" rowspan="1">mapred.job.tracker</td>
  399. <td colspan="1" rowspan="1">Host or IP and port of <span class="codefrag">JobTracker</span>.</td>
  400. <td colspan="1" rowspan="1"><em>host:port</em> pair.</td>
  401. </tr>
  402. <tr>
  403. <td colspan="1" rowspan="1">dfs.name.dir</td>
  404. <td colspan="1" rowspan="1">
  405. Path on the local filesystem where the <span class="codefrag">NameNode</span>
  406. stores the namespace and transactions logs persistently.</td>
  407. <td colspan="1" rowspan="1">
  408. If this is a comma-delimited list of directories then the name
  409. table is replicated in all of the directories, for redundancy.
  410. </td>
  411. </tr>
  412. <tr>
  413. <td colspan="1" rowspan="1">dfs.data.dir</td>
  414. <td colspan="1" rowspan="1">
  415. Comma separated list of paths on the local filesystem of a
  416. <span class="codefrag">DataNode</span> where it should store its blocks.
  417. </td>
  418. <td colspan="1" rowspan="1">
  419. If this is a comma-delimited list of directories, then data will
  420. be stored in all named directories, typically on different
  421. devices.
  422. </td>
  423. </tr>
  424. <tr>
  425. <td colspan="1" rowspan="1">mapred.system.dir</td>
  426. <td colspan="1" rowspan="1">
  427. Path on the HDFS where where the Map/Reduce framework stores
  428. system files e.g. <span class="codefrag">/hadoop/mapred/system/</span>.
  429. </td>
  430. <td colspan="1" rowspan="1">
  431. This is in the default filesystem (HDFS) and must be accessible
  432. from both the server and client machines.
  433. </td>
  434. </tr>
  435. <tr>
  436. <td colspan="1" rowspan="1">mapred.local.dir</td>
  437. <td colspan="1" rowspan="1">
  438. Comma-separated list of paths on the local filesystem where
  439. temporary Map/Reduce data is written.
  440. </td>
  441. <td colspan="1" rowspan="1">Multiple paths help spread disk i/o.</td>
  442. </tr>
  443. <tr>
  444. <td colspan="1" rowspan="1">mapred.tasktracker.{map|reduce}.tasks.maximum</td>
  445. <td colspan="1" rowspan="1">
  446. The maximum number of Map/Reduce tasks, which are run
  447. simultaneously on a given <span class="codefrag">TaskTracker</span>, individually.
  448. </td>
  449. <td colspan="1" rowspan="1">
  450. Defaults to 2 (2 maps and 2 reduces), but vary it depending on
  451. your hardware.
  452. </td>
  453. </tr>
  454. <tr>
  455. <td colspan="1" rowspan="1">dfs.hosts/dfs.hosts.exclude</td>
  456. <td colspan="1" rowspan="1">List of permitted/excluded DataNodes.</td>
  457. <td colspan="1" rowspan="1">
  458. If necessary, use these files to control the list of allowable
  459. datanodes.
  460. </td>
  461. </tr>
  462. <tr>
  463. <td colspan="1" rowspan="1">mapred.hosts/mapred.hosts.exclude</td>
  464. <td colspan="1" rowspan="1">List of permitted/excluded TaskTrackers.</td>
  465. <td colspan="1" rowspan="1">
  466. If necessary, use these files to control the list of allowable
  467. TaskTrackers.
  468. </td>
  469. </tr>
  470. <tr>
  471. <td colspan="1" rowspan="1">mapred.queue.names</td>
  472. <td colspan="1" rowspan="1">Comma separated list of queues to which jobs can be submitted.</td>
  473. <td colspan="1" rowspan="1">
  474. The Map/Reduce system always supports atleast one queue
  475. with the name as <em>default</em>. Hence, this parameter's
  476. value should always contain the string <em>default</em>.
  477. Some job schedulers supported in Hadoop, like the
  478. <a href="capacity_scheduler.html">Capacity
  479. Scheduler</a>, support multiple queues. If such a scheduler is
  480. being used, the list of configured queue names must be
  481. specified here. Once queues are defined, users can submit
  482. jobs to a queue using the property name
  483. <em>mapred.job.queue.name</em> in the job configuration.
  484. There could be a separate
  485. configuration file for configuring properties of these
  486. queues that is managed by the scheduler.
  487. Refer to the documentation of the scheduler for information on
  488. the same.
  489. </td>
  490. </tr>
  491. <tr>
  492. <td colspan="1" rowspan="1">mapred.acls.enabled</td>
  493. <td colspan="1" rowspan="1">Specifies whether ACLs are supported for controlling job
  494. submission and administration</td>
  495. <td colspan="1" rowspan="1">
  496. If <em>true</em>, ACLs would be checked while submitting
  497. and administering jobs. ACLs can be specified using the
  498. configuration parameters of the form
  499. <em>mapred.queue.queue-name.acl-name</em>, defined below.
  500. </td>
  501. </tr>
  502. <tr>
  503. <td colspan="1" rowspan="1">mapred.queue.<em>queue-name</em>.acl-submit-job</td>
  504. <td colspan="1" rowspan="1">List of users and groups that can submit jobs to the
  505. specified <em>queue-name</em>.</td>
  506. <td colspan="1" rowspan="1">
  507. The list of users and groups are both comma separated
  508. list of names. The two lists are separated by a blank.
  509. Example: <em>user1,user2 group1,group2</em>.
  510. If you wish to define only a list of groups, provide
  511. a blank at the beginning of the value.
  512. </td>
  513. </tr>
  514. <tr>
  515. <td colspan="1" rowspan="1">mapred.queue.<em>queue-name</em>.acl-administer-job</td>
  516. <td colspan="1" rowspan="1">List of users and groups that can change the priority
  517. or kill jobs that have been submitted to the
  518. specified <em>queue-name</em>.</td>
  519. <td colspan="1" rowspan="1">
  520. The list of users and groups are both comma separated
  521. list of names. The two lists are separated by a blank.
  522. Example: <em>user1,user2 group1,group2</em>.
  523. If you wish to define only a list of groups, provide
  524. a blank at the beginning of the value. Note that an
  525. owner of a job can always change the priority or kill
  526. his/her own job, irrespective of the ACLs.
  527. </td>
  528. </tr>
  529. </table>
  530. <p>Typically all the above parameters are marked as
  531. <a href="api/org/apache/hadoop/conf/Configuration.html#FinalParams">
  532. final</a> to ensure that they cannot be overriden by user-applications.
  533. </p>
  534. <a name="N1027C"></a><a name="Real-World+Cluster+Configurations"></a>
  535. <h5>Real-World Cluster Configurations</h5>
  536. <p>This section lists some non-default configuration parameters which
  537. have been used to run the <em>sort</em> benchmark on very large
  538. clusters.</p>
  539. <ul>
  540. <li>
  541. <p>Some non-default configuration values used to run sort900,
  542. that is 9TB of data sorted on a cluster with 900 nodes:</p>
  543. <table class="ForrestTable" cellspacing="1" cellpadding="4">
  544. <tr>
  545. <th colspan="1" rowspan="1">Parameter</th>
  546. <th colspan="1" rowspan="1">Value</th>
  547. <th colspan="1" rowspan="1">Notes</th>
  548. </tr>
  549. <tr>
  550. <td colspan="1" rowspan="1">dfs.block.size</td>
  551. <td colspan="1" rowspan="1">134217728</td>
  552. <td colspan="1" rowspan="1">HDFS blocksize of 128MB for large file-systems.</td>
  553. </tr>
  554. <tr>
  555. <td colspan="1" rowspan="1">dfs.namenode.handler.count</td>
  556. <td colspan="1" rowspan="1">40</td>
  557. <td colspan="1" rowspan="1">
  558. More NameNode server threads to handle RPCs from large
  559. number of DataNodes.
  560. </td>
  561. </tr>
  562. <tr>
  563. <td colspan="1" rowspan="1">mapred.reduce.parallel.copies</td>
  564. <td colspan="1" rowspan="1">20</td>
  565. <td colspan="1" rowspan="1">
  566. Higher number of parallel copies run by reduces to fetch
  567. outputs from very large number of maps.
  568. </td>
  569. </tr>
  570. <tr>
  571. <td colspan="1" rowspan="1">mapred.child.java.opts</td>
  572. <td colspan="1" rowspan="1">-Xmx512M</td>
  573. <td colspan="1" rowspan="1">
  574. Larger heap-size for child jvms of maps/reduces.
  575. </td>
  576. </tr>
  577. <tr>
  578. <td colspan="1" rowspan="1">fs.inmemory.size.mb</td>
  579. <td colspan="1" rowspan="1">200</td>
  580. <td colspan="1" rowspan="1">
  581. Larger amount of memory allocated for the in-memory
  582. file-system used to merge map-outputs at the reduces.
  583. </td>
  584. </tr>
  585. <tr>
  586. <td colspan="1" rowspan="1">io.sort.factor</td>
  587. <td colspan="1" rowspan="1">100</td>
  588. <td colspan="1" rowspan="1">More streams merged at once while sorting files.</td>
  589. </tr>
  590. <tr>
  591. <td colspan="1" rowspan="1">io.sort.mb</td>
  592. <td colspan="1" rowspan="1">200</td>
  593. <td colspan="1" rowspan="1">Higher memory-limit while sorting data.</td>
  594. </tr>
  595. <tr>
  596. <td colspan="1" rowspan="1">io.file.buffer.size</td>
  597. <td colspan="1" rowspan="1">131072</td>
  598. <td colspan="1" rowspan="1">Size of read/write buffer used in SequenceFiles.</td>
  599. </tr>
  600. </table>
  601. </li>
  602. <li>
  603. <p>Updates to some configuration values to run sort1400 and
  604. sort2000, that is 14TB of data sorted on 1400 nodes and 20TB of
  605. data sorted on 2000 nodes:</p>
  606. <table class="ForrestTable" cellspacing="1" cellpadding="4">
  607. <tr>
  608. <th colspan="1" rowspan="1">Parameter</th>
  609. <th colspan="1" rowspan="1">Value</th>
  610. <th colspan="1" rowspan="1">Notes</th>
  611. </tr>
  612. <tr>
  613. <td colspan="1" rowspan="1">mapred.job.tracker.handler.count</td>
  614. <td colspan="1" rowspan="1">60</td>
  615. <td colspan="1" rowspan="1">
  616. More JobTracker server threads to handle RPCs from large
  617. number of TaskTrackers.
  618. </td>
  619. </tr>
  620. <tr>
  621. <td colspan="1" rowspan="1">mapred.reduce.parallel.copies</td>
  622. <td colspan="1" rowspan="1">50</td>
  623. <td colspan="1" rowspan="1"></td>
  624. </tr>
  625. <tr>
  626. <td colspan="1" rowspan="1">tasktracker.http.threads</td>
  627. <td colspan="1" rowspan="1">50</td>
  628. <td colspan="1" rowspan="1">
  629. More worker threads for the TaskTracker's http server. The
  630. http server is used by reduces to fetch intermediate
  631. map-outputs.
  632. </td>
  633. </tr>
  634. <tr>
  635. <td colspan="1" rowspan="1">mapred.child.java.opts</td>
  636. <td colspan="1" rowspan="1">-Xmx1024M</td>
  637. <td colspan="1" rowspan="1">Larger heap-size for child jvms of maps/reduces.</td>
  638. </tr>
  639. </table>
  640. </li>
  641. </ul>
  642. <a name="N1039A"></a><a name="Slaves"></a>
  643. <h4>Slaves</h4>
  644. <p>Typically you choose one machine in the cluster to act as the
  645. <span class="codefrag">NameNode</span> and one machine as to act as the
  646. <span class="codefrag">JobTracker</span>, exclusively. The rest of the machines act as
  647. both a <span class="codefrag">DataNode</span> and <span class="codefrag">TaskTracker</span> and are
  648. referred to as <em>slaves</em>.</p>
  649. <p>List all slave hostnames or IP addresses in your
  650. <span class="codefrag">conf/slaves</span> file, one per line.</p>
  651. <a name="N103B9"></a><a name="Logging"></a>
  652. <h4>Logging</h4>
  653. <p>Hadoop uses the <a href="http://logging.apache.org/log4j/">Apache
  654. log4j</a> via the <a href="http://commons.apache.org/logging/">Apache
  655. Commons Logging</a> framework for logging. Edit the
  656. <span class="codefrag">conf/log4j.properties</span> file to customize the Hadoop
  657. daemons' logging configuration (log-formats and so on).</p>
  658. <a name="N103CD"></a><a name="History+Logging"></a>
  659. <h5>History Logging</h5>
  660. <p> The job history files are stored in central location
  661. <span class="codefrag"> hadoop.job.history.location </span> which can be on DFS also,
  662. whose default value is <span class="codefrag">${HADOOP_LOG_DIR}/history</span>.
  663. The history web UI is accessible from job tracker web UI.</p>
  664. <p> The history files are also logged to user specified directory
  665. <span class="codefrag">hadoop.job.history.user.location</span>
  666. which defaults to job output directory. The files are stored in
  667. "_logs/history/" in the specified directory. Hence, by default
  668. they will be in "mapred.output.dir/_logs/history/". User can stop
  669. logging by giving the value <span class="codefrag">none</span> for
  670. <span class="codefrag">hadoop.job.history.user.location</span>
  671. </p>
  672. <p> User can view the history logs summary in specified directory
  673. using the following command <br>
  674. <span class="codefrag">$ bin/hadoop job -history output-dir</span>
  675. <br>
  676. This command will print job details, failed and killed tip
  677. details. <br>
  678. More details about the job such as successful tasks and
  679. task attempts made for each task can be viewed using the
  680. following command <br>
  681. <span class="codefrag">$ bin/hadoop job -history all output-dir</span>
  682. <br>
  683. </p>
  684. <p>Once all the necessary configuration is complete, distribute the files
  685. to the <span class="codefrag">HADOOP_CONF_DIR</span> directory on all the machines,
  686. typically <span class="codefrag">${HADOOP_HOME}/conf</span>.</p>
  687. </div>
  688. <a name="N10405"></a><a name="Cluster+Restartability"></a>
  689. <h2 class="h3">Cluster Restartability</h2>
  690. <div class="section">
  691. <a name="N1040B"></a><a name="Map%2FReduce"></a>
  692. <h3 class="h4">Map/Reduce</h3>
  693. <p>The job tracker restart can recover running jobs if
  694. <span class="codefrag">mapred.jobtracker.restart.recover</span> is set true and
  695. <a href="#Logging">JobHistory logging</a> is enabled. Also
  696. <span class="codefrag">mapred.jobtracker.job.history.block.size</span> value should be
  697. set to an optimal value to dump job history to disk as soon as
  698. possible, the typical value is 3145728(3MB).</p>
  699. </div>
  700. <a name="N10420"></a><a name="Hadoop+Rack+Awareness"></a>
  701. <h2 class="h3">Hadoop Rack Awareness</h2>
  702. <div class="section">
  703. <p>The HDFS and the Map/Reduce components are rack-aware.</p>
  704. <p>The <span class="codefrag">NameNode</span> and the <span class="codefrag">JobTracker</span> obtains the
  705. <span class="codefrag">rack id</span> of the slaves in the cluster by invoking an API
  706. <a href="api/org/apache/hadoop/net/DNSToSwitchMapping.html#resolve(java.util.List)">resolve</a> in an administrator configured
  707. module. The API resolves the slave's DNS name (also IP address) to a
  708. rack id. What module to use can be configured using the configuration
  709. item <span class="codefrag">topology.node.switch.mapping.impl</span>. The default
  710. implementation of the same runs a script/command configured using
  711. <span class="codefrag">topology.script.file.name</span>. If topology.script.file.name is
  712. not set, the rack id <span class="codefrag">/default-rack</span> is returned for any
  713. passed IP address. The additional configuration in the Map/Reduce
  714. part is <span class="codefrag">mapred.cache.task.levels</span> which determines the number
  715. of levels (in the network topology) of caches. So, for example, if it is
  716. the default value of 2, two levels of caches will be constructed -
  717. one for hosts (host -&gt; task mapping) and another for racks
  718. (rack -&gt; task mapping).
  719. </p>
  720. </div>
  721. <a name="N10446"></a><a name="Hadoop+Startup"></a>
  722. <h2 class="h3">Hadoop Startup</h2>
  723. <div class="section">
  724. <p>To start a Hadoop cluster you will need to start both the HDFS and
  725. Map/Reduce cluster.</p>
  726. <p>
  727. Format a new distributed filesystem:<br>
  728. <span class="codefrag">$ bin/hadoop namenode -format</span>
  729. </p>
  730. <p>
  731. Start the HDFS with the following command, run on the designated
  732. <span class="codefrag">NameNode</span>:<br>
  733. <span class="codefrag">$ bin/start-dfs.sh</span>
  734. </p>
  735. <p>The <span class="codefrag">bin/start-dfs.sh</span> script also consults the
  736. <span class="codefrag">${HADOOP_CONF_DIR}/slaves</span> file on the <span class="codefrag">NameNode</span>
  737. and starts the <span class="codefrag">DataNode</span> daemon on all the listed slaves.</p>
  738. <p>
  739. Start Map-Reduce with the following command, run on the designated
  740. <span class="codefrag">JobTracker</span>:<br>
  741. <span class="codefrag">$ bin/start-mapred.sh</span>
  742. </p>
  743. <p>The <span class="codefrag">bin/start-mapred.sh</span> script also consults the
  744. <span class="codefrag">${HADOOP_CONF_DIR}/slaves</span> file on the <span class="codefrag">JobTracker</span>
  745. and starts the <span class="codefrag">TaskTracker</span> daemon on all the listed slaves.
  746. </p>
  747. </div>
  748. <a name="N1048C"></a><a name="Hadoop+Shutdown"></a>
  749. <h2 class="h3">Hadoop Shutdown</h2>
  750. <div class="section">
  751. <p>
  752. Stop HDFS with the following command, run on the designated
  753. <span class="codefrag">NameNode</span>:<br>
  754. <span class="codefrag">$ bin/stop-dfs.sh</span>
  755. </p>
  756. <p>The <span class="codefrag">bin/stop-dfs.sh</span> script also consults the
  757. <span class="codefrag">${HADOOP_CONF_DIR}/slaves</span> file on the <span class="codefrag">NameNode</span>
  758. and stops the <span class="codefrag">DataNode</span> daemon on all the listed slaves.</p>
  759. <p>
  760. Stop Map/Reduce with the following command, run on the designated
  761. the designated <span class="codefrag">JobTracker</span>:<br>
  762. <span class="codefrag">$ bin/stop-mapred.sh</span>
  763. <br>
  764. </p>
  765. <p>The <span class="codefrag">bin/stop-mapred.sh</span> script also consults the
  766. <span class="codefrag">${HADOOP_CONF_DIR}/slaves</span> file on the <span class="codefrag">JobTracker</span>
  767. and stops the <span class="codefrag">TaskTracker</span> daemon on all the listed slaves.</p>
  768. </div>
  769. </div>
  770. <!--+
  771. |end content
  772. +-->
  773. <div class="clearboth">&nbsp;</div>
  774. </div>
  775. <div id="footer">
  776. <!--+
  777. |start bottomstrip
  778. +-->
  779. <div class="lastmodified">
  780. <script type="text/javascript"><!--
  781. document.write("Last Published: " + document.lastModified);
  782. // --></script>
  783. </div>
  784. <div class="copyright">
  785. Copyright &copy;
  786. 2008 <a href="http://www.apache.org/licenses/">The Apache Software Foundation.</a>
  787. </div>
  788. <!--+
  789. |end bottomstrip
  790. +-->
  791. </div>
  792. </body>
  793. </html>