capacity_scheduler.html 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528
  1. <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
  2. <html>
  3. <head>
  4. <META http-equiv="Content-Type" content="text/html; charset=UTF-8">
  5. <meta content="Apache Forrest" name="Generator">
  6. <meta name="Forrest-version" content="0.8">
  7. <meta name="Forrest-skin-name" content="pelt">
  8. <title>Capacity Scheduler</title>
  9. <link type="text/css" href="skin/basic.css" rel="stylesheet">
  10. <link media="screen" type="text/css" href="skin/screen.css" rel="stylesheet">
  11. <link media="print" type="text/css" href="skin/print.css" rel="stylesheet">
  12. <link type="text/css" href="skin/profile.css" rel="stylesheet">
  13. <script src="skin/getBlank.js" language="javascript" type="text/javascript"></script><script src="skin/getMenu.js" language="javascript" type="text/javascript"></script><script src="skin/fontsize.js" language="javascript" type="text/javascript"></script>
  14. <link rel="shortcut icon" href="images/favicon.ico">
  15. </head>
  16. <body onload="init()">
  17. <script type="text/javascript">ndeSetTextSize();</script>
  18. <div id="top">
  19. <!--+
  20. |breadtrail
  21. +-->
  22. <div class="breadtrail">
  23. <a href="http://www.apache.org/">Apache</a> &gt; <a href="http://hadoop.apache.org/">Hadoop</a> &gt; <a href="http://hadoop.apache.org/core/">Core</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
  24. </div>
  25. <!--+
  26. |header
  27. +-->
  28. <div class="header">
  29. <!--+
  30. |start group logo
  31. +-->
  32. <div class="grouplogo">
  33. <a href="http://hadoop.apache.org/"><img class="logoImage" alt="Hadoop" src="images/hadoop-logo.jpg" title="Apache Hadoop"></a>
  34. </div>
  35. <!--+
  36. |end group logo
  37. +-->
  38. <!--+
  39. |start Project Logo
  40. +-->
  41. <div class="projectlogo">
  42. <a href="http://hadoop.apache.org/core/"><img class="logoImage" alt="Hadoop" src="images/core-logo.gif" title="Scalable Computing Platform"></a>
  43. </div>
  44. <!--+
  45. |end Project Logo
  46. +-->
  47. <!--+
  48. |start Search
  49. +-->
  50. <div class="searchbox">
  51. <form action="http://www.google.com/search" method="get" class="roundtopsmall">
  52. <input value="hadoop.apache.org" name="sitesearch" type="hidden"><input onFocus="getBlank (this, 'Search the site with google');" size="25" name="q" id="query" type="text" value="Search the site with google">&nbsp;
  53. <input name="Search" value="Search" type="submit">
  54. </form>
  55. </div>
  56. <!--+
  57. |end search
  58. +-->
  59. <!--+
  60. |start Tabs
  61. +-->
  62. <ul id="tabs">
  63. <li>
  64. <a class="unselected" href="http://hadoop.apache.org/core/">Project</a>
  65. </li>
  66. <li>
  67. <a class="unselected" href="http://wiki.apache.org/hadoop">Wiki</a>
  68. </li>
  69. <li class="current">
  70. <a class="selected" href="index.html">Hadoop 0.20 Documentation</a>
  71. </li>
  72. </ul>
  73. <!--+
  74. |end Tabs
  75. +-->
  76. </div>
  77. </div>
  78. <div id="main">
  79. <div id="publishedStrip">
  80. <!--+
  81. |start Subtabs
  82. +-->
  83. <div id="level2tabs"></div>
  84. <!--+
  85. |end Endtabs
  86. +-->
  87. <script type="text/javascript"><!--
  88. document.write("Last Published: " + document.lastModified);
  89. // --></script>
  90. </div>
  91. <!--+
  92. |breadtrail
  93. +-->
  94. <div class="breadtrail">
  95. &nbsp;
  96. </div>
  97. <!--+
  98. |start Menu, mainarea
  99. +-->
  100. <!--+
  101. |start Menu
  102. +-->
  103. <div id="menu">
  104. <div onclick="SwitchMenu('menu_selected_1.1', 'skin/')" id="menu_selected_1.1Title" class="menutitle" style="background-image: url('skin/images/chapter_open.gif');">Documentation</div>
  105. <div id="menu_selected_1.1" class="selectedmenuitemgroup" style="display: block;">
  106. <div class="menuitem">
  107. <a href="index.html">Overview</a>
  108. </div>
  109. <div class="menuitem">
  110. <a href="quickstart.html">Hadoop Quick Start</a>
  111. </div>
  112. <div class="menuitem">
  113. <a href="cluster_setup.html">Hadoop Cluster Setup</a>
  114. </div>
  115. <div class="menuitem">
  116. <a href="mapred_tutorial.html">Hadoop Map/Reduce Tutorial</a>
  117. </div>
  118. <div class="menuitem">
  119. <a href="commands_manual.html">Hadoop Command Guide</a>
  120. </div>
  121. <div class="menuitem">
  122. <a href="hdfs_shell.html">Hadoop FS Shell Guide</a>
  123. </div>
  124. <div class="menuitem">
  125. <a href="distcp.html">Hadoop DistCp Guide</a>
  126. </div>
  127. <div class="menuitem">
  128. <a href="native_libraries.html">Hadoop Native Libraries</a>
  129. </div>
  130. <div class="menuitem">
  131. <a href="streaming.html">Hadoop Streaming</a>
  132. </div>
  133. <div class="menuitem">
  134. <a href="hadoop_archives.html">Hadoop Archives</a>
  135. </div>
  136. <div class="menuitem">
  137. <a href="hdfs_user_guide.html">HDFS User Guide</a>
  138. </div>
  139. <div class="menuitem">
  140. <a href="hdfs_design.html">HDFS Architecture</a>
  141. </div>
  142. <div class="menuitem">
  143. <a href="hdfs_permissions_guide.html">HDFS Admin Guide: Permissions</a>
  144. </div>
  145. <div class="menuitem">
  146. <a href="hdfs_quota_admin_guide.html">HDFS Admin Guide: Quotas</a>
  147. </div>
  148. <div class="menuitem">
  149. <a href="SLG_user_guide.html">HDFS Utilities</a>
  150. </div>
  151. <div class="menuitem">
  152. <a href="libhdfs.html">HDFS C API</a>
  153. </div>
  154. <div class="menuitem">
  155. <a href="hod_user_guide.html">HOD User Guide</a>
  156. </div>
  157. <div class="menuitem">
  158. <a href="hod_admin_guide.html">HOD Admin Guide</a>
  159. </div>
  160. <div class="menuitem">
  161. <a href="hod_config_guide.html">HOD Config Guide</a>
  162. </div>
  163. <div class="menupage">
  164. <div class="menupagetitle">Capacity Scheduler</div>
  165. </div>
  166. <div class="menuitem">
  167. <a href="vaidya.html">Hadoop Vaidya</a>
  168. </div>
  169. <div class="menuitem">
  170. <a href="api/index.html">API Docs</a>
  171. </div>
  172. <div class="menuitem">
  173. <a href="jdiff/changes.html">API Changes</a>
  174. </div>
  175. <div class="menuitem">
  176. <a href="http://wiki.apache.org/hadoop/">Wiki</a>
  177. </div>
  178. <div class="menuitem">
  179. <a href="http://wiki.apache.org/hadoop/FAQ">FAQ</a>
  180. </div>
  181. <div class="menuitem">
  182. <a href="releasenotes.html">Release Notes</a>
  183. </div>
  184. <div class="menuitem">
  185. <a href="changes.html">Change Log</a>
  186. </div>
  187. </div>
  188. <div id="credit"></div>
  189. <div id="roundbottom">
  190. <img style="display: none" class="corner" height="15" width="15" alt="" src="skin/images/rc-b-l-15-1body-2menu-3menu.png"></div>
  191. <!--+
  192. |alternative credits
  193. +-->
  194. <div id="credit2"></div>
  195. </div>
  196. <!--+
  197. |end Menu
  198. +-->
  199. <!--+
  200. |start content
  201. +-->
  202. <div id="content">
  203. <div title="Portable Document Format" class="pdflink">
  204. <a class="dida" href="capacity_scheduler.pdf"><img alt="PDF -icon" src="skin/images/pdfdoc.gif" class="skin"><br>
  205. PDF</a>
  206. </div>
  207. <h1>Capacity Scheduler</h1>
  208. <div id="minitoc-area">
  209. <ul class="minitoc">
  210. <li>
  211. <a href="#Purpose">Purpose</a>
  212. </li>
  213. <li>
  214. <a href="#Features">Features</a>
  215. </li>
  216. <li>
  217. <a href="#Picking+a+task+to+run">Picking a task to run</a>
  218. </li>
  219. <li>
  220. <a href="#Reclaiming+capacity">Reclaiming capacity</a>
  221. </li>
  222. <li>
  223. <a href="#Installation">Installation</a>
  224. </li>
  225. <li>
  226. <a href="#Configuration">Configuration</a>
  227. <ul class="minitoc">
  228. <li>
  229. <a href="#Using+the+capacity+scheduler">Using the capacity scheduler</a>
  230. </li>
  231. <li>
  232. <a href="#Setting+up+queues">Setting up queues</a>
  233. </li>
  234. <li>
  235. <a href="#Configuring+properties+for+queues">Configuring properties for queues</a>
  236. </li>
  237. <li>
  238. <a href="#Reviewing+the+configuration+of+the+capacity+scheduler">Reviewing the configuration of the capacity scheduler</a>
  239. </li>
  240. </ul>
  241. </li>
  242. </ul>
  243. </div>
  244. <a name="N1000D"></a><a name="Purpose"></a>
  245. <h2 class="h3">Purpose</h2>
  246. <div class="section">
  247. <p>This document describes the Capacity Scheduler, a pluggable Map/Reduce scheduler for Hadoop which provides a way to share large clusters.</p>
  248. </div>
  249. <a name="N10017"></a><a name="Features"></a>
  250. <h2 class="h3">Features</h2>
  251. <div class="section">
  252. <p>The Capacity Scheduler supports the following features:</p>
  253. <ul>
  254. <li>
  255. Support for multiple queues, where a job is submitted to a queue.
  256. </li>
  257. <li>
  258. Queues are guaranteed a fraction of the capacity of the grid (their
  259. 'guaranteed capacity') in the sense that a certain capacity of
  260. resources will be at their disposal. All jobs submitted to the a
  261. queue will have access to the capacity guaranteed to the queue.
  262. </li>
  263. <li>
  264. Free resources can be allocated to any queue beyond its guaranteed
  265. capacity. These excess allocated resources can be reclaimed and made
  266. available to another queue in order to meet its capacity guarantee.
  267. </li>
  268. <li>
  269. The scheduler guarantees that excess resources taken from a queue
  270. will be restored to it within N minutes of its need for them.
  271. </li>
  272. <li>
  273. Queues optionally support job priorities (disabled by default).
  274. </li>
  275. <li>
  276. Within a queue, jobs with higher priority will have access to the
  277. queue's resources before jobs with lower priority. However, once a
  278. job is running, it will not be preempted for a higher priority job.
  279. </li>
  280. <li>
  281. In order to prevent one or more users from monopolizing its
  282. resources, each queue enforces a limit on the percentage of
  283. resources allocated to a user at any given time, if there is
  284. competition for them.
  285. </li>
  286. <li>
  287. Support for memory-intensive jobs, wherein a job can optionally
  288. specify higher memory-requirements than the default, and the tasks
  289. of the job will only be run on TaskTrackers that have enough memory
  290. to spare.
  291. </li>
  292. </ul>
  293. </div>
  294. <a name="N1003C"></a><a name="Picking+a+task+to+run"></a>
  295. <h2 class="h3">Picking a task to run</h2>
  296. <div class="section">
  297. <p>Note that many of these steps can be, and will be, enhanced over time
  298. to provide better algorithms.</p>
  299. <p>Whenever a TaskTracker is free, the Capacity Scheduler first picks a
  300. queue that needs to reclaim any resources the earliest (this is a queue
  301. whose resources were temporarily being used by some other queue and now
  302. needs access to those resources). If no such queue is found, it then picks
  303. a queue which has most free space (whose ratio of # of running slots to
  304. guaranteed capacity is the lowest).</p>
  305. <p>Once a queue is selected, the scheduler picks a job in the queue. Jobs
  306. are sorted based on when they're submitted and their priorities (if the
  307. queue supports priorities). Jobs are considered in order, and a job is
  308. selected if its user is within the user-quota for the queue, i.e., the
  309. user is not already using queue resources above his/her limit. The
  310. scheduler also makes sure that there is enough free memory in the
  311. TaskTracker to tun the job's task, in case the job has special memory
  312. requirements.</p>
  313. <p>Once a job is selected, the scheduler picks a task to run. This logic
  314. to pick a task remains unchanged from earlier versions.</p>
  315. </div>
  316. <a name="N1004F"></a><a name="Reclaiming+capacity"></a>
  317. <h2 class="h3">Reclaiming capacity</h2>
  318. <div class="section">
  319. <p>Periodically, the scheduler determines:</p>
  320. <ul>
  321. <li>
  322. if a queue needs to reclaim capacity. This happens when a queue has
  323. at least one task pending and part of its guaranteed capacity is
  324. being used by some other queue. If this happens, the scheduler notes
  325. the amount of resources it needs to reclaim for this queue within a
  326. specified period of time (the reclaim time).
  327. </li>
  328. <li>
  329. if a queue has not received all the resources it needed to reclaim,
  330. and its reclaim time is about to expire. In this case, the scheduler
  331. needs to kill tasks from queues running over capacity. This it does
  332. by killing the tasks that started the latest.
  333. </li>
  334. </ul>
  335. </div>
  336. <a name="N10062"></a><a name="Installation"></a>
  337. <h2 class="h3">Installation</h2>
  338. <div class="section">
  339. <p>The capacity scheduler is available as a JAR file in the Hadoop
  340. tarball under the <em>contrib/capacity-scheduler</em> directory. The name of
  341. the JAR file would be on the lines of hadoop-*-capacity-scheduler.jar.</p>
  342. <p>You can also build the scheduler from source by executing
  343. <em>ant package</em>, in which case it would be available under
  344. <em>build/contrib/capacity-scheduler</em>.</p>
  345. <p>To run the capacity scheduler in your Hadoop installation, you need
  346. to put it on the <em>CLASSPATH</em>. The easiest way is to copy the
  347. <span class="codefrag">hadoop-*-capacity-scheduler.jar</span> from
  348. to <span class="codefrag">HADOOP_HOME/lib</span>. Alternatively, you can modify
  349. <em>HADOOP_CLASSPATH</em> to include this jar, in
  350. <span class="codefrag">conf/hadoop-env.sh</span>.</p>
  351. </div>
  352. <a name="N1008A"></a><a name="Configuration"></a>
  353. <h2 class="h3">Configuration</h2>
  354. <div class="section">
  355. <a name="N10090"></a><a name="Using+the+capacity+scheduler"></a>
  356. <h3 class="h4">Using the capacity scheduler</h3>
  357. <p>
  358. To make the Hadoop framework use the capacity scheduler, set up
  359. the following property in the site configuration:</p>
  360. <table class="ForrestTable" cellspacing="1" cellpadding="4">
  361. <tr>
  362. <td colspan="1" rowspan="1">Property</td>
  363. <td colspan="1" rowspan="1">Value</td>
  364. </tr>
  365. <tr>
  366. <td colspan="1" rowspan="1">mapred.jobtracker.taskScheduler</td>
  367. <td colspan="1" rowspan="1">org.apache.hadoop.mapred.CapacityTaskScheduler</td>
  368. </tr>
  369. </table>
  370. <a name="N100B7"></a><a name="Setting+up+queues"></a>
  371. <h3 class="h4">Setting up queues</h3>
  372. <p>
  373. You can define multiple queues to which users can submit jobs with
  374. the capacity scheduler. To define multiple queues, you should edit
  375. the site configuration for Hadoop and modify the
  376. <em>mapred.queue.names</em> property.
  377. </p>
  378. <p>
  379. You can also configure ACLs for controlling which users or groups
  380. have access to the queues.
  381. </p>
  382. <p>
  383. For more details, refer to
  384. <a href="cluster_setup.html#Configuring+the+Hadoop+Daemons">Cluster
  385. Setup</a> documentation.
  386. </p>
  387. <a name="N100CE"></a><a name="Configuring+properties+for+queues"></a>
  388. <h3 class="h4">Configuring properties for queues</h3>
  389. <p>The capacity scheduler can be configured with several properties
  390. for each queue that control the behavior of the scheduler. This
  391. configuration is in the <em>conf/capacity-scheduler.xml</em>. By
  392. default, the configuration is set up for one queue, named
  393. <em>default</em>.</p>
  394. <p>To specify a property for a queue that is defined in the site
  395. configuration, you should use the property name as
  396. <em>mapred.capacity-scheduler.queue.&lt;queue-name&gt;.&lt;property-name&gt;</em>.
  397. </p>
  398. <p>For example, to define the property <em>guaranteed-capacity</em>
  399. for queue named <em>research</em>, you should specify the property
  400. name as
  401. <em>mapred.capacity-scheduler.queue.research.guaranteed-capacity</em>.
  402. </p>
  403. <p>The properties defined for queues and their descriptions are
  404. listed in the table below:</p>
  405. <table class="ForrestTable" cellspacing="1" cellpadding="4">
  406. <tr>
  407. <th colspan="1" rowspan="1">Name</th><th colspan="1" rowspan="1">Description</th>
  408. </tr>
  409. <tr>
  410. <td colspan="1" rowspan="1">mapred.capacity-scheduler.queue.&lt;queue-name&gt;.guaranteed-capacity</td>
  411. <td colspan="1" rowspan="1">Percentage of the number of slots in the cluster that are
  412. guaranteed to be available for jobs in this queue.
  413. The sum of guaranteed capacities for all queues should be less
  414. than or equal 100.</td>
  415. </tr>
  416. <tr>
  417. <td colspan="1" rowspan="1">mapred.capacity-scheduler.queue.&lt;queue-name&gt;.reclaim-time-limit</td>
  418. <td colspan="1" rowspan="1">The amount of time, in seconds, before which resources
  419. distributed to other queues will be reclaimed.</td>
  420. </tr>
  421. <tr>
  422. <td colspan="1" rowspan="1">mapred.capacity-scheduler.queue.&lt;queue-name&gt;.supports-priority</td>
  423. <td colspan="1" rowspan="1">If true, priorities of jobs will be taken into account in scheduling
  424. decisions.</td>
  425. </tr>
  426. <tr>
  427. <td colspan="1" rowspan="1">mapred.capacity-scheduler.queue.&lt;queue-name&gt;.minimum-user-limit-percent</td>
  428. <td colspan="1" rowspan="1">Each queue enforces a limit on the percentage of resources
  429. allocated to a user at any given time, if there is competition
  430. for them. This user limit can vary between a minimum and maximum
  431. value. The former depends on the number of users who have submitted
  432. jobs, and the latter is set to this property value. For example,
  433. suppose the value of this property is 25. If two users have
  434. submitted jobs to a queue, no single user can use more than 50%
  435. of the queue resources. If a third user submits a job, no single
  436. user can use more than 33% of the queue resources. With 4 or more
  437. users, no user can use more than 25% of the queue's resources. A
  438. value of 100 implies no user limits are imposed.</td>
  439. </tr>
  440. </table>
  441. <a name="N10130"></a><a name="Reviewing+the+configuration+of+the+capacity+scheduler"></a>
  442. <h3 class="h4">Reviewing the configuration of the capacity scheduler</h3>
  443. <p>
  444. Once the installation and configuration is completed, you can review
  445. it after starting the Map/Reduce cluster from the admin UI.
  446. </p>
  447. <ul>
  448. <li>Start the Map/Reduce cluster as usual.</li>
  449. <li>Open the JobTracker web UI.</li>
  450. <li>The queues you have configured should be listed under the <em>Scheduling
  451. Information</em> section of the page.</li>
  452. <li>The properties for the queues should be visible in the <em>Scheduling
  453. Information</em> column against each queue.</li>
  454. </ul>
  455. </div>
  456. </div>
  457. <!--+
  458. |end content
  459. +-->
  460. <div class="clearboth">&nbsp;</div>
  461. </div>
  462. <div id="footer">
  463. <!--+
  464. |start bottomstrip
  465. +-->
  466. <div class="lastmodified">
  467. <script type="text/javascript"><!--
  468. document.write("Last Published: " + document.lastModified);
  469. // --></script>
  470. </div>
  471. <div class="copyright">
  472. Copyright &copy;
  473. 2008 <a href="http://www.apache.org/licenses/">The Apache Software Foundation.</a>
  474. </div>
  475. <!--+
  476. |end bottomstrip
  477. +-->
  478. </div>
  479. </body>
  480. </html>