16 年之前 · 139981eb62
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -442,6 +442,9 @@ Release 0.19.0 - Unreleased
 
				     HADOOP-4150. Include librecordio in hadoop releases. (Giridharan Kesavan
			
 
				     via acmurthy) 
			
 
				 
			
 
				+    HADOOP-4321. Include documentation for the capacity scheduler. (Hemanth 
			
 
				+    Yamijala via omalley)
			
 
				+
			
 
				   OPTIMIZATIONS
			
 
				 
			
 
				     HADOOP-3556. Removed lock contention in MD5Hash by changing the 
			
--- a/docs/SLG_user_guide.html
+++ b/docs/SLG_user_guide.html
@@ -153,6 +153,9 @@ document.write("Last Published: " + document.lastModified);
 
				 <a href="hod.html">Hadoop On Demand</a>
			
 
				 </div>
			
 
				 <div class="menuitem">
			
 
				+<a href="capacity_scheduler.html">Capacity Scheduler</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				 <a href="api/index.html">API Docs</a>
			
 
				 </div>
			
 
				 <div class="menuitem">
			
--- a/docs/capacity_scheduler.html
+++ b/docs/capacity_scheduler.html
@@ -0,0 +1,519 @@
 
				+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
			
 
				+<html>
			
 
				+<head>
			
 
				+<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
			
 
				+<meta content="Apache Forrest" name="Generator">
			
 
				+<meta name="Forrest-version" content="0.8">
			
 
				+<meta name="Forrest-skin-name" content="pelt">
			
 
				+<title>Capacity Scheduler</title>
			
 
				+<link type="text/css" href="skin/basic.css" rel="stylesheet">
			
 
				+<link media="screen" type="text/css" href="skin/screen.css" rel="stylesheet">
			
 
				+<link media="print" type="text/css" href="skin/print.css" rel="stylesheet">
			
 
				+<link type="text/css" href="skin/profile.css" rel="stylesheet">
			
 
				+<script src="skin/getBlank.js" language="javascript" type="text/javascript"></script><script src="skin/getMenu.js" language="javascript" type="text/javascript"></script><script src="skin/fontsize.js" language="javascript" type="text/javascript"></script>
			
 
				+<link rel="shortcut icon" href="images/favicon.ico">
			
 
				+</head>
			
 
				+<body onload="init()">
			
 
				+<script type="text/javascript">ndeSetTextSize();</script>
			
 
				+<div id="top">
			
 
				+<!--+
			
 
				+    |breadtrail
			
 
				+    +-->
			
 
				+<div class="breadtrail">
			
 
				+<a href="http://www.apache.org/">Apache</a> &gt; <a href="http://hadoop.apache.org/">Hadoop</a> &gt; <a href="http://hadoop.apache.org/core/">Core</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
			
 
				+</div>
			
 
				+<!--+
			
 
				+    |header
			
 
				+    +-->
			
 
				+<div class="header">
			
 
				+<!--+
			
 
				+    |start group logo
			
 
				+    +-->
			
 
				+<div class="grouplogo">
			
 
				+<a href="http://hadoop.apache.org/"><img class="logoImage" alt="Hadoop" src="images/hadoop-logo.jpg" title="Apache Hadoop"></a>
			
 
				+</div>
			
 
				+<!--+
			
 
				+    |end group logo
			
 
				+    +-->
			
 
				+<!--+
			
 
				+    |start Project Logo
			
 
				+    +-->
			
 
				+<div class="projectlogo">
			
 
				+<a href="http://hadoop.apache.org/core/"><img class="logoImage" alt="Hadoop" src="images/core-logo.gif" title="Scalable Computing Platform"></a>
			
 
				+</div>
			
 
				+<!--+
			
 
				+    |end Project Logo
			
 
				+    +-->
			
 
				+<!--+
			
 
				+    |start Search
			
 
				+    +-->
			
 
				+<div class="searchbox">
			
 
				+<form action="http://www.google.com/search" method="get" class="roundtopsmall">
			
 
				+<input value="hadoop.apache.org" name="sitesearch" type="hidden"><input onFocus="getBlank (this, 'Search the site with google');" size="25" name="q" id="query" type="text" value="Search the site with google">&nbsp; 
			
 
				+                    <input name="Search" value="Search" type="submit">
			
 
				+</form>
			
 
				+</div>
			
 
				+<!--+
			
 
				+    |end search
			
 
				+    +-->
			
 
				+<!--+
			
 
				+    |start Tabs
			
 
				+    +-->
			
 
				+<ul id="tabs">
			
 
				+<li>
			
 
				+<a class="unselected" href="http://hadoop.apache.org/core/">Project</a>
			
 
				+</li>
			
 
				+<li>
			
 
				+<a class="unselected" href="http://wiki.apache.org/hadoop">Wiki</a>
			
 
				+</li>
			
 
				+<li class="current">
			
 
				+<a class="selected" href="index.html">Hadoop 0.19 Documentation</a>
			
 
				+</li>
			
 
				+</ul>
			
 
				+<!--+
			
 
				+    |end Tabs
			
 
				+    +-->
			
 
				+</div>
			
 
				+</div>
			
 
				+<div id="main">
			
 
				+<div id="publishedStrip">
			
 
				+<!--+
			
 
				+    |start Subtabs
			
 
				+    +-->
			
 
				+<div id="level2tabs"></div>
			
 
				+<!--+
			
 
				+    |end Endtabs
			
 
				+    +-->
			
 
				+<script type="text/javascript"><!--
			
 
				+document.write("Last Published: " + document.lastModified);
			
 
				+//  --></script>
			
 
				+</div>
			
 
				+<!--+
			
 
				+    |breadtrail
			
 
				+    +-->
			
 
				+<div class="breadtrail">
			
 
				+
			
 
				+             &nbsp;
			
 
				+           </div>
			
 
				+<!--+
			
 
				+    |start Menu, mainarea
			
 
				+    +-->
			
 
				+<!--+
			
 
				+    |start Menu
			
 
				+    +-->
			
 
				+<div id="menu">
			
 
				+<div onclick="SwitchMenu('menu_selected_1.1', 'skin/')" id="menu_selected_1.1Title" class="menutitle" style="background-image: url('skin/images/chapter_open.gif');">Documentation</div>
			
 
				+<div id="menu_selected_1.1" class="selectedmenuitemgroup" style="display: block;">
			
 
				+<div class="menuitem">
			
 
				+<a href="index.html">Overview</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				+<a href="quickstart.html">Quickstart</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				+<a href="cluster_setup.html">Cluster Setup</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				+<a href="hdfs_design.html">HDFS Architecture</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				+<a href="hdfs_user_guide.html">HDFS User Guide</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				+<a href="hdfs_permissions_guide.html">HDFS Permissions Guide</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				+<a href="hdfs_quota_admin_guide.html">HDFS Quotas Administrator Guide</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				+<a href="commands_manual.html">Commands Manual</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				+<a href="hdfs_shell.html">FS Shell Guide</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				+<a href="SLG_user_guide.html">Synthetic Load Generator User Guide</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				+<a href="distcp.html">DistCp Guide</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				+<a href="mapred_tutorial.html">Map-Reduce Tutorial</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				+<a href="native_libraries.html">Native Hadoop Libraries</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				+<a href="streaming.html">Streaming</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				+<a href="hadoop_archives.html">Hadoop Archives</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				+<a href="hod.html">Hadoop On Demand</a>
			
 
				+</div>
			
 
				+<div class="menupage">
			
 
				+<div class="menupagetitle">Capacity Scheduler</div>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				+<a href="api/index.html">API Docs</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				+<a href="jdiff/changes.html">API Changes</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				+<a href="http://wiki.apache.org/hadoop/">Wiki</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				+<a href="http://wiki.apache.org/hadoop/FAQ">FAQ</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				+<a href="http://hadoop.apache.org/core/mailing_lists.html">Mailing Lists</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				+<a href="releasenotes.html">Release Notes</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				+<a href="changes.html">All Changes</a>
			
 
				+</div>
			
 
				+</div>
			
 
				+<div id="credit"></div>
			
 
				+<div id="roundbottom">
			
 
				+<img style="display: none" class="corner" height="15" width="15" alt="" src="skin/images/rc-b-l-15-1body-2menu-3menu.png"></div>
			
 
				+<!--+
			
 
				+  |alternative credits
			
 
				+  +-->
			
 
				+<div id="credit2"></div>
			
 
				+</div>
			
 
				+<!--+
			
 
				+    |end Menu
			
 
				+    +-->
			
 
				+<!--+
			
 
				+    |start content
			
 
				+    +-->
			
 
				+<div id="content">
			
 
				+<div title="Portable Document Format" class="pdflink">
			
 
				+<a class="dida" href="capacity_scheduler.pdf"><img alt="PDF -icon" src="skin/images/pdfdoc.gif" class="skin"><br>
			
 
				+        PDF</a>
			
 
				+</div>
			
 
				+<h1>Capacity Scheduler</h1>
			
 
				+<div id="minitoc-area">
			
 
				+<ul class="minitoc">
			
 
				+<li>
			
 
				+<a href="#Purpose">Purpose</a>
			
 
				+</li>
			
 
				+<li>
			
 
				+<a href="#Features">Features</a>
			
 
				+</li>
			
 
				+<li>
			
 
				+<a href="#Picking+a+task+to+run">Picking a task to run</a>
			
 
				+</li>
			
 
				+<li>
			
 
				+<a href="#Reclaiming+capacity">Reclaiming capacity</a>
			
 
				+</li>
			
 
				+<li>
			
 
				+<a href="#Installation">Installation</a>
			
 
				+</li>
			
 
				+<li>
			
 
				+<a href="#Configuration">Configuration</a>
			
 
				+<ul class="minitoc">
			
 
				+<li>
			
 
				+<a href="#Using+the+capacity+scheduler">Using the capacity scheduler</a>
			
 
				+</li>
			
 
				+<li>
			
 
				+<a href="#Setting+up+queues">Setting up queues</a>
			
 
				+</li>
			
 
				+<li>
			
 
				+<a href="#Configuring+properties+for+queues">Configuring properties for queues</a>
			
 
				+</li>
			
 
				+<li>
			
 
				+<a href="#Reviewing+the+configuration+of+the+capacity+scheduler">Reviewing the configuration of the capacity scheduler</a>
			
 
				+</li>
			
 
				+</ul>
			
 
				+</li>
			
 
				+</ul>
			
 
				+</div>
			
 
				+  
			
 
				+    
			
 
				+<a name="N1000D"></a><a name="Purpose"></a>
			
 
				+<h2 class="h3">Purpose</h2>
			
 
				+<div class="section">
			
 
				+<p>This document describes the Capacity Scheduler, a pluggable Map/Reduce scheduler for Hadoop which provides a way to share large clusters.</p>
			
 
				+</div>
			
 
				+    
			
 
				+    
			
 
				+<a name="N10017"></a><a name="Features"></a>
			
 
				+<h2 class="h3">Features</h2>
			
 
				+<div class="section">
			
 
				+<p>The Capacity Scheduler supports the following features:</p>
			
 
				+<ul>
			
 
				+        
			
 
				+<li>
			
 
				+          Support for multiple queues, where a job is submitted to a queue.
			
 
				+        </li>
			
 
				+        
			
 
				+<li>
			
 
				+          Queues are guaranteed a fraction of the capacity of the grid (their 
			
 
				+ 	      'guaranteed capacity') in the sense that a certain capacity of 
			
 
				+ 	      resources will be at their disposal. All jobs submitted to the a 
			
 
				+ 	      queue will have access to the capacity guaranteed to the queue.
			
 
				+        </li>
			
 
				+        
			
 
				+<li>
			
 
				+          Free resources can be allocated to any queue beyond its guaranteed 
			
 
				+          capacity. These excess allocated resources can be reclaimed and made 
			
 
				+          available to another queue in order to meet its capacity guarantee.
			
 
				+        </li>
			
 
				+        
			
 
				+<li>
			
 
				+          The scheduler guarantees that excess resources taken from a queue 
			
 
				+          will be restored to it within N minutes of its need for them.
			
 
				+        </li>
			
 
				+        
			
 
				+<li>
			
 
				+          Queues optionally support job priorities (disabled by default).
			
 
				+        </li>
			
 
				+        
			
 
				+<li>
			
 
				+          Within a queue, jobs with higher priority will have access to the 
			
 
				+          queue's resources before jobs with lower priority. However, once a 
			
 
				+          job is running, it will not be preempted for a higher priority job.
			
 
				+        </li>
			
 
				+        
			
 
				+<li>
			
 
				+          In order to prevent one or more users from monopolizing its 
			
 
				+          resources, each queue enforces a limit on the percentage of 
			
 
				+          resources allocated to a user at any given time, if there is 
			
 
				+          competition for them.  
			
 
				+        </li>
			
 
				+        
			
 
				+<li>
			
 
				+          Support for memory-intensive jobs, wherein a job can optionally 
			
 
				+          specify higher memory-requirements than the default, and the tasks 
			
 
				+          of the job will only be run on TaskTrackers that have enough memory 
			
 
				+          to spare.
			
 
				+        </li>
			
 
				+      
			
 
				+</ul>
			
 
				+</div>
			
 
				+    
			
 
				+    
			
 
				+<a name="N1003C"></a><a name="Picking+a+task+to+run"></a>
			
 
				+<h2 class="h3">Picking a task to run</h2>
			
 
				+<div class="section">
			
 
				+<p>Note that many of these steps can be, and will be, enhanced over time
			
 
				+      to provide better algorithms.</p>
			
 
				+<p>Whenever a TaskTracker is free, the Capacity Scheduler first picks a 
			
 
				+      queue that needs to reclaim any resources the earliest (this is a queue
			
 
				+      whose resources were temporarily being used by some other queue and now
			
 
				+      needs access to those resources). If no such queue is found, it then picks
			
 
				+      a queue which has most free space (whose ratio of # of running slots to 
			
 
				+      guaranteed capacity is the lowest).</p>
			
 
				+<p>Once a queue is selected, the scheduler picks a job in the queue. Jobs
			
 
				+      are sorted based on when they're submitted and their priorities (if the 
			
 
				+      queue supports priorities). Jobs are considered in order, and a job is 
			
 
				+      selected if its user is within the user-quota for the queue, i.e., the 
			
 
				+      user is not already using queue resources above his/her limit. The 
			
 
				+      scheduler also makes sure that there is enough free memory in the 
			
 
				+      TaskTracker to tun the job's task, in case the job has special memory
			
 
				+      requirements.</p>
			
 
				+<p>Once a job is selected, the scheduler picks a task to run. This logic 
			
 
				+      to pick a task remains unchanged from earlier versions.</p>
			
 
				+</div>
			
 
				+    
			
 
				+    
			
 
				+<a name="N1004F"></a><a name="Reclaiming+capacity"></a>
			
 
				+<h2 class="h3">Reclaiming capacity</h2>
			
 
				+<div class="section">
			
 
				+<p>Periodically, the scheduler determines:</p>
			
 
				+<ul>
			
 
				+	    
			
 
				+<li>
			
 
				+	      if a queue needs to reclaim capacity. This happens when a queue has
			
 
				+	      at least one task pending and part of its guaranteed capacity is 
			
 
				+	      being used by some other queue. If this happens, the scheduler notes
			
 
				+	      the amount of resources it needs to reclaim for this queue within a 
			
 
				+	      specified period of time (the reclaim time). 
			
 
				+	    </li>
			
 
				+	    
			
 
				+<li>
			
 
				+	      if a queue has not received all the resources it needed to reclaim,
			
 
				+	      and its reclaim time is about to expire. In this case, the scheduler
			
 
				+	      needs to kill tasks from queues running over capacity. This it does
			
 
				+	      by killing the tasks that started the latest.
			
 
				+	    </li>
			
 
				+	  
			
 
				+</ul>
			
 
				+</div>
			
 
				+
			
 
				+    
			
 
				+<a name="N10062"></a><a name="Installation"></a>
			
 
				+<h2 class="h3">Installation</h2>
			
 
				+<div class="section">
			
 
				+<p>The capacity scheduler is available as a JAR file in the Hadoop
			
 
				+        tarball under the <em>contrib/capacity-scheduler</em> directory. The name of 
			
 
				+        the JAR file would be on the lines of hadoop-*-capacity-scheduler.jar.</p>
			
 
				+<p>You can also build the scheduler from source by executing
			
 
				+        <em>ant package</em>, in which case it would be available under
			
 
				+        <em>build/contrib/capacity-scheduler</em>.</p>
			
 
				+<p>To run the capacity scheduler in your Hadoop installation, you need 
			
 
				+        to put it on the <em>CLASSPATH</em>. The easiest way is to copy the 
			
 
				+        <span class="codefrag">hadoop-*-capacity-scheduler.jar</span> from 
			
 
				+        to <span class="codefrag">HADOOP_HOME/lib</span>. Alternatively, you can modify 
			
 
				+        <em>HADOOP_CLASSPATH</em> to include this jar, in 
			
 
				+        <span class="codefrag">conf/hadoop-env.sh</span>.</p>
			
 
				+</div>
			
 
				+
			
 
				+    
			
 
				+<a name="N1008A"></a><a name="Configuration"></a>
			
 
				+<h2 class="h3">Configuration</h2>
			
 
				+<div class="section">
			
 
				+<a name="N10090"></a><a name="Using+the+capacity+scheduler"></a>
			
 
				+<h3 class="h4">Using the capacity scheduler</h3>
			
 
				+<p>
			
 
				+          To make the Hadoop framework use the capacity scheduler, set up
			
 
				+          the following property in the site configuration:</p>
			
 
				+<table class="ForrestTable" cellspacing="1" cellpadding="4">
			
 
				+            
			
 
				+<tr>
			
 
				+              
			
 
				+<td colspan="1" rowspan="1">Property</td>
			
 
				+              <td colspan="1" rowspan="1">Value</td>
			
 
				+            
			
 
				+</tr>
			
 
				+            
			
 
				+<tr>
			
 
				+              
			
 
				+<td colspan="1" rowspan="1">mapred.jobtracker.taskScheduler</td>
			
 
				+              <td colspan="1" rowspan="1">org.apache.hadoop.mapred.CapacityTaskScheduler</td>
			
 
				+            
			
 
				+</tr>
			
 
				+          
			
 
				+</table>
			
 
				+<a name="N100B7"></a><a name="Setting+up+queues"></a>
			
 
				+<h3 class="h4">Setting up queues</h3>
			
 
				+<p>
			
 
				+          You can define multiple queues to which users can submit jobs with
			
 
				+          the capacity scheduler. To define multiple queues, you should edit
			
 
				+          the site configuration for Hadoop and modify the
			
 
				+          <em>mapred.queue.names</em> property.
			
 
				+        </p>
			
 
				+<p>
			
 
				+          You can also configure ACLs for controlling which users or groups
			
 
				+          have access to the queues.
			
 
				+        </p>
			
 
				+<p>
			
 
				+          For more details, refer to
			
 
				+          <a href="cluster_setup.html#Configuring+the+Hadoop+Daemons">Cluster 
			
 
				+          Setup</a> documentation.
			
 
				+        </p>
			
 
				+<a name="N100CE"></a><a name="Configuring+properties+for+queues"></a>
			
 
				+<h3 class="h4">Configuring properties for queues</h3>
			
 
				+<p>The capacity scheduler can be configured with several properties
			
 
				+        for each queue that control the behavior of the scheduler. This
			
 
				+        configuration is in the <em>conf/capacity-scheduler.xml</em>. By
			
 
				+        default, the configuration is set up for one queue, named 
			
 
				+        <em>default</em>.</p>
			
 
				+<p>To specify a property for a queue that is defined in the site
			
 
				+        configuration, you should use the property name as
			
 
				+        <em>mapred.capacity-scheduler.queue.&lt;queue-name&gt;.&lt;property-name&gt;</em>.
			
 
				+        </p>
			
 
				+<p>For example, to define the property <em>guaranteed-capacity</em>
			
 
				+        for queue named <em>research</em>, you should specify the property
			
 
				+        name as 
			
 
				+        <em>mapred.capacity-scheduler.queue.research.guaranteed-capacity</em>.
			
 
				+        </p>
			
 
				+<p>The properties defined for queues and their descriptions are
			
 
				+        listed in the table below:</p>
			
 
				+<table class="ForrestTable" cellspacing="1" cellpadding="4">
			
 
				+          
			
 
				+<tr>
			
 
				+<th colspan="1" rowspan="1">Name</th><th colspan="1" rowspan="1">Description</th>
			
 
				+</tr>
			
 
				+          
			
 
				+<tr>
			
 
				+<td colspan="1" rowspan="1">mapred.capacity-scheduler.queue.&lt;queue-name&gt;.guaranteed-capacity</td>
			
 
				+          	<td colspan="1" rowspan="1">Percentage of the number of slots in the cluster that are
			
 
				+          	guaranteed to be available for jobs in this queue. 
			
 
				+          	The sum of guaranteed capacities for all queues should be less 
			
 
				+          	than or equal 100.</td>
			
 
				+          
			
 
				+</tr>
			
 
				+          
			
 
				+<tr>
			
 
				+<td colspan="1" rowspan="1">mapred.capacity-scheduler.queue.&lt;queue-name&gt;.reclaim-time-limit</td>
			
 
				+          	<td colspan="1" rowspan="1">The amount of time, in seconds, before which resources 
			
 
				+          	distributed to other queues will be reclaimed.</td>
			
 
				+          
			
 
				+</tr>
			
 
				+          
			
 
				+<tr>
			
 
				+<td colspan="1" rowspan="1">mapred.capacity-scheduler.queue.&lt;queue-name&gt;.supports-priority</td>
			
 
				+          	<td colspan="1" rowspan="1">If true, priorities of jobs will be taken into account in scheduling 
			
 
				+          	decisions.</td>
			
 
				+          
			
 
				+</tr>
			
 
				+          
			
 
				+<tr>
			
 
				+<td colspan="1" rowspan="1">mapred.capacity-scheduler.queue.&lt;queue-name&gt;.minimum-user-limit-percent</td>
			
 
				+          	<td colspan="1" rowspan="1">Each queue enforces a limit on the percentage of resources 
			
 
				+          	allocated to a user at any given time, if there is competition 
			
 
				+          	for them. This user limit can vary between a minimum and maximum 
			
 
				+          	value. The former depends on the number of users who have submitted
			
 
				+          	jobs, and the latter is set to this property value. For example, 
			
 
				+          	suppose the value of this property is 25. If two users have 
			
 
				+          	submitted jobs to a queue, no single user can use more than 50% 
			
 
				+          	of the queue resources. If a third user submits a job, no single 
			
 
				+          	user can use more than 33% of the queue resources. With 4 or more 
			
 
				+          	users, no user can use more than 25% of the queue's resources. A 
			
 
				+          	value of 100 implies no user limits are imposed.</td>
			
 
				+          
			
 
				+</tr>
			
 
				+        
			
 
				+</table>
			
 
				+<a name="N10130"></a><a name="Reviewing+the+configuration+of+the+capacity+scheduler"></a>
			
 
				+<h3 class="h4">Reviewing the configuration of the capacity scheduler</h3>
			
 
				+<p>
			
 
				+          Once the installation and configuration is completed, you can review
			
 
				+          it after starting the Map/Reduce cluster from the admin UI.
			
 
				+        </p>
			
 
				+<ul>
			
 
				+          
			
 
				+<li>Start the Map/Reduce cluster as usual.</li>
			
 
				+          
			
 
				+<li>Open the JobTracker web UI.</li>
			
 
				+          
			
 
				+<li>The queues you have configured should be listed under the <em>Scheduling
			
 
				+              Information</em> section of the page.</li>
			
 
				+          
			
 
				+<li>The properties for the queues should be visible in the <em>Scheduling
			
 
				+              Information</em> column against each queue.</li>
			
 
				+        
			
 
				+</ul>
			
 
				+</div>
			
 
				+  
			
 
				+</div>
			
 
				+<!--+
			
 
				+    |end content
			
 
				+    +-->
			
 
				+<div class="clearboth">&nbsp;</div>
			
 
				+</div>
			
 
				+<div id="footer">
			
 
				+<!--+
			
 
				+    |start bottomstrip
			
 
				+    +-->
			
 
				+<div class="lastmodified">
			
 
				+<script type="text/javascript"><!--
			
 
				+document.write("Last Published: " + document.lastModified);
			
 
				+//  --></script>
			
 
				+</div>
			
 
				+<div class="copyright">
			
 
				+        Copyright &copy;
			
 
				+         2008 <a href="http://www.apache.org/licenses/">The Apache Software Foundation.</a>
			
 
				+</div>
			
 
				+<!--+
			
 
				+    |end bottomstrip
			
 
				+    +-->
			
 
				+</div>
			
 
				+</body>
			
 
				+</html>
			
--- a/docs/capacity_scheduler.pdf
+++ b/docs/capacity_scheduler.pdf
--- a/docs/changes.html
+++ b/docs/changes.html
@@ -205,7 +205,7 @@ changes from the prior release.<br />(cutting)</li>
 
				     </ol>
			
 
				   </li>
			
 
				   <li><a href="javascript:toggleList('release_0.19.0_-_unreleased_._improvements_')">  IMPROVEMENTS
			
 
				-</a>&nbsp;&nbsp;&nbsp;(74)
			
 
				+</a>&nbsp;&nbsp;&nbsp;(75)
			
 
				     <ol id="release_0.19.0_-_unreleased_._improvements_">
			
 
				       <li><a href="http://issues.apache.org/jira/browse/HADOOP-4205">HADOOP-4205</a>. hive: metastore and ql to use the refactored SerDe library.<br />(zshao)</li>
			
 
				       <li><a href="http://issues.apache.org/jira/browse/HADOOP-4106">HADOOP-4106</a>. libhdfs: add time, permission and user attribute support
			
@@ -338,6 +338,8 @@ incrementing the task attempt numbers by 1000 when the job restarts.<br />(Amar
 
				       <li><a href="http://issues.apache.org/jira/browse/HADOOP-3790">HADOOP-3790</a>. Add more unit tests for testing HDFS file append.<br />(szetszwo)</li>
			
 
				       <li><a href="http://issues.apache.org/jira/browse/HADOOP-4150">HADOOP-4150</a>. Include librecordio in hadoop releases.<br />(Giridharan Kesavan
			
 
				 via acmurthy)</li>
			
 
				+      <li><a href="http://issues.apache.org/jira/browse/HADOOP-4321">HADOOP-4321</a>. Include documentation for the capacity scheduler.<br />(Hemanth
			
 
				+Yamijala via omalley)</li>
			
 
				     </ol>
			
 
				   </li>
			
 
				   <li><a href="javascript:toggleList('release_0.19.0_-_unreleased_._optimizations_')">  OPTIMIZATIONS
			
@@ -370,7 +372,7 @@ org.apache.hadoop.mapred  package private instead of public.<br />(omalley)</li>
 
				     </ol>
			
 
				   </li>
			
 
				   <li><a href="javascript:toggleList('release_0.19.0_-_unreleased_._bug_fixes_')">  BUG FIXES
			
 
				-</a>&nbsp;&nbsp;&nbsp;(133)
			
 
				+</a>&nbsp;&nbsp;&nbsp;(139)
			
 
				     <ol id="release_0.19.0_-_unreleased_._bug_fixes_">
			
 
				       <li><a href="http://issues.apache.org/jira/browse/HADOOP-3563">HADOOP-3563</a>.  Refactor the distributed upgrade code so that it is
			
 
				 easier to identify datanode and namenode related code.<br />(dhruba)</li>
			
@@ -618,12 +620,21 @@ capacity-scheduler.<br />(Hemanth Yamijala via acmurthy)</li>
 
				 Phillips via cdouglas)</li>
			
 
				       <li><a href="http://issues.apache.org/jira/browse/HADOOP-4426">HADOOP-4426</a>. TestCapacityScheduler broke due to the two commits <a href="http://issues.apache.org/jira/browse/HADOOP-4053">HADOOP-4053</a>
			
 
				 and <a href="http://issues.apache.org/jira/browse/HADOOP-4373">HADOOP-4373</a>. This patch fixes that.<br />(Hemanth Yamijala via ddas)</li>
			
 
				-      <li><a href="http://issues.apache.org/jira/browse/HADOOP-4278">HADOOP-4278</a>. Increase debug logging for unit test TestDatanodeDeath.<br />(dhruba)</li>
			
 
				       <li><a href="http://issues.apache.org/jira/browse/HADOOP-4418">HADOOP-4418</a>. Updates documentation in forrest for Mapred, streaming and pipes.<br />(Amareshwari Sriramadasu via ddas)</li>
			
 
				       <li><a href="http://issues.apache.org/jira/browse/HADOOP-3155">HADOOP-3155</a>. Ensure that there is only one thread fetching
			
 
				 TaskCompletionEvents on TaskTracker re-init.<br />(Dhruba Borthakur via
			
 
				 acmurthy)</li>
			
 
				       <li><a href="http://issues.apache.org/jira/browse/HADOOP-4425">HADOOP-4425</a>. Fix EditLogInputStream to overload the bulk read method.<br />(cdouglas)</li>
			
 
				+      <li><a href="http://issues.apache.org/jira/browse/HADOOP-4427">HADOOP-4427</a>. Adds the new queue/job commands to the manual.<br />(Sreekanth Ramakrishnan via ddas)</li>
			
 
				+      <li><a href="http://issues.apache.org/jira/browse/HADOOP-4278">HADOOP-4278</a>. Increase debug logging for unit test TestDatanodeDeath.
			
 
				+Fix the case when primary is dead.<br />(dhruba via szetszwo)</li>
			
 
				+      <li><a href="http://issues.apache.org/jira/browse/HADOOP-4423">HADOOP-4423</a>. Keep block length when the block recovery is triggered by
			
 
				+append.<br />(szetszwo)</li>
			
 
				+      <li><a href="http://issues.apache.org/jira/browse/HADOOP-4449">HADOOP-4449</a>. Fix dfsadmin usage.<br />(Raghu Angadi via cdouglas)</li>
			
 
				+      <li><a href="http://issues.apache.org/jira/browse/HADOOP-4455">HADOOP-4455</a>. Added TestSerDe so that unit tests can run successfully.<br />(Ashish Thusoo via dhruba)</li>
			
 
				+      <li><a href="http://issues.apache.org/jira/browse/HADOOP-4399">HADOOP-4399</a>. Make fuse-dfs multi-thread access safe.<br />(Pete Wyckoff via dhruba)</li>
			
 
				+      <li><a href="http://issues.apache.org/jira/browse/HADOOP-4457">HADOOP-4457</a>. Fixes an input split logging problem introduced by
			
 
				+<a href="http://issues.apache.org/jira/browse/HADOOP-3245">HADOOP-3245</a>.<br />(Amareshwari Sriramadasu via ddas)</li>
			
 
				     </ol>
			
 
				   </li>
			
 
				 </ul>
			
@@ -631,7 +642,7 @@ acmurthy)</li>
 
				 </a></h2>
			
 
				 <ul id="release_0.18.2_-_unreleased_">
			
 
				   <li><a href="javascript:toggleList('release_0.18.2_-_unreleased_._bug_fixes_')">  BUG FIXES
			
 
				-</a>&nbsp;&nbsp;&nbsp;(7)
			
 
				+</a>&nbsp;&nbsp;&nbsp;(8)
			
 
				     <ol id="release_0.18.2_-_unreleased_._bug_fixes_">
			
 
				       <li><a href="http://issues.apache.org/jira/browse/HADOOP-4116">HADOOP-4116</a>. Balancer should provide better resource management.<br />(hairong)</li>
			
 
				       <li><a href="http://issues.apache.org/jira/browse/HADOOP-3614">HADOOP-3614</a>. Fix a bug that Datanode may use an old GenerationStamp to get
			
@@ -644,6 +655,8 @@ due to incorrect type used.<br />(hairong)</li>
 
				       <li><a href="http://issues.apache.org/jira/browse/HADOOP-4351">HADOOP-4351</a>. FSNamesystem.getBlockLocationsInternal throws
			
 
				 ArrayIndexOutOfBoundsException.<br />(hairong)</li>
			
 
				       <li><a href="http://issues.apache.org/jira/browse/HADOOP-4292">HADOOP-4292</a>. Do not support append() for LocalFileSystem.<br />(hairong)</li>
			
 
				+      <li><a href="http://issues.apache.org/jira/browse/HADOOP-4398">HADOOP-4398</a>. No need to truncate access time in INode. Also fixes NPE
			
 
				+in CreateEditsLog.<br />(Raghu Angadi)</li>
			
 
				     </ol>
			
 
				   </li>
			
 
				 </ul>
			
--- a/docs/cluster_setup.html
+++ b/docs/cluster_setup.html
@@ -153,6 +153,9 @@ document.write("Last Published: " + document.lastModified);
 
				 <a href="hod.html">Hadoop On Demand</a>
			
 
				 </div>
			
 
				 <div class="menuitem">
			
 
				+<a href="capacity_scheduler.html">Capacity Scheduler</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				 <a href="api/index.html">API Docs</a>
			
 
				 </div>
			
 
				 <div class="menuitem">
			
@@ -518,13 +521,84 @@ document.write("Last Published: " + document.lastModified);
 
				 		      </td>
			
 
				   		    
			
 
				 </tr>
			
 
				+        
			
 
				+<tr>
			
 
				+          
			
 
				+<td colspan="1" rowspan="1">mapred.queue.names</td>
			
 
				+          <td colspan="1" rowspan="1">Comma separated list of queues to which jobs can be submitted.</td>
			
 
				+          <td colspan="1" rowspan="1">
			
 
				+            The Map/Reduce system always supports atleast one queue
			
 
				+            with the name as <em>default</em>. Hence, this parameter's
			
 
				+            value should always contain the string <em>default</em>.
			
 
				+            Some job schedulers supported in Hadoop, like the 
			
 
				+            <a href="capacity_scheduler.html">Capacity 
			
 
				+            Scheduler</a>, support multiple queues. If such a scheduler is
			
 
				+            being used, the list of configured queue names must be
			
 
				+            specified here. Once queues are defined, users can submit
			
 
				+            jobs to a queue using the property name 
			
 
				+            <em>mapred.job.queue.name</em> in the job configuration.
			
 
				+            There could be a separate 
			
 
				+            configuration file for configuring properties of these 
			
 
				+            queues that is managed by the scheduler. 
			
 
				+            Refer to the documentation of the scheduler for information on 
			
 
				+            the same.
			
 
				+          </td>
			
 
				+        
			
 
				+</tr>
			
 
				+        
			
 
				+<tr>
			
 
				+          
			
 
				+<td colspan="1" rowspan="1">mapred.acls.enabled</td>
			
 
				+          <td colspan="1" rowspan="1">Specifies whether ACLs are supported for controlling job
			
 
				+              submission and administration</td>
			
 
				+          <td colspan="1" rowspan="1">
			
 
				+            If <em>true</em>, ACLs would be checked while submitting
			
 
				+            and administering jobs. ACLs can be specified using the
			
 
				+            configuration parameters of the form
			
 
				+            <em>mapred.queue.queue-name.acl-name</em>, defined below.
			
 
				+          </td>
			
 
				+        
			
 
				+</tr>
			
 
				+        
			
 
				+<tr>
			
 
				+          
			
 
				+<td colspan="1" rowspan="1">mapred.queue.<em>queue-name</em>.acl-submit-job</td>
			
 
				+          <td colspan="1" rowspan="1">List of users and groups that can submit jobs to the
			
 
				+              specified <em>queue-name</em>.</td>
			
 
				+          <td colspan="1" rowspan="1">
			
 
				+            The list of users and groups are both comma separated
			
 
				+            list of names. The two lists are separated by a blank.
			
 
				+            Example: <em>user1,user2 group1,group2</em>.
			
 
				+            If you wish to define only a list of groups, provide
			
 
				+            a blank at the beginning of the value.
			
 
				+          </td>
			
 
				+        
			
 
				+</tr>
			
 
				+        
			
 
				+<tr>
			
 
				+          
			
 
				+<td colspan="1" rowspan="1">mapred.queue.<em>queue-name</em>.acl-administer-job</td>
			
 
				+          <td colspan="1" rowspan="1">List of users and groups that can change the priority
			
 
				+              or kill jobs that have been submitted to the
			
 
				+              specified <em>queue-name</em>.</td>
			
 
				+          <td colspan="1" rowspan="1">
			
 
				+            The list of users and groups are both comma separated
			
 
				+            list of names. The two lists are separated by a blank.
			
 
				+            Example: <em>user1,user2 group1,group2</em>.
			
 
				+            If you wish to define only a list of groups, provide
			
 
				+            a blank at the beginning of the value. Note that an
			
 
				+            owner of a job can always change the priority or kill
			
 
				+            his/her own job, irrespective of the ACLs.
			
 
				+          </td>
			
 
				+        
			
 
				+</tr>
			
 
				 		  
			
 
				 </table>
			
 
				 <p>Typically all the above parameters are marked as 
			
 
				           <a href="api/org/apache/hadoop/conf/Configuration.html#FinalParams">
			
 
				           final</a> to ensure that they cannot be overriden by user-applications.
			
 
				           </p>
			
 
				-<a name="N1020F"></a><a name="Real-World+Cluster+Configurations"></a>
			
 
				+<a name="N1027C"></a><a name="Real-World+Cluster+Configurations"></a>
			
 
				 <h5>Real-World Cluster Configurations</h5>
			
 
				 <p>This section lists some non-default configuration parameters which 
			
 
				             have been used to run the <em>sort</em> benchmark on very large 
			
@@ -685,7 +759,7 @@ document.write("Last Published: " + document.lastModified);
 
				 </li>
			
 
				             
			
 
				 </ul>
			
 
				-<a name="N1032D"></a><a name="Slaves"></a>
			
 
				+<a name="N1039A"></a><a name="Slaves"></a>
			
 
				 <h4>Slaves</h4>
			
 
				 <p>Typically you choose one machine in the cluster to act as the 
			
 
				           <span class="codefrag">NameNode</span> and one machine as to act as the 
			
@@ -694,14 +768,14 @@ document.write("Last Published: " + document.lastModified);
 
				           referred to as <em>slaves</em>.</p>
			
 
				 <p>List all slave hostnames or IP addresses in your 
			
 
				           <span class="codefrag">conf/slaves</span> file, one per line.</p>
			
 
				-<a name="N1034C"></a><a name="Logging"></a>
			
 
				+<a name="N103B9"></a><a name="Logging"></a>
			
 
				 <h4>Logging</h4>
			
 
				 <p>Hadoop uses the <a href="http://logging.apache.org/log4j/">Apache 
			
 
				           log4j</a> via the <a href="http://commons.apache.org/logging/">Apache 
			
 
				           Commons Logging</a> framework for logging. Edit the 
			
 
				           <span class="codefrag">conf/log4j.properties</span> file to customize the Hadoop 
			
 
				           daemons' logging configuration (log-formats and so on).</p>
			
 
				-<a name="N10360"></a><a name="History+Logging"></a>
			
 
				+<a name="N103CD"></a><a name="History+Logging"></a>
			
 
				 <h5>History Logging</h5>
			
 
				 <p> The job history files are stored in central location 
			
 
				             <span class="codefrag"> hadoop.job.history.location </span> which can be on DFS also,
			
@@ -734,10 +808,10 @@ document.write("Last Published: " + document.lastModified);
 
				       typically <span class="codefrag">${HADOOP_HOME}/conf</span>.</p>
			
 
				 </div>
			
 
				     
			
 
				-<a name="N10398"></a><a name="Cluster+Restartability"></a>
			
 
				+<a name="N10405"></a><a name="Cluster+Restartability"></a>
			
 
				 <h2 class="h3">Cluster Restartability</h2>
			
 
				 <div class="section">
			
 
				-<a name="N1039E"></a><a name="Map%2FReduce"></a>
			
 
				+<a name="N1040B"></a><a name="Map%2FReduce"></a>
			
 
				 <h3 class="h4">Map/Reduce</h3>
			
 
				 <p>The job tracker restart can recover running jobs if 
			
 
				         <span class="codefrag">mapred.jobtracker.restart.recover</span> is set true and 
			
@@ -748,7 +822,7 @@ document.write("Last Published: " + document.lastModified);
 
				 </div>
			
 
				     
			
 
				     
			
 
				-<a name="N103B3"></a><a name="Hadoop+Rack+Awareness"></a>
			
 
				+<a name="N10420"></a><a name="Hadoop+Rack+Awareness"></a>
			
 
				 <h2 class="h3">Hadoop Rack Awareness</h2>
			
 
				 <div class="section">
			
 
				 <p>The HDFS and the Map/Reduce components are rack-aware.</p>
			
@@ -771,7 +845,7 @@ document.write("Last Published: " + document.lastModified);
 
				 </div>
			
 
				     
			
 
				     
			
 
				-<a name="N103D9"></a><a name="Hadoop+Startup"></a>
			
 
				+<a name="N10446"></a><a name="Hadoop+Startup"></a>
			
 
				 <h2 class="h3">Hadoop Startup</h2>
			
 
				 <div class="section">
			
 
				 <p>To start a Hadoop cluster you will need to start both the HDFS and 
			
@@ -806,7 +880,7 @@ document.write("Last Published: " + document.lastModified);
 
				 </div>
			
 
				     
			
 
				     
			
 
				-<a name="N1041F"></a><a name="Hadoop+Shutdown"></a>
			
 
				+<a name="N1048C"></a><a name="Hadoop+Shutdown"></a>
			
 
				 <h2 class="h3">Hadoop Shutdown</h2>
			
 
				 <div class="section">
			
 
				 <p>
			
--- a/docs/cluster_setup.pdf
+++ b/docs/cluster_setup.pdf
--- a/docs/commands_manual.html
+++ b/docs/commands_manual.html
@@ -153,6 +153,9 @@ document.write("Last Published: " + document.lastModified);
 
				 <a href="hod.html">Hadoop On Demand</a>
			
 
				 </div>
			
 
				 <div class="menuitem">
			
 
				+<a href="capacity_scheduler.html">Capacity Scheduler</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				 <a href="api/index.html">API Docs</a>
			
 
				 </div>
			
 
				 <div class="menuitem">
			
--- a/docs/distcp.html
+++ b/docs/distcp.html
@@ -153,6 +153,9 @@ document.write("Last Published: " + document.lastModified);
 
				 <a href="hod.html">Hadoop On Demand</a>
			
 
				 </div>
			
 
				 <div class="menuitem">
			
 
				+<a href="capacity_scheduler.html">Capacity Scheduler</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				 <a href="api/index.html">API Docs</a>
			
 
				 </div>
			
 
				 <div class="menuitem">
			
--- a/docs/hadoop_archives.html
+++ b/docs/hadoop_archives.html
@@ -153,6 +153,9 @@ document.write("Last Published: " + document.lastModified);
 
				 <a href="hod.html">Hadoop On Demand</a>
			
 
				 </div>
			
 
				 <div class="menuitem">
			
 
				+<a href="capacity_scheduler.html">Capacity Scheduler</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				 <a href="api/index.html">API Docs</a>
			
 
				 </div>
			
 
				 <div class="menuitem">
			
--- a/docs/hdfs_design.html
+++ b/docs/hdfs_design.html
@@ -155,6 +155,9 @@ document.write("Last Published: " + document.lastModified);
 
				 <a href="hod.html">Hadoop On Demand</a>
			
 
				 </div>
			
 
				 <div class="menuitem">
			
 
				+<a href="capacity_scheduler.html">Capacity Scheduler</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				 <a href="api/index.html">API Docs</a>
			
 
				 </div>
			
 
				 <div class="menuitem">
			
--- a/docs/hdfs_permissions_guide.html
+++ b/docs/hdfs_permissions_guide.html
@@ -155,6 +155,9 @@ document.write("Last Published: " + document.lastModified);
 
				 <a href="hod.html">Hadoop On Demand</a>
			
 
				 </div>
			
 
				 <div class="menuitem">
			
 
				+<a href="capacity_scheduler.html">Capacity Scheduler</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				 <a href="api/index.html">API Docs</a>
			
 
				 </div>
			
 
				 <div class="menuitem">
			
--- a/docs/hdfs_quota_admin_guide.html
+++ b/docs/hdfs_quota_admin_guide.html
@@ -153,6 +153,9 @@ document.write("Last Published: " + document.lastModified);
 
				 <a href="hod.html">Hadoop On Demand</a>
			
 
				 </div>
			
 
				 <div class="menuitem">
			
 
				+<a href="capacity_scheduler.html">Capacity Scheduler</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				 <a href="api/index.html">API Docs</a>
			
 
				 </div>
			
 
				 <div class="menuitem">
			
--- a/docs/hdfs_shell.html
+++ b/docs/hdfs_shell.html
@@ -153,6 +153,9 @@ document.write("Last Published: " + document.lastModified);
 
				 <a href="hod.html">Hadoop On Demand</a>
			
 
				 </div>
			
 
				 <div class="menuitem">
			
 
				+<a href="capacity_scheduler.html">Capacity Scheduler</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				 <a href="api/index.html">API Docs</a>
			
 
				 </div>
			
 
				 <div class="menuitem">
			
--- a/docs/hdfs_user_guide.html
+++ b/docs/hdfs_user_guide.html
@@ -155,6 +155,9 @@ document.write("Last Published: " + document.lastModified);
 
				 <a href="hod.html">Hadoop On Demand</a>
			
 
				 </div>
			
 
				 <div class="menuitem">
			
 
				+<a href="capacity_scheduler.html">Capacity Scheduler</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				 <a href="api/index.html">API Docs</a>
			
 
				 </div>
			
 
				 <div class="menuitem">
			
--- a/docs/hod.html
+++ b/docs/hod.html
@@ -155,6 +155,9 @@ document.write("Last Published: " + document.lastModified);
 
				 <div class="menupagetitle">Hadoop On Demand</div>
			
 
				 </div>
			
 
				 <div class="menuitem">
			
 
				+<a href="capacity_scheduler.html">Capacity Scheduler</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				 <a href="api/index.html">API Docs</a>
			
 
				 </div>
			
 
				 <div class="menuitem">
			
--- a/docs/hod_admin_guide.html
+++ b/docs/hod_admin_guide.html
@@ -155,6 +155,9 @@ document.write("Last Published: " + document.lastModified);
 
				 <a href="hod.html">Hadoop On Demand</a>
			
 
				 </div>
			
 
				 <div class="menuitem">
			
 
				+<a href="capacity_scheduler.html">Capacity Scheduler</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				 <a href="api/index.html">API Docs</a>
			
 
				 </div>
			
 
				 <div class="menuitem">
			
--- a/docs/hod_config_guide.html
+++ b/docs/hod_config_guide.html
@@ -155,6 +155,9 @@ document.write("Last Published: " + document.lastModified);
 
				 <a href="hod.html">Hadoop On Demand</a>
			
 
				 </div>
			
 
				 <div class="menuitem">
			
 
				+<a href="capacity_scheduler.html">Capacity Scheduler</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				 <a href="api/index.html">API Docs</a>
			
 
				 </div>
			
 
				 <div class="menuitem">
			
--- a/docs/hod_user_guide.html
+++ b/docs/hod_user_guide.html
@@ -155,6 +155,9 @@ document.write("Last Published: " + document.lastModified);
 
				 <a href="hod.html">Hadoop On Demand</a>
			
 
				 </div>
			
 
				 <div class="menuitem">
			
 
				+<a href="capacity_scheduler.html">Capacity Scheduler</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				 <a href="api/index.html">API Docs</a>
			
 
				 </div>
			
 
				 <div class="menuitem">
			
--- a/docs/index.html
+++ b/docs/index.html
@@ -153,6 +153,9 @@ document.write("Last Published: " + document.lastModified);
 
				 <a href="hod.html">Hadoop On Demand</a>
			
 
				 </div>
			
 
				 <div class="menuitem">
			
 
				+<a href="capacity_scheduler.html">Capacity Scheduler</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				 <a href="api/index.html">API Docs</a>
			
 
				 </div>
			
 
				 <div class="menuitem">
			
--- a/docs/linkmap.html
+++ b/docs/linkmap.html
@@ -153,6 +153,9 @@ document.write("Last Published: " + document.lastModified);
 
				 <a href="hod.html">Hadoop On Demand</a>
			
 
				 </div>
			
 
				 <div class="menuitem">
			
 
				+<a href="capacity_scheduler.html">Capacity Scheduler</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				 <a href="api/index.html">API Docs</a>
			
 
				 </div>
			
 
				 <div class="menuitem">
			
@@ -312,6 +315,12 @@ document.write("Last Published: " + document.lastModified);
 
				 </ul>
			
 
				 </ul>
			
 
				     
			
 
				+<ul>
			
 
				+<li>
			
 
				+<a href="capacity_scheduler.html">Capacity Scheduler</a>&nbsp;&nbsp;___________________&nbsp;&nbsp;<em>capacity_scheduler</em>
			
 
				+</li>
			
 
				+</ul>
			
 
				+    
			
 
				 <ul>
			
 
				 <li>
			
 
				 <a href="api/index.html">API Docs</a>&nbsp;&nbsp;___________________&nbsp;&nbsp;<em>api</em>
			
--- a/docs/linkmap.pdf
+++ b/docs/linkmap.pdf
@@ -5,10 +5,10 @@
 
				 /Producer (FOP 0.20.5) >>
			
 
				 endobj
			
 
				 5 0 obj
			
 
				-<< /Length 1185 /Filter [ /ASCII85Decode /FlateDecode ]
			
 
				+<< /Length 1195 /Filter [ /ASCII85Decode /FlateDecode ]
			
 
				  >>
			
 
				 stream
			
 
				-Gatn'9lnc;&;KZL'g@pNL%T0W(fJMs:nT'B#@JBW/%'&,Vm`**ruTiCYnTYej.r5[-$d/lUXcNSer/)Fq)&Wn!F`39cbf*W3X^SP)%8k1$\P5o@jD2E`PJP[^0W@e'$l/&'ZKpj`B.5Z(OS:QXSC,$ot4H=m6t#);6&b]n3'mee]K_!a=',j1l;8@M32$(ZKY'0m`C1=k!S`s^iYO2BrbJIS[9lT)kj?q7bU"3AsSUcU`*7BWG62Lp?^/D:(r`-:[HsMUApW4$p1pp('=&N[]asW<S^nnfF.`SpLeE+rF6C-&0qR&9q"*=+V<r^#QuPtYV..VPJ=-G[a=?[Q)r--BUE1[Ad`0MqS-EgX;dg's7N%5fSOo%`T(JYM?/IUeYr12a2gRM@Lq.//(=AbnQJY"UW-fPb(F8KKT<uqaJ&dJ`MRL:W.t@[$Iql]406?&o:D`0(lMmcN!thp!;ZKVH8uP^lSR4Ugh5SVcm/XN`3*jHXiH'U&F.LNQh,9.qt3r>ZP.9a@$J4]a5M25I2)*>;&?NPFCmIEU*a!W'ZMcCERZ0'k*&GdgK'29_"4XhV:<+6=4Ojo:EfusMI!6Wk(2R6T6XF6k]=aQ^YrbI2j5]`9,JmT1YmtJOg<W/I^["X\_(`D("H._0HmUm0jZ/_2[,P_97>5=R1>2E.W1g>Y='fLK&_fH9!n1'SZ!%X0iR@9`H1>0E*dA,k&[>nKXl]&<_19?BFI7H<o3?5jX61uWDuBh>4#pI8jmlm1i[Y\JQIbGPSGLkRsFIFrN`ahpSHHuJ<<'3?;&[>#"KA8/abkD$a(;eQ\\:7$a065]2L29DNAa)R_e8+RkBl:BA\7Rqbqf"h1o)Kd24'/POFKkO^d<7p'sVXPXG=?'W1Sr(pMLDgZj',`1fWYc)g`]2\ob--Vn#qe<c;X(uNJ53g``^/ie$3S<nFI/322Tc<RneR9Zr9$2"Vl9:@O)?;;j4G"L<^@rH7DLIU,g\Xq]MLgqB2o7q<(*.*n.L<GS0;"gAcUSb5&'_WocUUW#oqj1WlAe4jB6N;lN_[&Xe+MDe$&.N@dXCo#K(Yrp2da,bYD+'=,6ESE:fR4Ae)C-0HaN,p4mI+68j_D;;pkC1:H:ZY^f'PkCh+c,&4LP(0C98P&D2he%D41(S]tes=PX;7rCH#:$kd"YF1,)E#%jZfDir~>
			
 
				+Gatn'gN&cE'Sc)T'O7LO;:,ucFIesDVPhr3oO<#?7g1nd`ukV?1O8j)E/iq=M?c528Iu<rs0TFfBj%W?Z^8-\5QlbVC-;sZJ*AKmmUn3ZbqBKgp)YG%^aosr7I]cr;JP_G\40>3Sm8sqj[6kn_i5E2\Zqa,ol.T@V&#o1?NgB4X*7%$"q^IMNZ=$&^d?JmX`@RR+_^c6b.d#Ujdr+eoTK0\cG_J.<O(?cD;D1f4lU$b.;;GTel@q!V@qh&%R'/8!D0q:s,IZhb@G&6VM2;eOPdBm__\.C\^7Porpf0t+X$Y,+Gq:4''+f6kNI[dXG%c.T[:NhL9R<JNCo\WHu<1^<DH!#"SZT<:o;[U'a&"6%>rA:&)*qPW-2'$9r=X%@)uR^FU1#t(*X<:aR0t751H$&H9*#41M3[M>%'qb8;mci*RQGs9s$iA'_kSiAocR5;g*$O*6#LM3!RLG:oQsHDf0HZ"6$mbF&U2O$R^QR$",$%<V(#i?A*u.Y<Q:;YI0qQf8\#"^a4M.k),dcgR@sags;]EA6g-ef,(/.fXqp<0#MDQ+nPQPV7p`0jqdGK.qC*R.a>B4<;&o%!(O4dFj`Bk*;Q/'MSe^9(Ae>C2LI<Kd_(5fE])<ua;?@a^%Sf(^!+H-F/c1"&qC:.[84mo8\>gD5*olZf.n?`h^`U3k-%3&Nf)kK7O9DWM(u#_Gfl73X*Aen;pL<g`H$u4PqZ8n1CECA&rg>`Fl`kB^&B4]-FLF`<FaH.c`-,OOX@66C%*U7NO#8]HCafXoIi:ao?_7N/f=Djift5NA8jp;2W>4bWSln@nelGH/&BL,G(377dB4(A83K?]@l$iBGOIUc@^f#!VWSM[-5o84C@Xa=0iC^CgYNP,VfK;]&OkUS:FoW%1Y_3*3oNG]`9XK.hssb3B57rOF5sW%[qM>_rA_+I?KXgAKHJ$b$2'a'rqW[]eul]Y>-k0nIN:/WQ/csug\*YLTj-Tj]K`/hRE6dsjo3TeH9G"o,Hm`RaTjhoW*S37kYGKo0ch<]?#r*B3toXE^?1LR(]IR^*kQKJmRo>E,7r7QFIF@<1oNgHT&%Xt[HA4##XYa[T86?4W-(3'gFl-W"pkJjl9uJ'qcq8?>Rq&Mh[tR[#at,13j6W,du([jK0<P@[=7,#Flud_A_)8C#1dR<F4:^]mFiHddqJ)9#)9,W!_6K:0)~>
			
 
				 endstream
			
 
				 endobj
			
 
				 6 0 obj
			
@@ -20,10 +20,10 @@ endobj
 
				 >>
			
 
				 endobj
			
 
				 7 0 obj
			
 
				-<< /Length 433 /Filter [ /ASCII85Decode /FlateDecode ]
			
 
				+<< /Length 465 /Filter [ /ASCII85Decode /FlateDecode ]
			
 
				  >>
			
 
				 stream
			
 
				-GatU._,>n>&A@ZMHYa4q8]W5tCch@K30%0[eo>.<-mOEQW8ld)!Yqs"a`)Qe]R=^B1QT2l,lnh'+Gr;dM8t1XNrrHR;NX;b,Xd!(S?.F;.9pgebl*tmfZ]f,3_$KpD,K'16"1:N5!MtjK':p6WZTQNT<BR;>`BasS)nM7:.Js./uPmMrfM8fAUVS;cK*)3;CY#0jOeBlWnGJs"=T$T.0Kjl#L.\X,&C8[Zu0KR2259@R,"M5leWUN>\%0HnZ<uV_*sL<2Qop==-<7.X>)gqa-fCk+Igq"$A-\mm+m$[j50V#R,m9&JIeHm5^+[G9JZ;XK/PaUhMphr9,bMc+q,2P+C2mt/[Zu:?Y,\mlD@m,*<'+Qmu_!/kY\^n4d[86Ar'MW>^r2_^atE<Hu&#SaDi><=5SVL],G']g%*$uZ(Ki2ng(Z=~>
			
 
				+GatU.8Pat('Ya/hhQ]6=S:--%gXq-Uj20%Z1U^4SUeh(Lrd3=%^o$Wu3.2=OjXnrs]:;Ne$jd&PO$Mo++QZ7IWiE"n6:D"[>Bh2@(mAI>TN%*Fe*,@PEKl,fKf+RR-nk-(8lERYj(AcR&6tQnlJT5Ij-h),@d"XfMChC@OkPn%UGrkVdDaDnXp]NA(@I-C!]a7Ja3;Lo,PkbBC65"-3JlcKj(r4hn;NT(U/GIa\(kc-1^4.QT-Y=A_`2>POYX[[4[he&40?o^MH)`5hR@YeW!2lH*r_G-#B5tUKhDoF]F)<4qUYSE3D),C$)2=.+HqmI=F1S8ZoUtY2l8.dEPoU/BQTa7hM+CIen^J4p:/<6[#k5"QP+]m_2,C*g.9K:h+nf?S"eIieaOAKdLIuu9S\4ZlbI%DQ[SdrY)@4fod]S*1KiBT8e^R=Raj?ip<J$;jY$FN/*pmB$Uf$kOo~>
			
 
				 endstream
			
 
				 endobj
			
 
				 8 0 obj
			
@@ -87,19 +87,19 @@ endobj
 
				 xref
			
 
				 0 14
			
 
				 0000000000 65535 f 
			
 
				-0000002641 00000 n 
			
 
				-0000002705 00000 n 
			
 
				-0000002755 00000 n 
			
 
				+0000002683 00000 n 
			
 
				+0000002747 00000 n 
			
 
				+0000002797 00000 n 
			
 
				 0000000015 00000 n 
			
 
				 0000000071 00000 n 
			
 
				-0000001348 00000 n 
			
 
				-0000001454 00000 n 
			
 
				-0000001978 00000 n 
			
 
				-0000002084 00000 n 
			
 
				-0000002196 00000 n 
			
 
				-0000002306 00000 n 
			
 
				-0000002417 00000 n 
			
 
				-0000002525 00000 n 
			
 
				+0000001358 00000 n 
			
 
				+0000001464 00000 n 
			
 
				+0000002020 00000 n 
			
 
				+0000002126 00000 n 
			
 
				+0000002238 00000 n 
			
 
				+0000002348 00000 n 
			
 
				+0000002459 00000 n 
			
 
				+0000002567 00000 n 
			
 
				 trailer
			
 
				 <<
			
 
				 /Size 14
			
@@ -107,5 +107,5 @@ trailer
 
				 /Info 4 0 R
			
 
				 >>
			
 
				 startxref
			
 
				-2877
			
 
				+2919
			
 
				 %%EOF
			
--- a/docs/mapred_tutorial.html
+++ b/docs/mapred_tutorial.html
@@ -153,6 +153,9 @@ document.write("Last Published: " + document.lastModified);
 
				 <a href="hod.html">Hadoop On Demand</a>
			
 
				 </div>
			
 
				 <div class="menuitem">
			
 
				+<a href="capacity_scheduler.html">Capacity Scheduler</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				 <a href="api/index.html">API Docs</a>
			
 
				 </div>
			
 
				 <div class="menuitem">
			
@@ -305,6 +308,9 @@ document.write("Last Published: " + document.lastModified);
 
				 <a href="#Other+Useful+Features">Other Useful Features</a>
			
 
				 <ul class="minitoc">
			
 
				 <li>
			
 
				+<a href="#Submitting+Jobs+to+a+Queue">Submitting Jobs to a Queue</a>
			
 
				+</li>
			
 
				+<li>
			
 
				 <a href="#Counters">Counters</a>
			
 
				 </li>
			
 
				 <li>
			
@@ -339,7 +345,7 @@ document.write("Last Published: " + document.lastModified);
 
				 <a href="#Example%3A+WordCount+v2.0">Example: WordCount v2.0</a>
			
 
				 <ul class="minitoc">
			
 
				 <li>
			
 
				-<a href="#Source+Code-N10F9A">Source Code</a>
			
 
				+<a href="#Source+Code-N10FB2">Source Code</a>
			
 
				 </li>
			
 
				 <li>
			
 
				 <a href="#Sample+Runs">Sample Runs</a>
			
@@ -2292,7 +2298,23 @@ document.write("Last Published: " + document.lastModified);
 
				           <span class="codefrag">FileSystem</span>.</p>
			
 
				 <a name="N10D29"></a><a name="Other+Useful+Features"></a>
			
 
				 <h3 class="h4">Other Useful Features</h3>
			
 
				-<a name="N10D2F"></a><a name="Counters"></a>
			
 
				+<a name="N10D2F"></a><a name="Submitting+Jobs+to+a+Queue"></a>
			
 
				+<h4>Submitting Jobs to a Queue</h4>
			
 
				+<p>Some job schedulers supported in Hadoop, like the 
			
 
				+            <a href="capacity_scheduler.html">Capacity
			
 
				+            Scheduler</a>, support multiple queues. If such a scheduler is
			
 
				+            being used, users can submit jobs to one of the queues
			
 
				+            administrators would have defined in the
			
 
				+            <em>mapred.queue.names</em> property of the Hadoop site
			
 
				+            configuration. The queue name can be specified through the
			
 
				+            <em>mapred.job.queue.name</em> property, or through the
			
 
				+            <a href="api/org/apache/hadoop/mapred/JobConf.html#setQueueName(java.lang.String)">setQueueName(String)</a>
			
 
				+            API. Note that administrators may choose to define ACLs
			
 
				+            that control which queues a job can be submitted to by a
			
 
				+            given user. In that case, if the job is not submitted
			
 
				+            to one of the queues where the user has access,
			
 
				+            the job would be rejected.</p>
			
 
				+<a name="N10D47"></a><a name="Counters"></a>
			
 
				 <h4>Counters</h4>
			
 
				 <p>
			
 
				 <span class="codefrag">Counters</span> represent global counters, defined either by 
			
@@ -2309,7 +2331,7 @@ document.write("Last Published: " + document.lastModified);
 
				           in the <span class="codefrag">map</span> and/or 
			
 
				           <span class="codefrag">reduce</span> methods. These counters are then globally 
			
 
				           aggregated by the framework.</p>
			
 
				-<a name="N10D5E"></a><a name="DistributedCache"></a>
			
 
				+<a name="N10D76"></a><a name="DistributedCache"></a>
			
 
				 <h4>DistributedCache</h4>
			
 
				 <p>
			
 
				 <a href="api/org/apache/hadoop/filecache/DistributedCache.html">
			
@@ -2380,7 +2402,7 @@ document.write("Last Published: " + document.lastModified);
 
				           <span class="codefrag">mapred.job.classpath.{files|archives}</span>. Similarly the
			
 
				           cached files that are symlinked into the working directory of the
			
 
				           task can be used to distribute native libraries and load them.</p>
			
 
				-<a name="N10DE1"></a><a name="Tool"></a>
			
 
				+<a name="N10DF9"></a><a name="Tool"></a>
			
 
				 <h4>Tool</h4>
			
 
				 <p>The <a href="api/org/apache/hadoop/util/Tool.html">Tool</a> 
			
 
				           interface supports the handling of generic Hadoop command-line options.
			
@@ -2420,7 +2442,7 @@ document.write("Last Published: " + document.lastModified);
 
				             </span>
			
 
				           
			
 
				 </p>
			
 
				-<a name="N10E13"></a><a name="IsolationRunner"></a>
			
 
				+<a name="N10E2B"></a><a name="IsolationRunner"></a>
			
 
				 <h4>IsolationRunner</h4>
			
 
				 <p>
			
 
				 <a href="api/org/apache/hadoop/mapred/IsolationRunner.html">
			
@@ -2444,7 +2466,7 @@ document.write("Last Published: " + document.lastModified);
 
				 <p>
			
 
				 <span class="codefrag">IsolationRunner</span> will run the failed task in a single 
			
 
				           jvm, which can be in the debugger, over precisely the same input.</p>
			
 
				-<a name="N10E46"></a><a name="Profiling"></a>
			
 
				+<a name="N10E5E"></a><a name="Profiling"></a>
			
 
				 <h4>Profiling</h4>
			
 
				 <p>Profiling is a utility to get a representative (2 or 3) sample
			
 
				           of built-in java profiler for a sample of maps and reduces. </p>
			
@@ -2477,7 +2499,7 @@ document.write("Last Published: " + document.lastModified);
 
				           <span class="codefrag">-agentlib:hprof=cpu=samples,heap=sites,force=n,thread=y,verbose=n,file=%s</span>
			
 
				           
			
 
				 </p>
			
 
				-<a name="N10E7A"></a><a name="Debugging"></a>
			
 
				+<a name="N10E92"></a><a name="Debugging"></a>
			
 
				 <h4>Debugging</h4>
			
 
				 <p>Map/Reduce framework provides a facility to run user-provided 
			
 
				           scripts for debugging. When map/reduce task fails, user can run 
			
@@ -2488,14 +2510,14 @@ document.write("Last Published: " + document.lastModified);
 
				 <p> In the following sections we discuss how to submit debug script
			
 
				           along with the job. For submitting debug script, first it has to
			
 
				           distributed. Then the script has to supplied in Configuration. </p>
			
 
				-<a name="N10E86"></a><a name="How+to+distribute+script+file%3A"></a>
			
 
				+<a name="N10E9E"></a><a name="How+to+distribute+script+file%3A"></a>
			
 
				 <h5> How to distribute script file: </h5>
			
 
				 <p>
			
 
				           The user has to use 
			
 
				           <a href="mapred_tutorial.html#DistributedCache">DistributedCache</a>
			
 
				           mechanism to <em>distribute</em> and <em>symlink</em> the
			
 
				           debug script file.</p>
			
 
				-<a name="N10E9A"></a><a name="How+to+submit+script%3A"></a>
			
 
				+<a name="N10EB2"></a><a name="How+to+submit+script%3A"></a>
			
 
				 <h5> How to submit script: </h5>
			
 
				 <p> A quick way to submit debug script is to set values for the 
			
 
				           properties "mapred.map.task.debug.script" and 
			
@@ -2519,17 +2541,17 @@ document.write("Last Published: " + document.lastModified);
 
				 <span class="codefrag">$script $stdout $stderr $syslog $jobconf $program </span>  
			
 
				           
			
 
				 </p>
			
 
				-<a name="N10EBC"></a><a name="Default+Behavior%3A"></a>
			
 
				+<a name="N10ED4"></a><a name="Default+Behavior%3A"></a>
			
 
				 <h5> Default Behavior: </h5>
			
 
				 <p> For pipes, a default script is run to process core dumps under
			
 
				           gdb, prints stack trace and gives info about running threads. </p>
			
 
				-<a name="N10EC7"></a><a name="JobControl"></a>
			
 
				+<a name="N10EDF"></a><a name="JobControl"></a>
			
 
				 <h4>JobControl</h4>
			
 
				 <p>
			
 
				 <a href="api/org/apache/hadoop/mapred/jobcontrol/package-summary.html">
			
 
				           JobControl</a> is a utility which encapsulates a set of Map/Reduce jobs
			
 
				           and their dependencies.</p>
			
 
				-<a name="N10ED4"></a><a name="Data+Compression"></a>
			
 
				+<a name="N10EEC"></a><a name="Data+Compression"></a>
			
 
				 <h4>Data Compression</h4>
			
 
				 <p>Hadoop Map/Reduce provides facilities for the application-writer to
			
 
				           specify compression for both intermediate map-outputs and the
			
@@ -2543,7 +2565,7 @@ document.write("Last Published: " + document.lastModified);
 
				           codecs for reasons of both performance (zlib) and non-availability of
			
 
				           Java libraries (lzo). More details on their usage and availability are
			
 
				           available <a href="native_libraries.html">here</a>.</p>
			
 
				-<a name="N10EF4"></a><a name="Intermediate+Outputs"></a>
			
 
				+<a name="N10F0C"></a><a name="Intermediate+Outputs"></a>
			
 
				 <h5>Intermediate Outputs</h5>
			
 
				 <p>Applications can control compression of intermediate map-outputs
			
 
				             via the 
			
@@ -2552,7 +2574,7 @@ document.write("Last Published: " + document.lastModified);
 
				             <span class="codefrag">CompressionCodec</span> to be used via the
			
 
				             <a href="api/org/apache/hadoop/mapred/JobConf.html#setMapOutputCompressorClass(java.lang.Class)">
			
 
				             JobConf.setMapOutputCompressorClass(Class)</a> api.</p>
			
 
				-<a name="N10F09"></a><a name="Job+Outputs"></a>
			
 
				+<a name="N10F21"></a><a name="Job+Outputs"></a>
			
 
				 <h5>Job Outputs</h5>
			
 
				 <p>Applications can control compression of job-outputs via the
			
 
				             <a href="api/org/apache/hadoop/mapred/FileOutputFormat.html#setCompressOutput(org.apache.hadoop.mapred.JobConf,%20boolean)">
			
@@ -2569,7 +2591,7 @@ document.write("Last Published: " + document.lastModified);
 
				             <a href="api/org/apache/hadoop/mapred/SequenceFileOutputFormat.html#setOutputCompressionType(org.apache.hadoop.mapred.JobConf,%20org.apache.hadoop.io.SequenceFile.CompressionType)">
			
 
				             SequenceFileOutputFormat.setOutputCompressionType(JobConf, 
			
 
				             SequenceFile.CompressionType)</a> api.</p>
			
 
				-<a name="N10F36"></a><a name="Skipping+Bad+Records"></a>
			
 
				+<a name="N10F4E"></a><a name="Skipping+Bad+Records"></a>
			
 
				 <h4>Skipping Bad Records</h4>
			
 
				 <p>Hadoop provides an optional mode of execution in which the bad 
			
 
				           records are detected and skipped in further attempts. 
			
@@ -2643,7 +2665,7 @@ document.write("Last Published: " + document.lastModified);
 
				 </div>
			
 
				 
			
 
				     
			
 
				-<a name="N10F80"></a><a name="Example%3A+WordCount+v2.0"></a>
			
 
				+<a name="N10F98"></a><a name="Example%3A+WordCount+v2.0"></a>
			
 
				 <h2 class="h3">Example: WordCount v2.0</h2>
			
 
				 <div class="section">
			
 
				 <p>Here is a more complete <span class="codefrag">WordCount</span> which uses many of the
			
@@ -2653,7 +2675,7 @@ document.write("Last Published: " + document.lastModified);
 
				       <a href="quickstart.html#SingleNodeSetup">pseudo-distributed</a> or
			
 
				       <a href="quickstart.html#Fully-Distributed+Operation">fully-distributed</a> 
			
 
				       Hadoop installation.</p>
			
 
				-<a name="N10F9A"></a><a name="Source+Code-N10F9A"></a>
			
 
				+<a name="N10FB2"></a><a name="Source+Code-N10FB2"></a>
			
 
				 <h3 class="h4">Source Code</h3>
			
 
				 <table class="ForrestTable" cellspacing="1" cellpadding="4">
			
 
				           
			
@@ -3863,7 +3885,7 @@ document.write("Last Published: " + document.lastModified);
 
				 </tr>
			
 
				         
			
 
				 </table>
			
 
				-<a name="N116FC"></a><a name="Sample+Runs"></a>
			
 
				+<a name="N11714"></a><a name="Sample+Runs"></a>
			
 
				 <h3 class="h4">Sample Runs</h3>
			
 
				 <p>Sample text-files as input:</p>
			
 
				 <p>
			
@@ -4031,7 +4053,7 @@ document.write("Last Published: " + document.lastModified);
 
				 <br>
			
 
				         
			
 
				 </p>
			
 
				-<a name="N117D0"></a><a name="Highlights"></a>
			
 
				+<a name="N117E8"></a><a name="Highlights"></a>
			
 
				 <h3 class="h4">Highlights</h3>
			
 
				 <p>The second version of <span class="codefrag">WordCount</span> improves upon the 
			
 
				         previous one by using some features offered by the Map/Reduce framework:
			
--- a/docs/mapred_tutorial.pdf
+++ b/docs/mapred_tutorial.pdf
--- a/docs/native_libraries.html
+++ b/docs/native_libraries.html
@@ -153,6 +153,9 @@ document.write("Last Published: " + document.lastModified);
 
				 <a href="hod.html">Hadoop On Demand</a>
			
 
				 </div>
			
 
				 <div class="menuitem">
			
 
				+<a href="capacity_scheduler.html">Capacity Scheduler</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				 <a href="api/index.html">API Docs</a>
			
 
				 </div>
			
 
				 <div class="menuitem">
			
--- a/docs/quickstart.html
+++ b/docs/quickstart.html
@@ -153,6 +153,9 @@ document.write("Last Published: " + document.lastModified);
 
				 <a href="hod.html">Hadoop On Demand</a>
			
 
				 </div>
			
 
				 <div class="menuitem">
			
 
				+<a href="capacity_scheduler.html">Capacity Scheduler</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				 <a href="api/index.html">API Docs</a>
			
 
				 </div>
			
 
				 <div class="menuitem">
			
--- a/docs/streaming.html
+++ b/docs/streaming.html
@@ -156,6 +156,9 @@ document.write("Last Published: " + document.lastModified);
 
				 <a href="hod.html">Hadoop On Demand</a>
			
 
				 </div>
			
 
				 <div class="menuitem">
			
 
				+<a href="capacity_scheduler.html">Capacity Scheduler</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				 <a href="api/index.html">API Docs</a>
			
 
				 </div>
			
 
				 <div class="menuitem">
			
--- a/src/docs/src/documentation/content/xdocs/capacity_scheduler.xml
+++ b/src/docs/src/documentation/content/xdocs/capacity_scheduler.xml
@@ -0,0 +1,256 @@
 
				+<?xml version="1.0"?>
			
 
				+<!--
			
 
				+  Copyright 2002-2004 The Apache Software Foundation
			
 
				+
			
 
				+  Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+  you may not use this file except in compliance with the License.
			
 
				+  You may obtain a copy of the License at
			
 
				+
			
 
				+      http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+  Unless required by applicable law or agreed to in writing, software
			
 
				+  distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+  See the License for the specific language governing permissions and
			
 
				+  limitations under the License.
			
 
				+-->
			
 
				+
			
 
				+<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN" "http://forrest.apache.org/dtd/document-v20.dtd">
			
 
				+
			
 
				+<document>
			
 
				+  
			
 
				+  <header>
			
 
				+    <title>Capacity Scheduler</title>
			
 
				+  </header>
			
 
				+  
			
 
				+  <body>
			
 
				+  
			
 
				+    <section>
			
 
				+      <title>Purpose</title>
			
 
				+      
			
 
				+      <p>This document describes the Capacity Scheduler, a pluggable Map/Reduce scheduler for Hadoop which provides a way to share large clusters.</p>
			
 
				+    </section>
			
 
				+    
			
 
				+    <section>
			
 
				+      <title>Features</title>
			
 
				+      
			
 
				+      <p>The Capacity Scheduler supports the following features:</p> 
			
 
				+      <ul>
			
 
				+        <li>
			
 
				+          Support for multiple queues, where a job is submitted to a queue.
			
 
				+        </li>
			
 
				+        <li>
			
 
				+          Queues are guaranteed a fraction of the capacity of the grid (their 
			
 
				+ 	      'guaranteed capacity') in the sense that a certain capacity of 
			
 
				+ 	      resources will be at their disposal. All jobs submitted to the a 
			
 
				+ 	      queue will have access to the capacity guaranteed to the queue.
			
 
				+        </li>
			
 
				+        <li>
			
 
				+          Free resources can be allocated to any queue beyond its guaranteed 
			
 
				+          capacity. These excess allocated resources can be reclaimed and made 
			
 
				+          available to another queue in order to meet its capacity guarantee.
			
 
				+        </li>
			
 
				+        <li>
			
 
				+          The scheduler guarantees that excess resources taken from a queue 
			
 
				+          will be restored to it within N minutes of its need for them.
			
 
				+        </li>
			
 
				+        <li>
			
 
				+          Queues optionally support job priorities (disabled by default).
			
 
				+        </li>
			
 
				+        <li>
			
 
				+          Within a queue, jobs with higher priority will have access to the 
			
 
				+          queue's resources before jobs with lower priority. However, once a 
			
 
				+          job is running, it will not be preempted for a higher priority job.
			
 
				+        </li>
			
 
				+        <li>
			
 
				+          In order to prevent one or more users from monopolizing its 
			
 
				+          resources, each queue enforces a limit on the percentage of 
			
 
				+          resources allocated to a user at any given time, if there is 
			
 
				+          competition for them.  
			
 
				+        </li>
			
 
				+        <li>
			
 
				+          Support for memory-intensive jobs, wherein a job can optionally 
			
 
				+          specify higher memory-requirements than the default, and the tasks 
			
 
				+          of the job will only be run on TaskTrackers that have enough memory 
			
 
				+          to spare.
			
 
				+        </li>
			
 
				+      </ul>
			
 
				+    </section>
			
 
				+    
			
 
				+    <section>
			
 
				+      <title>Picking a task to run</title>
			
 
				+      
			
 
				+      <p>Note that many of these steps can be, and will be, enhanced over time
			
 
				+      to provide better algorithms.</p>
			
 
				+      
			
 
				+      <p>Whenever a TaskTracker is free, the Capacity Scheduler first picks a 
			
 
				+      queue that needs to reclaim any resources the earliest (this is a queue
			
 
				+      whose resources were temporarily being used by some other queue and now
			
 
				+      needs access to those resources). If no such queue is found, it then picks
			
 
				+      a queue which has most free space (whose ratio of # of running slots to 
			
 
				+      guaranteed capacity is the lowest).</p>
			
 
				+      
			
 
				+      <p>Once a queue is selected, the scheduler picks a job in the queue. Jobs
			
 
				+      are sorted based on when they're submitted and their priorities (if the 
			
 
				+      queue supports priorities). Jobs are considered in order, and a job is 
			
 
				+      selected if its user is within the user-quota for the queue, i.e., the 
			
 
				+      user is not already using queue resources above his/her limit. The 
			
 
				+      scheduler also makes sure that there is enough free memory in the 
			
 
				+      TaskTracker to tun the job's task, in case the job has special memory
			
 
				+      requirements.</p>
			
 
				+      
			
 
				+      <p>Once a job is selected, the scheduler picks a task to run. This logic 
			
 
				+      to pick a task remains unchanged from earlier versions.</p> 
			
 
				+      
			
 
				+    </section>
			
 
				+    
			
 
				+    <section>
			
 
				+      <title>Reclaiming capacity</title>
			
 
				+
			
 
				+	  <p>Periodically, the scheduler determines:</p>
			
 
				+	  <ul>
			
 
				+	    <li>
			
 
				+	      if a queue needs to reclaim capacity. This happens when a queue has
			
 
				+	      at least one task pending and part of its guaranteed capacity is 
			
 
				+	      being used by some other queue. If this happens, the scheduler notes
			
 
				+	      the amount of resources it needs to reclaim for this queue within a 
			
 
				+	      specified period of time (the reclaim time). 
			
 
				+	    </li>
			
 
				+	    <li>
			
 
				+	      if a queue has not received all the resources it needed to reclaim,
			
 
				+	      and its reclaim time is about to expire. In this case, the scheduler
			
 
				+	      needs to kill tasks from queues running over capacity. This it does
			
 
				+	      by killing the tasks that started the latest.
			
 
				+	    </li>
			
 
				+	  </ul>   
			
 
				+
			
 
				+    </section>
			
 
				+
			
 
				+    <section>
			
 
				+      <title>Installation</title>
			
 
				+      
			
 
				+        <p>The capacity scheduler is available as a JAR file in the Hadoop
			
 
				+        tarball under the <em>contrib/capacity-scheduler</em> directory. The name of 
			
 
				+        the JAR file would be on the lines of hadoop-*-capacity-scheduler.jar.</p>
			
 
				+        <p>You can also build the scheduler from source by executing
			
 
				+        <em>ant package</em>, in which case it would be available under
			
 
				+        <em>build/contrib/capacity-scheduler</em>.</p>
			
 
				+        <p>To run the capacity scheduler in your Hadoop installation, you need 
			
 
				+        to put it on the <em>CLASSPATH</em>. The easiest way is to copy the 
			
 
				+        <code>hadoop-*-capacity-scheduler.jar</code> from 
			
 
				+        to <code>HADOOP_HOME/lib</code>. Alternatively, you can modify 
			
 
				+        <em>HADOOP_CLASSPATH</em> to include this jar, in 
			
 
				+        <code>conf/hadoop-env.sh</code>.</p>
			
 
				+    </section>
			
 
				+
			
 
				+    <section>
			
 
				+      <title>Configuration</title>
			
 
				+
			
 
				+      <section>
			
 
				+        <title>Using the capacity scheduler</title>
			
 
				+        <p>
			
 
				+          To make the Hadoop framework use the capacity scheduler, set up
			
 
				+          the following property in the site configuration:</p>
			
 
				+          <table>
			
 
				+            <tr>
			
 
				+              <td>Property</td>
			
 
				+              <td>Value</td>
			
 
				+            </tr>
			
 
				+            <tr>
			
 
				+              <td>mapred.jobtracker.taskScheduler</td>
			
 
				+              <td>org.apache.hadoop.mapred.CapacityTaskScheduler</td>
			
 
				+            </tr>
			
 
				+          </table>
			
 
				+      </section>
			
 
				+
			
 
				+      <section>
			
 
				+        <title>Setting up queues</title>
			
 
				+        <p>
			
 
				+          You can define multiple queues to which users can submit jobs with
			
 
				+          the capacity scheduler. To define multiple queues, you should edit
			
 
				+          the site configuration for Hadoop and modify the
			
 
				+          <em>mapred.queue.names</em> property.
			
 
				+        </p>
			
 
				+        <p>
			
 
				+          You can also configure ACLs for controlling which users or groups
			
 
				+          have access to the queues.
			
 
				+        </p>
			
 
				+        <p>
			
 
				+          For more details, refer to
			
 
				+          <a href="cluster_setup.html#Configuring+the+Hadoop+Daemons">Cluster 
			
 
				+          Setup</a> documentation.
			
 
				+        </p>
			
 
				+      </section>
			
 
				+  
			
 
				+      <section>
			
 
				+        <title>Configuring properties for queues</title>
			
 
				+
			
 
				+        <p>The capacity scheduler can be configured with several properties
			
 
				+        for each queue that control the behavior of the scheduler. This
			
 
				+        configuration is in the <em>conf/capacity-scheduler.xml</em>. By
			
 
				+        default, the configuration is set up for one queue, named 
			
 
				+        <em>default</em>.</p>
			
 
				+        <p>To specify a property for a queue that is defined in the site
			
 
				+        configuration, you should use the property name as
			
 
				+        <em>mapred.capacity-scheduler.queue.&lt;queue-name&gt;.&lt;property-name&gt;</em>.
			
 
				+        </p>
			
 
				+        <p>For example, to define the property <em>guaranteed-capacity</em>
			
 
				+        for queue named <em>research</em>, you should specify the property
			
 
				+        name as 
			
 
				+        <em>mapred.capacity-scheduler.queue.research.guaranteed-capacity</em>.
			
 
				+        </p>
			
 
				+
			
 
				+        <p>The properties defined for queues and their descriptions are
			
 
				+        listed in the table below:</p>
			
 
				+
			
 
				+        <table>
			
 
				+          <tr><th>Name</th><th>Description</th></tr>
			
 
				+          <tr><td>mapred.capacity-scheduler.queue.&lt;queue-name&gt;.guaranteed-capacity</td>
			
 
				+          	<td>Percentage of the number of slots in the cluster that are
			
 
				+          	guaranteed to be available for jobs in this queue. 
			
 
				+          	The sum of guaranteed capacities for all queues should be less 
			
 
				+          	than or equal 100.</td>
			
 
				+          </tr>
			
 
				+          <tr><td>mapred.capacity-scheduler.queue.&lt;queue-name&gt;.reclaim-time-limit</td>
			
 
				+          	<td>The amount of time, in seconds, before which resources 
			
 
				+          	distributed to other queues will be reclaimed.</td>
			
 
				+          </tr>
			
 
				+          <tr><td>mapred.capacity-scheduler.queue.&lt;queue-name&gt;.supports-priority</td>
			
 
				+          	<td>If true, priorities of jobs will be taken into account in scheduling 
			
 
				+          	decisions.</td>
			
 
				+          </tr>
			
 
				+          <tr><td>mapred.capacity-scheduler.queue.&lt;queue-name&gt;.minimum-user-limit-percent</td>
			
 
				+          	<td>Each queue enforces a limit on the percentage of resources 
			
 
				+          	allocated to a user at any given time, if there is competition 
			
 
				+          	for them. This user limit can vary between a minimum and maximum 
			
 
				+          	value. The former depends on the number of users who have submitted
			
 
				+          	jobs, and the latter is set to this property value. For example, 
			
 
				+          	suppose the value of this property is 25. If two users have 
			
 
				+          	submitted jobs to a queue, no single user can use more than 50% 
			
 
				+          	of the queue resources. If a third user submits a job, no single 
			
 
				+          	user can use more than 33% of the queue resources. With 4 or more 
			
 
				+          	users, no user can use more than 25% of the queue's resources. A 
			
 
				+          	value of 100 implies no user limits are imposed.</td>
			
 
				+          </tr>
			
 
				+        </table>
			
 
				+      </section>
			
 
				+
			
 
				+      <section>
			
 
				+        <title>Reviewing the configuration of the capacity scheduler</title>
			
 
				+        <p>
			
 
				+          Once the installation and configuration is completed, you can review
			
 
				+          it after starting the Map/Reduce cluster from the admin UI.
			
 
				+        </p>
			
 
				+        <ul>
			
 
				+          <li>Start the Map/Reduce cluster as usual.</li>
			
 
				+          <li>Open the JobTracker web UI.</li>
			
 
				+          <li>The queues you have configured should be listed under the <em>Scheduling
			
 
				+              Information</em> section of the page.</li>
			
 
				+          <li>The properties for the queues should be visible in the <em>Scheduling
			
 
				+              Information</em> column against each queue.</li>
			
 
				+        </ul>
			
 
				+      </section>
			
 
				+    </section>
			
 
				+  </body>
			
 
				+  
			
 
				+</document>
			
--- a/src/docs/src/documentation/content/xdocs/cluster_setup.xml
+++ b/src/docs/src/documentation/content/xdocs/cluster_setup.xml
@@ -244,6 +244,65 @@
 
				 		        TaskTrackers.
			
 
				 		      </td>
			
 
				   		    </tr>
			
 
				+        <tr>
			
 
				+          <td>mapred.queue.names</td>
			
 
				+          <td>Comma separated list of queues to which jobs can be submitted.</td>
			
 
				+          <td>
			
 
				+            The Map/Reduce system always supports atleast one queue
			
 
				+            with the name as <em>default</em>. Hence, this parameter's
			
 
				+            value should always contain the string <em>default</em>.
			
 
				+            Some job schedulers supported in Hadoop, like the 
			
 
				+            <a href="capacity_scheduler.html">Capacity 
			
 
				+            Scheduler</a>, support multiple queues. If such a scheduler is
			
 
				+            being used, the list of configured queue names must be
			
 
				+            specified here. Once queues are defined, users can submit
			
 
				+            jobs to a queue using the property name 
			
 
				+            <em>mapred.job.queue.name</em> in the job configuration.
			
 
				+            There could be a separate 
			
 
				+            configuration file for configuring properties of these 
			
 
				+            queues that is managed by the scheduler. 
			
 
				+            Refer to the documentation of the scheduler for information on 
			
 
				+            the same.
			
 
				+          </td>
			
 
				+        </tr>
			
 
				+        <tr>
			
 
				+          <td>mapred.acls.enabled</td>
			
 
				+          <td>Specifies whether ACLs are supported for controlling job
			
 
				+              submission and administration</td>
			
 
				+          <td>
			
 
				+            If <em>true</em>, ACLs would be checked while submitting
			
 
				+            and administering jobs. ACLs can be specified using the
			
 
				+            configuration parameters of the form
			
 
				+            <em>mapred.queue.queue-name.acl-name</em>, defined below.
			
 
				+          </td>
			
 
				+        </tr>
			
 
				+        <tr>
			
 
				+          <td>mapred.queue.<em>queue-name</em>.acl-submit-job</td>
			
 
				+          <td>List of users and groups that can submit jobs to the
			
 
				+              specified <em>queue-name</em>.</td>
			
 
				+          <td>
			
 
				+            The list of users and groups are both comma separated
			
 
				+            list of names. The two lists are separated by a blank.
			
 
				+            Example: <em>user1,user2 group1,group2</em>.
			
 
				+            If you wish to define only a list of groups, provide
			
 
				+            a blank at the beginning of the value.
			
 
				+          </td>
			
 
				+        </tr>
			
 
				+        <tr>
			
 
				+          <td>mapred.queue.<em>queue-name</em>.acl-administer-job</td>
			
 
				+          <td>List of users and groups that can change the priority
			
 
				+              or kill jobs that have been submitted to the
			
 
				+              specified <em>queue-name</em>.</td>
			
 
				+          <td>
			
 
				+            The list of users and groups are both comma separated
			
 
				+            list of names. The two lists are separated by a blank.
			
 
				+            Example: <em>user1,user2 group1,group2</em>.
			
 
				+            If you wish to define only a list of groups, provide
			
 
				+            a blank at the beginning of the value. Note that an
			
 
				+            owner of a job can always change the priority or kill
			
 
				+            his/her own job, irrespective of the ACLs.
			
 
				+          </td>
			
 
				+        </tr>
			
 
				 		  </table>
			
 
				 
			
 
				           <p>Typically all the above parameters are marked as 
			
--- a/src/docs/src/documentation/content/xdocs/mapred_tutorial.xml
+++ b/src/docs/src/documentation/content/xdocs/mapred_tutorial.xml
@@ -1702,6 +1702,23 @@
 
				       <section>
			
 
				         <title>Other Useful Features</title>
			
 
				  
			
 
				+        <section>
			
 
				+          <title>Submitting Jobs to a Queue</title>
			
 
				+          <p>Some job schedulers supported in Hadoop, like the 
			
 
				+            <a href="capacity_scheduler.html">Capacity
			
 
				+            Scheduler</a>, support multiple queues. If such a scheduler is
			
 
				+            being used, users can submit jobs to one of the queues
			
 
				+            administrators would have defined in the
			
 
				+            <em>mapred.queue.names</em> property of the Hadoop site
			
 
				+            configuration. The queue name can be specified through the
			
 
				+            <em>mapred.job.queue.name</em> property, or through the
			
 
				+            <a href="ext:api/org/apache/hadoop/mapred/jobconf/setqueuename">setQueueName(String)</a>
			
 
				+            API. Note that administrators may choose to define ACLs
			
 
				+            that control which queues a job can be submitted to by a
			
 
				+            given user. In that case, if the job is not submitted
			
 
				+            to one of the queues where the user has access,
			
 
				+            the job would be rejected.</p>
			
 
				+        </section>
			
 
				         <section>
			
 
				           <title>Counters</title>
			
 
				           
			
--- a/src/docs/src/documentation/content/xdocs/site.xml
+++ b/src/docs/src/documentation/content/xdocs/site.xml
@@ -52,6 +52,7 @@ See http://forrest.apache.org/docs/linking.html for more info.
 
				       <hod-admin-guide href="hod_admin_guide.html"/>
			
 
				       <hod-config-guide href="hod_config_guide.html"/>
			
 
				     </hod>
			
 
				+    <capacity_scheduler label="Capacity Scheduler" href="capacity_scheduler.html"/>
			
 
				     <api       label="API Docs"           href="ext:api/index" />
			
 
				     <jdiff     label="API Changes"        href="ext:jdiff/changes" />
			
 
				     <wiki      label="Wiki"               href="ext:wiki" />
			
@@ -182,6 +183,7 @@ See http://forrest.apache.org/docs/linking.html for more info.
 
				                 <setprofiletaskrange href="#setProfileTaskRange(boolean,%20java.lang.String)" />
			
 
				                 <setprofileparams href="#setProfileParams(java.lang.String)" />
			
 
				                 <setnumtaskstoexecuteperjvm href="#setNumTasksToExecutePerJvm(int)" />
			
 
				+                <setqueuename href="#setQueueName(java.lang.String)" />
			
 
				                 <getjoblocaldir href="#getJobLocalDir()" />
			
 
				                 <getjar href="#getJar()" />
			
 
				               </jobconf>