17 лет назад · 922e77a048
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -6,6 +6,9 @@ Release 0.16.1 - Unrelease
 
				 
			
 
				     HADOOP-2371. User guide for file permissions in HDFS.
			
 
				     (Robert Chansler via rangadi)
			
 
				+
			
 
				+    HADOOP-2730. HOD documentation update.
			
 
				+    (Vinod Kumar Vavilapalli via ddas)
			
 
				     
			
 
				   BUG FIXES
			
 
				 
			
--- a/docs/hod.html
+++ b/docs/hod.html
@@ -177,122 +177,7 @@ document.write("Last Published: " + document.lastModified);
 
				 <a href="#Introduction"> Introduction </a>
			
 
				 </li>
			
 
				 <li>
			
 
				-<a href="#Feature+List"> Feature List </a>
			
 
				-<ul class="minitoc">
			
 
				-<li>
			
 
				-<a href="#Simplified+Interface+for+Provisioning+Hadoop+Clusters"> Simplified Interface for Provisioning Hadoop Clusters </a>
			
 
				-</li>
			
 
				-<li>
			
 
				-<a href="#Automatic+installation+of+Hadoop"> Automatic installation of Hadoop </a>
			
 
				-</li>
			
 
				-<li>
			
 
				-<a href="#Configuring+Hadoop"> Configuring Hadoop </a>
			
 
				-</li>
			
 
				-<li>
			
 
				-<a href="#Auto-cleanup+of+Unused+Clusters"> Auto-cleanup of Unused Clusters </a>
			
 
				-</li>
			
 
				-<li>
			
 
				-<a href="#Log+Services"> Log Services </a>
			
 
				-</li>
			
 
				-</ul>
			
 
				-</li>
			
 
				-<li>
			
 
				-<a href="#HOD+Components"> HOD Components </a>
			
 
				-<ul class="minitoc">
			
 
				-<li>
			
 
				-<a href="#HOD+Client"> HOD Client </a>
			
 
				-</li>
			
 
				-<li>
			
 
				-<a href="#RingMaster"> RingMaster </a>
			
 
				-</li>
			
 
				-<li>
			
 
				-<a href="#HodRing"> HodRing </a>
			
 
				-</li>
			
 
				-<li>
			
 
				-<a href="#Hodrc+%2F+HOD+configuration+file"> Hodrc / HOD configuration file </a>
			
 
				-</li>
			
 
				-<li>
			
 
				-<a href="#Submit+Nodes+and+Compute+Nodes"> Submit Nodes and Compute Nodes </a>
			
 
				-</li>
			
 
				-</ul>
			
 
				-</li>
			
 
				-<li>
			
 
				-<a href="#Getting+Started+with+HOD"> Getting Started with HOD </a>
			
 
				-<ul class="minitoc">
			
 
				-<li>
			
 
				-<a href="#Pre-Requisites"> Pre-Requisites </a>
			
 
				-<ul class="minitoc">
			
 
				-<li>
			
 
				-<a href="#Hardware"> Hardware </a>
			
 
				-</li>
			
 
				-<li>
			
 
				-<a href="#Software"> Software </a>
			
 
				-</li>
			
 
				-<li>
			
 
				-<a href="#Resource+Manager+Configuration+Pre-requisites">Resource Manager Configuration Pre-requisites</a>
			
 
				-</li>
			
 
				-</ul>
			
 
				-</li>
			
 
				-<li>
			
 
				-<a href="#Setting+up+HOD">Setting up HOD</a>
			
 
				-</li>
			
 
				-</ul>
			
 
				-</li>
			
 
				-<li>
			
 
				-<a href="#Running+HOD">Running HOD</a>
			
 
				-<ul class="minitoc">
			
 
				-<li>
			
 
				-<a href="#Overview">Overview</a>
			
 
				-<ul class="minitoc">
			
 
				-<li>
			
 
				-<a href="#Operation+allocate">Operation allocate</a>
			
 
				-</li>
			
 
				-<li>
			
 
				-<a href="#Running+Hadoop+jobs+using+the+allocated+cluster">Running Hadoop jobs using the allocated cluster</a>
			
 
				-</li>
			
 
				-<li>
			
 
				-<a href="#Operation+deallocate">Operation deallocate</a>
			
 
				-</li>
			
 
				-</ul>
			
 
				-</li>
			
 
				-<li>
			
 
				-<a href="#Command+Line+Options">Command Line Options</a>
			
 
				-</li>
			
 
				-</ul>
			
 
				-</li>
			
 
				-<li>
			
 
				-<a href="#HOD+Configuration"> HOD Configuration </a>
			
 
				-<ul class="minitoc">
			
 
				-<li>
			
 
				-<a href="#Introduction+to+HOD+Configuration"> Introduction to HOD Configuration </a>
			
 
				-</li>
			
 
				-<li>
			
 
				-<a href="#Categories+%2F+Sections+in+HOD+Configuration"> Categories / Sections in HOD Configuration </a>
			
 
				-</li>
			
 
				-<li>
			
 
				-<a href="#Important+and+Commonly+Used+Configuration+Options"> Important and Commonly Used Configuration Options </a>
			
 
				-<ul class="minitoc">
			
 
				-<li>
			
 
				-<a href="#Common+configuration+options"> Common configuration options </a>
			
 
				-</li>
			
 
				-<li>
			
 
				-<a href="#hod+options"> hod options </a>
			
 
				-</li>
			
 
				-<li>
			
 
				-<a href="#resource_manager+options"> resource_manager options </a>
			
 
				-</li>
			
 
				-<li>
			
 
				-<a href="#ringmaster+options"> ringmaster options </a>
			
 
				-</li>
			
 
				-<li>
			
 
				-<a href="#gridservice-hdfs+options"> gridservice-hdfs options </a>
			
 
				-</li>
			
 
				-<li>
			
 
				-<a href="#gridservice-mapred+options"> gridservice-mapred options </a>
			
 
				-</li>
			
 
				-</ul>
			
 
				-</li>
			
 
				-</ul>
			
 
				+<a href="#Documentation">Documentation</a>
			
 
				 </li>
			
 
				 </ul>
			
 
				 </div>
			
@@ -301,810 +186,26 @@ document.write("Last Published: " + document.lastModified);
 
				 <h2 class="h3"> Introduction </h2>
			
 
				 <div class="section">
			
 
				 <p>
			
 
				-      The Hadoop On Demand (<acronym title="Hadoop On Demand">HOD</acronym>) project is a system for provisioning and managing independent Hadoop MapReduce instances on a shared cluster of nodes. HOD uses a resource manager for allocation. At present it supports <a href="http://www.clusterresources.com/pages/products/torque-resource-manager.php">Torque</a> out of the box.
			
 
				+Hadoop On Demand (HOD) is a system for provisioning virtual Hadoop clusters over a large physical cluster. It uses the Torque resource manager to do node allocation. On the allocated nodes, it can start Hadoop Map/Reduce and HDFS daemons. It automatically generates the appropriate configuration files (hadoop-site.xml) for the Hadoop daemons and client. HOD also has the capability to distribute Hadoop to the nodes in the virtual cluster that it allocates. In short, HOD makes it easy for administrators and users to quickly setup and use Hadoop. It is also a very useful tool for Hadoop developers and testers who need to share a physical cluster for testing their own Hadoop versions.
			
 
				       </p>
			
 
				 </div>
			
 
				-
			
 
				-    
			
 
				-<a name="N1001F"></a><a name="Feature+List"></a>
			
 
				-<h2 class="h3"> Feature List </h2>
			
 
				-<div class="section">
			
 
				-<a name="N10025"></a><a name="Simplified+Interface+for+Provisioning+Hadoop+Clusters"></a>
			
 
				-<h3 class="h4"> Simplified Interface for Provisioning Hadoop Clusters </h3>
			
 
				-<p>
			
 
				-        By far, the biggest advantage of HOD is to quickly setup a Hadoop cluster. The user interacts with the cluster through a simple command line interface, the HOD client. HOD brings up a virtual MapReduce cluster with the required number of nodes, which the user can use for running Hadoop jobs. When done, HOD will automatically clean up the resources and make the nodes available again.
			
 
				-        </p>
			
 
				-<a name="N1002F"></a><a name="Automatic+installation+of+Hadoop"></a>
			
 
				-<h3 class="h4"> Automatic installation of Hadoop </h3>
			
 
				-<p>
			
 
				-        With HOD, Hadoop does not need to be even installed on the cluster. The user can provide a Hadoop tarball that HOD will automatically distribute to all the nodes in the cluster.
			
 
				-        </p>
			
 
				-<a name="N10039"></a><a name="Configuring+Hadoop"></a>
			
 
				-<h3 class="h4"> Configuring Hadoop </h3>
			
 
				-<p>
			
 
				-        Dynamic parameters of Hadoop configuration, such as the NameNode and JobTracker addresses and ports, and file system temporary directories are generated and distributed by HOD automatically to all nodes in the cluster. In addition, HOD allows the user to configure Hadoop parameters at both the server (for e.g. JobTracker) and client (for e.g. JobClient) level, including 'final' parameters, that were introduced with Hadoop 0.15.
			
 
				-        </p>
			
 
				-<a name="N10043"></a><a name="Auto-cleanup+of+Unused+Clusters"></a>
			
 
				-<h3 class="h4"> Auto-cleanup of Unused Clusters </h3>
			
 
				-<p>
			
 
				-        HOD has an automatic timeout so that users cannot misuse resources they aren't using. The timeout applies only when there is no MapReduce job running. 
			
 
				-        </p>
			
 
				-<a name="N1004D"></a><a name="Log+Services"></a>
			
 
				-<h3 class="h4"> Log Services </h3>
			
 
				-<p>
			
 
				-        HOD can be used to collect all MapReduce logs to a central location for archiving and inspection after the job is completed.
			
 
				-        </p>
			
 
				-</div>
			
 
				-
			
 
				-    
			
 
				-<a name="N10058"></a><a name="HOD+Components"></a>
			
 
				-<h2 class="h3"> HOD Components </h2>
			
 
				-<div class="section">
			
 
				-<p>
			
 
				-      This is a brief overview of the various components of HOD and how they interact to provision Hadoop.
			
 
				-      </p>
			
 
				-<a name="N10061"></a><a name="HOD+Client"></a>
			
 
				-<h3 class="h4"> HOD Client </h3>
			
 
				-<p>
			
 
				-        The HOD client is a Unix command that users use to allocate Hadoop MapReduce clusters. The command provides other options to list allocated clusters and deallocate them. The HOD client generates the <em>hadoop-site.xml</em> in a user specified directory. The user can point to this configuration file while running Map/Reduce jobs on the allocated cluster.
			
 
				-        </p>
			
 
				-<p>
			
 
				-        The nodes from where the HOD Client is run are called <em>submit nodes</em> because jobs are submitted to the resource manager system for allocating and running clusters from these nodes.
			
 
				-        </p>
			
 
				-<a name="N10074"></a><a name="RingMaster"></a>
			
 
				-<h3 class="h4"> RingMaster </h3>
			
 
				-<p>
			
 
				-        The RingMaster is a HOD process that is started on one node per every allocated cluster. It is submitted as a 'job' to the resource manager by the HOD client. It controls which Hadoop daemons start on which nodes. It provides this information to other HOD processes, such as the HOD client, so users can also determine this information. The RingMaster is responsible for hosting and distributing the Hadoop tarball to all nodes in the cluster. It also automatically cleans up unused clusters.
			
 
				-        </p>
			
 
				-<p>
			
 
				-        
			
 
				-</p>
			
 
				-<a name="N10081"></a><a name="HodRing"></a>
			
 
				-<h3 class="h4"> HodRing </h3>
			
 
				-<p>
			
 
				-        The HodRing is a HOD process that runs on every allocated node in the cluster. These processes are run by the RingMaster through the resource manager, using a facility of parallel execution. The HodRings are responsible for launching Hadoop commands on the nodes to bring up the Hadoop daemons. They get the command to launch from the RingMaster.
			
 
				-        </p>
			
 
				-<a name="N1008B"></a><a name="Hodrc+%2F+HOD+configuration+file"></a>
			
 
				-<h3 class="h4"> Hodrc / HOD configuration file </h3>
			
 
				-<p>
			
 
				-        An INI style configuration file where the users configure various options for the HOD system, including install locations of different software, resource manager parameters, log and temp file directories, parameters for their MapReduce jobs, etc.
			
 
				-        </p>
			
 
				-<a name="N10095"></a><a name="Submit+Nodes+and+Compute+Nodes"></a>
			
 
				-<h3 class="h4"> Submit Nodes and Compute Nodes </h3>
			
 
				-<p>
			
 
				-        The nodes from where the <em>HOD Client</em> is run are referred as <em>submit nodes</em> because jobs are submitted to the resource manager system for allocating and running clusters from these nodes.
			
 
				-        </p>
			
 
				-<p>
			
 
				-        The nodes where the <em>Ringmaster</em> and <em>HodRings</em> run are called the Compute nodes. These are the nodes that get allocated by a resource manager, and on which the Hadoop daemons are provisioned and started.
			
 
				-        </p>
			
 
				-</div>
			
 
				-
			
 
				-    
			
 
				-<a name="N100AF"></a><a name="Getting+Started+with+HOD"></a>
			
 
				-<h2 class="h3"> Getting Started with HOD </h2>
			
 
				+      
			
 
				+<a name="N10017"></a><a name="Documentation"></a>
			
 
				+<h2 class="h3">Documentation</h2>
			
 
				 <div class="section">
			
 
				-<a name="N100B5"></a><a name="Pre-Requisites"></a>
			
 
				-<h3 class="h4"> Pre-Requisites </h3>
			
 
				-<a name="N100BB"></a><a name="Hardware"></a>
			
 
				-<h4> Hardware </h4>
			
 
				-<p>
			
 
				-          HOD requires a minimum of 3 nodes configured through a resource manager.
			
 
				-          </p>
			
 
				-<a name="N100C5"></a><a name="Software"></a>
			
 
				-<h4> Software </h4>
			
 
				-<p>
			
 
				-          The following components are assumed to be installed before using HOD:
			
 
				-          </p>
			
 
				-<ul>
			
 
				-            
			
 
				-<li>
			
 
				-              
			
 
				-<em>Torque:</em> Currently HOD supports Torque out of the box. We assume that you are familiar with configuring Torque. You can get information about this from <a href="http://www.clusterresources.com/wiki/doku.php?id=torque:torque_wiki">here</a>.
			
 
				-            </li>
			
 
				-            
			
 
				-<li>
			
 
				-              
			
 
				-<em>Python:</em> We require version 2.5.1, which can be downloaded from <a href="http://www.python.org/">here</a>.
			
 
				-            </li>
			
 
				-          
			
 
				-</ul>
			
 
				-<p>
			
 
				-          The following components can be optionally installed for getting better functionality from HOD:
			
 
				-          </p>
			
 
				+<p>Please go through the following to know more about using HOD</p>
			
 
				 <ul>
			
 
				-            
			
 
				-<li>
			
 
				-              
			
 
				-<em>Twisted Python:</em> This can be used for improving the scalability of HOD. Twisted Python is available <a href="http://twistedmatrix.com/trac/">here</a>.
			
 
				-            </li>
			
 
				-            
			
 
				-<li>
			
 
				-            
			
 
				-<em>Hadoop:</em> HOD can automatically distribute Hadoop to all nodes in the cluster. However, it can also use a pre-installed version of Hadoop, if it is available on all nodes in the cluster. HOD currently supports Hadoop 0.15 and above.
			
 
				-            </li>
			
 
				-          
			
 
				-</ul>
			
 
				-<p>
			
 
				-          HOD configuration requires the location of installs of these components to be the same on all nodes in the cluster. It will also make the configuration simpler to have the same location on the submit nodes.
			
 
				-          </p>
			
 
				-<a name="N100FF"></a><a name="Resource+Manager+Configuration+Pre-requisites"></a>
			
 
				-<h4>Resource Manager Configuration Pre-requisites</h4>
			
 
				-<p>
			
 
				-          For using HOD with Torque:
			
 
				-          </p>
			
 
				-<ul>
			
 
				-            
			
 
				-<li>
			
 
				-            Install Torque components: pbs_server on a head node, pbs_moms on all compute nodes, and PBS client tools on all compute nodes and submit nodes.
			
 
				-            </li>
			
 
				-            
			
 
				-<li>
			
 
				-            Create a queue for submitting jobs on the pbs_server.
			
 
				-            </li>
			
 
				-            
			
 
				-<li>
			
 
				-            Specify a name for all nodes in the cluster, by setting a 'node property' to all the nodes. This can be done by using the 'qmgr' command. For example:
			
 
				-            <em>qmgr -c "set node node properties=cluster-name"</em>
			
 
				-            
			
 
				-</li>
			
 
				-            
			
 
				-<li>
			
 
				-            Ensure that jobs can be submitted to the nodes. This can be done by using the 'qsub' command. For example:
			
 
				-            <em>echo "sleep 30" | qsub -l nodes=3</em>
			
 
				-            
			
 
				-</li>
			
 
				-          
			
 
				-</ul>
			
 
				-<p>
			
 
				-          More information about setting up Torque can be found by referring to the documentation <a href="http://www.clusterresources.com/pages/products/torque-resource-manager.php">here.</a>
			
 
				-          
			
 
				-</p>
			
 
				-<a name="N10126"></a><a name="Setting+up+HOD"></a>
			
 
				-<h3 class="h4">Setting up HOD</h3>
			
 
				-<ul>
			
 
				-          
			
 
				-<li>
			
 
				-          HOD is available in the 'contrib' section of Hadoop under the root directory 'hod'. Distribute the files under this directory to all the nodes in the cluster.
			
 
				-          </li>
			
 
				-          
			
 
				-<li>
			
 
				-          On the node from where you want to run hod, edit the file hodrc which can be found in the <em>install dir/conf</em> directory. This file contains the minimal set of values required for running hod.
			
 
				-          </li>
			
 
				-          
			
 
				+        
			
 
				 <li>
			
 
				-          Specify values suitable to your environment for the following variables defined in the configuration file. Note that some of these variables are defined at more than one place in the file.
			
 
				-          </li>
			
 
				-      
			
 
				-</ul>
			
 
				-<table class="ForrestTable" cellspacing="1" cellpadding="4">
			
 
				-          
			
 
				-<tr>
			
 
				-            
			
 
				-<th colspan="1" rowspan="1"> Variable Name </th>
			
 
				-            <th colspan="1" rowspan="1"> Meaning </th>
			
 
				-          
			
 
				-</tr>
			
 
				-          
			
 
				-<tr>
			
 
				-            
			
 
				-<td colspan="1" rowspan="1"> ${JAVA_HOME} </td>
			
 
				-            <td colspan="1" rowspan="1"> Location of Java for Hadoop. Hadoop supports Sun JDK 1.5.x </td>
			
 
				-          
			
 
				-</tr>
			
 
				-          
			
 
				-<tr>
			
 
				-            
			
 
				-<td colspan="1" rowspan="1"> ${CLUSTER_NAME} </td>
			
 
				-            <td colspan="1" rowspan="1"> Name of the cluster which is specified in the 'node property' as mentioned in resource manager configuration. </td>
			
 
				-          
			
 
				-</tr>
			
 
				-          
			
 
				-<tr>
			
 
				-            
			
 
				-<td colspan="1" rowspan="1"> ${HADOOP_HOME} </td>
			
 
				-            <td colspan="1" rowspan="1"> Location of Hadoop installation on the compute and submit nodes. </td>
			
 
				-          
			
 
				-</tr>
			
 
				-          
			
 
				-<tr>
			
 
				-            
			
 
				-<td colspan="1" rowspan="1"> ${RM_QUEUE} </td>
			
 
				-            <td colspan="1" rowspan="1"> Queue configured for submiting jobs in the resource manager configuration. </td>
			
 
				-          
			
 
				-</tr>
			
 
				-          
			
 
				-<tr>
			
 
				-            
			
 
				-<td colspan="1" rowspan="1"> ${RM_HOME} </td>
			
 
				-            <td colspan="1" rowspan="1"> Location of the resource manager installation on the compute and submit nodes. </td>
			
 
				-          
			
 
				-</tr>
			
 
				+<a href="hod_admin_guide.html">Hod Admin Guide</a> : This guide will walk you through an overview of architecture of HOD, prerequisites, installing various components and dependent software, and configuring HOD to get it up and running.</li>
			
 
				         
			
 
				-</table>
			
 
				-<ul>
			
 
				-          
			
 
				 <li>
			
 
				-          The following environment variables *may* need to be set depending on your environment. These variables must be defined where you run the HOD client, and also be specified in the HOD configuration file as the value of the key resource_manager.env-vars. Multiple variables can be specified as a comma separated list of key=value pairs.
			
 
				-          </li>
			
 
				+<a href="hod_user_guide.html">Hod User Guide</a> : This guide will let you know about how to get started on running hod, its various features, command line options and help on troubleshooting in detail.</li>
			
 
				         
			
 
				+<li>
			
 
				+<a href="hod_config_guide.html">Hod Configuration Guide</a> : This guide discusses about onfiguring HOD, describing various configuration sections, parameters and their purpose in detail.</li>
			
 
				+      
			
 
				 </ul>
			
 
				-<table class="ForrestTable" cellspacing="1" cellpadding="4">
			
 
				-          
			
 
				-<tr>
			
 
				-            
			
 
				-<th colspan="1" rowspan="1"> Variable Name </th>
			
 
				-            <th colspan="1" rowspan="1"> Meaning </th>
			
 
				-          
			
 
				-</tr>
			
 
				-          
			
 
				-<tr>
			
 
				-            
			
 
				-<td colspan="1" rowspan="1">HOD_PYTHON_HOME</td>
			
 
				-            <td colspan="1" rowspan="1">
			
 
				-            If you install python to a non-default location of the compute nodes, or submit nodes, then, this variable must be defined to point to the python executable in the non-standard   location.
			
 
				-            </td>
			
 
				-          
			
 
				-</tr>
			
 
				-        
			
 
				-</table>
			
 
				-<p>
			
 
				-        You can also review other configuration options in the file and modify them to suit your needs. Refer to the the section on configuration below for information about the HOD configuration.
			
 
				-        </p>
			
 
				-</div>
			
 
				-
			
 
				-    
			
 
				-<a name="N101B4"></a><a name="Running+HOD"></a>
			
 
				-<h2 class="h3">Running HOD</h2>
			
 
				-<div class="section">
			
 
				-<a name="N101BA"></a><a name="Overview"></a>
			
 
				-<h3 class="h4">Overview</h3>
			
 
				-<p>
			
 
				-        A typical session of HOD will involve atleast three steps: allocate, run hadoop jobs, deallocate.
			
 
				-        </p>
			
 
				-<a name="N101C3"></a><a name="Operation+allocate"></a>
			
 
				-<h4>Operation allocate</h4>
			
 
				-<p>
			
 
				-          The allocate operation is used to allocate a set of nodes and install and provision Hadoop on them. It has the following syntax:
			
 
				-          </p>
			
 
				-<table class="ForrestTable" cellspacing="1" cellpadding="4">
			
 
				-            
			
 
				-<tr>
			
 
				-              
			
 
				-<td colspan="1" rowspan="1">hod -c config_file -t hadoop_tarball_location -o "allocate                 cluster_dir number_of_nodes"</td>
			
 
				-            
			
 
				-</tr>
			
 
				-          
			
 
				-</table>
			
 
				-<p>
			
 
				-          The hadoop_tarball_location must be a location on a shared file system accesible from all nodes in the cluster. Note, the cluster_dir must exist before running the command. If the command completes successfully then cluster_dir/hadoop-site.xml will be generated and will contain information about the allocated cluster's JobTracker and NameNode.
			
 
				-          </p>
			
 
				-<p>
			
 
				-          For example, the following command uses a hodrc file in ~/hod-config/hodrc and allocates Hadoop (provided by the tarball ~/share/hadoop.tar.gz) on 10 nodes, storing the generated Hadoop configuration in a directory named <em>~/hadoop-cluster</em>:
			
 
				-          </p>
			
 
				-<table class="ForrestTable" cellspacing="1" cellpadding="4">
			
 
				-            
			
 
				-<tr>
			
 
				-              
			
 
				-<td colspan="1" rowspan="1">$ hod -c ~/hod-config/hodrc -t ~/share/hadoop.tar.gz -o "allocate ~/hadoop-cluster 10"</td>
			
 
				-            
			
 
				-</tr>
			
 
				-          
			
 
				-</table>
			
 
				-<p>
			
 
				-          HOD also supports an environment variable called <em>HOD_CONF_DIR</em>. If this is defined, HOD will look for a default hodrc file at $HOD_CONF_DIR/hodrc. Defining this allows the above command to also be run as follows:
			
 
				-          </p>
			
 
				-<table class="ForrestTable" cellspacing="1" cellpadding="4">
			
 
				-            
			
 
				-<tr>
			
 
				-              
			
 
				-<td colspan="1" rowspan="1">
			
 
				-                
			
 
				-<p>$ export HOD_CONF_DIR=~/hod-config</p>
			
 
				-                
			
 
				-<p>$ hod -t ~/share/hadoop.tar.gz -o "allocate ~/hadoop-cluster 10"</p>
			
 
				-              
			
 
				-</td>
			
 
				-            
			
 
				-</tr>
			
 
				-          
			
 
				-</table>
			
 
				-<a name="N10203"></a><a name="Running+Hadoop+jobs+using+the+allocated+cluster"></a>
			
 
				-<h4>Running Hadoop jobs using the allocated cluster</h4>
			
 
				-<p>
			
 
				-          Now, one can run Hadoop jobs using the allocated cluster in the usual manner:
			
 
				-          </p>
			
 
				-<table class="ForrestTable" cellspacing="1" cellpadding="4">
			
 
				-            
			
 
				-<tr>
			
 
				-              
			
 
				-<td colspan="1" rowspan="1">hadoop --config cluster_dir hadoop_command hadoop_command_args</td>
			
 
				-            
			
 
				-</tr>
			
 
				-          
			
 
				-</table>
			
 
				-<p>
			
 
				-          Continuing our example, the following command will run a wordcount example on the allocated cluster:
			
 
				-          </p>
			
 
				-<table class="ForrestTable" cellspacing="1" cellpadding="4">
			
 
				-            
			
 
				-<tr>
			
 
				-              
			
 
				-<td colspan="1" rowspan="1">$ hadoop --config ~/hadoop-cluster jar /path/to/hadoop/hadoop-examples.jar wordcount /path/to/input /path/to/output</td>
			
 
				-            
			
 
				-</tr>
			
 
				-          
			
 
				-</table>
			
 
				-<a name="N10226"></a><a name="Operation+deallocate"></a>
			
 
				-<h4>Operation deallocate</h4>
			
 
				-<p>
			
 
				-          The deallocate operation is used to release an allocated cluster. When finished with a cluster, deallocate must be run so that the nodes become free for others to use. The deallocate operation has the following syntax:
			
 
				-          </p>
			
 
				-<table class="ForrestTable" cellspacing="1" cellpadding="4">
			
 
				-            
			
 
				-<tr>
			
 
				-              
			
 
				-<td colspan="1" rowspan="1">hod -o "deallocate cluster_dir"</td>
			
 
				-            
			
 
				-</tr>
			
 
				-          
			
 
				-</table>
			
 
				-<p>
			
 
				-          Continuing our example, the following command will deallocate the cluster:
			
 
				-          </p>
			
 
				-<table class="ForrestTable" cellspacing="1" cellpadding="4">
			
 
				-            
			
 
				-<tr>
			
 
				-              
			
 
				-<td colspan="1" rowspan="1">$ hod -o "deallocate ~/hadoop-cluster"</td>
			
 
				-            
			
 
				-</tr>
			
 
				-          
			
 
				-</table>
			
 
				-<a name="N1024A"></a><a name="Command+Line+Options"></a>
			
 
				-<h3 class="h4">Command Line Options</h3>
			
 
				-<p>
			
 
				-        This section covers the major command line options available via the hod command:
			
 
				-        </p>
			
 
				-<p>
			
 
				-        
			
 
				-<em>--help</em>
			
 
				-        
			
 
				-</p>
			
 
				-<p>
			
 
				-        Prints out the help message to see the basic options.
			
 
				-        </p>
			
 
				-<p>
			
 
				-        
			
 
				-<em>--verbose-help</em>
			
 
				-        
			
 
				-</p>
			
 
				-<p>
			
 
				-        All configuration options provided in the hodrc file can be passed on the command line, using the syntax --section_name.option_name[=value]. When provided this way, the value provided on command line overrides the option provided in hodrc. The verbose-help command lists all the available options in the hodrc file. This is also a nice way to see the meaning of the configuration options.
			
 
				-        </p>
			
 
				-<p>
			
 
				-        
			
 
				-<em>-c config_file</em>
			
 
				-        
			
 
				-</p>
			
 
				-<p>
			
 
				-        Provides the configuration file to use. Can be used with all other options of HOD. Alternatively, the HOD_CONF_DIR environment variable can be defined to specify a directory that contains a file named hodrc, alleviating the need to specify the configuration file in each HOD command.
			
 
				-        </p>
			
 
				-<p>
			
 
				-        
			
 
				-<em>-b 1|2|3|4</em>
			
 
				-        
			
 
				-</p>
			
 
				-<p>
			
 
				-        Enables the given debug level. Can be used with all other options of HOD. 4 is most verbose.
			
 
				-        </p>
			
 
				-<p>
			
 
				-        
			
 
				-<em>-o "help"</em>
			
 
				-        
			
 
				-</p>
			
 
				-<p>
			
 
				-        Lists the operations available in the operation mode.
			
 
				-        </p>
			
 
				-<p>
			
 
				-        
			
 
				-<em>-o "allocate cluster_dir number_of_nodes"</em>
			
 
				-        
			
 
				-</p>
			
 
				-<p>
			
 
				-        Allocates a cluster on the given number of cluster nodes, and store the allocation information in cluster_dir for use with subsequent hadoop commands. Note that the cluster_dir must exist before running the command.
			
 
				-        </p>
			
 
				-<p>
			
 
				-        
			
 
				-<em>-o "list"</em>
			
 
				-        
			
 
				-</p>
			
 
				-<p>
			
 
				-        Lists the clusters allocated by this user. Information provided includes the Torque job id corresponding to the cluster, the cluster directory where the allocation information is stored, and whether the Map/Reduce daemon is still active or not.
			
 
				-        </p>
			
 
				-<p>
			
 
				-        
			
 
				-<em>-o "info cluster_dir"</em>
			
 
				-        
			
 
				-</p>
			
 
				-<p>
			
 
				-        Lists information about the cluster whose allocation information is stored in the specified cluster directory.
			
 
				-        </p>
			
 
				-<p>
			
 
				-        
			
 
				-<em>-o "deallocate cluster_dir"</em>
			
 
				-        
			
 
				-</p>
			
 
				-<p>
			
 
				-       Deallocates the cluster whose allocation information is stored in the specified cluster directory.
			
 
				-        </p>
			
 
				-<p>
			
 
				-        
			
 
				-<em>-t hadoop_tarball</em>
			
 
				-        
			
 
				-</p>
			
 
				-<p>
			
 
				-        Provisions Hadoop from the given tar.gz file. This option is only applicable to the allocate operation. For better distribution performance it is recommended that the Hadoop tarball contain only the libraries and binaries, and not the source or documentation. 
			
 
				-        </p>
			
 
				-<p>
			
 
				-        
			
 
				-<em>-Mkey1=value1 -Mkey2=value2</em>
			
 
				-        
			
 
				-</p>
			
 
				-<p>
			
 
				-        Provides configuration parameters for the provisioned Map/Reduce daemons (JobTracker and TaskTrackers). A hadoop-site.xml is generated with these values on the cluster nodes
			
 
				-        </p>
			
 
				-<p>
			
 
				-        
			
 
				-<em>-Hkey1=value1 -Hkey2=value2</em>
			
 
				-        
			
 
				-</p>
			
 
				-<p>
			
 
				-        Provides configuration parameters for the provisioned HDFS daemons (NameNode and DataNodes). A hadoop-site.xml is generated with these values on the cluster nodes
			
 
				-        </p>
			
 
				-<p>
			
 
				-        
			
 
				-<em>-Ckey1=value1 -Ckey2=value2</em>
			
 
				-        
			
 
				-</p>
			
 
				-<p>
			
 
				-        Provides configuration parameters for the client from where jobs can be submitted. A hadoop-site.xml is generated with these values on the submit node.
			
 
				-        </p>
			
 
				-</div>
			
 
				-    
			
 
				-<a name="N102CA"></a><a name="HOD+Configuration"></a>
			
 
				-<h2 class="h3"> HOD Configuration </h2>
			
 
				-<div class="section">
			
 
				-<a name="N102D0"></a><a name="Introduction+to+HOD+Configuration"></a>
			
 
				-<h3 class="h4"> Introduction to HOD Configuration </h3>
			
 
				-<p>
			
 
				-        Configuration options for HOD are organized as sections and options within them. They can be specified in two ways: a configuration file in the INI format, and as command line options to the HOD shell, specified in the format --section.option[=value]. If the same option is specified in both places, the value specified on the command line overrides the value in the configuration file.
			
 
				-        </p>
			
 
				-<p>
			
 
				-        To get a simple description of all configuration options, you can type <em>hod --verbose-help</em>
			
 
				-        
			
 
				-</p>
			
 
				-<p>
			
 
				-        This section explains some of the most important or commonly used configuration options in some more detail.
			
 
				-        </p>
			
 
				-<a name="N102E3"></a><a name="Categories+%2F+Sections+in+HOD+Configuration"></a>
			
 
				-<h3 class="h4"> Categories / Sections in HOD Configuration </h3>
			
 
				-<p>
			
 
				-        The following are the various sections in the HOD configuration:
			
 
				-        </p>
			
 
				-<table class="ForrestTable" cellspacing="1" cellpadding="4">
			
 
				-          
			
 
				-<tr>
			
 
				-            
			
 
				-<th colspan="1" rowspan="1"> Section Name </th>
			
 
				-            <th colspan="1" rowspan="1"> Description </th>
			
 
				-          
			
 
				-</tr>
			
 
				-          
			
 
				-<tr>
			
 
				-            
			
 
				-<td colspan="1" rowspan="1">hod</td>
			
 
				-            <td colspan="1" rowspan="1">Options for the HOD client</td>
			
 
				-          
			
 
				-</tr>
			
 
				-          
			
 
				-<tr>
			
 
				-            
			
 
				-<td colspan="1" rowspan="1">resource_manager</td>
			
 
				-            <td colspan="1" rowspan="1">Options for specifying which resource manager to use, and other parameters for using that resource manager</td>
			
 
				-          
			
 
				-</tr>
			
 
				-          
			
 
				-<tr>
			
 
				-            
			
 
				-<td colspan="1" rowspan="1">ringmaster</td>
			
 
				-            <td colspan="1" rowspan="1">Options for the RingMaster process</td>
			
 
				-          
			
 
				-</tr>
			
 
				-          
			
 
				-<tr>
			
 
				-            
			
 
				-<td colspan="1" rowspan="1">hodring</td>
			
 
				-            <td colspan="1" rowspan="1">Options for the HodRing process</td>
			
 
				-          
			
 
				-</tr>
			
 
				-          
			
 
				-<tr>
			
 
				-            
			
 
				-<td colspan="1" rowspan="1">gridservice-mapred</td>
			
 
				-            <td colspan="1" rowspan="1">Options for the MapReduce daemons</td>
			
 
				-          
			
 
				-</tr>
			
 
				-          
			
 
				-<tr>
			
 
				-            
			
 
				-<td colspan="1" rowspan="1">gridservice-hdfs</td>
			
 
				-            <td colspan="1" rowspan="1">Options for the HDFS daemons</td>
			
 
				-          
			
 
				-</tr>
			
 
				-        
			
 
				-</table>
			
 
				-<a name="N1034B"></a><a name="Important+and+Commonly+Used+Configuration+Options"></a>
			
 
				-<h3 class="h4"> Important and Commonly Used Configuration Options </h3>
			
 
				-<a name="N10351"></a><a name="Common+configuration+options"></a>
			
 
				-<h4> Common configuration options </h4>
			
 
				-<p>
			
 
				-          Certain configuration options are defined in most of the sections of the HOD configuration. Options defined in a section, are used by the process for which that section applies. These options have the same meaning, but can have different values in each section.
			
 
				-          </p>
			
 
				-<table class="ForrestTable" cellspacing="1" cellpadding="4">
			
 
				-            
			
 
				-<tr>
			
 
				-              
			
 
				-<th colspan="1" rowspan="1"> Option Name </th>
			
 
				-              <th colspan="1" rowspan="1"> Description </th>
			
 
				-            
			
 
				-</tr>
			
 
				-            
			
 
				-<tr>
			
 
				-              
			
 
				-<td colspan="1" rowspan="1">temp-dir</td>
			
 
				-              <td colspan="1" rowspan="1">Temporary directory for usage by the HOD processes. Make sure that the users who will run hod have rights to create directories under the directory specified here.</td>
			
 
				-            
			
 
				-</tr>
			
 
				-            
			
 
				-<tr>
			
 
				-              
			
 
				-<td colspan="1" rowspan="1">debug</td>
			
 
				-              <td colspan="1" rowspan="1">A numeric value from 1-4. 4 produces the most log information, and 1 the least.</td>
			
 
				-            
			
 
				-</tr>
			
 
				-            
			
 
				-<tr>
			
 
				-              
			
 
				-<td colspan="1" rowspan="1">log-dir</td>
			
 
				-              <td colspan="1" rowspan="1">Directory where log files are stored. By default, this is <em>install-location/logs/</em>. The restrictions and notes for the temp-dir variable apply here too.</td>
			
 
				-            
			
 
				-</tr>
			
 
				-            
			
 
				-<tr>
			
 
				-              
			
 
				-<td colspan="1" rowspan="1">xrs-port-range</td>
			
 
				-              <td colspan="1" rowspan="1">A range of ports, among which an available port shall be picked for use to run any XML-RPC based server daemon processes of HOD.</td>
			
 
				-            
			
 
				-</tr>
			
 
				-            
			
 
				-<tr>
			
 
				-              
			
 
				-<td colspan="1" rowspan="1">http-port-range</td>
			
 
				-              <td colspan="1" rowspan="1">A range of ports, among which an available port shall be picked for use to run any HTTP based server daemon processes of HOD.</td>
			
 
				-            
			
 
				-</tr>
			
 
				-          
			
 
				-</table>
			
 
				-<a name="N103AF"></a><a name="hod+options"></a>
			
 
				-<h4> hod options </h4>
			
 
				-<table class="ForrestTable" cellspacing="1" cellpadding="4">
			
 
				-            
			
 
				-<tr>
			
 
				-              
			
 
				-<th colspan="1" rowspan="1"> Option Name </th>
			
 
				-              <th colspan="1" rowspan="1"> Description </th>
			
 
				-            
			
 
				-</tr>
			
 
				-            
			
 
				-<tr>
			
 
				-              
			
 
				-<td colspan="1" rowspan="1">cluster</td>
			
 
				-              <td colspan="1" rowspan="1">A descriptive name given to the cluster. For Torque, this is specified as a 'Node property' for every node in the cluster. HOD uses this value to compute the number of available nodes.</td>
			
 
				-            
			
 
				-</tr>
			
 
				-            
			
 
				-<tr>
			
 
				-              
			
 
				-<td colspan="1" rowspan="1">client-params</td>
			
 
				-              <td colspan="1" rowspan="1">A comma-separated list of hadoop config parameters specified as key-value pairs. These will be used to generate a hadoop-site.xml on the submit node that should be used for running MapReduce jobs.</td>
			
 
				-            
			
 
				-</tr>
			
 
				-          
			
 
				-</table>
			
 
				-<a name="N103E0"></a><a name="resource_manager+options"></a>
			
 
				-<h4> resource_manager options </h4>
			
 
				-<table class="ForrestTable" cellspacing="1" cellpadding="4">
			
 
				-            
			
 
				-<tr>
			
 
				-              
			
 
				-<th colspan="1" rowspan="1"> Option Name </th>
			
 
				-              <th colspan="1" rowspan="1"> Description </th>
			
 
				-            
			
 
				-</tr>
			
 
				-            
			
 
				-<tr>
			
 
				-              
			
 
				-<td colspan="1" rowspan="1">queue</td>
			
 
				-              <td colspan="1" rowspan="1">Name of the queue configured in the resource manager to which jobs are to be submitted.</td>
			
 
				-            
			
 
				-</tr>
			
 
				-            
			
 
				-<tr>
			
 
				-              
			
 
				-<td colspan="1" rowspan="1">batch-home</td>
			
 
				-              <td colspan="1" rowspan="1">Install directory to which 'bin' is appended and under which the executables of the resource manager can be found. </td>
			
 
				-            
			
 
				-</tr>
			
 
				-            
			
 
				-<tr>
			
 
				-              
			
 
				-<td colspan="1" rowspan="1">env-vars</td>
			
 
				-              <td colspan="1" rowspan="1">This is a comma separated list of key-value pairs, expressed as key=value, which would be passed to the jobs launched on the compute nodes. For example, if the python installation is in a non-standard location, one can set the environment variable 'HOD_PYTHON_HOME' to the path to the python executable. The HOD processes launched on the compute nodes can then use this variable.</td>
			
 
				-            
			
 
				-</tr>
			
 
				-          
			
 
				-</table>
			
 
				-<a name="N1041E"></a><a name="ringmaster+options"></a>
			
 
				-<h4> ringmaster options </h4>
			
 
				-<table class="ForrestTable" cellspacing="1" cellpadding="4">
			
 
				-            
			
 
				-<tr>
			
 
				-              
			
 
				-<th colspan="1" rowspan="1"> Option Name </th>
			
 
				-              <th colspan="1" rowspan="1"> Description </th>
			
 
				-            
			
 
				-</tr>
			
 
				-            
			
 
				-<tr>
			
 
				-              
			
 
				-<td colspan="1" rowspan="1">work-dirs</td>
			
 
				-              <td colspan="1" rowspan="1">These are a list of comma separated paths that will serve as the root for directories that HOD generates and passes to Hadoop for use to store DFS / MapReduce data. For e.g. this is where DFS data blocks will be stored. Typically, as many paths are specified as there are disks available to ensure all disks are being utilized. The restrictions and notes for the temp-dir variable apply here too.</td>
			
 
				-            
			
 
				-</tr>
			
 
				-          
			
 
				-</table>
			
 
				-<a name="N10442"></a><a name="gridservice-hdfs+options"></a>
			
 
				-<h4> gridservice-hdfs options </h4>
			
 
				-<table class="ForrestTable" cellspacing="1" cellpadding="4">
			
 
				-            
			
 
				-<tr>
			
 
				-              
			
 
				-<th colspan="1" rowspan="1"> Option Name </th>
			
 
				-              <th colspan="1" rowspan="1"> Description </th>
			
 
				-            
			
 
				-</tr>
			
 
				-            
			
 
				-<tr>
			
 
				-              
			
 
				-<td colspan="1" rowspan="1">external</td>
			
 
				-              <td colspan="1" rowspan="1">
			
 
				-              
			
 
				-<p> If false, this indicates that a HDFS cluster must be bought up by the HOD system, on the nodes which it allocates via the allocate command. Note that in that case, when the cluster is de-allocated, it will bring down the HDFS cluster, and all the data will be lost. If true, it will try and connect to an externally configured HDFS system. </p>
			
 
				-              
			
 
				-<p>Typically, because input for jobs are placed into HDFS before jobs are run, and also the output from jobs in HDFS is required to be persistent, an internal HDFS cluster is of little value in a production system. However, it allows for quick testing.</p>
			
 
				-              
			
 
				-</td>
			
 
				-            
			
 
				-</tr>
			
 
				-            
			
 
				-<tr>
			
 
				-              
			
 
				-<td colspan="1" rowspan="1">host</td>
			
 
				-              <td colspan="1" rowspan="1">Hostname of the externally configured NameNode, if any.</td>
			
 
				-            
			
 
				-</tr>
			
 
				-            
			
 
				-<tr>
			
 
				-              
			
 
				-<td colspan="1" rowspan="1">fs_port</td>
			
 
				-              <td colspan="1" rowspan="1">Port to which NameNode RPC server is bound.</td>
			
 
				-            
			
 
				-</tr>
			
 
				-            
			
 
				-<tr>
			
 
				-              
			
 
				-<td colspan="1" rowspan="1">info_port</td>
			
 
				-              <td colspan="1" rowspan="1">Port to which the NameNode web UI server is bound.</td>
			
 
				-            
			
 
				-</tr>
			
 
				-            
			
 
				-<tr>
			
 
				-              
			
 
				-<td colspan="1" rowspan="1">pkgs</td>
			
 
				-              <td colspan="1" rowspan="1">Installation directory, under which bin/hadoop executable is located. This can be used to use a pre-installed version of Hadoop on the cluster.</td>
			
 
				-            
			
 
				-</tr>
			
 
				-            
			
 
				-<tr>
			
 
				-              
			
 
				-<td colspan="1" rowspan="1">server-params</td>
			
 
				-              <td colspan="1" rowspan="1">A comma-separated list of hadoop config parameters specified key-value pairs. These will be used to generate a hadoop-site.xml that will be used by the NameNode and DataNodes.</td>
			
 
				-            
			
 
				-</tr>
			
 
				-            
			
 
				-<tr>
			
 
				-              
			
 
				-<td colspan="1" rowspan="1">final-server-params</td>
			
 
				-              <td colspan="1" rowspan="1">Same as above, except they will be marked final.</td>
			
 
				-            
			
 
				-</tr>
			
 
				-          
			
 
				-</table>
			
 
				-<a name="N104BA"></a><a name="gridservice-mapred+options"></a>
			
 
				-<h4> gridservice-mapred options </h4>
			
 
				-<table class="ForrestTable" cellspacing="1" cellpadding="4">
			
 
				-            
			
 
				-<tr>
			
 
				-              
			
 
				-<th colspan="1" rowspan="1"> Option Name </th>
			
 
				-              <th colspan="1" rowspan="1"> Description </th>
			
 
				-            
			
 
				-</tr>
			
 
				-            
			
 
				-<tr>
			
 
				-              
			
 
				-<td colspan="1" rowspan="1">external</td>
			
 
				-              <td colspan="1" rowspan="1">
			
 
				-              
			
 
				-<p> If false, this indicates that a MapReduce cluster must be bought up by the HOD system on the nodes which it allocates via the allocate command. If true, if will try and connect to an externally configured MapReduce system.</p>
			
 
				-              
			
 
				-</td>
			
 
				-            
			
 
				-</tr>
			
 
				-            
			
 
				-<tr>
			
 
				-              
			
 
				-<td colspan="1" rowspan="1">host</td>
			
 
				-              <td colspan="1" rowspan="1">Hostname of the externally configured JobTracker, if any.</td>
			
 
				-            
			
 
				-</tr>
			
 
				-            
			
 
				-<tr>
			
 
				-              
			
 
				-<td colspan="1" rowspan="1">tracker_port</td>
			
 
				-              <td colspan="1" rowspan="1">Port to which the JobTracker RPC server is bound.</td>
			
 
				-            
			
 
				-</tr>
			
 
				-            
			
 
				-<tr>
			
 
				-              
			
 
				-<td colspan="1" rowspan="1">info_port</td>
			
 
				-              <td colspan="1" rowspan="1">Port to which the JobTracker web UI server is bound.</td>
			
 
				-            
			
 
				-</tr>
			
 
				-            
			
 
				-<tr>
			
 
				-              
			
 
				-<td colspan="1" rowspan="1">pkgs</td>
			
 
				-              <td colspan="1" rowspan="1">Installation directory, under which bin/hadoop executable is located. This can be used to use a pre-installed version of Hadoop on the cluster.</td>
			
 
				-            
			
 
				-</tr>
			
 
				-            
			
 
				-<tr>
			
 
				-              
			
 
				-<td colspan="1" rowspan="1">server-params</td>
			
 
				-              <td colspan="1" rowspan="1">A comma-separated list of hadoop config parameters specified key-value pairs. These will be used to generate a hadoop-site.xml that will be used by the JobTracker and TaskTrackers.</td>
			
 
				-            
			
 
				-</tr>
			
 
				-            
			
 
				-<tr>
			
 
				-              
			
 
				-<td colspan="1" rowspan="1">final-server-params</td>
			
 
				-              <td colspan="1" rowspan="1">Same as above, except they will be marked final.</td>
			
 
				-            
			
 
				-</tr>
			
 
				-          
			
 
				-</table>
			
 
				 </div>
			
 
				   
			
 
				 </div>
			
--- a/docs/hod.pdf
+++ b/docs/hod.pdf
--- a/docs/hod_admin_guide.html
+++ b/docs/hod_admin_guide.html
@@ -0,0 +1,491 @@
 
				+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
			
 
				+<html>
			
 
				+<head>
			
 
				+<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
			
 
				+<meta content="Apache Forrest" name="Generator">
			
 
				+<meta name="Forrest-version" content="0.8">
			
 
				+<meta name="Forrest-skin-name" content="pelt">
			
 
				+<title> 
			
 
				+      Hadoop On Demand
			
 
				+    </title>
			
 
				+<link type="text/css" href="skin/basic.css" rel="stylesheet">
			
 
				+<link media="screen" type="text/css" href="skin/screen.css" rel="stylesheet">
			
 
				+<link media="print" type="text/css" href="skin/print.css" rel="stylesheet">
			
 
				+<link type="text/css" href="skin/profile.css" rel="stylesheet">
			
 
				+<script src="skin/getBlank.js" language="javascript" type="text/javascript"></script><script src="skin/getMenu.js" language="javascript" type="text/javascript"></script><script src="skin/fontsize.js" language="javascript" type="text/javascript"></script>
			
 
				+<link rel="shortcut icon" href="images/favicon.ico">
			
 
				+</head>
			
 
				+<body onload="init()">
			
 
				+<script type="text/javascript">ndeSetTextSize();</script>
			
 
				+<div id="top">
			
 
				+<!--+
			
 
				+    |breadtrail
			
 
				+    +-->
			
 
				+<div class="breadtrail">
			
 
				+<a href="http://www.apache.org/">Apache</a> &gt; <a href="http://hadoop.apache.org/">Hadoop</a> &gt; <a href="http://hadoop.apache.org/core/">Core</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
			
 
				+</div>
			
 
				+<!--+
			
 
				+    |header
			
 
				+    +-->
			
 
				+<div class="header">
			
 
				+<!--+
			
 
				+    |start group logo
			
 
				+    +-->
			
 
				+<div class="grouplogo">
			
 
				+<a href="http://hadoop.apache.org/"><img class="logoImage" alt="Hadoop" src="images/hadoop-logo.jpg" title="Apache Hadoop"></a>
			
 
				+</div>
			
 
				+<!--+
			
 
				+    |end group logo
			
 
				+    +-->
			
 
				+<!--+
			
 
				+    |start Project Logo
			
 
				+    +-->
			
 
				+<div class="projectlogo">
			
 
				+<a href="http://hadoop.apache.org/core/"><img class="logoImage" alt="Hadoop" src="images/core-logo.jpg" title="Scalable Computing Platform"></a>
			
 
				+</div>
			
 
				+<!--+
			
 
				+    |end Project Logo
			
 
				+    +-->
			
 
				+<!--+
			
 
				+    |start Search
			
 
				+    +-->
			
 
				+<div class="searchbox">
			
 
				+<form action="http://www.google.com/search" method="get" class="roundtopsmall">
			
 
				+<input value="hadoop.apache.org" name="sitesearch" type="hidden"><input onFocus="getBlank (this, 'Search the site with google');" size="25" name="q" id="query" type="text" value="Search the site with google">&nbsp; 
			
 
				+                    <input name="Search" value="Search" type="submit">
			
 
				+</form>
			
 
				+</div>
			
 
				+<!--+
			
 
				+    |end search
			
 
				+    +-->
			
 
				+<!--+
			
 
				+    |start Tabs
			
 
				+    +-->
			
 
				+<ul id="tabs">
			
 
				+<li>
			
 
				+<a class="unselected" href="http://hadoop.apache.org/core/">Project</a>
			
 
				+</li>
			
 
				+<li>
			
 
				+<a class="unselected" href="http://wiki.apache.org/hadoop">Wiki</a>
			
 
				+</li>
			
 
				+<li class="current">
			
 
				+<a class="selected" href="index.html">Hadoop 0.16 Documentation</a>
			
 
				+</li>
			
 
				+</ul>
			
 
				+<!--+
			
 
				+    |end Tabs
			
 
				+    +-->
			
 
				+</div>
			
 
				+</div>
			
 
				+<div id="main">
			
 
				+<div id="publishedStrip">
			
 
				+<!--+
			
 
				+    |start Subtabs
			
 
				+    +-->
			
 
				+<div id="level2tabs"></div>
			
 
				+<!--+
			
 
				+    |end Endtabs
			
 
				+    +-->
			
 
				+<script type="text/javascript"><!--
			
 
				+document.write("Last Published: " + document.lastModified);
			
 
				+//  --></script>
			
 
				+</div>
			
 
				+<!--+
			
 
				+    |breadtrail
			
 
				+    +-->
			
 
				+<div class="breadtrail">
			
 
				+
			
 
				+             &nbsp;
			
 
				+           </div>
			
 
				+<!--+
			
 
				+    |start Menu, mainarea
			
 
				+    +-->
			
 
				+<!--+
			
 
				+    |start Menu
			
 
				+    +-->
			
 
				+<div id="menu">
			
 
				+<div onclick="SwitchMenu('menu_1.1', 'skin/')" id="menu_1.1Title" class="menutitle">Documentation</div>
			
 
				+<div id="menu_1.1" class="menuitemgroup">
			
 
				+<div class="menuitem">
			
 
				+<a href="index.html">Overview</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				+<a href="quickstart.html">Quickstart</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				+<a href="cluster_setup.html">Cluster Setup</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				+<a href="hdfs_design.html">HDFS Architecture</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				+<a href="hdfs_user_guide.html">HDFS User Guide</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				+<a href="hdfs_permissions_guide.html">HDFS Permissions Guide</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				+<a href="mapred_tutorial.html">Map-Reduce Tutorial</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				+<a href="native_libraries.html">Native Hadoop Libraries</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				+<a href="streaming.html">Streaming</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				+<a href="hod.html">Hadoop On Demand</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				+<a href="api/index.html">API Docs</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				+<a href="http://wiki.apache.org/hadoop/">Wiki</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				+<a href="http://wiki.apache.org/hadoop/FAQ">FAQ</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				+<a href="http://hadoop.apache.org/core/mailing_lists.html">Mailing Lists</a>
			
 
				+</div>
			
 
				+</div>
			
 
				+<div id="credit"></div>
			
 
				+<div id="roundbottom">
			
 
				+<img style="display: none" class="corner" height="15" width="15" alt="" src="skin/images/rc-b-l-15-1body-2menu-3menu.png"></div>
			
 
				+<!--+
			
 
				+  |alternative credits
			
 
				+  +-->
			
 
				+<div id="credit2"></div>
			
 
				+</div>
			
 
				+<!--+
			
 
				+    |end Menu
			
 
				+    +-->
			
 
				+<!--+
			
 
				+    |start content
			
 
				+    +-->
			
 
				+<div id="content">
			
 
				+<div title="Portable Document Format" class="pdflink">
			
 
				+<a class="dida" href="hod_admin_guide.pdf"><img alt="PDF -icon" src="skin/images/pdfdoc.gif" class="skin"><br>
			
 
				+        PDF</a>
			
 
				+</div>
			
 
				+<h1> 
			
 
				+      Hadoop On Demand
			
 
				+    </h1>
			
 
				+<div id="minitoc-area">
			
 
				+<ul class="minitoc">
			
 
				+<li>
			
 
				+<a href="#Overview">Overview</a>
			
 
				+</li>
			
 
				+<li>
			
 
				+<a href="#Pre-requisites">Pre-requisites</a>
			
 
				+</li>
			
 
				+<li>
			
 
				+<a href="#Resource+Manager">Resource Manager</a>
			
 
				+</li>
			
 
				+<li>
			
 
				+<a href="#Installing+HOD">Installing HOD</a>
			
 
				+</li>
			
 
				+<li>
			
 
				+<a href="#Configuring+HOD">Configuring HOD</a>
			
 
				+<ul class="minitoc">
			
 
				+<li>
			
 
				+<a href="#Minimal+Configuration+to+get+started">Minimal Configuration to get started</a>
			
 
				+</li>
			
 
				+<li>
			
 
				+<a href="#Advanced+Configuration">Advanced Configuration</a>
			
 
				+</li>
			
 
				+</ul>
			
 
				+</li>
			
 
				+<li>
			
 
				+<a href="#Running+HOD">Running HOD</a>
			
 
				+</li>
			
 
				+</ul>
			
 
				+</div>
			
 
				+
			
 
				+<a name="N1000C"></a><a name="Overview"></a>
			
 
				+<h2 class="h3">Overview</h2>
			
 
				+<div class="section">
			
 
				+<p>The Hadoop On Demand (HOD) project is a system for provisioning and
			
 
				+managing independent Hadoop MapReduce and HDFS instances on a shared cluster 
			
 
				+of nodes. HOD is a tool that makes it easy for administrators and users to 
			
 
				+quickly setup and use Hadoop. It is also a very useful tool for Hadoop developers 
			
 
				+and testers who need to share a physical cluster for testing their own Hadoop 
			
 
				+versions.
			
 
				+</p>
			
 
				+<p>HOD relies on a resource manager (RM) for allocation of nodes that it can use for
			
 
				+running Hadoop instances. At present it runs with the <a href="http://www.clusterresources.com/pages/products/torque-resource-manager.php">Torque
			
 
				+resource manager</a>.
			
 
				+</p>
			
 
				+<p>
			
 
				+The basic system architecture of HOD includes components from:</p>
			
 
				+<ul>
			
 
				+  
			
 
				+<li>A Resource manager (possibly together with a scheduler),</li>
			
 
				+  
			
 
				+<li>HOD components, and </li>
			
 
				+  
			
 
				+<li>Hadoop Map/Reduce and HDFS daemons.</li>
			
 
				+
			
 
				+</ul>
			
 
				+<p>
			
 
				+HOD provisions and maintains Hadoop Map/Reduce and, optionally, HDFS instances 
			
 
				+through interaction with the above components on a given cluster of nodes. A cluster of
			
 
				+nodes can be thought of as comprising of two sets of nodes:</p>
			
 
				+<ul>
			
 
				+  
			
 
				+<li>Submit nodes: Users use the HOD client on these nodes to allocate clusters, and then
			
 
				+use the Hadoop client to submit Hadoop jobs. </li>
			
 
				+  
			
 
				+<li>Compute nodes: Using the resource manager, HOD components are run on these nodes to 
			
 
				+provision the Hadoop daemons. After that Hadoop jobs run on them.</li>
			
 
				+
			
 
				+</ul>
			
 
				+<p>
			
 
				+Here is a brief description of the sequence of operations in allocating a cluster and
			
 
				+running jobs on them.
			
 
				+</p>
			
 
				+<ul>
			
 
				+  
			
 
				+<li>The user uses the HOD client on the Submit node to allocate a required number of
			
 
				+cluster nodes, and provision Hadoop on them.</li>
			
 
				+  
			
 
				+<li>The HOD client uses a Resource Manager interface, (qsub, in Torque), to submit a HOD
			
 
				+process, called the RingMaster, as a Resource Manager job, requesting the user desired number 
			
 
				+of nodes. This job is submitted to the central server of the Resource Manager (pbs_server, in Torque).</li>
			
 
				+  
			
 
				+<li>On the compute nodes, the resource manager slave daemons, (pbs_moms in Torque), accept
			
 
				+and run jobs that they are given by the central server (pbs_server in Torque). The RingMaster 
			
 
				+process is started on one of the compute nodes (mother superior, in Torque).</li>
			
 
				+  
			
 
				+<li>The Ringmaster then uses another Resource Manager interface, (pbsdsh, in Torque), to run
			
 
				+the second HOD component, HodRing, as distributed tasks on each of the compute
			
 
				+nodes allocated.</li>
			
 
				+  
			
 
				+<li>The Hodrings, after initializing, communicate with the Ringmaster to get Hadoop commands, 
			
 
				+and run them accordingly. Once the Hadoop commands are started, they register with the RingMaster,
			
 
				+giving information about the daemons.</li>
			
 
				+  
			
 
				+<li>All the configuration files needed for Hadoop instances are generated by HOD itself, 
			
 
				+some obtained from options given by user in its own configuration file.</li>
			
 
				+  
			
 
				+<li>The HOD client keeps communicating with the RingMaster to find out the location of the 
			
 
				+JobTracker and HDFS daemons.</li>
			
 
				+
			
 
				+</ul>
			
 
				+<p>The rest of the document deals with the steps needed to setup HOD on a physical cluster of nodes.</p>
			
 
				+</div>
			
 
				+
			
 
				+
			
 
				+<a name="N10056"></a><a name="Pre-requisites"></a>
			
 
				+<h2 class="h3">Pre-requisites</h2>
			
 
				+<div class="section">
			
 
				+<p>Operating System: HOD is currently tested on RHEL4.<br>
			
 
				+Nodes : HOD requires a minimum of 3 nodes configured through a resource manager.<br>
			
 
				+</p>
			
 
				+<p> Software </p>
			
 
				+<p>The following components are to be installed on *ALL* the nodes before using HOD:</p>
			
 
				+<ul>
			
 
				+ 
			
 
				+<li>Torque: Resource manager</li>
			
 
				+ 
			
 
				+<li>
			
 
				+<a href="http://www.python.org">Python</a> : HOD requires version 2.5.1 of Python.</li>
			
 
				+
			
 
				+</ul>
			
 
				+<p>The following components can be optionally installed for getting better
			
 
				+functionality from HOD:</p>
			
 
				+<ul>
			
 
				+ 
			
 
				+<li>
			
 
				+<a href="http://twistedmatrix.com/trac/">Twisted Python</a>: This can be
			
 
				+  used for improving the scalability of HOD. If this module is detected to be
			
 
				+  installed, HOD uses it, else it falls back to default modules.</li>
			
 
				+ 
			
 
				+<li>
			
 
				+<a href="http://hadoop.apache.org/core/">Hadoop</a>: HOD can automatically
			
 
				+ distribute Hadoop to all nodes in the cluster. However, it can also use a
			
 
				+ pre-installed version of Hadoop, if it is available on all nodes in the cluster.
			
 
				+  HOD currently supports Hadoop 0.15 and above.</li>
			
 
				+
			
 
				+</ul>
			
 
				+<p>NOTE: HOD configuration requires the location of installs of these
			
 
				+components to be the same on all nodes in the cluster. It will also
			
 
				+make the configuration simpler to have the same location on the submit
			
 
				+nodes.
			
 
				+</p>
			
 
				+</div>
			
 
				+
			
 
				+
			
 
				+<a name="N1008A"></a><a name="Resource+Manager"></a>
			
 
				+<h2 class="h3">Resource Manager</h2>
			
 
				+<div class="section">
			
 
				+<p>  Currently HOD works with the Torque resource manager, which it uses for its node
			
 
				+  allocation and job submission. Torque is an open source resource manager from
			
 
				+  <a href="http://www.clusterresources.com">Cluster Resources</a>, a community effort
			
 
				+  based on the PBS project. It provides control over batch jobs and distributed compute nodes. Torque is
			
 
				+  freely available for download from <a href="http://www.clusterresources.com/downloads/torque/">here</a>.
			
 
				+  </p>
			
 
				+<p>  All documentation related to torque can be seen under
			
 
				+  the section TORQUE Resource Manager <a href="http://www.clusterresources.com/pages/resources/documentation.php">here</a>. You can
			
 
				+  get wiki documentation from <a href="http://www.clusterresources.com/wiki/doku.php?id=torque:torque_wiki">here</a>.
			
 
				+  Users may wish to subscribe to TORQUE&rsquo;s mailing list or view the archive for questions,
			
 
				+  comments <a href="http://www.clusterresources.com/pages/resources/mailing-lists.php">here</a>.
			
 
				+</p>
			
 
				+<p>For using HOD with Torque:</p>
			
 
				+<ul>
			
 
				+ 
			
 
				+<li>Install Torque components: pbs_server on one node(head node), pbs_mom on all
			
 
				+  compute nodes, and PBS client tools on all compute nodes and submit
			
 
				+  nodes. Perform atleast a basic configuration so that the Torque system is up and
			
 
				+  running i.e pbs_server knows which machines to talk to. Look <a href="http://www.clusterresources.com/wiki/doku.php?id=torque:1.2_basic_configuration">here</a>
			
 
				+  for basic configuration.
			
 
				+
			
 
				+  For advanced configuration, see <a href="http://www.clusterresources.com/wiki/doku.php?id=torque:1.3_advanced_configuration">here</a>
			
 
				+</li>
			
 
				+ 
			
 
				+<li>Create a queue for submitting jobs on the pbs_server. The name of the queue is the
			
 
				+  same as the HOD configuration parameter, resource-manager.queue. The Hod client uses this queue to
			
 
				+  submit the Ringmaster process as a Torque job.</li>
			
 
				+ 
			
 
				+<li>Specify a 'cluster name' as a 'property' for all nodes in the cluster.
			
 
				+  This can be done by using the 'qmgr' command. For example:
			
 
				+  qmgr -c "set node node properties=cluster-name". The name of the cluster is the same as
			
 
				+  the HOD configuration parameter, hod.cluster. </li>
			
 
				+ 
			
 
				+<li>Ensure that jobs can be submitted to the nodes. This can be done by
			
 
				+  using the 'qsub' command. For example:
			
 
				+  echo "sleep 30" | qsub -l nodes=3</li>
			
 
				+
			
 
				+</ul>
			
 
				+</div>
			
 
				+
			
 
				+
			
 
				+<a name="N100C4"></a><a name="Installing+HOD"></a>
			
 
				+<h2 class="h3">Installing HOD</h2>
			
 
				+<div class="section">
			
 
				+<p>Now that the resource manager set up is done, we proceed on to obtaining and
			
 
				+installing HOD.</p>
			
 
				+<ul>
			
 
				+ 
			
 
				+<li>If you are getting HOD from the Hadoop tarball,it is available under the 
			
 
				+  'contrib' section of Hadoop, under the root  directory 'hod'.</li>
			
 
				+ 
			
 
				+<li>If you are building from source, you can run ant tar from the Hadoop root
			
 
				+  directory, to generate the Hadoop tarball, and then pick HOD from there,
			
 
				+  as described in the point above.</li>
			
 
				+ 
			
 
				+<li>Distribute the files under this directory to all the nodes in the
			
 
				+  cluster. Note that the location where the files are copied should be
			
 
				+  the same on all the nodes.</li>
			
 
				+  
			
 
				+<li>Note that compiling hadoop would build HOD with appropriate permissions 
			
 
				+  set on all the required script files in HOD.</li>
			
 
				+
			
 
				+</ul>
			
 
				+</div>
			
 
				+
			
 
				+
			
 
				+<a name="N100DD"></a><a name="Configuring+HOD"></a>
			
 
				+<h2 class="h3">Configuring HOD</h2>
			
 
				+<div class="section">
			
 
				+<p>After HOD installation is done, it has to be configured before we start using
			
 
				+it.</p>
			
 
				+<a name="N100E6"></a><a name="Minimal+Configuration+to+get+started"></a>
			
 
				+<h3 class="h4">Minimal Configuration to get started</h3>
			
 
				+<ul>
			
 
				+ 
			
 
				+<li>On the node from where you want to run hod, edit the file hodrc
			
 
				+  which can be found in the &lt;install dir&gt;/conf directory. This file
			
 
				+  contains the minimal set of values required for running hod.</li>
			
 
				+ 
			
 
				+<li>
			
 
				+
			
 
				+<p>Specify values suitable to your environment for the following
			
 
				+  variables defined in the configuration file. Note that some of these
			
 
				+  variables are defined at more than one place in the file.</p>
			
 
				+
			
 
				+  
			
 
				+<ul>
			
 
				+   
			
 
				+<li>${JAVA_HOME}: Location of Java for Hadoop. Hadoop supports Sun JDK
			
 
				+    1.5.x and above.</li>
			
 
				+   
			
 
				+<li>${CLUSTER_NAME}: Name of the cluster which is specified in the
			
 
				+    'node property' as mentioned in resource manager configuration.</li>
			
 
				+   
			
 
				+<li>${HADOOP_HOME}: Location of Hadoop installation on the compute and
			
 
				+    submit nodes.</li>
			
 
				+   
			
 
				+<li>${RM_QUEUE}: Queue configured for submiting jobs in the resource
			
 
				+    manager configuration.</li>
			
 
				+   
			
 
				+<li>${RM_HOME}: Location of the resource manager installation on the
			
 
				+    compute and submit nodes.</li>
			
 
				+    
			
 
				+</ul>
			
 
				+
			
 
				+</li>
			
 
				+
			
 
				+
			
 
				+<li>
			
 
				+
			
 
				+<p>The following environment variables *may* need to be set depending on
			
 
				+  your environment. These variables must be defined where you run the
			
 
				+  HOD client, and also be specified in the HOD configuration file as the
			
 
				+  value of the key resource_manager.env-vars. Multiple variables can be
			
 
				+  specified as a comma separated list of key=value pairs.</p>
			
 
				+
			
 
				+  
			
 
				+<ul>
			
 
				+   
			
 
				+<li>HOD_PYTHON_HOME: If you install python to a non-default location
			
 
				+    of the compute nodes, or submit nodes, then, this variable must be
			
 
				+    defined to point to the python executable in the non-standard
			
 
				+    location.</li>
			
 
				+    
			
 
				+</ul>
			
 
				+
			
 
				+</li>
			
 
				+
			
 
				+</ul>
			
 
				+<a name="N10117"></a><a name="Advanced+Configuration"></a>
			
 
				+<h3 class="h4">Advanced Configuration</h3>
			
 
				+<p> You can review other configuration options in the file and modify them to suit
			
 
				+ your needs. Refer to the <a href="hod_config_guide.html">Configuration Guide</a> for information about the HOD
			
 
				+ configuration.
			
 
				+    </p>
			
 
				+</div>
			
 
				+
			
 
				+  
			
 
				+<a name="N10126"></a><a name="Running+HOD"></a>
			
 
				+<h2 class="h3">Running HOD</h2>
			
 
				+<div class="section">
			
 
				+<p>You can now proceed to <a href="hod_user_guide.html">HOD User Guide</a> for information about how to run HOD,
			
 
				+    what are the various features, options and for help in trouble-shooting.</p>
			
 
				+</div>
			
 
				+
			
 
				+</div>
			
 
				+<!--+
			
 
				+    |end content
			
 
				+    +-->
			
 
				+<div class="clearboth">&nbsp;</div>
			
 
				+</div>
			
 
				+<div id="footer">
			
 
				+<!--+
			
 
				+    |start bottomstrip
			
 
				+    +-->
			
 
				+<div class="lastmodified">
			
 
				+<script type="text/javascript"><!--
			
 
				+document.write("Last Published: " + document.lastModified);
			
 
				+//  --></script>
			
 
				+</div>
			
 
				+<div class="copyright">
			
 
				+        Copyright &copy;
			
 
				+         2007 <a href="http://www.apache.org/licenses/">The Apache Software Foundation.</a>
			
 
				+</div>
			
 
				+<!--+
			
 
				+    |end bottomstrip
			
 
				+    +-->
			
 
				+</div>
			
 
				+</body>
			
 
				+</html>
			
--- a/docs/hod_admin_guide.pdf
+++ b/docs/hod_admin_guide.pdf
--- a/docs/hod_config_guide.html
+++ b/docs/hod_config_guide.html
@@ -0,0 +1,444 @@
 
				+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
			
 
				+<html>
			
 
				+<head>
			
 
				+<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
			
 
				+<meta content="Apache Forrest" name="Generator">
			
 
				+<meta name="Forrest-version" content="0.8">
			
 
				+<meta name="Forrest-skin-name" content="pelt">
			
 
				+<title> 
			
 
				+      Hadoop On Demand: Configuration Guide
			
 
				+    </title>
			
 
				+<link type="text/css" href="skin/basic.css" rel="stylesheet">
			
 
				+<link media="screen" type="text/css" href="skin/screen.css" rel="stylesheet">
			
 
				+<link media="print" type="text/css" href="skin/print.css" rel="stylesheet">
			
 
				+<link type="text/css" href="skin/profile.css" rel="stylesheet">
			
 
				+<script src="skin/getBlank.js" language="javascript" type="text/javascript"></script><script src="skin/getMenu.js" language="javascript" type="text/javascript"></script><script src="skin/fontsize.js" language="javascript" type="text/javascript"></script>
			
 
				+<link rel="shortcut icon" href="images/favicon.ico">
			
 
				+</head>
			
 
				+<body onload="init()">
			
 
				+<script type="text/javascript">ndeSetTextSize();</script>
			
 
				+<div id="top">
			
 
				+<!--+
			
 
				+    |breadtrail
			
 
				+    +-->
			
 
				+<div class="breadtrail">
			
 
				+<a href="http://www.apache.org/">Apache</a> &gt; <a href="http://hadoop.apache.org/">Hadoop</a> &gt; <a href="http://hadoop.apache.org/core/">Core</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
			
 
				+</div>
			
 
				+<!--+
			
 
				+    |header
			
 
				+    +-->
			
 
				+<div class="header">
			
 
				+<!--+
			
 
				+    |start group logo
			
 
				+    +-->
			
 
				+<div class="grouplogo">
			
 
				+<a href="http://hadoop.apache.org/"><img class="logoImage" alt="Hadoop" src="images/hadoop-logo.jpg" title="Apache Hadoop"></a>
			
 
				+</div>
			
 
				+<!--+
			
 
				+    |end group logo
			
 
				+    +-->
			
 
				+<!--+
			
 
				+    |start Project Logo
			
 
				+    +-->
			
 
				+<div class="projectlogo">
			
 
				+<a href="http://hadoop.apache.org/core/"><img class="logoImage" alt="Hadoop" src="images/core-logo.jpg" title="Scalable Computing Platform"></a>
			
 
				+</div>
			
 
				+<!--+
			
 
				+    |end Project Logo
			
 
				+    +-->
			
 
				+<!--+
			
 
				+    |start Search
			
 
				+    +-->
			
 
				+<div class="searchbox">
			
 
				+<form action="http://www.google.com/search" method="get" class="roundtopsmall">
			
 
				+<input value="hadoop.apache.org" name="sitesearch" type="hidden"><input onFocus="getBlank (this, 'Search the site with google');" size="25" name="q" id="query" type="text" value="Search the site with google">&nbsp; 
			
 
				+                    <input name="Search" value="Search" type="submit">
			
 
				+</form>
			
 
				+</div>
			
 
				+<!--+
			
 
				+    |end search
			
 
				+    +-->
			
 
				+<!--+
			
 
				+    |start Tabs
			
 
				+    +-->
			
 
				+<ul id="tabs">
			
 
				+<li>
			
 
				+<a class="unselected" href="http://hadoop.apache.org/core/">Project</a>
			
 
				+</li>
			
 
				+<li>
			
 
				+<a class="unselected" href="http://wiki.apache.org/hadoop">Wiki</a>
			
 
				+</li>
			
 
				+<li class="current">
			
 
				+<a class="selected" href="index.html">Hadoop 0.16 Documentation</a>
			
 
				+</li>
			
 
				+</ul>
			
 
				+<!--+
			
 
				+    |end Tabs
			
 
				+    +-->
			
 
				+</div>
			
 
				+</div>
			
 
				+<div id="main">
			
 
				+<div id="publishedStrip">
			
 
				+<!--+
			
 
				+    |start Subtabs
			
 
				+    +-->
			
 
				+<div id="level2tabs"></div>
			
 
				+<!--+
			
 
				+    |end Endtabs
			
 
				+    +-->
			
 
				+<script type="text/javascript"><!--
			
 
				+document.write("Last Published: " + document.lastModified);
			
 
				+//  --></script>
			
 
				+</div>
			
 
				+<!--+
			
 
				+    |breadtrail
			
 
				+    +-->
			
 
				+<div class="breadtrail">
			
 
				+
			
 
				+             &nbsp;
			
 
				+           </div>
			
 
				+<!--+
			
 
				+    |start Menu, mainarea
			
 
				+    +-->
			
 
				+<!--+
			
 
				+    |start Menu
			
 
				+    +-->
			
 
				+<div id="menu">
			
 
				+<div onclick="SwitchMenu('menu_1.1', 'skin/')" id="menu_1.1Title" class="menutitle">Documentation</div>
			
 
				+<div id="menu_1.1" class="menuitemgroup">
			
 
				+<div class="menuitem">
			
 
				+<a href="index.html">Overview</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				+<a href="quickstart.html">Quickstart</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				+<a href="cluster_setup.html">Cluster Setup</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				+<a href="hdfs_design.html">HDFS Architecture</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				+<a href="hdfs_user_guide.html">HDFS User Guide</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				+<a href="hdfs_permissions_guide.html">HDFS Permissions Guide</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				+<a href="mapred_tutorial.html">Map-Reduce Tutorial</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				+<a href="native_libraries.html">Native Hadoop Libraries</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				+<a href="streaming.html">Streaming</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				+<a href="hod.html">Hadoop On Demand</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				+<a href="api/index.html">API Docs</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				+<a href="http://wiki.apache.org/hadoop/">Wiki</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				+<a href="http://wiki.apache.org/hadoop/FAQ">FAQ</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				+<a href="http://hadoop.apache.org/core/mailing_lists.html">Mailing Lists</a>
			
 
				+</div>
			
 
				+</div>
			
 
				+<div id="credit"></div>
			
 
				+<div id="roundbottom">
			
 
				+<img style="display: none" class="corner" height="15" width="15" alt="" src="skin/images/rc-b-l-15-1body-2menu-3menu.png"></div>
			
 
				+<!--+
			
 
				+  |alternative credits
			
 
				+  +-->
			
 
				+<div id="credit2"></div>
			
 
				+</div>
			
 
				+<!--+
			
 
				+    |end Menu
			
 
				+    +-->
			
 
				+<!--+
			
 
				+    |start content
			
 
				+    +-->
			
 
				+<div id="content">
			
 
				+<div title="Portable Document Format" class="pdflink">
			
 
				+<a class="dida" href="hod_config_guide.pdf"><img alt="PDF -icon" src="skin/images/pdfdoc.gif" class="skin"><br>
			
 
				+        PDF</a>
			
 
				+</div>
			
 
				+<h1> 
			
 
				+      Hadoop On Demand: Configuration Guide
			
 
				+    </h1>
			
 
				+<div id="minitoc-area">
			
 
				+<ul class="minitoc">
			
 
				+<li>
			
 
				+<a href="#1.+Introduction">1. Introduction</a>
			
 
				+</li>
			
 
				+<li>
			
 
				+<a href="#2.+Sections">2. Sections</a>
			
 
				+</li>
			
 
				+<li>
			
 
				+<a href="#3.+Important+%2F+Commonly+Used+Configuration+Options">3. Important / Commonly Used Configuration Options</a>
			
 
				+<ul class="minitoc">
			
 
				+<li>
			
 
				+<a href="#3.1+Common+configuration+options">3.1 Common configuration options</a>
			
 
				+</li>
			
 
				+<li>
			
 
				+<a href="#3.2+hod+options">3.2 hod options</a>
			
 
				+</li>
			
 
				+<li>
			
 
				+<a href="#3.3+resource_manager+options">3.3 resource_manager options</a>
			
 
				+</li>
			
 
				+<li>
			
 
				+<a href="#3.4+ringmaster+options">3.4 ringmaster options</a>
			
 
				+</li>
			
 
				+<li>
			
 
				+<a href="#3.5+gridservice-hdfs+options">3.5 gridservice-hdfs options</a>
			
 
				+</li>
			
 
				+<li>
			
 
				+<a href="#3.6+gridservice-mapred+options">3.6 gridservice-mapred options</a>
			
 
				+</li>
			
 
				+</ul>
			
 
				+</li>
			
 
				+</ul>
			
 
				+</div>
			
 
				+    
			
 
				+<a name="N1000C"></a><a name="1.+Introduction"></a>
			
 
				+<h2 class="h3">1. Introduction</h2>
			
 
				+<div class="section">
			
 
				+<p>Configuration options for HOD are organized as sections and options 
			
 
				+      within them. They can be specified in two ways: a configuration file 
			
 
				+      in the INI format, and as command line options to the HOD shell, 
			
 
				+      specified in the format --section.option[=value]. If the same option is 
			
 
				+      specified in both places, the value specified on the command line 
			
 
				+      overrides the value in the configuration file.</p>
			
 
				+<p>
			
 
				+        To get a simple description of all configuration options, you can type
			
 
				+      </p>
			
 
				+<table class="ForrestTable" cellspacing="1" cellpadding="4">
			
 
				+<tr>
			
 
				+<td colspan="1" rowspan="1"><span class="codefrag">$ hod --verbose-help</span></td>
			
 
				+</tr>
			
 
				+</table>
			
 
				+<p>This document explains some of the most important or commonly used
			
 
				+      configuration options in some more detail.</p>
			
 
				+</div>
			
 
				+    
			
 
				+    
			
 
				+<a name="N10024"></a><a name="2.+Sections"></a>
			
 
				+<h2 class="h3">2. Sections</h2>
			
 
				+<div class="section">
			
 
				+<p>The following are the various sections in the HOD configuration:</p>
			
 
				+<ul>
			
 
				+        
			
 
				+<li>  hod:                  Options for the HOD client</li>
			
 
				+        
			
 
				+<li>  resource_manager:     Options for specifying which resource manager
			
 
				+         to use, and other parameters for using that resource manager</li>
			
 
				+        
			
 
				+<li>  ringmaster:           Options for the RingMaster process, </li>
			
 
				+        
			
 
				+<li>  hodring:              Options for the HodRing processes</li>
			
 
				+        
			
 
				+<li>  gridservice-mapred:   Options for the MapReduce daemons</li>
			
 
				+        
			
 
				+<li>  gridservice-hdfs:     Options for the HDFS daemons.</li>
			
 
				+      
			
 
				+</ul>
			
 
				+<p>The next section deals with some of the important options in the HOD 
			
 
				+        configuration.</p>
			
 
				+</div>
			
 
				+    
			
 
				+    
			
 
				+<a name="N10046"></a><a name="3.+Important+%2F+Commonly+Used+Configuration+Options"></a>
			
 
				+<h2 class="h3">3. Important / Commonly Used Configuration Options</h2>
			
 
				+<div class="section">
			
 
				+<a name="N1004C"></a><a name="3.1+Common+configuration+options"></a>
			
 
				+<h3 class="h4">3.1 Common configuration options</h3>
			
 
				+<p>Certain configuration options are defined in most of the sections of 
			
 
				+        the HOD configuration. Options defined in a section, are used by the
			
 
				+        process for which that section applies. These options have the same
			
 
				+        meaning, but can have different values in each section.
			
 
				+        </p>
			
 
				+<ul>
			
 
				+          
			
 
				+<li>temp-dir: Temporary directory for usage by the HOD processes. Make 
			
 
				+                      sure that the users who will run hod have rights to create 
			
 
				+                      directories under the directory specified here.</li>
			
 
				+          
			
 
				+          
			
 
				+<li>debug: A numeric value from 1-4. 4 produces the most log information,
			
 
				+                   and 1 the least.</li>
			
 
				+          
			
 
				+          
			
 
				+<li>log-dir: Directory where log files are stored. By default, this is
			
 
				+                     &lt;install-location&gt;/logs/. The restrictions and notes for the
			
 
				+                     temp-dir variable apply here too.
			
 
				+          </li>
			
 
				+          
			
 
				+          
			
 
				+<li>xrs-port-range: A range of ports, among which an available port shall
			
 
				+                            be picked for use to run an XML-RPC server.</li>
			
 
				+          
			
 
				+          
			
 
				+<li>http-port-range: A range of ports, among which an available port shall
			
 
				+                             be picked for use to run an HTTP server.</li>
			
 
				+          
			
 
				+          
			
 
				+<li>java-home: Location of Java to be used by Hadoop.</li>
			
 
				+        
			
 
				+</ul>
			
 
				+<a name="N1006B"></a><a name="3.2+hod+options"></a>
			
 
				+<h3 class="h4">3.2 hod options</h3>
			
 
				+<ul>
			
 
				+          
			
 
				+<li>cluster: A descriptive name given to the cluster. For Torque, this is
			
 
				+                     specified as a 'Node property' for every node in the cluster.
			
 
				+                     HOD uses this value to compute the number of available nodes.</li>
			
 
				+          
			
 
				+          
			
 
				+<li>client-params: A comma-separated list of hadoop config parameters
			
 
				+                           specified as key-value pairs. These will be used to
			
 
				+                           generate a hadoop-site.xml on the submit node that 
			
 
				+                           should be used for running MapReduce jobs.</li>
			
 
				+         
			
 
				+</ul>
			
 
				+<a name="N1007B"></a><a name="3.3+resource_manager+options"></a>
			
 
				+<h3 class="h4">3.3 resource_manager options</h3>
			
 
				+<ul>
			
 
				+          
			
 
				+<li>queue: Name of the queue configured in the resource manager to which
			
 
				+                   jobs are to be submitted.</li>
			
 
				+          
			
 
				+          
			
 
				+<li>batch-home: Install directory to which 'bin' is appended and under 
			
 
				+                        which the executables of the resource manager can be 
			
 
				+                        found.</li> 
			
 
				+          
			
 
				+          
			
 
				+<li>env-vars: This is a comma separated list of key-value pairs, 
			
 
				+                      expressed as key=value, which would be passed to the jobs 
			
 
				+                      launched on the compute nodes. 
			
 
				+                      For example, if the python installation is 
			
 
				+                      in a non-standard location, one can set the environment
			
 
				+                      variable 'HOD_PYTHON_HOME' to the path to the python 
			
 
				+                      executable. The HOD processes launched on the compute nodes
			
 
				+                      can then use this variable.</li>
			
 
				+        
			
 
				+</ul>
			
 
				+<a name="N1008E"></a><a name="3.4+ringmaster+options"></a>
			
 
				+<h3 class="h4">3.4 ringmaster options</h3>
			
 
				+<ul>
			
 
				+          
			
 
				+<li>work-dirs: These are a list of comma separated paths that will serve
			
 
				+                       as the root for directories that HOD generates and passes
			
 
				+                       to Hadoop for use to store DFS / MapReduce data. For e.g.
			
 
				+                       this is where DFS data blocks will be stored. Typically,
			
 
				+                       as many paths are specified as there are disks available
			
 
				+                       to ensure all disks are being utilized. The restrictions
			
 
				+                       and notes for the temp-dir variable apply here too.</li>
			
 
				+        
			
 
				+</ul>
			
 
				+<a name="N1009B"></a><a name="3.5+gridservice-hdfs+options"></a>
			
 
				+<h3 class="h4">3.5 gridservice-hdfs options</h3>
			
 
				+<ul>
			
 
				+          
			
 
				+<li>external: If false, this indicates that a HDFS cluster must be 
			
 
				+                      bought up by the HOD system, on the nodes which it 
			
 
				+                      allocates via the allocate command. Note that in that case,
			
 
				+                      when the cluster is de-allocated, it will bring down the 
			
 
				+                      HDFS cluster, and all the data will be lost.
			
 
				+                      If true, it will try and connect to an externally configured
			
 
				+                      HDFS system.
			
 
				+                      Typically, because input for jobs are placed into HDFS
			
 
				+                      before jobs are run, and also the output from jobs in HDFS 
			
 
				+                      is required to be persistent, an internal HDFS cluster is 
			
 
				+                      of little value in a production system. However, it allows 
			
 
				+                      for quick testing.</li>
			
 
				+          
			
 
				+          
			
 
				+<li>host: Hostname of the externally configured NameNode, if any</li>
			
 
				+          
			
 
				+          
			
 
				+<li>fs_port: Port to which NameNode RPC server is bound.</li>
			
 
				+          
			
 
				+          
			
 
				+<li>info_port: Port to which the NameNode web UI server is bound.</li>
			
 
				+          
			
 
				+          
			
 
				+<li>pkgs: Installation directory, under which bin/hadoop executable is 
			
 
				+                  located. This can be used to use a pre-installed version of
			
 
				+                  Hadoop on the cluster.</li>
			
 
				+          
			
 
				+          
			
 
				+<li>server-params: A comma-separated list of hadoop config parameters
			
 
				+                           specified key-value pairs. These will be used to
			
 
				+                           generate a hadoop-site.xml that will be used by the
			
 
				+                           NameNode and DataNodes.</li>
			
 
				+          
			
 
				+          
			
 
				+<li>final-server-params: Same as above, except they will be marked final.</li>
			
 
				+        
			
 
				+</ul>
			
 
				+<a name="N100BA"></a><a name="3.6+gridservice-mapred+options"></a>
			
 
				+<h3 class="h4">3.6 gridservice-mapred options</h3>
			
 
				+<ul>
			
 
				+          
			
 
				+<li>external: If false, this indicates that a MapReduce cluster must be
			
 
				+                      bought up by the HOD system on the nodes which it allocates
			
 
				+                      via the allocate command.
			
 
				+                      If true, if will try and connect to an externally 
			
 
				+                      configured MapReduce system.</li>
			
 
				+          
			
 
				+          
			
 
				+<li>host: Hostname of the externally configured JobTracker, if any</li>
			
 
				+          
			
 
				+          
			
 
				+<li>tracker_port: Port to which the JobTracker RPC server is bound</li>
			
 
				+          
			
 
				+          
			
 
				+<li>info_port: Port to which the JobTracker web UI server is bound.</li>
			
 
				+          
			
 
				+          
			
 
				+<li>pkgs: Installation directory, under which bin/hadoop executable is 
			
 
				+                  located</li>
			
 
				+          
			
 
				+          
			
 
				+<li>server-params: A comma-separated list of hadoop config parameters
			
 
				+                           specified key-value pairs. These will be used to
			
 
				+                           generate a hadoop-site.xml that will be used by the
			
 
				+                           JobTracker and TaskTrackers</li>
			
 
				+          
			
 
				+          
			
 
				+<li>final-server-params: Same as above, except they will be marked final.</li>
			
 
				+        
			
 
				+</ul>
			
 
				+</div>
			
 
				+  
			
 
				+</div>
			
 
				+<!--+
			
 
				+    |end content
			
 
				+    +-->
			
 
				+<div class="clearboth">&nbsp;</div>
			
 
				+</div>
			
 
				+<div id="footer">
			
 
				+<!--+
			
 
				+    |start bottomstrip
			
 
				+    +-->
			
 
				+<div class="lastmodified">
			
 
				+<script type="text/javascript"><!--
			
 
				+document.write("Last Published: " + document.lastModified);
			
 
				+//  --></script>
			
 
				+</div>
			
 
				+<div class="copyright">
			
 
				+        Copyright &copy;
			
 
				+         2007 <a href="http://www.apache.org/licenses/">The Apache Software Foundation.</a>
			
 
				+</div>
			
 
				+<!--+
			
 
				+    |end bottomstrip
			
 
				+    +-->
			
 
				+</div>
			
 
				+</body>
			
 
				+</html>
			
--- a/docs/hod_config_guide.pdf
+++ b/docs/hod_config_guide.pdf
--- a/docs/hod_user_guide.html
+++ b/docs/hod_user_guide.html
@@ -0,0 +1,1168 @@
 
				+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
			
 
				+<html>
			
 
				+<head>
			
 
				+<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
			
 
				+<meta content="Apache Forrest" name="Generator">
			
 
				+<meta name="Forrest-version" content="0.8">
			
 
				+<meta name="Forrest-skin-name" content="pelt">
			
 
				+<title>
			
 
				+      Hadoop On Demand 0.4 User Guide
			
 
				+    </title>
			
 
				+<link type="text/css" href="skin/basic.css" rel="stylesheet">
			
 
				+<link media="screen" type="text/css" href="skin/screen.css" rel="stylesheet">
			
 
				+<link media="print" type="text/css" href="skin/print.css" rel="stylesheet">
			
 
				+<link type="text/css" href="skin/profile.css" rel="stylesheet">
			
 
				+<script src="skin/getBlank.js" language="javascript" type="text/javascript"></script><script src="skin/getMenu.js" language="javascript" type="text/javascript"></script><script src="skin/fontsize.js" language="javascript" type="text/javascript"></script>
			
 
				+<link rel="shortcut icon" href="images/favicon.ico">
			
 
				+</head>
			
 
				+<body onload="init()">
			
 
				+<script type="text/javascript">ndeSetTextSize();</script>
			
 
				+<div id="top">
			
 
				+<!--+
			
 
				+    |breadtrail
			
 
				+    +-->
			
 
				+<div class="breadtrail">
			
 
				+<a href="http://www.apache.org/">Apache</a> &gt; <a href="http://hadoop.apache.org/">Hadoop</a> &gt; <a href="http://hadoop.apache.org/core/">Core</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
			
 
				+</div>
			
 
				+<!--+
			
 
				+    |header
			
 
				+    +-->
			
 
				+<div class="header">
			
 
				+<!--+
			
 
				+    |start group logo
			
 
				+    +-->
			
 
				+<div class="grouplogo">
			
 
				+<a href="http://hadoop.apache.org/"><img class="logoImage" alt="Hadoop" src="images/hadoop-logo.jpg" title="Apache Hadoop"></a>
			
 
				+</div>
			
 
				+<!--+
			
 
				+    |end group logo
			
 
				+    +-->
			
 
				+<!--+
			
 
				+    |start Project Logo
			
 
				+    +-->
			
 
				+<div class="projectlogo">
			
 
				+<a href="http://hadoop.apache.org/core/"><img class="logoImage" alt="Hadoop" src="images/core-logo.jpg" title="Scalable Computing Platform"></a>
			
 
				+</div>
			
 
				+<!--+
			
 
				+    |end Project Logo
			
 
				+    +-->
			
 
				+<!--+
			
 
				+    |start Search
			
 
				+    +-->
			
 
				+<div class="searchbox">
			
 
				+<form action="http://www.google.com/search" method="get" class="roundtopsmall">
			
 
				+<input value="hadoop.apache.org" name="sitesearch" type="hidden"><input onFocus="getBlank (this, 'Search the site with google');" size="25" name="q" id="query" type="text" value="Search the site with google">&nbsp; 
			
 
				+                    <input name="Search" value="Search" type="submit">
			
 
				+</form>
			
 
				+</div>
			
 
				+<!--+
			
 
				+    |end search
			
 
				+    +-->
			
 
				+<!--+
			
 
				+    |start Tabs
			
 
				+    +-->
			
 
				+<ul id="tabs">
			
 
				+<li>
			
 
				+<a class="unselected" href="http://hadoop.apache.org/core/">Project</a>
			
 
				+</li>
			
 
				+<li>
			
 
				+<a class="unselected" href="http://wiki.apache.org/hadoop">Wiki</a>
			
 
				+</li>
			
 
				+<li class="current">
			
 
				+<a class="selected" href="index.html">Hadoop 0.16 Documentation</a>
			
 
				+</li>
			
 
				+</ul>
			
 
				+<!--+
			
 
				+    |end Tabs
			
 
				+    +-->
			
 
				+</div>
			
 
				+</div>
			
 
				+<div id="main">
			
 
				+<div id="publishedStrip">
			
 
				+<!--+
			
 
				+    |start Subtabs
			
 
				+    +-->
			
 
				+<div id="level2tabs"></div>
			
 
				+<!--+
			
 
				+    |end Endtabs
			
 
				+    +-->
			
 
				+<script type="text/javascript"><!--
			
 
				+document.write("Last Published: " + document.lastModified);
			
 
				+//  --></script>
			
 
				+</div>
			
 
				+<!--+
			
 
				+    |breadtrail
			
 
				+    +-->
			
 
				+<div class="breadtrail">
			
 
				+
			
 
				+             &nbsp;
			
 
				+           </div>
			
 
				+<!--+
			
 
				+    |start Menu, mainarea
			
 
				+    +-->
			
 
				+<!--+
			
 
				+    |start Menu
			
 
				+    +-->
			
 
				+<div id="menu">
			
 
				+<div onclick="SwitchMenu('menu_1.1', 'skin/')" id="menu_1.1Title" class="menutitle">Documentation</div>
			
 
				+<div id="menu_1.1" class="menuitemgroup">
			
 
				+<div class="menuitem">
			
 
				+<a href="index.html">Overview</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				+<a href="quickstart.html">Quickstart</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				+<a href="cluster_setup.html">Cluster Setup</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				+<a href="hdfs_design.html">HDFS Architecture</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				+<a href="hdfs_user_guide.html">HDFS User Guide</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				+<a href="hdfs_permissions_guide.html">HDFS Permissions Guide</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				+<a href="mapred_tutorial.html">Map-Reduce Tutorial</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				+<a href="native_libraries.html">Native Hadoop Libraries</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				+<a href="streaming.html">Streaming</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				+<a href="hod.html">Hadoop On Demand</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				+<a href="api/index.html">API Docs</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				+<a href="http://wiki.apache.org/hadoop/">Wiki</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				+<a href="http://wiki.apache.org/hadoop/FAQ">FAQ</a>
			
 
				+</div>
			
 
				+<div class="menuitem">
			
 
				+<a href="http://hadoop.apache.org/core/mailing_lists.html">Mailing Lists</a>
			
 
				+</div>
			
 
				+</div>
			
 
				+<div id="credit"></div>
			
 
				+<div id="roundbottom">
			
 
				+<img style="display: none" class="corner" height="15" width="15" alt="" src="skin/images/rc-b-l-15-1body-2menu-3menu.png"></div>
			
 
				+<!--+
			
 
				+  |alternative credits
			
 
				+  +-->
			
 
				+<div id="credit2"></div>
			
 
				+</div>
			
 
				+<!--+
			
 
				+    |end Menu
			
 
				+    +-->
			
 
				+<!--+
			
 
				+    |start content
			
 
				+    +-->
			
 
				+<div id="content">
			
 
				+<div title="Portable Document Format" class="pdflink">
			
 
				+<a class="dida" href="hod_user_guide.pdf"><img alt="PDF -icon" src="skin/images/pdfdoc.gif" class="skin"><br>
			
 
				+        PDF</a>
			
 
				+</div>
			
 
				+<h1>
			
 
				+      Hadoop On Demand 0.4 User Guide
			
 
				+    </h1>
			
 
				+<div id="minitoc-area">
			
 
				+<ul class="minitoc">
			
 
				+<li>
			
 
				+<a href="#Introduction-N1000C"> Introduction </a>
			
 
				+</li>
			
 
				+<li>
			
 
				+<a href="#Getting+Started+Using+HOD+0.4"> Getting Started Using HOD 0.4 </a>
			
 
				+<ul class="minitoc">
			
 
				+<li>
			
 
				+<a href="#HOD"> HOD Operation Mode </a>
			
 
				+</li>
			
 
				+<li>
			
 
				+<a href="#HOD-N1013B"> HOD Script Mode </a>
			
 
				+</li>
			
 
				+</ul>
			
 
				+</li>
			
 
				+<li>
			
 
				+<a href="#HOD+0.4+Features"> HOD 0.4 Features </a>
			
 
				+<ul class="minitoc">
			
 
				+<li>
			
 
				+<a href="#Provisioning+and+Managing+Hadoop+Clusters"> Provisioning and Managing Hadoop Clusters </a>
			
 
				+</li>
			
 
				+<li>
			
 
				+<a href="#Using+a+tarball+to+distribute+Hadoop"> Using a tarball to distribute Hadoop </a>
			
 
				+</li>
			
 
				+<li>
			
 
				+<a href="#Using+an+external+HDFS"> Using an external HDFS </a>
			
 
				+</li>
			
 
				+<li>
			
 
				+<a href="#Options+for+Configuring+Hadoop"> Options for Configuring Hadoop </a>
			
 
				+</li>
			
 
				+<li>
			
 
				+<a href="#Viewing+Hadoop+Web-UIs"> Viewing Hadoop Web-UIs </a>
			
 
				+</li>
			
 
				+<li>
			
 
				+<a href="#Collecting+and+Viewing+Hadoop+Logs"> Collecting and Viewing Hadoop Logs </a>
			
 
				+</li>
			
 
				+<li>
			
 
				+<a href="#Auto-deallocation+of+Idle+Clusters"> Auto-deallocation of Idle Clusters </a>
			
 
				+</li>
			
 
				+<li>
			
 
				+<a href="#Specifying+Additional+Job+Attributes"> Specifying Additional Job Attributes </a>
			
 
				+</li>
			
 
				+<li>
			
 
				+<a href="#Capturing+HOD+exit+codes+in+Torque"> Capturing HOD exit codes in Torque </a>
			
 
				+</li>
			
 
				+</ul>
			
 
				+</li>
			
 
				+<li>
			
 
				+<a href="#Command+Line+Options"> Command Line Options </a>
			
 
				+<ul class="minitoc">
			
 
				+<li>
			
 
				+<a href="#Options+Defining+Operations"> Options Defining Operations </a>
			
 
				+</li>
			
 
				+<li>
			
 
				+<a href="#Options+Configuring+HOD"> Options Configuring HOD </a>
			
 
				+</li>
			
 
				+</ul>
			
 
				+</li>
			
 
				+<li>
			
 
				+<a href="#Troubleshooting-N1055A"> Troubleshooting </a>
			
 
				+<ul class="minitoc">
			
 
				+<li>
			
 
				+<a href="#Hangs+During+Allocation">hod Hangs During Allocation </a>
			
 
				+</li>
			
 
				+<li>
			
 
				+<a href="#Hangs+During+Deallocation">hod Hangs During Deallocation </a>
			
 
				+</li>
			
 
				+<li>
			
 
				+<a href="#Fails+With+an+error+code+and+error+message">hod Fails With an error code and error message </a>
			
 
				+</li>
			
 
				+<li>
			
 
				+<a href="#Hadoop+Jobs+Not+Running+on+a+Successfully+Allocated+Cluster"> Hadoop Jobs Not Running on a Successfully Allocated Cluster </a>
			
 
				+</li>
			
 
				+<li>
			
 
				+<a href="#My+Hadoop+Job+Got+Killed"> My Hadoop Job Got Killed </a>
			
 
				+</li>
			
 
				+<li>
			
 
				+<a href="#Hadoop+Job+Fails+with+Message%3A+%27Job+tracker+still+initializing%27"> Hadoop Job Fails with Message: 'Job tracker still initializing' </a>
			
 
				+</li>
			
 
				+<li>
			
 
				+<a href="#The+Exit+Codes+For+HOD+Are+Not+Getting+Into+Torque"> The Exit Codes For HOD Are Not Getting Into Torque </a>
			
 
				+</li>
			
 
				+<li>
			
 
				+<a href="#The+Hadoop+Logs+are+Not+Uploaded+to+DFS"> The Hadoop Logs are Not Uploaded to DFS </a>
			
 
				+</li>
			
 
				+<li>
			
 
				+<a href="#Locating+Ringmaster+Logs"> Locating Ringmaster Logs </a>
			
 
				+</li>
			
 
				+<li>
			
 
				+<a href="#Locating+Hodring+Logs"> Locating Hodring Logs </a>
			
 
				+</li>
			
 
				+</ul>
			
 
				+</li>
			
 
				+</ul>
			
 
				+</div>
			
 
				+  
			
 
				+<a name="N1000C"></a><a name="Introduction-N1000C"></a>
			
 
				+<h2 class="h3"> Introduction </h2>
			
 
				+<div class="section">
			
 
				+<a name="Introduction" id="Introduction"></a>
			
 
				+<p>Hadoop On Demand (HOD) is a system for provisioning virtual Hadoop clusters over a large physical cluster. It uses the Torque resource manager to do node allocation. On the allocated nodes, it can start Hadoop Map/Reduce and HDFS daemons. It automatically generates the appropriate configuration files (hadoop-site.xml) for the Hadoop daemons and client. HOD also has the capability to distribute Hadoop to the nodes in the virtual cluster that it allocates. In short, HOD makes it easy for administrators and users to quickly setup and use Hadoop. It is also a very useful tool for Hadoop developers and testers who need to share a physical cluster for testing their own Hadoop versions.</p>
			
 
				+<p>HOD 0.4 supports Hadoop from version 0.15 onwards.</p>
			
 
				+<p>The rest of the documentation comprises of a quick-start guide that helps you get quickly started with using HOD, a more detailed guide of all HOD features, command line options, known issues and trouble-shooting information.</p>
			
 
				+</div>
			
 
				+  
			
 
				+<a name="N1001E"></a><a name="Getting+Started+Using+HOD+0.4"></a>
			
 
				+<h2 class="h3"> Getting Started Using HOD 0.4 </h2>
			
 
				+<div class="section">
			
 
				+<a name="Getting_Started_Using_HOD_0_4" id="Getting_Started_Using_HOD_0_4"></a>
			
 
				+<p>In this section, we shall see a step-by-step introduction on how to use HOD for the most basic operations. Before following these steps, it is assumed that HOD 0.4 and its dependent hardware and software components are setup and configured correctly. This is a step that is generally performed by system administrators of the cluster.</p>
			
 
				+<p>The HOD 0.4 user interface is a command line utility called <span class="codefrag">hod</span>. It is driven by a configuration file, that is typically setup for users by system administrators. Users can override this configuration when using the <span class="codefrag">hod</span>, which is described later in this documentation. The configuration file can be specified in two ways when using <span class="codefrag">hod</span>, as described below: </p>
			
 
				+<ul>
			
 
				+    
			
 
				+<li> Specify it on command line, using the -c option. Such as <span class="codefrag">hod -c path-to-the-configuration-file other-options</span>
			
 
				+</li>
			
 
				+    
			
 
				+<li> Set up an environment variable <em>HOD_CONF_DIR</em> where <span class="codefrag">hod</span> will be run. This should be pointed to a directory on the local file system, containing a file called <em>hodrc</em>. Note that this is analogous to the <em>HADOOP_CONF_DIR</em> and <em>hadoop-site.xml</em> file for Hadoop. If no configuration file is specified on the command line, <span class="codefrag">hod</span> shall look for the <em>HOD_CONF_DIR</em> environment variable and a <em>hodrc</em> file under that.</li>
			
 
				+    
			
 
				+</ul>
			
 
				+<p>In examples listed below, we shall not explicitly point to the configuration option, assuming it is correctly specified.</p>
			
 
				+<p>
			
 
				+<span class="codefrag">hod</span> can be used in two modes, the <em>operation</em> mode and the <em>script</em> mode. We shall describe the two modes in detail below.</p>
			
 
				+<a name="N10066"></a><a name="HOD"></a>
			
 
				+<h3 class="h4"> HOD Operation Mode </h3>
			
 
				+<a name="HOD_Operation_Mode" id="HOD_Operation_Mode"></a>
			
 
				+<p>A typical session of HOD using this option will involve at least three steps: allocate, run hadoop jobs, deallocate. In order to use this mode, perform the following steps.</p>
			
 
				+<p>
			
 
				+<strong> Create a Cluster Directory </strong>
			
 
				+</p>
			
 
				+<a name="Create_a_Cluster_Directory" id="Create_a_Cluster_Directory"></a>
			
 
				+<p>The <em>cluster directory</em> is a directory on the local file system where <span class="codefrag">hod</span> will generate the Hadoop configuration, <em>hadoop-site.xml</em>, corresponding to the cluster it allocates. Create this directory and pass it to the <span class="codefrag">hod</span> operations as stated below. Once a cluster is allocated, a user can utilize it to run Hadoop jobs by specifying the cluster directory as the Hadoop --config option. </p>
			
 
				+<p>
			
 
				+<strong> Operation <em>allocate</em></strong>
			
 
				+</p>
			
 
				+<a name="Operation_allocate" id="Operation_allocate"></a>
			
 
				+<p>The <em>allocate</em> operation is used to allocate a set of nodes and install and provision Hadoop on them. It has the following syntax:</p>
			
 
				+<table class="ForrestTable" cellspacing="1" cellpadding="4">
			
 
				+      
			
 
				+        
			
 
				+<tr>
			
 
				+          
			
 
				+<td colspan="1" rowspan="1"><span class="codefrag">$ hod -o "allocate cluster_dir number_of_nodes"</span></td>
			
 
				+        
			
 
				+</tr>
			
 
				+      
			
 
				+    
			
 
				+</table>
			
 
				+<p>If the command completes successfully, then <span class="codefrag">cluster_dir/hadoop-site.xml</span> will be generated and will contain information about the allocated cluster. It will also print out the information about the Hadoop web UIs.</p>
			
 
				+<p>An example run of this command produces the following output. Note in this example that <span class="codefrag">~/hod-clusters/test</span> is the cluster directory, and we are allocating 5 nodes:</p>
			
 
				+<table class="ForrestTable" cellspacing="1" cellpadding="4">
			
 
				+    
			
 
				+<tr>
			
 
				+      
			
 
				+<td colspan="1" rowspan="1"><span class="codefrag">$ hod -o "allocate ~/hod-clusters/test 5"</span>
			
 
				+<br>
			
 
				+      
			
 
				+<span class="codefrag">INFO - HDFS UI on http://foo1.bar.com:53422</span>
			
 
				+<br>
			
 
				+      
			
 
				+<span class="codefrag">INFO - Mapred UI on http://foo2.bar.com:55380</span>
			
 
				+<br>
			
 
				+</td>
			
 
				+      
			
 
				+</tr>
			
 
				+   
			
 
				+</table>
			
 
				+<p>
			
 
				+<strong> Running Hadoop jobs using the allocated cluster </strong>
			
 
				+</p>
			
 
				+<a name="Running_Hadoop_jobs_using_the_al" id="Running_Hadoop_jobs_using_the_al"></a>
			
 
				+<p>Now, one can run Hadoop jobs using the allocated cluster in the usual manner. This assumes variables like <em>JAVA_HOME</em> and path to the Hadoop installation are set up correctly.:</p>
			
 
				+<table class="ForrestTable" cellspacing="1" cellpadding="4">
			
 
				+      
			
 
				+        
			
 
				+<tr>
			
 
				+          
			
 
				+<td colspan="1" rowspan="1"><span class="codefrag">$ hadoop --config cluster_dir hadoop_command hadoop_command_args</span></td>
			
 
				+        
			
 
				+</tr>
			
 
				+      
			
 
				+    
			
 
				+</table>
			
 
				+<p>or</p>
			
 
				+<table class="ForrestTable" cellspacing="1" cellpadding="4">
			
 
				+      
			
 
				+        
			
 
				+<tr>
			
 
				+          
			
 
				+<td colspan="1" rowspan="1"><span class="codefrag">$ export HADOOP_CONF_DIR=cluster_dir</span> 
			
 
				+<br>
			
 
				+              
			
 
				+<span class="codefrag">$ hadoop hadoop_command hadoop_command_args</span></td>
			
 
				+        
			
 
				+</tr>
			
 
				+      
			
 
				+    
			
 
				+</table>
			
 
				+<p>Continuing our example, the following command will run a wordcount example on the allocated cluster:</p>
			
 
				+<table class="ForrestTable" cellspacing="1" cellpadding="4">
			
 
				+<tr>
			
 
				+<td colspan="1" rowspan="1"><span class="codefrag">$ hadoop --config ~/hod-clusters/test jar /path/to/hadoop/hadoop-examples.jar wordcount /path/to/input /path/to/output</span></td>
			
 
				+</tr>
			
 
				+</table>
			
 
				+<p>or</p>
			
 
				+<table class="ForrestTable" cellspacing="1" cellpadding="4">
			
 
				+<tr>
			
 
				+    
			
 
				+<td colspan="1" rowspan="1"><span class="codefrag">$ export HADOOP_CONF_DIR=~/hod-clusters/test</span>
			
 
				+<br>
			
 
				+    
			
 
				+<span class="codefrag">$ hadoop jar /path/to/hadoop/hadoop-examples.jar wordcount /path/to/input /path/to/output</span></td>
			
 
				+    
			
 
				+</tr>
			
 
				+  
			
 
				+</table>
			
 
				+<p>
			
 
				+<strong> Operation <em>deallocate</em></strong>
			
 
				+</p>
			
 
				+<a name="Operation_deallocate" id="Operation_deallocate"></a>
			
 
				+<p>The <em>deallocate</em> operation is used to release an allocated cluster. When finished with a cluster, deallocate must be run so that the nodes become free for others to use. The <em>deallocate</em> operation has the following syntax:</p>
			
 
				+<table class="ForrestTable" cellspacing="1" cellpadding="4">
			
 
				+      
			
 
				+        
			
 
				+<tr>
			
 
				+          
			
 
				+<td colspan="1" rowspan="1"><span class="codefrag">$ hod -o "deallocate cluster_dir"</span></td>
			
 
				+        
			
 
				+</tr>
			
 
				+      
			
 
				+    
			
 
				+</table>
			
 
				+<p>Continuing our example, the following command will deallocate the cluster:</p>
			
 
				+<table class="ForrestTable" cellspacing="1" cellpadding="4">
			
 
				+<tr>
			
 
				+<td colspan="1" rowspan="1"><span class="codefrag">$ hod -o "deallocate ~/hod-clusters/test"</span></td>
			
 
				+</tr>
			
 
				+</table>
			
 
				+<p>As can be seen, when used in the <em>operation</em> mode, HOD allows the users to allocate a cluster, and use it flexibly for running Hadoop jobs. For example, users can run multiple jobs in parallel on the same cluster, by running hadoop from multiple shells pointing to the same configuration.</p>
			
 
				+<a name="N1013B"></a><a name="HOD-N1013B"></a>
			
 
				+<h3 class="h4"> HOD Script Mode </h3>
			
 
				+<a name="HOD_Script_Mode" id="HOD_Script_Mode"></a>
			
 
				+<p>The HOD <em>script mode</em> combines the operations of allocating, using and deallocating a cluster into a single operation. This is very useful for users who want to run a script of hadoop jobs and let HOD handle the cleanup automatically once the script completes. In order to use <span class="codefrag">hod</span> in the script mode, do the following:</p>
			
 
				+<p>
			
 
				+<strong> Create a script file </strong>
			
 
				+</p>
			
 
				+<a name="Create_a_script_file" id="Create_a_script_file"></a>
			
 
				+<p>This will be a regular shell script that will typically contain hadoop commands, such as:</p>
			
 
				+<table class="ForrestTable" cellspacing="1" cellpadding="4">
			
 
				+<tr>
			
 
				+<td colspan="1" rowspan="1"><span class="codefrag">$ hadoop jar jar_file options</span></td>
			
 
				+  
			
 
				+</tr>
			
 
				+</table>
			
 
				+<p>However, the user can add any valid commands as part of the script. HOD will execute this script setting <em>HADOOP_CONF_DIR</em> automatically to point to the allocated cluster. So users do not need to worry about this. They also do not need to create a cluster directory as in the <em>operation</em> mode.</p>
			
 
				+<p>
			
 
				+<strong> Running the script </strong>
			
 
				+</p>
			
 
				+<a name="Running_the_script" id="Running_the_script"></a>
			
 
				+<p>The syntax for the <em>script mode</em> as is as follows:</p>
			
 
				+<table class="ForrestTable" cellspacing="1" cellpadding="4">
			
 
				+      
			
 
				+        
			
 
				+<tr>
			
 
				+          
			
 
				+<td colspan="1" rowspan="1"><span class="codefrag">$ hod -m number_of_nodes -z script_file</span></td>
			
 
				+        
			
 
				+</tr>
			
 
				+      
			
 
				+    
			
 
				+</table>
			
 
				+<p>Note that HOD will deallocate the cluster as soon as the script completes, and this means that the script must not complete until the hadoop jobs themselves are completed. Users must take care of this while writing the script. </p>
			
 
				+</div>
			
 
				+  
			
 
				+<a name="N10186"></a><a name="HOD+0.4+Features"></a>
			
 
				+<h2 class="h3"> HOD 0.4 Features </h2>
			
 
				+<div class="section">
			
 
				+<a name="HOD_0_4_Features" id="HOD_0_4_Features"></a><a name="N1018E"></a><a name="Provisioning+and+Managing+Hadoop+Clusters"></a>
			
 
				+<h3 class="h4"> Provisioning and Managing Hadoop Clusters </h3>
			
 
				+<a name="Provisioning_and_Managing_Hadoop" id="Provisioning_and_Managing_Hadoop"></a>
			
 
				+<p>The primary feature of HOD is to provision Hadoop Map/Reduce and HDFS clusters. This is described above in the Getting Started section. Also, as long as nodes are available, and organizational policies allow, a user can use HOD to allocate multiple Map/Reduce clusters simultaneously. The user would need to specify different paths for the <span class="codefrag">cluster_dir</span> parameter mentioned above for each cluster he/she allocates. HOD provides the <em>list</em> and the <em>info</em> operations to enable managing multiple clusters.</p>
			
 
				+<p>
			
 
				+<strong> Operation <em>list</em></strong>
			
 
				+</p>
			
 
				+<a name="Operation_list" id="Operation_list"></a>
			
 
				+<p>The list operation lists all the clusters allocated so far by a user. The cluster directory where the hadoop-site.xml is stored for the cluster, and it's status vis-a-vis connectivity with the JobTracker and/or HDFS is shown. The list operation has the following syntax:</p>
			
 
				+<table class="ForrestTable" cellspacing="1" cellpadding="4">
			
 
				+      
			
 
				+        
			
 
				+<tr>
			
 
				+          
			
 
				+<td colspan="1" rowspan="1"><span class="codefrag">$ hod -o "list"</span></td>
			
 
				+        
			
 
				+</tr>
			
 
				+      
			
 
				+    
			
 
				+</table>
			
 
				+<p>
			
 
				+<strong> Operation <em>info</em></strong>
			
 
				+</p>
			
 
				+<a name="Operation_info" id="Operation_info"></a>
			
 
				+<p>The info operation shows information about a given cluster. The information shown includes the Torque job id, and locations of the important daemons like the HOD Ringmaster process, and the Hadoop JobTracker and NameNode daemons. The info operation has the following syntax:</p>
			
 
				+<table class="ForrestTable" cellspacing="1" cellpadding="4">
			
 
				+      
			
 
				+        
			
 
				+<tr>
			
 
				+          
			
 
				+<td colspan="1" rowspan="1"><span class="codefrag">$ hod -o "info cluster_dir"</span></td>
			
 
				+        
			
 
				+</tr>
			
 
				+      
			
 
				+    
			
 
				+</table>
			
 
				+<p>The <span class="codefrag">cluster_dir</span> should be a valid cluster directory specified in an earlier <em>allocate</em> operation.</p>
			
 
				+<a name="N101D9"></a><a name="Using+a+tarball+to+distribute+Hadoop"></a>
			
 
				+<h3 class="h4"> Using a tarball to distribute Hadoop </h3>
			
 
				+<a name="Using_a_tarball_to_distribute_Ha" id="Using_a_tarball_to_distribute_Ha"></a>
			
 
				+<p>When provisioning Hadoop, HOD can use either a pre-installed Hadoop on the cluster nodes or distribute and install a Hadoop tarball as part of the provisioning operation. If the tarball option is being used, there is no need to have a pre-installed Hadoop on the cluster nodes, nor a need to use a pre-installed one. This is especially useful in a development / QE environment where individual developers may have different versions of Hadoop to test on a shared cluster. </p>
			
 
				+<p>In order to use a pre-installed Hadoop, you must specify, in the hodrc, the <span class="codefrag">pkgs</span> option in the <span class="codefrag">gridservice-hdfs</span> and <span class="codefrag">gridservice-mapred</span> sections. This must point to the path where Hadoop is installed on all nodes of the cluster.</p>
			
 
				+<p>The tarball option can be used in both the <em>operation</em> and <em>script</em> options. </p>
			
 
				+<p>In the operation option, the syntax is as follows:</p>
			
 
				+<table class="ForrestTable" cellspacing="1" cellpadding="4">
			
 
				+        
			
 
				+<tr>
			
 
				+          
			
 
				+<td colspan="1" rowspan="1"><span class="codefrag">$ hod -t hadoop_tarball_location -o "allocate cluster_dir number_of_nodes"</span></td>
			
 
				+        
			
 
				+</tr>
			
 
				+    
			
 
				+</table>
			
 
				+<p>For example, the following command allocates Hadoop provided by the tarball <span class="codefrag">~/share/hadoop.tar.gz</span>:</p>
			
 
				+<table class="ForrestTable" cellspacing="1" cellpadding="4">
			
 
				+<tr>
			
 
				+<td colspan="1" rowspan="1"><span class="codefrag">$ hod -t ~/share/hadoop.tar.gz -o "allocate ~/hadoop-cluster 10"</span></td>
			
 
				+</tr>
			
 
				+</table>
			
 
				+<p>In the script option, the syntax is as follows:</p>
			
 
				+<table class="ForrestTable" cellspacing="1" cellpadding="4">
			
 
				+        
			
 
				+<tr>
			
 
				+          
			
 
				+<td colspan="1" rowspan="1"><span class="codefrag">$ hod -t hadoop_tarball_location -m number_of_nodes -z script_file</span></td>
			
 
				+        
			
 
				+</tr>
			
 
				+    
			
 
				+</table>
			
 
				+<p>The hadoop_tarball specified in the syntax above should point to a path on a shared file system that is accessible from all the compute nodes. Currently, HOD only supports NFS mounted file systems.</p>
			
 
				+<p>
			
 
				+<em>Note:</em>
			
 
				+</p>
			
 
				+<ul>
			
 
				+    
			
 
				+<li> For better distribution performance it is recommended that the Hadoop tarball contain only the libraries and binaries, and not the source or documentation.</li>
			
 
				+    
			
 
				+<li> When you want to run jobs against a cluster allocated using the tarball, you must use a compatible version of hadoop to submit your jobs. The best would be to untar and use the version that is present in the tarball itself.</li>
			
 
				+  
			
 
				+</ul>
			
 
				+<a name="N10235"></a><a name="Using+an+external+HDFS"></a>
			
 
				+<h3 class="h4"> Using an external HDFS </h3>
			
 
				+<a name="Using_an_external_HDFS" id="Using_an_external_HDFS"></a>
			
 
				+<p>In typical Hadoop clusters provisioned by HOD, HDFS is already set up statically (without using HOD). This allows data to persist in HDFS after the HOD provisioned clusters is deallocated. To use a statically configured HDFS, your hodrc must point to an external HDFS. Specifically, set the following options to the correct values in the section <span class="codefrag">gridservice-hdfs</span> of the hodrc:</p>
			
 
				+<table class="ForrestTable" cellspacing="1" cellpadding="4">
			
 
				+<tr>
			
 
				+<td colspan="1" rowspan="1">external = true</td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td colspan="1" rowspan="1">host = Hostname of the HDFS NameNode</td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td colspan="1" rowspan="1">fs_port = Port number of the HDFS NameNode</td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td colspan="1" rowspan="1">info_port = Port number of the HDFS NameNode web UI</td>
			
 
				+</tr>
			
 
				+</table>
			
 
				+<p>
			
 
				+<em>Note:</em> You can also enable this option from command line. That is, to use a static HDFS, you will need to say: <br>
			
 
				+    
			
 
				+</p>
			
 
				+<table class="ForrestTable" cellspacing="1" cellpadding="4">
			
 
				+        
			
 
				+<tr>
			
 
				+          
			
 
				+<td colspan="1" rowspan="1"><span class="codefrag">$ hod --gridservice-hdfs.external -o "allocate cluster_dir number_of_nodes"</span></td>
			
 
				+        
			
 
				+</tr>
			
 
				+    
			
 
				+</table>
			
 
				+<p>HOD can be used to provision an HDFS cluster as well as a Map/Reduce cluster, if required. To do so, set the following option in the section <span class="codefrag">gridservice-hdfs</span> of the hodrc:</p>
			
 
				+<table class="ForrestTable" cellspacing="1" cellpadding="4">
			
 
				+<tr>
			
 
				+<td colspan="1" rowspan="1">external = false</td>
			
 
				+</tr>
			
 
				+</table>
			
 
				+<a name="N10279"></a><a name="Options+for+Configuring+Hadoop"></a>
			
 
				+<h3 class="h4"> Options for Configuring Hadoop </h3>
			
 
				+<a name="Options_for_Configuring_Hadoop" id="Options_for_Configuring_Hadoop"></a>
			
 
				+<p>HOD provides a very convenient mechanism to configure both the Hadoop daemons that it provisions and also the hadoop-site.xml that it generates on the client side. This is done by specifying Hadoop configuration parameters in either the HOD configuration file, or from the command line when allocating clusters.</p>
			
 
				+<p>
			
 
				+<strong> Configuring Hadoop Daemons </strong>
			
 
				+</p>
			
 
				+<a name="Configuring_Hadoop_Daemons" id="Configuring_Hadoop_Daemons"></a>
			
 
				+<p>For configuring the Hadoop daemons, you can do the following:</p>
			
 
				+<p>For Map/Reduce, specify the options as a comma separated list of key-value pairs to the <span class="codefrag">server-params</span> option in the <span class="codefrag">gridservice-mapred</span> section. Likewise for a dynamically provisioned HDFS cluster, specify the options in the <span class="codefrag">server-params</span> option in the <span class="codefrag">gridservice-hdfs</span> section. If these parameters should be marked as <em>final</em>, then include these in the <span class="codefrag">final-server-params</span> option of the appropriate section.</p>
			
 
				+<p>For example:</p>
			
 
				+<table class="ForrestTable" cellspacing="1" cellpadding="4">
			
 
				+<tr>
			
 
				+<td colspan="1" rowspan="1"><span class="codefrag">server-params = mapred.reduce.parallel.copies=20,io.sort.factor=100,io.sort.mb=128,io.file.buffer.size=131072</span></td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td colspan="1" rowspan="1"><span class="codefrag">final-server-params = mapred.child.java.opts=-Xmx512m,dfs.block.size=134217728,fs.inmemory.size.mb=128</span></td>
			
 
				+  
			
 
				+</tr>
			
 
				+</table>
			
 
				+<p>In order to provide the options from command line, you can use the following syntax:</p>
			
 
				+<p>For configuring the Map/Reduce daemons use:</p>
			
 
				+<table class="ForrestTable" cellspacing="1" cellpadding="4">
			
 
				+        
			
 
				+<tr>
			
 
				+          
			
 
				+<td colspan="1" rowspan="1"><span class="codefrag">$ hod -Mmapred.reduce.parallel.copies=20 -Mio.sort.factor=100 -o "allocate cluster_dir number_of_nodes"</span></td>
			
 
				+        
			
 
				+</tr>
			
 
				+    
			
 
				+</table>
			
 
				+<p>In the example above, the <em>mapred.reduce.parallel.copies</em> parameter and the <em>io.sort.factor</em> parameter will be appended to the other <span class="codefrag">server-params</span> or if they already exist in <span class="codefrag">server-params</span>, will override them. In order to specify these are <em>final</em> parameters, you can use:</p>
			
 
				+<table class="ForrestTable" cellspacing="1" cellpadding="4">
			
 
				+        
			
 
				+<tr>
			
 
				+          
			
 
				+<td colspan="1" rowspan="1"><span class="codefrag">$ hod -Fmapred.reduce.parallel.copies=20 -Fio.sort.factor=100 -o "allocate cluster_dir number_of_nodes"</span></td>
			
 
				+        
			
 
				+</tr>
			
 
				+    
			
 
				+</table>
			
 
				+<p>However, note that final parameters cannot be overwritten from command line. They can only be appended if not already specified.</p>
			
 
				+<p>Similar options exist for configuring dynamically provisioned HDFS daemons. For doing so, replace -M with -H and -F with -S.</p>
			
 
				+<p>
			
 
				+<strong> Configuring Hadoop Job Submission (Client) Programs </strong>
			
 
				+</p>
			
 
				+<a name="Configuring_Hadoop_Job_Submissio" id="Configuring_Hadoop_Job_Submissio"></a>
			
 
				+<p>As mentioned above, if the allocation operation completes successfully then <span class="codefrag">cluster_dir/hadoop-site.xml</span> will be generated and will contain information about the allocated cluster's JobTracker and NameNode. This configuration is used when submitting jobs to the cluster. HOD provides an option to include additional Hadoop configuration parameters into this file. The syntax for doing so is as follows:</p>
			
 
				+<table class="ForrestTable" cellspacing="1" cellpadding="4">
			
 
				+        
			
 
				+<tr>
			
 
				+          
			
 
				+<td colspan="1" rowspan="1"><span class="codefrag">$ hod -Cmapred.userlog.limit.kb=200 -Cmapred.child.java.opts=-Xmx512m -o "allocate cluster_dir number_of_nodes"</span></td>
			
 
				+        
			
 
				+</tr>
			
 
				+    
			
 
				+</table>
			
 
				+<p>In this example, the <em>mapred.userlog.limit.kb</em> and <em>mapred.child.java.opts</em> options will be included into the hadoop-site.xml that is generated by HOD.</p>
			
 
				+<a name="N1030B"></a><a name="Viewing+Hadoop+Web-UIs"></a>
			
 
				+<h3 class="h4"> Viewing Hadoop Web-UIs </h3>
			
 
				+<a name="Viewing_Hadoop_Web_UIs" id="Viewing_Hadoop_Web_UIs"></a>
			
 
				+<p>The HOD allocation operation prints the JobTracker and NameNode web UI URLs. For example:</p>
			
 
				+<table class="ForrestTable" cellspacing="1" cellpadding="4">
			
 
				+<tr>
			
 
				+<td colspan="1" rowspan="1"><span class="codefrag">$ hod -c ~/hod-conf-dir/hodrc -o "allocate ~/hadoop-cluster 10"</span>
			
 
				+<br>
			
 
				+    
			
 
				+<span class="codefrag">INFO - HDFS UI on http://host242.foo.com:55391</span>
			
 
				+<br>
			
 
				+    
			
 
				+<span class="codefrag">INFO - Mapred UI on http://host521.foo.com:54874</span>
			
 
				+    </td>
			
 
				+</tr>
			
 
				+</table>
			
 
				+<p>The same information is also available via the <em>info</em> operation described above.</p>
			
 
				+<a name="N1032D"></a><a name="Collecting+and+Viewing+Hadoop+Logs"></a>
			
 
				+<h3 class="h4"> Collecting and Viewing Hadoop Logs </h3>
			
 
				+<a name="Collecting_and_Viewing_Hadoop_Lo" id="Collecting_and_Viewing_Hadoop_Lo"></a>
			
 
				+<p>To get the Hadoop logs of the daemons running on one of the allocated nodes: </p>
			
 
				+<ul>
			
 
				+    
			
 
				+<li> Log into the node of interest. If you want to look at the logs of the JobTracker or NameNode, then you can find the node running these by using the <em>list</em> and <em>info</em> operations mentioned above.</li>
			
 
				+    
			
 
				+<li> Get the process information of the daemon of interest (for example, <span class="codefrag">ps ux | grep TaskTracker</span>)</li>
			
 
				+    
			
 
				+<li> In the process information, search for the value of the variable <span class="codefrag">-Dhadoop.log.dir</span>. Typically this will be a decendent directory of the <span class="codefrag">hodring.temp-dir</span> value from the hod configuration file.</li>
			
 
				+    
			
 
				+<li> Change to the <span class="codefrag">hadoop.log.dir</span> directory to view daemon and user logs.</li>
			
 
				+  
			
 
				+</ul>
			
 
				+<p>HOD also provides a mechanism to collect logs when a cluster is being deallocated and persist them into a file system, or an externally configured HDFS. By doing so, these logs can be viewed after the jobs are completed and the nodes are released. In order to do so, configure the log-destination-uri to a URI as follows:</p>
			
 
				+<table class="ForrestTable" cellspacing="1" cellpadding="4">
			
 
				+<tr>
			
 
				+<td colspan="1" rowspan="1"><span class="codefrag">log-destination-uri = hdfs://host123:45678/user/hod/logs</span> or</td>
			
 
				+</tr>
			
 
				+    
			
 
				+<tr>
			
 
				+<td colspan="1" rowspan="1"><span class="codefrag">log-destination-uri = file://path/to/store/log/files</span></td>
			
 
				+</tr>
			
 
				+    
			
 
				+</table>
			
 
				+<p>Under the root directory specified above in the path, HOD will create a create a path user_name/torque_jobid and store gzipped log files for each node that was part of the job.</p>
			
 
				+<p>Note that to store the files to HDFS, you may need to configure the <span class="codefrag">hodring.pkgs</span> option with the Hadoop version that matches the HDFS mentioned. If not, HOD will try to use the Hadoop version that it is using to provision the Hadoop cluster itself.</p>
			
 
				+<a name="N10376"></a><a name="Auto-deallocation+of+Idle+Clusters"></a>
			
 
				+<h3 class="h4"> Auto-deallocation of Idle Clusters </h3>
			
 
				+<a name="Auto_deallocation_of_Idle_Cluste" id="Auto_deallocation_of_Idle_Cluste"></a>
			
 
				+<p>HOD automatically deallocates clusters that are not running Hadoop jobs for a given period of time. Each HOD allocation includes a monitoring facility that constantly checks for running Hadoop jobs. If it detects no running Hadoop jobs for a given period, it will automatically deallocate its own cluster and thus free up nodes which are not being used effectively.</p>
			
 
				+<p>
			
 
				+<em>Note:</em> While the cluster is deallocated, the <em>cluster directory</em> is not cleaned up automatically. The user must deallocate this cluster through the regular <em>deallocate</em> operation to clean this up.</p>
			
 
				+<a name="N1038C"></a><a name="Specifying+Additional+Job+Attributes"></a>
			
 
				+<h3 class="h4"> Specifying Additional Job Attributes </h3>
			
 
				+<a name="Specifying_Additional_Job_Attrib" id="Specifying_Additional_Job_Attrib"></a>
			
 
				+<p>HOD allows the user to specify a wallclock time and a name (or title) for a Torque job. </p>
			
 
				+<p>The wallclock time is the estimated amount of time for which the Torque job will be valid. After this time has expired, Torque will automatically delete the job and free up the nodes. Specifying the wallclock time can also help the job scheduler to better schedule jobs, and help improve utilization of cluster resources.</p>
			
 
				+<p>To specify the wallclock time, use the following syntax:</p>
			
 
				+<table class="ForrestTable" cellspacing="1" cellpadding="4">
			
 
				+        
			
 
				+<tr>
			
 
				+          
			
 
				+<td colspan="1" rowspan="1"><span class="codefrag">$ hod -l time_in_seconds -o "allocate cluster_dir number_of_nodes"</span></td>
			
 
				+        
			
 
				+</tr>
			
 
				+    
			
 
				+</table>
			
 
				+<p>The name or title of a Torque job helps in user friendly identification of the job. The string specified here will show up in all information where Torque job attributes are displayed, including the <span class="codefrag">qstat</span> command.</p>
			
 
				+<p>To specify the name or title, use the following syntax:</p>
			
 
				+<table class="ForrestTable" cellspacing="1" cellpadding="4">
			
 
				+        
			
 
				+<tr>
			
 
				+          
			
 
				+<td colspan="1" rowspan="1"><span class="codefrag">$ hod -N name_of_job -o "allocate cluster_dir number_of_nodes"</span></td>
			
 
				+        
			
 
				+</tr>
			
 
				+    
			
 
				+</table>
			
 
				+<p>
			
 
				+<em>Note:</em> Due to restriction in the underlying Torque resource manager, names which do not start with a alphabet or contain a 'space' will cause the job to fail. The failure message points to the problem being in the specified job name.</p>
			
 
				+<a name="N103C3"></a><a name="Capturing+HOD+exit+codes+in+Torque"></a>
			
 
				+<h3 class="h4"> Capturing HOD exit codes in Torque </h3>
			
 
				+<a name="Capturing_HOD_exit_codes_in_Torq" id="Capturing_HOD_exit_codes_in_Torq"></a>
			
 
				+<p>HOD exit codes are captured in the Torque exit_status field. This will help users and system administrators to distinguish successful runs from unsuccessful runs of HOD. The exit codes are 0 if allocation succeeded and all hadoop jobs ran on the allocated cluster correctly. They are non-zero if allocation failed or some of the hadoop jobs failed on the allocated cluster. The exit codes that are possible are mentioned in the table below. <em>Note: Hadoop job status is captured only if the version of Hadoop used is 16 or above.</em>
			
 
				+</p>
			
 
				+<table class="ForrestTable" cellspacing="1" cellpadding="4">
			
 
				+    
			
 
				+      
			
 
				+<tr>
			
 
				+        
			
 
				+<td colspan="1" rowspan="1"> Exit Code </td>
			
 
				+        <td colspan="1" rowspan="1"> Meaning </td>
			
 
				+      
			
 
				+</tr>
			
 
				+      
			
 
				+<tr>
			
 
				+        
			
 
				+<td colspan="1" rowspan="1"> 6 </td>
			
 
				+        <td colspan="1" rowspan="1"> Ringmaster failure </td>
			
 
				+      
			
 
				+</tr>
			
 
				+      
			
 
				+<tr>
			
 
				+        
			
 
				+<td colspan="1" rowspan="1"> 7 </td>
			
 
				+        <td colspan="1" rowspan="1"> DFS failure </td>
			
 
				+      
			
 
				+</tr>
			
 
				+      
			
 
				+<tr>
			
 
				+        
			
 
				+<td colspan="1" rowspan="1"> 8 </td>
			
 
				+        <td colspan="1" rowspan="1"> Job tracker failure </td>
			
 
				+      
			
 
				+</tr>
			
 
				+      
			
 
				+<tr>
			
 
				+        
			
 
				+<td colspan="1" rowspan="1"> 10 </td>
			
 
				+        <td colspan="1" rowspan="1"> Cluster dead </td>
			
 
				+      
			
 
				+</tr>
			
 
				+      
			
 
				+<tr>
			
 
				+        
			
 
				+<td colspan="1" rowspan="1"> 12 </td>
			
 
				+        <td colspan="1" rowspan="1"> Cluster already allocated </td>
			
 
				+      
			
 
				+</tr>
			
 
				+      
			
 
				+<tr>
			
 
				+        
			
 
				+<td colspan="1" rowspan="1"> 13 </td>
			
 
				+        <td colspan="1" rowspan="1"> HDFS dead </td>
			
 
				+      
			
 
				+</tr>
			
 
				+      
			
 
				+<tr>
			
 
				+        
			
 
				+<td colspan="1" rowspan="1"> 14 </td>
			
 
				+        <td colspan="1" rowspan="1"> Mapred dead </td>
			
 
				+      
			
 
				+</tr>
			
 
				+      
			
 
				+<tr>
			
 
				+        
			
 
				+<td colspan="1" rowspan="1"> 16 </td>
			
 
				+        <td colspan="1" rowspan="1"> All Map/Reduce jobs that ran on the cluster failed. Refer to hadoop logs for more details. </td>
			
 
				+      
			
 
				+</tr>
			
 
				+      
			
 
				+<tr>
			
 
				+        
			
 
				+<td colspan="1" rowspan="1"> 17 </td>
			
 
				+        <td colspan="1" rowspan="1"> Some of the Map/Reduce jobs that ran on the cluster failed. Refer to hadoop logs for more details. </td>
			
 
				+      
			
 
				+</tr>
			
 
				+    
			
 
				+  
			
 
				+</table>
			
 
				+</div>
			
 
				+  
			
 
				+<a name="N10456"></a><a name="Command+Line+Options"></a>
			
 
				+<h2 class="h3"> Command Line Options </h2>
			
 
				+<div class="section">
			
 
				+<a name="Command_Line_Options" id="Command_Line_Options"></a>
			
 
				+<p>Command line options for the <span class="codefrag">hod</span> command are used for two purposes: defining an operation that HOD must perform, and defining configuration options for customizing HOD that override options defined in the default configuration file. This section covers both types of options. </p>
			
 
				+<a name="N10464"></a><a name="Options+Defining+Operations"></a>
			
 
				+<h3 class="h4"> Options Defining Operations </h3>
			
 
				+<a name="Options_Defining_Operations" id="Options_Defining_Operations"></a>
			
 
				+<p>
			
 
				+<em>--help</em>
			
 
				+<br>
			
 
				+    Prints out the help message to see the basic options.</p>
			
 
				+<p>
			
 
				+<em>--verbose-help</em>
			
 
				+<br>
			
 
				+    All configuration options provided in the hodrc file can be passed on the command line, using the syntax <span class="codefrag">--section_name.option_name[=value]</span>. When provided this way, the value provided on command line overrides the option provided in hodrc. The verbose-help command lists all the available options in the hodrc file. This is also a nice way to see the meaning of the configuration options.</p>
			
 
				+<p>
			
 
				+<em>-o "operation_name options"</em>
			
 
				+<br>
			
 
				+    This class of options are used to define the <em>operation</em> mode of HOD. <em>Note:</em> The operation_name and other options must be specified within double quotes.</p>
			
 
				+<p>
			
 
				+<em>-o "help"</em>
			
 
				+<br>
			
 
				+    Lists the operations available in the <em>operation</em> mode.</p>
			
 
				+<p>
			
 
				+<em>-o "allocate cluster_dir number_of_nodes"</em>
			
 
				+<br>
			
 
				+    Allocates a cluster on the given number of cluster nodes, and store the allocation information in cluster_dir for use with subsequent <span class="codefrag">hadoop</span> commands. Note that the <span class="codefrag">cluster_dir</span> must exist before running the command.</p>
			
 
				+<p>
			
 
				+<em>-o "list"</em>
			
 
				+<br>
			
 
				+    Lists the clusters allocated by this user. Information provided includes the Torque job id corresponding to the cluster, the cluster directory where the allocation information is stored, and whether the Map/Reduce daemon is still active or not.</p>
			
 
				+<p>
			
 
				+<em>-o "info cluster_dir"</em>
			
 
				+<br>
			
 
				+    Lists information about the cluster whose allocation information is stored in the specified cluster directory.</p>
			
 
				+<p>
			
 
				+<em>-o "deallocate cluster_dir"</em>
			
 
				+<br>
			
 
				+    Deallocates the cluster whose allocation information is stored in the specified cluster directory.</p>
			
 
				+<p>
			
 
				+<em>-z script_file</em>
			
 
				+<br>
			
 
				+    Runs HOD in <em>script mode</em>. Provisions Hadoop on a given number of nodes, executes the given script from the submitting node, and deallocates the cluster when the script completes. Refer to option <em>-m</em>
			
 
				+</p>
			
 
				+<a name="N104B9"></a><a name="Options+Configuring+HOD"></a>
			
 
				+<h3 class="h4"> Options Configuring HOD </h3>
			
 
				+<a name="Options_Configuring_HOD" id="Options_Configuring_HOD"></a>
			
 
				+<p>As described above, HOD is configured using a configuration file that is usually set up by system administrators. This is a INI style configuration file that is divided into sections, and options inside each section. Each section relates to one of the HOD processes: client, ringmaster, hodring, mapreduce or hdfs. The options inside a section comprise of an option name and value. </p>
			
 
				+<p>Users can override the configuration defined in the default configuration in two ways: </p>
			
 
				+<ul>
			
 
				+    
			
 
				+<li> Users can supply their own configuration file to HOD in each of the commands, using the <span class="codefrag">-c</span> option</li>
			
 
				+    
			
 
				+<li> Users can supply specific configuration options to HOD/ Options provided on command line <em>override</em> the values provided in the configuration file being used.</li>
			
 
				+  
			
 
				+</ul>
			
 
				+<p>This section describes some of the most commonly used configuration options. These commonly used options are provided with a <em>short</em> option for convenience of specification. All other options can be specified using a <em>long</em> option that is also described below.</p>
			
 
				+<p>
			
 
				+<em>-c config_file</em>
			
 
				+<br>
			
 
				+    Provides the configuration file to use. Can be used with all other options of HOD. Alternatively, the <span class="codefrag">HOD_CONF_DIR</span> environment variable can be defined to specify a directory that contains a file named <span class="codefrag">hodrc</span>, alleviating the need to specify the configuration file in each HOD command.</p>
			
 
				+<p>
			
 
				+<em>-b 1|2|3|4</em>
			
 
				+<br>
			
 
				+    Enables the given debug level. Can be used with all other options of HOD. 4 is most verbose.</p>
			
 
				+<p>
			
 
				+<em>-t hadoop_tarball</em>
			
 
				+<br>
			
 
				+    Provisions Hadoop from the given tar.gz file. This option is only applicable to the <em>allocate</em> operation. For better distribution performance it is strongly recommended that the Hadoop tarball is created <em>after</em> removing the source or documentation.</p>
			
 
				+<p>
			
 
				+<em>-m number_of_nodes</em>
			
 
				+<br>
			
 
				+    When used in the <em>script</em> mode, this specifies the number of nodes to allocate. Note that this option is useful only in the script mode.</p>
			
 
				+<p>
			
 
				+<em>-N job-name</em>
			
 
				+<br>
			
 
				+    The Name to give to the resource manager job that HOD uses underneath. For e.g. in the case of Torque, this translates to the <span class="codefrag">qsub -N</span> option, and can be seen as the job name using the <span class="codefrag">qstat</span> command.</p>
			
 
				+<p>
			
 
				+<em>-l wall-clock-time</em>
			
 
				+<br>
			
 
				+    The amount of time for which the user expects to have work on the allocated cluster. This is passed to the resource manager underneath HOD, and can be used in more efficient scheduling and utilization of the cluster. Note that in the case of Torque, the cluster is automatically deallocated after this time expires.</p>
			
 
				+<p>
			
 
				+<em>-j java-home</em>
			
 
				+<br>
			
 
				+    Path to be set to the JAVA_HOME environment variable. This is used in the <em>script</em> mode. HOD sets the JAVA_HOME environment variable tot his value and launches the user script in that.</p>
			
 
				+<p>
			
 
				+<em>-A account-string</em>
			
 
				+<br>
			
 
				+    Accounting information to pass to underlying resource manager.</p>
			
 
				+<p>
			
 
				+<em>-Q queue-name</em>
			
 
				+<br>
			
 
				+    Name of the queue in the underlying resource manager to which the job must be submitted.</p>
			
 
				+<p>
			
 
				+<em>-Mkey1=value1 -Mkey2=value2</em>
			
 
				+<br>
			
 
				+    Provides configuration parameters for the provisioned Map/Reduce daemons (JobTracker and TaskTrackers). A hadoop-site.xml is generated with these values on the cluster nodes. <br>
			
 
				+    
			
 
				+<em>Note:</em> Values which have the following characters: space, comma, equal-to, semi-colon need to be escaped with a '\' character, and need to be enclosed within quotes. You can escape a '\' with a '\' too. </p>
			
 
				+<p>
			
 
				+<em>-Hkey1=value1 -Hkey2=value2</em>
			
 
				+<br>
			
 
				+    Provides configuration parameters for the provisioned HDFS daemons (NameNode and DataNodes). A hadoop-site.xml is generated with these values on the cluster nodes <br>
			
 
				+    
			
 
				+<em>Note:</em> Values which have the following characters: space, comma, equal-to, semi-colon need to be escaped with a '\' character, and need to be enclosed within quotes. You can escape a '\' with a '\' too. </p>
			
 
				+<p>
			
 
				+<em>-Ckey1=value1 -Ckey2=value2</em>
			
 
				+<br>
			
 
				+    Provides configuration parameters for the client from where jobs can be submitted. A hadoop-site.xml is generated with these values on the submit node. <br>
			
 
				+    
			
 
				+<em>Note:</em> Values which have the following characters: space, comma, equal-to, semi-colon need to be escaped with a '\' character, and need to be enclosed within quotes. You can escape a '\' with a '\' too. </p>
			
 
				+<p>
			
 
				+<em>--section-name.option-name=value</em>
			
 
				+<br>
			
 
				+    This is the method to provide options using the <em>long</em> format. For e.g. you could say <em>--hod.script-wait-time=20</em>
			
 
				+</p>
			
 
				+</div>
			
 
				+	
			
 
				+<a name="N1055A"></a><a name="Troubleshooting-N1055A"></a>
			
 
				+<h2 class="h3"> Troubleshooting </h2>
			
 
				+<div class="section">
			
 
				+<a name="Troubleshooting" id="Troubleshooting"></a>
			
 
				+<p>The following section identifies some of the most likely error conditions users can run into when using HOD and ways to trouble-shoot them</p>
			
 
				+<a name="N10565"></a><a name="Hangs+During+Allocation"></a>
			
 
				+<h3 class="h4">hod Hangs During Allocation </h3>
			
 
				+<a name="_hod_Hangs_During_Allocation" id="_hod_Hangs_During_Allocation"></a><a name="hod_Hangs_During_Allocation" id="hod_Hangs_During_Allocation"></a>
			
 
				+<p>
			
 
				+<em>Possible Cause:</em> One of the HOD or Hadoop components have failed to come up. In such a case, the <span class="codefrag">hod</span> command will return after a few minutes (typically 2-3 minutes) with an error code of either 7 or 8 as defined in the Error Codes section. Refer to that section for further details. </p>
			
 
				+<p>
			
 
				+<em>Possible Cause:</em> A large allocation is fired with a tarball. Sometimes due to load in the network, or on the allocated nodes, the tarball distribution might be significantly slow and take a couple of minutes to come back. Wait for completion. Also check that the tarball does not have the Hadoop sources or documentation.</p>
			
 
				+<p>
			
 
				+<em>Possible Cause:</em> A Torque related problem. If the cause is Torque related, the <span class="codefrag">hod</span> command will not return for more than 5 minutes. Running <span class="codefrag">hod</span> in debug mode may show the <span class="codefrag">qstat</span> command being executed repeatedly. Executing the <span class="codefrag">qstat</span> command from a separate shell may show that the job is in the <span class="codefrag">Q</span> (Queued) state. This usually indicates a problem with Torque. Possible causes could include some nodes being down, or new nodes added that Torque is not aware of. Generally, system administator help is needed to resolve this problem.</p>
			
 
				+<a name="N10592"></a><a name="Hangs+During+Deallocation"></a>
			
 
				+<h3 class="h4">hod Hangs During Deallocation </h3>
			
 
				+<a name="_hod_Hangs_During_Deallocation" id="_hod_Hangs_During_Deallocation"></a><a name="hod_Hangs_During_Deallocation" id="hod_Hangs_During_Deallocation"></a>
			
 
				+<p>
			
 
				+<em>Possible Cause:</em> A Torque related problem, usually load on the Torque server, or the allocation is very large. Generally, waiting for the command to complete is the only option.</p>
			
 
				+<a name="N105A3"></a><a name="Fails+With+an+error+code+and+error+message"></a>
			
 
				+<h3 class="h4">hod Fails With an error code and error message </h3>
			
 
				+<a name="hod_Fails_With_an_error_code_and" id="hod_Fails_With_an_error_code_and"></a><a name="_hod_Fails_With_an_error_code_an" id="_hod_Fails_With_an_error_code_an"></a>
			
 
				+<p>If the exit code of the <span class="codefrag">hod</span> command is not <span class="codefrag">0</span>, then refer to the following table of error exit codes to determine why the code may have occurred and how to debug the situation.</p>
			
 
				+<p>
			
 
				+<strong> Error Codes </strong>
			
 
				+</p>
			
 
				+<a name="Error_Codes" id="Error_Codes"></a>
			
 
				+<table class="ForrestTable" cellspacing="1" cellpadding="4">
			
 
				+    
			
 
				+      
			
 
				+<tr>
			
 
				+        
			
 
				+<th colspan="1" rowspan="1">Error Code</th>
			
 
				+        <th colspan="1" rowspan="1">Meaning</th>
			
 
				+        <th colspan="1" rowspan="1">Possible Causes and Remedial Actions</th>
			
 
				+      
			
 
				+</tr>
			
 
				+      
			
 
				+<tr>
			
 
				+        
			
 
				+<td colspan="1" rowspan="1"> 1 </td>
			
 
				+        <td colspan="1" rowspan="1"> Configuration error </td>
			
 
				+        <td colspan="1" rowspan="1"> Incorrect configuration values specified in hodrc, or other errors related to HOD configuration. The error messages in this case must be sufficient to debug and fix the problem. </td>
			
 
				+      
			
 
				+</tr>
			
 
				+      
			
 
				+<tr>
			
 
				+        
			
 
				+<td colspan="1" rowspan="1"> 2 </td>
			
 
				+        <td colspan="1" rowspan="1"> Invalid operation </td>
			
 
				+        <td colspan="1" rowspan="1"> Do <span class="codefrag">hod -o "help"</span> for the list of valid operations. </td>
			
 
				+      
			
 
				+</tr>
			
 
				+      
			
 
				+<tr>
			
 
				+        
			
 
				+<td colspan="1" rowspan="1"> 3 </td>
			
 
				+        <td colspan="1" rowspan="1"> Invalid operation arguments </td>
			
 
				+        <td colspan="1" rowspan="1"> Do <span class="codefrag">hod -o "help"</span> for the list of valid operations. Note that for an <em>allocate</em> operation, the directory argument must specify an existing directory. </td>
			
 
				+      
			
 
				+</tr>
			
 
				+      
			
 
				+<tr>
			
 
				+        
			
 
				+<td colspan="1" rowspan="1"> 4 </td>
			
 
				+        <td colspan="1" rowspan="1"> Scheduler failure </td>
			
 
				+        <td colspan="1" rowspan="1"> 1. Requested more resources than available. Run <span class="codefrag">checknodes cluster_name</span> to see if enough nodes are available. <br>
			
 
				+          2. Torque is misconfigured, the path to Torque binaries is misconfigured, or other Torque problems. Contact system administrator. </td>
			
 
				+      
			
 
				+</tr>
			
 
				+      
			
 
				+<tr>
			
 
				+        
			
 
				+<td colspan="1" rowspan="1"> 5 </td>
			
 
				+        <td colspan="1" rowspan="1"> Job execution failure </td>
			
 
				+        <td colspan="1" rowspan="1"> 1. Torque Job was deleted from outside. Execute the Torque <span class="codefrag">qstat</span> command to see if you have any jobs in the <span class="codefrag">R</span> (Running) state. If none exist, try re-executing HOD. <br>
			
 
				+          2. Torque problems such as the server momentarily going down, or becoming unresponsive. Contact system administrator. </td>
			
 
				+      
			
 
				+</tr>
			
 
				+      
			
 
				+<tr>
			
 
				+        
			
 
				+<td colspan="1" rowspan="1"> 6 </td>
			
 
				+        <td colspan="1" rowspan="1"> Ringmaster failure </td>
			
 
				+        <td colspan="1" rowspan="1"> 1. Invalid configuration in the <span class="codefrag">ringmaster</span> section,<br>
			
 
				+          2. invalid <span class="codefrag">pkgs</span> option in <span class="codefrag">gridservice-mapred or gridservice-hdfs</span> section,<br>
			
 
				+          3. an invalid hadoop tarball,<br>
			
 
				+          4. mismatched version in Hadoop between the MapReduce and an external HDFS.<br>
			
 
				+          The Torque <span class="codefrag">qstat</span> command will most likely show a job in the <span class="codefrag">C</span> (Completed) state. Refer to the section <em>Locating Ringmaster Logs</em> below for more information. </td>
			
 
				+      
			
 
				+</tr>
			
 
				+      
			
 
				+<tr>
			
 
				+        
			
 
				+<td colspan="1" rowspan="1"> 7 </td>
			
 
				+        <td colspan="1" rowspan="1"> DFS failure </td>
			
 
				+        <td colspan="1" rowspan="1"> 1. Problem in starting Hadoop clusters. Review the Hadoop related configuration. Look at the Hadoop logs using information specified in <em>Getting Hadoop Logs</em> section above. <br>
			
 
				+          2. Invalid configuration in the <span class="codefrag">hodring</span> section of hodrc. <span class="codefrag">ssh</span> to all allocated nodes (determined by <span class="codefrag">qstat -f torque_job_id</span>) and grep for <span class="codefrag">ERROR</span> or <span class="codefrag">CRITICAL</span> in hodring logs. Refer to the section <em>Locating Hodring Logs</em> below for more information. <br>
			
 
				+          3. Invalid tarball specified which is not packaged correctly. <br>
			
 
				+          4. Cannot communicate with an externally configured HDFS. </td>
			
 
				+      
			
 
				+</tr>
			
 
				+      
			
 
				+<tr>
			
 
				+        
			
 
				+<td colspan="1" rowspan="1"> 8 </td>
			
 
				+        <td colspan="1" rowspan="1"> Job tracker failure </td>
			
 
				+        <td colspan="1" rowspan="1"> Similar to the causes in <em>DFS failure</em> case. </td>
			
 
				+      
			
 
				+</tr>
			
 
				+      
			
 
				+<tr>
			
 
				+        
			
 
				+<td colspan="1" rowspan="1"> 10 </td>
			
 
				+        <td colspan="1" rowspan="1"> Cluster dead </td>
			
 
				+        <td colspan="1" rowspan="1"> 1. Cluster was auto-deallocated because it was idle for a long time. <br>
			
 
				+          2. Cluster was auto-deallocated because the wallclock time specified by the system administrator or user was exceeded. <br>
			
 
				+          3. Cannot communicate with the JobTracker and HDFS NameNode which were successfully allocated. Deallocate the cluster, and allocate again. </td>
			
 
				+      
			
 
				+</tr>
			
 
				+      
			
 
				+<tr>
			
 
				+        
			
 
				+<td colspan="1" rowspan="1"> 12 </td>
			
 
				+        <td colspan="1" rowspan="1"> Cluster already allocated </td>
			
 
				+        <td colspan="1" rowspan="1"> The cluster directory specified has been used in a previous allocate operation and is not yet deallocated. Specify a different directory, or deallocate the previous allocation first. </td>
			
 
				+      
			
 
				+</tr>
			
 
				+      
			
 
				+<tr>
			
 
				+        
			
 
				+<td colspan="1" rowspan="1"> 13 </td>
			
 
				+        <td colspan="1" rowspan="1"> HDFS dead </td>
			
 
				+        <td colspan="1" rowspan="1"> Cannot communicate with the HDFS NameNode. HDFS NameNode went down. </td>
			
 
				+      
			
 
				+</tr>
			
 
				+      
			
 
				+<tr>
			
 
				+        
			
 
				+<td colspan="1" rowspan="1"> 14 </td>
			
 
				+        <td colspan="1" rowspan="1"> Mapred dead </td>
			
 
				+        <td colspan="1" rowspan="1"> 1. Cluster was auto-deallocated because it was idle for a long time. <br>
			
 
				+          2. Cluster was auto-deallocated because the wallclock time specified by the system administrator or user was exceeded. <br>
			
 
				+          3. Cannot communicate with the Map/Reduce JobTracker. JobTracker node went down. <br>
			
 
				+          
			
 
				+</td>
			
 
				+      
			
 
				+</tr>
			
 
				+      
			
 
				+<tr>
			
 
				+        
			
 
				+<td colspan="1" rowspan="1"> 15 </td>
			
 
				+        <td colspan="1" rowspan="1"> Cluster not allocated </td>
			
 
				+        <td colspan="1" rowspan="1"> An operation which requires an allocated cluster is given a cluster directory with no state information. </td>
			
 
				+      
			
 
				+</tr>
			
 
				+    
			
 
				+  
			
 
				+</table>
			
 
				+<a name="N10715"></a><a name="Hadoop+Jobs+Not+Running+on+a+Successfully+Allocated+Cluster"></a>
			
 
				+<h3 class="h4"> Hadoop Jobs Not Running on a Successfully Allocated Cluster </h3>
			
 
				+<a name="Hadoop_Jobs_Not_Running_on_a_Suc" id="Hadoop_Jobs_Not_Running_on_a_Suc"></a>
			
 
				+<p>This scenario generally occurs when a cluster is allocated, and is left inactive for sometime, and then hadoop jobs are attempted to be run on them. Then Hadoop jobs fail with the following exception:</p>
			
 
				+<table class="ForrestTable" cellspacing="1" cellpadding="4">
			
 
				+<tr>
			
 
				+<td colspan="1" rowspan="1"><span class="codefrag">08/01/25 16:31:40 INFO ipc.Client: Retrying connect to server: foo.bar.com/1.1.1.1:53567. Already tried 1 time(s).</span></td>
			
 
				+</tr>
			
 
				+</table>
			
 
				+<p>
			
 
				+<em>Possible Cause:</em> No Hadoop jobs were run for a significant portion of time. Thus the cluster would have got deallocated as described in the section <em>Auto-deallocation of Idle Clusters</em>. Deallocate the cluster and allocate it again.</p>
			
 
				+<p>
			
 
				+<em>Possible Cause:</em> The wallclock limit specified by the Torque administrator or the <span class="codefrag">-l</span> option defined in the section <em>Specifying Additional Job Attributes</em> was exceeded since allocation time. Thus the cluster would have got released. Deallocate the cluster and allocate it again.</p>
			
 
				+<p>
			
 
				+<em>Possible Cause:</em> There is a version mismatch between the version of the hadoop being used in provisioning (typically via the tarball option) and the external HDFS. Ensure compatible versions are being used.</p>
			
 
				+<p>
			
 
				+<em>Possible Cause:</em> There is a version mismatch between the version of the hadoop client being used to submit jobs and the hadoop used in provisioning (typically via the tarball option). Ensure compatible versions are being used.</p>
			
 
				+<p>
			
 
				+<em>Possible Cause:</em> You used one of the options for specifying Hadoop configuration <span class="codefrag">-M or -H</span>, which had special characters like space or comma that were not escaped correctly. Refer to the section <em>Options Configuring HOD</em> for checking how to specify such options correctly.</p>
			
 
				+<a name="N10750"></a><a name="My+Hadoop+Job+Got+Killed"></a>
			
 
				+<h3 class="h4"> My Hadoop Job Got Killed </h3>
			
 
				+<a name="My_Hadoop_Job_Got_Killed" id="My_Hadoop_Job_Got_Killed"></a>
			
 
				+<p>
			
 
				+<em>Possible Cause:</em> The wallclock limit specified by the Torque administrator or the <span class="codefrag">-l</span> option defined in the section <em>Specifying Additional Job Attributes</em> was exceeded since allocation time. Thus the cluster would have got released. Deallocate the cluster and allocate it again, this time with a larger wallclock time.</p>
			
 
				+<p>
			
 
				+<em>Possible Cause:</em> Problems with the JobTracker node. Refer to the section in <em>Collecting and Viewing Hadoop Logs</em> to get more information.</p>
			
 
				+<a name="N1076B"></a><a name="Hadoop+Job+Fails+with+Message%3A+%27Job+tracker+still+initializing%27"></a>
			
 
				+<h3 class="h4"> Hadoop Job Fails with Message: 'Job tracker still initializing' </h3>
			
 
				+<a name="Hadoop_Job_Fails_with_Message_Jo" id="Hadoop_Job_Fails_with_Message_Jo"></a>
			
 
				+<p>
			
 
				+<em>Possible Cause:</em> The hadoop job was being run as part of the HOD script command, and it started before the JobTracker could come up fully. Allocate the cluster using a large value for the configuration option <span class="codefrag">--hod.script-wait-time</span>. Typically a value of 120 should work, though it is typically unnecessary to be that large.</p>
			
 
				+<a name="N1077B"></a><a name="The+Exit+Codes+For+HOD+Are+Not+Getting+Into+Torque"></a>
			
 
				+<h3 class="h4"> The Exit Codes For HOD Are Not Getting Into Torque </h3>
			
 
				+<a name="The_Exit_Codes_For_HOD_Are_Not_G" id="The_Exit_Codes_For_HOD_Are_Not_G"></a>
			
 
				+<p>
			
 
				+<em>Possible Cause:</em> Version 0.16 of hadoop is required for this functionality to work. The version of Hadoop used does not match. Use the required version of Hadoop.</p>
			
 
				+<p>
			
 
				+<em>Possible Cause:</em> The deallocation was done without using the <span class="codefrag">hod</span> command; for e.g. directly using <span class="codefrag">qdel</span>. When the cluster is deallocated in this manner, the HOD processes are terminated using signals. This results in the exit code to be based on the signal number, rather than the exit code of the program.</p>
			
 
				+<a name="N10793"></a><a name="The+Hadoop+Logs+are+Not+Uploaded+to+DFS"></a>
			
 
				+<h3 class="h4"> The Hadoop Logs are Not Uploaded to DFS </h3>
			
 
				+<a name="The_Hadoop_Logs_are_Not_Uploaded" id="The_Hadoop_Logs_are_Not_Uploaded"></a>
			
 
				+<p>
			
 
				+<em>Possible Cause:</em> There is a version mismatch between the version of the hadoop being used for uploading the logs and the external HDFS. Ensure that the correct version is specified in the <span class="codefrag">hodring.pkgs</span> option.</p>
			
 
				+<a name="N107A3"></a><a name="Locating+Ringmaster+Logs"></a>
			
 
				+<h3 class="h4"> Locating Ringmaster Logs </h3>
			
 
				+<a name="Locating_Ringmaster_Logs" id="Locating_Ringmaster_Logs"></a>
			
 
				+<p>To locate the ringmaster logs, follow these steps: </p>
			
 
				+<ul>
			
 
				+    
			
 
				+<li> Execute hod in the debug mode using the -b option. This will print the Torque job id for the current run.</li>
			
 
				+    
			
 
				+<li> Execute <span class="codefrag">qstat -f torque_job_id</span> and look up the value of the <span class="codefrag">exec_host</span> parameter in the output. The first host in this list is the ringmaster node.</li>
			
 
				+    
			
 
				+<li> Login to this node.</li>
			
 
				+    
			
 
				+<li> The ringmaster log location is specified by the <span class="codefrag">ringmaster.log-dir</span> option in the hodrc. The name of the log file will be <span class="codefrag">username.torque_job_id/ringmaster-main.log</span>.</li>
			
 
				+    
			
 
				+<li> If you don't get enough information, you may want to set the ringmaster debug level to 4. This can be done by passing <span class="codefrag">--ringmaster.debug 4</span> to the hod command line.</li>
			
 
				+  
			
 
				+</ul>
			
 
				+<a name="N107CF"></a><a name="Locating+Hodring+Logs"></a>
			
 
				+<h3 class="h4"> Locating Hodring Logs </h3>
			
 
				+<a name="Locating_Hodring_Logs" id="Locating_Hodring_Logs"></a>
			
 
				+<p>To locate hodring logs, follow the steps below: </p>
			
 
				+<ul>
			
 
				+    
			
 
				+<li> Execute hod in the debug mode using the -b option. This will print the Torque job id for the current run.</li>
			
 
				+    
			
 
				+<li> Execute <span class="codefrag">qstat -f torque_job_id</span> and look up the value of the <span class="codefrag">exec_host</span> parameter in the output. All nodes in this list should have a hodring on them.</li>
			
 
				+    
			
 
				+<li> Login to any of these nodes.</li>
			
 
				+    
			
 
				+<li> The hodring log location is specified by the <span class="codefrag">hodring.log-dir</span> option in the hodrc. The name of the log file will be <span class="codefrag">username.torque_job_id/hodring-main.log</span>.</li>
			
 
				+    
			
 
				+<li> If you don't get enough information, you may want to set the hodring debug level to 4. This can be done by passing <span class="codefrag">--hodring.debug 4</span> to the hod command line.</li>
			
 
				+  
			
 
				+</ul>
			
 
				+</div>
			
 
				+
			
 
				+</div>
			
 
				+<!--+
			
 
				+    |end content
			
 
				+    +-->
			
 
				+<div class="clearboth">&nbsp;</div>
			
 
				+</div>
			
 
				+<div id="footer">
			
 
				+<!--+
			
 
				+    |start bottomstrip
			
 
				+    +-->
			
 
				+<div class="lastmodified">
			
 
				+<script type="text/javascript"><!--
			
 
				+document.write("Last Published: " + document.lastModified);
			
 
				+//  --></script>
			
 
				+</div>
			
 
				+<div class="copyright">
			
 
				+        Copyright &copy;
			
 
				+         2007 <a href="http://www.apache.org/licenses/">The Apache Software Foundation.</a>
			
 
				+</div>
			
 
				+<!--+
			
 
				+    |end bottomstrip
			
 
				+    +-->
			
 
				+</div>
			
 
				+</body>
			
 
				+</html>
			
--- a/docs/hod_user_guide.pdf
+++ b/docs/hod_user_guide.pdf
--- a/docs/linkmap.html
+++ b/docs/linkmap.html
@@ -241,6 +241,12 @@ document.write("Last Published: " + document.lastModified);
 
				 <li>
			
 
				 <a href="hod.html">Hadoop On Demand</a>&nbsp;&nbsp;___________________&nbsp;&nbsp;<em>hod</em>
			
 
				 </li>
			
 
				+<ul>
			
 
				+      
			
 
				+      
			
 
				+      
			
 
				+    
			
 
				+</ul>
			
 
				 </ul>
			
 
				     
			
 
				 <ul>
			
--- a/src/docs/src/documentation/content/xdocs/hod.xml
+++ b/src/docs/src/documentation/content/xdocs/hod.xml
@@ -31,636 +31,17 @@
 
				     <section>
			
 
				       <title> Introduction </title>
			
 
				       <p>
			
 
				-      The Hadoop On Demand (<acronym title="Hadoop On Demand">HOD</acronym>) project is a system for provisioning and managing independent Hadoop MapReduce instances on a shared cluster of nodes. HOD uses a resource manager for allocation. At present it supports <a href="http://www.clusterresources.com/pages/products/torque-resource-manager.php">Torque</a> out of the box.
			
 
				+Hadoop On Demand (HOD) is a system for provisioning virtual Hadoop clusters over a large physical cluster. It uses the Torque resource manager to do node allocation. On the allocated nodes, it can start Hadoop Map/Reduce and HDFS daemons. It automatically generates the appropriate configuration files (hadoop-site.xml) for the Hadoop daemons and client. HOD also has the capability to distribute Hadoop to the nodes in the virtual cluster that it allocates. In short, HOD makes it easy for administrators and users to quickly setup and use Hadoop. It is also a very useful tool for Hadoop developers and testers who need to share a physical cluster for testing their own Hadoop versions.
			
 
				       </p>
			
 
				-    </section>
			
 
				-
			
 
				-    <section> 
			
 
				-      <title> Feature List </title>
			
 
				-
			
 
				-      <section> 
			
 
				-        <title> Simplified Interface for Provisioning Hadoop Clusters </title>
			
 
				-        <p>
			
 
				-        By far, the biggest advantage of HOD is to quickly setup a Hadoop cluster. The user interacts with the cluster through a simple command line interface, the HOD client. HOD brings up a virtual MapReduce cluster with the required number of nodes, which the user can use for running Hadoop jobs. When done, HOD will automatically clean up the resources and make the nodes available again.
			
 
				-        </p>
			
 
				-      </section>
			
 
				- 
			
 
				-      <section> 
			
 
				-        <title> Automatic installation of Hadoop </title>
			
 
				-        <p>
			
 
				-        With HOD, Hadoop does not need to be even installed on the cluster. The user can provide a Hadoop tarball that HOD will automatically distribute to all the nodes in the cluster.
			
 
				-        </p>
			
 
				-      </section>
			
 
				-
			
 
				-      <section> 
			
 
				-        <title> Configuring Hadoop </title>
			
 
				-        <p>
			
 
				-        Dynamic parameters of Hadoop configuration, such as the NameNode and JobTracker addresses and ports, and file system temporary directories are generated and distributed by HOD automatically to all nodes in the cluster. In addition, HOD allows the user to configure Hadoop parameters at both the server (for e.g. JobTracker) and client (for e.g. JobClient) level, including 'final' parameters, that were introduced with Hadoop 0.15.
			
 
				-        </p>
			
 
				-      </section>
			
 
				- 
			
 
				-      <section> 
			
 
				-        <title> Auto-cleanup of Unused Clusters </title>
			
 
				-        <p>
			
 
				-        HOD has an automatic timeout so that users cannot misuse resources they aren't using. The timeout applies only when there is no MapReduce job running. 
			
 
				-        </p>
			
 
				-      </section>
			
 
				- 
			
 
				-      <section> 
			
 
				-        <title> Log Services </title>
			
 
				-        <p>
			
 
				-        HOD can be used to collect all MapReduce logs to a central location for archiving and inspection after the job is completed.
			
 
				-        </p>
			
 
				-      </section>
			
 
				-    </section>
			
 
				-
			
 
				-    <section>
			
 
				-      
			
 
				-      <title> HOD Components </title>
			
 
				-      <p>
			
 
				-      This is a brief overview of the various components of HOD and how they interact to provision Hadoop.
			
 
				-      </p>
			
 
				-
			
 
				-      <section>
			
 
				-        <title> HOD Client </title>
			
 
				-        <p>
			
 
				-        The HOD client is a Unix command that users use to allocate Hadoop MapReduce clusters. The command provides other options to list allocated clusters and deallocate them. The HOD client generates the <em>hadoop-site.xml</em> in a user specified directory. The user can point to this configuration file while running Map/Reduce jobs on the allocated cluster.
			
 
				-        </p>
			
 
				-        <p>
			
 
				-        The nodes from where the HOD Client is run are called <em>submit nodes</em> because jobs are submitted to the resource manager system for allocating and running clusters from these nodes.
			
 
				-        </p>
			
 
				       </section>
			
 
				-
			
 
				-      <section>
			
 
				-        <title> RingMaster </title>
			
 
				-        <p>
			
 
				-        The RingMaster is a HOD process that is started on one node per every allocated cluster. It is submitted as a 'job' to the resource manager by the HOD client. It controls which Hadoop daemons start on which nodes. It provides this information to other HOD processes, such as the HOD client, so users can also determine this information. The RingMaster is responsible for hosting and distributing the Hadoop tarball to all nodes in the cluster. It also automatically cleans up unused clusters.
			
 
				-        </p>
			
 
				-        <p>
			
 
				-        </p>
			
 
				-      </section>
			
 
				-
			
 
				       <section>
			
 
				-        <title> HodRing </title>
			
 
				-        <p>
			
 
				-        The HodRing is a HOD process that runs on every allocated node in the cluster. These processes are run by the RingMaster through the resource manager, using a facility of parallel execution. The HodRings are responsible for launching Hadoop commands on the nodes to bring up the Hadoop daemons. They get the command to launch from the RingMaster.
			
 
				-        </p>
			
 
				-      </section>
			
 
				-
			
 
				-      <section>
			
 
				-        <title> Hodrc / HOD configuration file </title>
			
 
				-        <p>
			
 
				-        An INI style configuration file where the users configure various options for the HOD system, including install locations of different software, resource manager parameters, log and temp file directories, parameters for their MapReduce jobs, etc.
			
 
				-        </p>
			
 
				-      </section>
			
 
				-
			
 
				-      <section>
			
 
				-        <title> Submit Nodes and Compute Nodes </title>
			
 
				-        <p>
			
 
				-        The nodes from where the <em>HOD Client</em> is run are referred as <em>submit nodes</em> because jobs are submitted to the resource manager system for allocating and running clusters from these nodes.
			
 
				-        </p>
			
 
				-        <p>
			
 
				-        The nodes where the <em>Ringmaster</em> and <em>HodRings</em> run are called the Compute nodes. These are the nodes that get allocated by a resource manager, and on which the Hadoop daemons are provisioned and started.
			
 
				-        </p>
			
 
				-      </section>
			
 
				-    </section>
			
 
				-
			
 
				-    <section>
			
 
				-      <title> Getting Started with HOD </title>
			
 
				-
			
 
				-      <section>
			
 
				-        <title> Pre-Requisites </title>
			
 
				-
			
 
				-        <section>
			
 
				-          <title> Hardware </title>
			
 
				-          <p>
			
 
				-          HOD requires a minimum of 3 nodes configured through a resource manager.
			
 
				-          </p>          
			
 
				-        </section>
			
 
				-
			
 
				-        <section>
			
 
				-          <title> Software </title>
			
 
				-          <p>
			
 
				-          The following components are assumed to be installed before using HOD:
			
 
				-          </p>
			
 
				-          <ul>
			
 
				-            <li>
			
 
				-              <em>Torque:</em> Currently HOD supports Torque out of the box. We assume that you are familiar with configuring Torque. You can get information about this from <a href="http://www.clusterresources.com/wiki/doku.php?id=torque:torque_wiki">here</a>.
			
 
				-            </li>
			
 
				-            <li>
			
 
				-              <em>Python:</em> We require version 2.5.1, which can be downloaded from <a href="http://www.python.org/">here</a>.
			
 
				-            </li>
			
 
				-          </ul>
			
 
				-          <p>
			
 
				-          The following components can be optionally installed for getting better functionality from HOD:
			
 
				-          </p>
			
 
				-          <ul>
			
 
				-            <li>
			
 
				-              <em>Twisted Python:</em> This can be used for improving the scalability of HOD. Twisted Python is available <a href="http://twistedmatrix.com/trac/">here</a>.
			
 
				-            </li>
			
 
				-            <li>
			
 
				-            <em>Hadoop:</em> HOD can automatically distribute Hadoop to all nodes in the cluster. However, it can also use a pre-installed version of Hadoop, if it is available on all nodes in the cluster. HOD currently supports Hadoop 0.15 and above.
			
 
				-            </li>
			
 
				-          </ul>
			
 
				-          <p>
			
 
				-          HOD configuration requires the location of installs of these components to be the same on all nodes in the cluster. It will also make the configuration simpler to have the same location on the submit nodes.
			
 
				-          </p>
			
 
				-        </section>
			
 
				-   
			
 
				-        <section>
			
 
				-          <title>Resource Manager Configuration Pre-requisites</title>
			
 
				-          <p>
			
 
				-          For using HOD with Torque:
			
 
				-          </p>
			
 
				-          <ul>
			
 
				-            <li>
			
 
				-            Install Torque components: pbs_server on a head node, pbs_moms on all compute nodes, and PBS client tools on all compute nodes and submit nodes.
			
 
				-            </li>
			
 
				-            <li>
			
 
				-            Create a queue for submitting jobs on the pbs_server.
			
 
				-            </li>
			
 
				-            <li>
			
 
				-            Specify a name for all nodes in the cluster, by setting a 'node property' to all the nodes. This can be done by using the 'qmgr' command. For example:
			
 
				-            <em>qmgr -c "set node node properties=cluster-name"</em>
			
 
				-            </li>
			
 
				-            <li>
			
 
				-            Ensure that jobs can be submitted to the nodes. This can be done by using the 'qsub' command. For example:
			
 
				-            <em>echo "sleep 30" | qsub -l nodes=3</em>
			
 
				-            </li>
			
 
				-          </ul>
			
 
				-          <p>
			
 
				-          More information about setting up Torque can be found by referring to the documentation <a href="http://www.clusterresources.com/pages/products/torque-resource-manager.php">here.</a>
			
 
				-          </p>
			
 
				-        </section>
			
 
				-      </section>
			
 
				-
			
 
				-      <section>
			
 
				-        <title>Setting up HOD</title>
			
 
				-        <ul>
			
 
				-          <li>
			
 
				-          HOD is available in the 'contrib' section of Hadoop under the root directory 'hod'. Distribute the files under this directory to all the nodes in the cluster.
			
 
				-          </li>
			
 
				-          <li>
			
 
				-          On the node from where you want to run hod, edit the file hodrc which can be found in the <em>install dir/conf</em> directory. This file contains the minimal set of values required for running hod.
			
 
				-          </li>
			
 
				-          <li>
			
 
				-          Specify values suitable to your environment for the following variables defined in the configuration file. Note that some of these variables are defined at more than one place in the file.
			
 
				-          </li>
			
 
				+        <title>Documentation</title>
			
 
				+      <p>Please go through the following to know more about using HOD</p>
			
 
				+      <ul>
			
 
				+        <li><a href="hod_admin_guide.html">Hod Admin Guide</a> : This guide will walk you through an overview of architecture of HOD, prerequisites, installing various components and dependent software, and configuring HOD to get it up and running.</li>
			
 
				+        <li><a href="hod_user_guide.html">Hod User Guide</a> : This guide will let you know about how to get started on running hod, its various features, command line options and help on troubleshooting in detail.</li>
			
 
				+        <li><a href="hod_config_guide.html">Hod Configuration Guide</a> : This guide discusses about onfiguring HOD, describing various configuration sections, parameters and their purpose in detail.</li>
			
 
				       </ul>
			
 
				-        <table>
			
 
				-          <tr>
			
 
				-            <th> Variable Name </th>
			
 
				-            <th> Meaning </th>
			
 
				-          </tr>
			
 
				-          <tr>
			
 
				-            <td> ${JAVA_HOME} </td>
			
 
				-            <td> Location of Java for Hadoop. Hadoop supports Sun JDK 1.5.x </td>
			
 
				-          </tr>
			
 
				-          <tr>
			
 
				-            <td> ${CLUSTER_NAME} </td>
			
 
				-            <td> Name of the cluster which is specified in the 'node property' as mentioned in resource manager configuration. </td>
			
 
				-          </tr>
			
 
				-          <tr>
			
 
				-            <td> ${HADOOP_HOME} </td>
			
 
				-            <td> Location of Hadoop installation on the compute and submit nodes. </td>
			
 
				-          </tr>
			
 
				-          <tr>
			
 
				-            <td> ${RM_QUEUE} </td>
			
 
				-            <td> Queue configured for submiting jobs in the resource manager configuration. </td>
			
 
				-          </tr>
			
 
				-          <tr>
			
 
				-            <td> ${RM_HOME} </td>
			
 
				-            <td> Location of the resource manager installation on the compute and submit nodes. </td>
			
 
				-          </tr>
			
 
				-        </table>
			
 
				-        <ul>
			
 
				-          <li>
			
 
				-          The following environment variables *may* need to be set depending on your environment. These variables must be defined where you run the HOD client, and also be specified in the HOD configuration file as the value of the key resource_manager.env-vars. Multiple variables can be specified as a comma separated list of key=value pairs.
			
 
				-          </li>
			
 
				-        </ul>
			
 
				-        <table>
			
 
				-          <tr>
			
 
				-            <th> Variable Name </th>
			
 
				-            <th> Meaning </th>
			
 
				-          </tr>
			
 
				-          <tr>
			
 
				-            <td>HOD_PYTHON_HOME</td>
			
 
				-            <td>
			
 
				-            If you install python to a non-default location of the compute nodes, or submit nodes, then, this variable must be defined to point to the python executable in the non-standard   location.
			
 
				-            </td>
			
 
				-          </tr>
			
 
				-        </table>
			
 
				-        <p>
			
 
				-        You can also review other configuration options in the file and modify them to suit your needs. Refer to the the section on configuration below for information about the HOD configuration.
			
 
				-        </p>
			
 
				-      </section>
			
 
				-    </section>
			
 
				-
			
 
				-    <section>
			
 
				-      <title>Running HOD</title>
			
 
				-
			
 
				-      <section>
			
 
				-        <title>Overview</title>
			
 
				-        <p>
			
 
				-        A typical session of HOD will involve atleast three steps: allocate, run hadoop jobs, deallocate.
			
 
				-        </p>
			
 
				-        <section>
			
 
				-          <title>Operation allocate</title>
			
 
				-          <p>
			
 
				-          The allocate operation is used to allocate a set of nodes and install and provision Hadoop on them. It has the following syntax:
			
 
				-          </p>
			
 
				-          <table>
			
 
				-            <tr>
			
 
				-              <td>hod -c config_file -t hadoop_tarball_location -o "allocate                 cluster_dir number_of_nodes"</td>
			
 
				-            </tr>
			
 
				-          </table>
			
 
				-          <p>
			
 
				-          The hadoop_tarball_location must be a location on a shared file system accesible from all nodes in the cluster. Note, the cluster_dir must exist before running the command. If the command completes successfully then cluster_dir/hadoop-site.xml will be generated and will contain information about the allocated cluster's JobTracker and NameNode.
			
 
				-          </p>
			
 
				-          <p>
			
 
				-          For example, the following command uses a hodrc file in ~/hod-config/hodrc and allocates Hadoop (provided by the tarball ~/share/hadoop.tar.gz) on 10 nodes, storing the generated Hadoop configuration in a directory named <em>~/hadoop-cluster</em>:
			
 
				-          </p>
			
 
				-          <table>
			
 
				-            <tr>
			
 
				-              <td>$ hod -c ~/hod-config/hodrc -t ~/share/hadoop.tar.gz -o "allocate ~/hadoop-cluster 10"</td>
			
 
				-            </tr>
			
 
				-          </table>
			
 
				-          <p>
			
 
				-          HOD also supports an environment variable called <em>HOD_CONF_DIR</em>. If this is defined, HOD will look for a default hodrc file at $HOD_CONF_DIR/hodrc. Defining this allows the above command to also be run as follows:
			
 
				-          </p>
			
 
				-          <table>
			
 
				-            <tr>
			
 
				-              <td>
			
 
				-                <p>$ export HOD_CONF_DIR=~/hod-config</p>
			
 
				-                <p>$ hod -t ~/share/hadoop.tar.gz -o "allocate ~/hadoop-cluster 10"</p>
			
 
				-              </td>
			
 
				-            </tr>
			
 
				-          </table>
			
 
				-        </section>
			
 
				-        
			
 
				-        <section>
			
 
				-          <title>Running Hadoop jobs using the allocated cluster</title>
			
 
				-          <p>
			
 
				-          Now, one can run Hadoop jobs using the allocated cluster in the usual manner:
			
 
				-          </p>
			
 
				-          <table>
			
 
				-            <tr>
			
 
				-              <td>hadoop --config cluster_dir hadoop_command hadoop_command_args</td>
			
 
				-            </tr>
			
 
				-          </table>
			
 
				-          <p>
			
 
				-          Continuing our example, the following command will run a wordcount example on the allocated cluster:
			
 
				-          </p>
			
 
				-          <table>
			
 
				-            <tr>
			
 
				-              <td>$ hadoop --config ~/hadoop-cluster jar /path/to/hadoop/hadoop-examples.jar wordcount /path/to/input /path/to/output</td>
			
 
				-            </tr>
			
 
				-          </table>
			
 
				-        </section>
			
 
				-
			
 
				-        <section>
			
 
				-          <title>Operation deallocate</title>
			
 
				-          <p>
			
 
				-          The deallocate operation is used to release an allocated cluster. When finished with a cluster, deallocate must be run so that the nodes become free for others to use. The deallocate operation has the following syntax:
			
 
				-          </p>
			
 
				-          <table>
			
 
				-            <tr>
			
 
				-              <td>hod -o "deallocate cluster_dir"</td>
			
 
				-            </tr>
			
 
				-          </table>
			
 
				-          <p>
			
 
				-          Continuing our example, the following command will deallocate the cluster:
			
 
				-          </p>
			
 
				-          <table>
			
 
				-            <tr>
			
 
				-              <td>$ hod -o "deallocate ~/hadoop-cluster"</td>
			
 
				-            </tr>
			
 
				-          </table>
			
 
				-        </section>
			
 
				-      </section>
			
 
				-
			
 
				-      <section>
			
 
				-        <title>Command Line Options</title>
			
 
				-        <p>
			
 
				-        This section covers the major command line options available via the hod command:
			
 
				-        </p>
			
 
				-
			
 
				-        <p>
			
 
				-        <em>--help</em>
			
 
				-        </p>
			
 
				-        <p>
			
 
				-        Prints out the help message to see the basic options.
			
 
				-        </p>
			
 
				-
			
 
				-        <p>
			
 
				-        <em>--verbose-help</em>
			
 
				-        </p>
			
 
				-        <p>
			
 
				-        All configuration options provided in the hodrc file can be passed on the command line, using the syntax --section_name.option_name[=value]. When provided this way, the value provided on command line overrides the option provided in hodrc. The verbose-help command lists all the available options in the hodrc file. This is also a nice way to see the meaning of the configuration options.
			
 
				-        </p>
			
 
				-
			
 
				-        <p>
			
 
				-        <em>-c config_file</em>
			
 
				-        </p>
			
 
				-        <p>
			
 
				-        Provides the configuration file to use. Can be used with all other options of HOD. Alternatively, the HOD_CONF_DIR environment variable can be defined to specify a directory that contains a file named hodrc, alleviating the need to specify the configuration file in each HOD command.
			
 
				-        </p>
			
 
				-
			
 
				-        <p>
			
 
				-        <em>-b 1|2|3|4</em>
			
 
				-        </p>
			
 
				-        <p>
			
 
				-        Enables the given debug level. Can be used with all other options of HOD. 4 is most verbose.
			
 
				-        </p>
			
 
				-
			
 
				-        <p>
			
 
				-        <em>-o "help"</em>
			
 
				-        </p>
			
 
				-        <p>
			
 
				-        Lists the operations available in the operation mode.
			
 
				-        </p>
			
 
				-
			
 
				-        <p>
			
 
				-        <em>-o "allocate cluster_dir number_of_nodes"</em>
			
 
				-        </p>
			
 
				-        <p>
			
 
				-        Allocates a cluster on the given number of cluster nodes, and store the allocation information in cluster_dir for use with subsequent hadoop commands. Note that the cluster_dir must exist before running the command.
			
 
				-        </p>
			
 
				-
			
 
				-        <p>
			
 
				-        <em>-o "list"</em>
			
 
				-        </p>
			
 
				-        <p>
			
 
				-        Lists the clusters allocated by this user. Information provided includes the Torque job id corresponding to the cluster, the cluster directory where the allocation information is stored, and whether the Map/Reduce daemon is still active or not.
			
 
				-        </p>
			
 
				-
			
 
				-        <p>
			
 
				-        <em>-o "info cluster_dir"</em>
			
 
				-        </p>
			
 
				-        <p>
			
 
				-        Lists information about the cluster whose allocation information is stored in the specified cluster directory.
			
 
				-        </p>
			
 
				-
			
 
				-        <p>
			
 
				-        <em>-o "deallocate cluster_dir"</em>
			
 
				-        </p>
			
 
				-        <p>
			
 
				-       Deallocates the cluster whose allocation information is stored in the specified cluster directory.
			
 
				-        </p>
			
 
				-
			
 
				-        <p>
			
 
				-        <em>-t hadoop_tarball</em>
			
 
				-        </p>
			
 
				-        <p>
			
 
				-        Provisions Hadoop from the given tar.gz file. This option is only applicable to the allocate operation. For better distribution performance it is recommended that the Hadoop tarball contain only the libraries and binaries, and not the source or documentation. 
			
 
				-        </p>
			
 
				-
			
 
				-        <p>
			
 
				-        <em>-Mkey1=value1 -Mkey2=value2</em>
			
 
				-        </p>
			
 
				-        <p>
			
 
				-        Provides configuration parameters for the provisioned Map/Reduce daemons (JobTracker and TaskTrackers). A hadoop-site.xml is generated with these values on the cluster nodes
			
 
				-        </p>
			
 
				-
			
 
				-        <p>
			
 
				-        <em>-Hkey1=value1 -Hkey2=value2</em>
			
 
				-        </p>
			
 
				-        <p>
			
 
				-        Provides configuration parameters for the provisioned HDFS daemons (NameNode and DataNodes). A hadoop-site.xml is generated with these values on the cluster nodes
			
 
				-        </p>
			
 
				-
			
 
				-        <p>
			
 
				-        <em>-Ckey1=value1 -Ckey2=value2</em>
			
 
				-        </p>
			
 
				-        <p>
			
 
				-        Provides configuration parameters for the client from where jobs can be submitted. A hadoop-site.xml is generated with these values on the submit node.
			
 
				-        </p>
			
 
				-
			
 
				-      </section>
			
 
				-    </section>
			
 
				-    <section>
			
 
				-      <title> HOD Configuration </title>
			
 
				-      <section>
			
 
				-        <title> Introduction to HOD Configuration </title>
			
 
				-        <p>
			
 
				-        Configuration options for HOD are organized as sections and options within them. They can be specified in two ways: a configuration file in the INI format, and as command line options to the HOD shell, specified in the format --section.option[=value]. If the same option is specified in both places, the value specified on the command line overrides the value in the configuration file.
			
 
				-        </p>
			
 
				-        <p>
			
 
				-        To get a simple description of all configuration options, you can type <em>hod --verbose-help</em>
			
 
				-        </p>
			
 
				-        <p>
			
 
				-        This section explains some of the most important or commonly used configuration options in some more detail.
			
 
				-        </p>
			
 
				-      </section>
			
 
				-      <section>
			
 
				-        <title> Categories / Sections in HOD Configuration </title>
			
 
				-        <p>
			
 
				-        The following are the various sections in the HOD configuration:
			
 
				-        </p>
			
 
				-        <table>
			
 
				-          <tr>
			
 
				-            <th> Section Name </th>
			
 
				-            <th> Description </th>
			
 
				-          </tr>
			
 
				-          <tr>
			
 
				-            <td>hod</td>
			
 
				-            <td>Options for the HOD client</td>
			
 
				-          </tr>
			
 
				-          <tr>
			
 
				-            <td>resource_manager</td>
			
 
				-            <td>Options for specifying which resource manager to use, and other parameters for using that resource manager</td>
			
 
				-          </tr>
			
 
				-          <tr>
			
 
				-            <td>ringmaster</td>
			
 
				-            <td>Options for the RingMaster process</td>
			
 
				-          </tr>
			
 
				-          <tr>
			
 
				-            <td>hodring</td>
			
 
				-            <td>Options for the HodRing process</td>
			
 
				-          </tr>
			
 
				-          <tr>
			
 
				-            <td>gridservice-mapred</td>
			
 
				-            <td>Options for the MapReduce daemons</td>
			
 
				-          </tr>
			
 
				-          <tr>
			
 
				-            <td>gridservice-hdfs</td>
			
 
				-            <td>Options for the HDFS daemons</td>
			
 
				-          </tr>
			
 
				-        </table>
			
 
				-      </section>
			
 
				-
			
 
				-      <section>
			
 
				-        <title> Important and Commonly Used Configuration Options </title>
			
 
				-        
			
 
				-        <section>
			
 
				-          <title> Common configuration options </title>
			
 
				-          <p>
			
 
				-          Certain configuration options are defined in most of the sections of the HOD configuration. Options defined in a section, are used by the process for which that section applies. These options have the same meaning, but can have different values in each section.
			
 
				-          </p>
			
 
				-          <table>
			
 
				-            <tr>
			
 
				-              <th> Option Name </th>
			
 
				-              <th> Description </th>
			
 
				-            </tr>
			
 
				-            <tr>
			
 
				-              <td>temp-dir</td>
			
 
				-              <td>Temporary directory for usage by the HOD processes. Make sure that the users who will run hod have rights to create directories under the directory specified here.</td>
			
 
				-            </tr>
			
 
				-            <tr>
			
 
				-              <td>debug</td>
			
 
				-              <td>A numeric value from 1-4. 4 produces the most log information, and 1 the least.</td>
			
 
				-            </tr>
			
 
				-            <tr>
			
 
				-              <td>log-dir</td>
			
 
				-              <td>Directory where log files are stored. By default, this is <em>install-location/logs/</em>. The restrictions and notes for the temp-dir variable apply here too.</td>
			
 
				-            </tr>
			
 
				-            <tr>
			
 
				-              <td>xrs-port-range</td>
			
 
				-              <td>A range of ports, among which an available port shall be picked for use to run any XML-RPC based server daemon processes of HOD.</td>
			
 
				-            </tr>
			
 
				-            <tr>
			
 
				-              <td>http-port-range</td>
			
 
				-              <td>A range of ports, among which an available port shall be picked for use to run any HTTP based server daemon processes of HOD.</td>
			
 
				-            </tr>
			
 
				-          </table>
			
 
				-        </section>
			
 
				-
			
 
				-        <section>
			
 
				-          <title> hod options </title>
			
 
				-          <table>
			
 
				-            <tr>
			
 
				-              <th> Option Name </th>
			
 
				-              <th> Description </th>
			
 
				-            </tr>
			
 
				-            <tr>
			
 
				-              <td>cluster</td>
			
 
				-              <td>A descriptive name given to the cluster. For Torque, this is specified as a 'Node property' for every node in the cluster. HOD uses this value to compute the number of available nodes.</td>
			
 
				-            </tr>
			
 
				-            <tr>
			
 
				-              <td>client-params</td>
			
 
				-              <td>A comma-separated list of hadoop config parameters specified as key-value pairs. These will be used to generate a hadoop-site.xml on the submit node that should be used for running MapReduce jobs.</td>
			
 
				-            </tr>
			
 
				-          </table>
			
 
				-        </section>
			
 
				-
			
 
				-        <section>
			
 
				-          <title> resource_manager options </title>
			
 
				-          <table>
			
 
				-            <tr>
			
 
				-              <th> Option Name </th>
			
 
				-              <th> Description </th>
			
 
				-            </tr>
			
 
				-            <tr>
			
 
				-              <td>queue</td>
			
 
				-              <td>Name of the queue configured in the resource manager to which jobs are to be submitted.</td>
			
 
				-            </tr>
			
 
				-            <tr>
			
 
				-              <td>batch-home</td>
			
 
				-              <td>Install directory to which 'bin' is appended and under which the executables of the resource manager can be found. </td>
			
 
				-            </tr>
			
 
				-            <tr>
			
 
				-              <td>env-vars</td>
			
 
				-              <td>This is a comma separated list of key-value pairs, expressed as key=value, which would be passed to the jobs launched on the compute nodes. For example, if the python installation is in a non-standard location, one can set the environment variable 'HOD_PYTHON_HOME' to the path to the python executable. The HOD processes launched on the compute nodes can then use this variable.</td>
			
 
				-            </tr>
			
 
				-          </table>
			
 
				-        </section>
			
 
				-
			
 
				-        <section>
			
 
				-          <title> ringmaster options </title>
			
 
				-          <table>
			
 
				-            <tr>
			
 
				-              <th> Option Name </th>
			
 
				-              <th> Description </th>
			
 
				-            </tr>
			
 
				-            <tr>
			
 
				-              <td>work-dirs</td>
			
 
				-              <td>These are a list of comma separated paths that will serve as the root for directories that HOD generates and passes to Hadoop for use to store DFS / MapReduce data. For e.g. this is where DFS data blocks will be stored. Typically, as many paths are specified as there are disks available to ensure all disks are being utilized. The restrictions and notes for the temp-dir variable apply here too.</td>
			
 
				-            </tr>
			
 
				-          </table>
			
 
				-        </section>
			
 
				-
			
 
				-        <section>
			
 
				-          <title> gridservice-hdfs options </title>
			
 
				-          <table>
			
 
				-            <tr>
			
 
				-              <th> Option Name </th>
			
 
				-              <th> Description </th>
			
 
				-            </tr>
			
 
				-            <tr>
			
 
				-              <td>external</td>
			
 
				-              <td>
			
 
				-              <p> If false, this indicates that a HDFS cluster must be bought up by the HOD system, on the nodes which it allocates via the allocate command. Note that in that case, when the cluster is de-allocated, it will bring down the HDFS cluster, and all the data will be lost. If true, it will try and connect to an externally configured HDFS system. </p>
			
 
				-              <p>Typically, because input for jobs are placed into HDFS before jobs are run, and also the output from jobs in HDFS is required to be persistent, an internal HDFS cluster is of little value in a production system. However, it allows for quick testing.</p>
			
 
				-              </td>
			
 
				-            </tr>
			
 
				-            <tr>
			
 
				-              <td>host</td>
			
 
				-              <td>Hostname of the externally configured NameNode, if any.</td>
			
 
				-            </tr>
			
 
				-            <tr>
			
 
				-              <td>fs_port</td>
			
 
				-              <td>Port to which NameNode RPC server is bound.</td>
			
 
				-            </tr>
			
 
				-            <tr>
			
 
				-              <td>info_port</td>
			
 
				-              <td>Port to which the NameNode web UI server is bound.</td>
			
 
				-            </tr>
			
 
				-            <tr>
			
 
				-              <td>pkgs</td>
			
 
				-              <td>Installation directory, under which bin/hadoop executable is located. This can be used to use a pre-installed version of Hadoop on the cluster.</td>
			
 
				-            </tr>
			
 
				-            <tr>
			
 
				-              <td>server-params</td>
			
 
				-              <td>A comma-separated list of hadoop config parameters specified key-value pairs. These will be used to generate a hadoop-site.xml that will be used by the NameNode and DataNodes.</td>
			
 
				-            </tr>
			
 
				-            <tr>
			
 
				-              <td>final-server-params</td>
			
 
				-              <td>Same as above, except they will be marked final.</td>
			
 
				-            </tr>
			
 
				-          </table>
			
 
				-        </section>
			
 
				-
			
 
				-        <section>
			
 
				-          <title> gridservice-mapred options </title>
			
 
				-          <table>
			
 
				-            <tr>
			
 
				-              <th> Option Name </th>
			
 
				-              <th> Description </th>
			
 
				-            </tr>
			
 
				-            <tr>
			
 
				-              <td>external</td>
			
 
				-              <td>
			
 
				-              <p> If false, this indicates that a MapReduce cluster must be bought up by the HOD system on the nodes which it allocates via the allocate command. If true, if will try and connect to an externally configured MapReduce system.</p>
			
 
				-              </td>
			
 
				-            </tr>
			
 
				-            <tr>
			
 
				-              <td>host</td>
			
 
				-              <td>Hostname of the externally configured JobTracker, if any.</td>
			
 
				-            </tr>
			
 
				-            <tr>
			
 
				-              <td>tracker_port</td>
			
 
				-              <td>Port to which the JobTracker RPC server is bound.</td>
			
 
				-            </tr>
			
 
				-            <tr>
			
 
				-              <td>info_port</td>
			
 
				-              <td>Port to which the JobTracker web UI server is bound.</td>
			
 
				-            </tr>
			
 
				-            <tr>
			
 
				-              <td>pkgs</td>
			
 
				-              <td>Installation directory, under which bin/hadoop executable is located. This can be used to use a pre-installed version of Hadoop on the cluster.</td>
			
 
				-            </tr>
			
 
				-            <tr>
			
 
				-              <td>server-params</td>
			
 
				-              <td>A comma-separated list of hadoop config parameters specified key-value pairs. These will be used to generate a hadoop-site.xml that will be used by the JobTracker and TaskTrackers.</td>
			
 
				-            </tr>
			
 
				-            <tr>
			
 
				-              <td>final-server-params</td>
			
 
				-              <td>Same as above, except they will be marked final.</td>
			
 
				-            </tr>
			
 
				-          </table>
			
 
				-        </section>
			
 
				-      </section>
			
 
				     </section>
			
 
				   </body>
			
 
				 </document>
			
 
				-
			
--- a/src/docs/src/documentation/content/xdocs/hod_admin_guide.xml
+++ b/src/docs/src/documentation/content/xdocs/hod_admin_guide.xml
@@ -0,0 +1,238 @@
 
				+<?xml version="1.0"?>

			
 
				+

			
 
				+<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN"

			
 
				+          "http://forrest.apache.org/dtd/document-v20.dtd">

			
 
				+

			
 
				+

			
 
				+<document>

			
 
				+

			
 
				+  <header>

			
 
				+    <title> 

			
 
				+      Hadoop On Demand

			
 
				+    </title>

			
 
				+  </header>

			
 
				+

			
 
				+  <body>

			
 
				+<section>

			
 
				+<title>Overview</title>

			
 
				+

			
 
				+<p>The Hadoop On Demand (HOD) project is a system for provisioning and

			
 
				+managing independent Hadoop MapReduce and HDFS instances on a shared cluster 

			
 
				+of nodes. HOD is a tool that makes it easy for administrators and users to 

			
 
				+quickly setup and use Hadoop. It is also a very useful tool for Hadoop developers 

			
 
				+and testers who need to share a physical cluster for testing their own Hadoop 

			
 
				+versions.

			
 
				+</p>

			
 
				+

			
 
				+<p>HOD relies on a resource manager (RM) for allocation of nodes that it can use for

			
 
				+running Hadoop instances. At present it runs with the <a href="ext:hod/torque">Torque

			
 
				+resource manager</a>.

			
 
				+</p>

			
 
				+

			
 
				+<p>

			
 
				+The basic system architecture of HOD includes components from:</p>

			
 
				+<ul>

			
 
				+  <li>A Resource manager (possibly together with a scheduler),</li>

			
 
				+  <li>HOD components, and </li>

			
 
				+  <li>Hadoop Map/Reduce and HDFS daemons.</li>

			
 
				+</ul>

			
 
				+

			
 
				+<p>

			
 
				+HOD provisions and maintains Hadoop Map/Reduce and, optionally, HDFS instances 

			
 
				+through interaction with the above components on a given cluster of nodes. A cluster of

			
 
				+nodes can be thought of as comprising of two sets of nodes:</p>

			
 
				+<ul>

			
 
				+  <li>Submit nodes: Users use the HOD client on these nodes to allocate clusters, and then

			
 
				+use the Hadoop client to submit Hadoop jobs. </li>

			
 
				+  <li>Compute nodes: Using the resource manager, HOD components are run on these nodes to 

			
 
				+provision the Hadoop daemons. After that Hadoop jobs run on them.</li>

			
 
				+</ul>

			
 
				+

			
 
				+<p>

			
 
				+Here is a brief description of the sequence of operations in allocating a cluster and

			
 
				+running jobs on them.

			
 
				+</p>

			
 
				+

			
 
				+<ul>

			
 
				+  <li>The user uses the HOD client on the Submit node to allocate a required number of

			
 
				+cluster nodes, and provision Hadoop on them.</li>

			
 
				+  <li>The HOD client uses a Resource Manager interface, (qsub, in Torque), to submit a HOD

			
 
				+process, called the RingMaster, as a Resource Manager job, requesting the user desired number 

			
 
				+of nodes. This job is submitted to the central server of the Resource Manager (pbs_server, in Torque).</li>

			
 
				+  <li>On the compute nodes, the resource manager slave daemons, (pbs_moms in Torque), accept

			
 
				+and run jobs that they are given by the central server (pbs_server in Torque). The RingMaster 

			
 
				+process is started on one of the compute nodes (mother superior, in Torque).</li>

			
 
				+  <li>The Ringmaster then uses another Resource Manager interface, (pbsdsh, in Torque), to run

			
 
				+the second HOD component, HodRing, as distributed tasks on each of the compute

			
 
				+nodes allocated.</li>

			
 
				+  <li>The Hodrings, after initializing, communicate with the Ringmaster to get Hadoop commands, 

			
 
				+and run them accordingly. Once the Hadoop commands are started, they register with the RingMaster,

			
 
				+giving information about the daemons.</li>

			
 
				+  <li>All the configuration files needed for Hadoop instances are generated by HOD itself, 

			
 
				+some obtained from options given by user in its own configuration file.</li>

			
 
				+  <li>The HOD client keeps communicating with the RingMaster to find out the location of the 

			
 
				+JobTracker and HDFS daemons.</li>

			
 
				+</ul>

			
 
				+

			
 
				+<p>The rest of the document deals with the steps needed to setup HOD on a physical cluster of nodes.</p>

			
 
				+

			
 
				+</section>

			
 
				+

			
 
				+<section>

			
 
				+<title>Pre-requisites</title>

			
 
				+

			
 
				+<p>Operating System: HOD is currently tested on RHEL4.<br/>

			
 
				+Nodes : HOD requires a minimum of 3 nodes configured through a resource manager.<br/></p>

			
 
				+

			
 
				+<p> Software </p>

			
 
				+<p>The following components are to be installed on *ALL* the nodes before using HOD:</p>

			
 
				+<ul>

			
 
				+ <li>Torque: Resource manager</li>

			
 
				+ <li><a href="ext:hod/python">Python</a> : HOD requires version 2.5.1 of Python.</li>

			
 
				+</ul>

			
 
				+

			
 
				+<p>The following components can be optionally installed for getting better

			
 
				+functionality from HOD:</p>

			
 
				+<ul>

			
 
				+ <li><a href="ext:hod/twisted-python">Twisted Python</a>: This can be

			
 
				+  used for improving the scalability of HOD. If this module is detected to be

			
 
				+  installed, HOD uses it, else it falls back to default modules.</li>

			
 
				+ <li><a href="ext:site">Hadoop</a>: HOD can automatically

			
 
				+ distribute Hadoop to all nodes in the cluster. However, it can also use a

			
 
				+ pre-installed version of Hadoop, if it is available on all nodes in the cluster.

			
 
				+  HOD currently supports Hadoop 0.15 and above.</li>

			
 
				+</ul>

			
 
				+

			
 
				+<p>NOTE: HOD configuration requires the location of installs of these

			
 
				+components to be the same on all nodes in the cluster. It will also

			
 
				+make the configuration simpler to have the same location on the submit

			
 
				+nodes.

			
 
				+</p>

			
 
				+</section>

			
 
				+

			
 
				+<section>

			
 
				+<title>Resource Manager</title>

			
 
				+<p>  Currently HOD works with the Torque resource manager, which it uses for its node

			
 
				+  allocation and job submission. Torque is an open source resource manager from

			
 
				+  <a href="ext:hod/cluster-resources">Cluster Resources</a>, a community effort

			
 
				+  based on the PBS project. It provides control over batch jobs and distributed compute nodes. Torque is

			
 
				+  freely available for download from <a href="ext:hod/torque-download">here</a>.

			
 
				+  </p>

			
 
				+

			
 
				+<p>  All documentation related to torque can be seen under

			
 
				+  the section TORQUE Resource Manager <a

			
 
				+  href="ext:hod/torque-docs">here</a>. You can

			
 
				+  get wiki documentation from <a

			
 
				+  href="ext:hod/torque-wiki">here</a>.

			
 
				+  Users may wish to subscribe to TORQUE’s mailing list or view the archive for questions,

			
 
				+  comments <a

			
 
				+  href="ext:hod/torque-mailing-list">here</a>.

			
 
				+</p>

			
 
				+

			
 
				+<p>For using HOD with Torque:</p>

			
 
				+<ul>

			
 
				+ <li>Install Torque components: pbs_server on one node(head node), pbs_mom on all

			
 
				+  compute nodes, and PBS client tools on all compute nodes and submit

			
 
				+  nodes. Perform atleast a basic configuration so that the Torque system is up and

			
 
				+  running i.e pbs_server knows which machines to talk to. Look <a

			
 
				+  href="ext:hod/torque-basic-config">here</a>

			
 
				+  for basic configuration.

			
 
				+

			
 
				+  For advanced configuration, see <a

			
 
				+  href="ext:hod/torque-advanced-config">here</a></li>

			
 
				+ <li>Create a queue for submitting jobs on the pbs_server. The name of the queue is the

			
 
				+  same as the HOD configuration parameter, resource-manager.queue. The Hod client uses this queue to

			
 
				+  submit the Ringmaster process as a Torque job.</li>

			
 
				+ <li>Specify a 'cluster name' as a 'property' for all nodes in the cluster.

			
 
				+  This can be done by using the 'qmgr' command. For example:

			
 
				+  qmgr -c "set node node properties=cluster-name". The name of the cluster is the same as

			
 
				+  the HOD configuration parameter, hod.cluster. </li>

			
 
				+ <li>Ensure that jobs can be submitted to the nodes. This can be done by

			
 
				+  using the 'qsub' command. For example:

			
 
				+  echo "sleep 30" | qsub -l nodes=3</li>

			
 
				+</ul>

			
 
				+

			
 
				+</section>

			
 
				+

			
 
				+<section>

			
 
				+<title>Installing HOD</title>

			
 
				+

			
 
				+<p>Now that the resource manager set up is done, we proceed on to obtaining and

			
 
				+installing HOD.</p>

			
 
				+<ul>

			
 
				+ <li>If you are getting HOD from the Hadoop tarball,it is available under the 

			
 
				+  'contrib' section of Hadoop, under the root  directory 'hod'.</li>

			
 
				+ <li>If you are building from source, you can run ant tar from the Hadoop root

			
 
				+  directory, to generate the Hadoop tarball, and then pick HOD from there,

			
 
				+  as described in the point above.</li>

			
 
				+ <li>Distribute the files under this directory to all the nodes in the

			
 
				+  cluster. Note that the location where the files are copied should be

			
 
				+  the same on all the nodes.</li>

			
 
				+  <li>Note that compiling hadoop would build HOD with appropriate permissions 

			
 
				+  set on all the required script files in HOD.</li>

			
 
				+</ul>

			
 
				+</section>

			
 
				+

			
 
				+<section>

			
 
				+<title>Configuring HOD</title>

			
 
				+

			
 
				+<p>After HOD installation is done, it has to be configured before we start using

			
 
				+it.</p>

			
 
				+<section>

			
 
				+  <title>Minimal Configuration to get started</title>

			
 
				+<ul>

			
 
				+ <li>On the node from where you want to run hod, edit the file hodrc

			
 
				+  which can be found in the &lt;install dir&gt;/conf directory. This file

			
 
				+  contains the minimal set of values required for running hod.</li>

			
 
				+ <li>

			
 
				+<p>Specify values suitable to your environment for the following

			
 
				+  variables defined in the configuration file. Note that some of these

			
 
				+  variables are defined at more than one place in the file.</p>

			
 
				+

			
 
				+  <ul>

			
 
				+   <li>${JAVA_HOME}: Location of Java for Hadoop. Hadoop supports Sun JDK

			
 
				+    1.5.x and above.</li>

			
 
				+   <li>${CLUSTER_NAME}: Name of the cluster which is specified in the

			
 
				+    'node property' as mentioned in resource manager configuration.</li>

			
 
				+   <li>${HADOOP_HOME}: Location of Hadoop installation on the compute and

			
 
				+    submit nodes.</li>

			
 
				+   <li>${RM_QUEUE}: Queue configured for submiting jobs in the resource

			
 
				+    manager configuration.</li>

			
 
				+   <li>${RM_HOME}: Location of the resource manager installation on the

			
 
				+    compute and submit nodes.</li>

			
 
				+    </ul>

			
 
				+</li>

			
 
				+

			
 
				+<li>

			
 
				+<p>The following environment variables *may* need to be set depending on

			
 
				+  your environment. These variables must be defined where you run the

			
 
				+  HOD client, and also be specified in the HOD configuration file as the

			
 
				+  value of the key resource_manager.env-vars. Multiple variables can be

			
 
				+  specified as a comma separated list of key=value pairs.</p>

			
 
				+

			
 
				+  <ul>

			
 
				+   <li>HOD_PYTHON_HOME: If you install python to a non-default location

			
 
				+    of the compute nodes, or submit nodes, then, this variable must be

			
 
				+    defined to point to the python executable in the non-standard

			
 
				+    location.</li>

			
 
				+    </ul>

			
 
				+</li>

			
 
				+</ul>

			
 
				+</section>

			
 
				+

			
 
				+  <section>

			
 
				+    <title>Advanced Configuration</title>

			
 
				+    <p> You can review other configuration options in the file and modify them to suit

			
 
				+ your needs. Refer to the <a href="hod_config_guide.html">Configuration Guide</a> for information about the HOD

			
 
				+ configuration.

			
 
				+    </p>

			
 
				+  </section>

			
 
				+</section>

			
 
				+

			
 
				+  <section>

			
 
				+    <title>Running HOD</title>

			
 
				+    <p>You can now proceed to <a href="hod_user_guide.html">HOD User Guide</a> for information about how to run HOD,

			
 
				+    what are the various features, options and for help in trouble-shooting.</p>

			
 
				+  </section>

			
 
				+</body>

			
 
				+</document>

			
--- a/src/docs/src/documentation/content/xdocs/hod_config_guide.xml
+++ b/src/docs/src/documentation/content/xdocs/hod_config_guide.xml
@@ -0,0 +1,207 @@
 
				+<?xml version="1.0"?>
			
 
				+
			
 
				+<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN"
			
 
				+          "http://forrest.apache.org/dtd/document-v20.dtd">
			
 
				+
			
 
				+
			
 
				+<document>
			
 
				+
			
 
				+  <header>
			
 
				+    <title> 
			
 
				+      Hadoop On Demand: Configuration Guide
			
 
				+    </title>
			
 
				+  </header>
			
 
				+
			
 
				+  <body>
			
 
				+    <section>
			
 
				+      <title>1. Introduction</title>
			
 
				+    
			
 
				+      <p>Configuration options for HOD are organized as sections and options 
			
 
				+      within them. They can be specified in two ways: a configuration file 
			
 
				+      in the INI format, and as command line options to the HOD shell, 
			
 
				+      specified in the format --section.option[=value]. If the same option is 
			
 
				+      specified in both places, the value specified on the command line 
			
 
				+      overrides the value in the configuration file.</p>
			
 
				+      
			
 
				+      <p>
			
 
				+        To get a simple description of all configuration options, you can type
			
 
				+      </p>
			
 
				+      <table><tr><td><code>$ hod --verbose-help</code></td></tr></table>
			
 
				+      
			
 
				+      <p>This document explains some of the most important or commonly used
			
 
				+      configuration options in some more detail.</p>
			
 
				+    </section>
			
 
				+    
			
 
				+    <section>
			
 
				+      <title>2. Sections</title>
			
 
				+    
			
 
				+      <p>The following are the various sections in the HOD configuration:</p>
			
 
				+      
			
 
				+      <ul>
			
 
				+        <li>  hod:                  Options for the HOD client</li>
			
 
				+        <li>  resource_manager:     Options for specifying which resource manager
			
 
				+         to use, and other parameters for using that resource manager</li>
			
 
				+        <li>  ringmaster:           Options for the RingMaster process, </li>
			
 
				+        <li>  hodring:              Options for the HodRing processes</li>
			
 
				+        <li>  gridservice-mapred:   Options for the MapReduce daemons</li>
			
 
				+        <li>  gridservice-hdfs:     Options for the HDFS daemons.</li>
			
 
				+      </ul>
			
 
				+    
			
 
				+      
			
 
				+      <p>The next section deals with some of the important options in the HOD 
			
 
				+        configuration.</p>
			
 
				+    </section>
			
 
				+    
			
 
				+    <section>
			
 
				+      <title>3. Important / Commonly Used Configuration Options</title>
			
 
				+  
			
 
				+  
			
 
				+      <section> 
			
 
				+        <title>3.1 Common configuration options</title>
			
 
				+        
			
 
				+        <p>Certain configuration options are defined in most of the sections of 
			
 
				+        the HOD configuration. Options defined in a section, are used by the
			
 
				+        process for which that section applies. These options have the same
			
 
				+        meaning, but can have different values in each section.
			
 
				+        </p>
			
 
				+        
			
 
				+        <ul>
			
 
				+          <li>temp-dir: Temporary directory for usage by the HOD processes. Make 
			
 
				+                      sure that the users who will run hod have rights to create 
			
 
				+                      directories under the directory specified here.</li>
			
 
				+          
			
 
				+          <li>debug: A numeric value from 1-4. 4 produces the most log information,
			
 
				+                   and 1 the least.</li>
			
 
				+          
			
 
				+          <li>log-dir: Directory where log files are stored. By default, this is
			
 
				+                     &lt;install-location&gt;/logs/. The restrictions and notes for the
			
 
				+                     temp-dir variable apply here too.
			
 
				+          </li>
			
 
				+          
			
 
				+          <li>xrs-port-range: A range of ports, among which an available port shall
			
 
				+                            be picked for use to run an XML-RPC server.</li>
			
 
				+          
			
 
				+          <li>http-port-range: A range of ports, among which an available port shall
			
 
				+                             be picked for use to run an HTTP server.</li>
			
 
				+          
			
 
				+          <li>java-home: Location of Java to be used by Hadoop.</li>
			
 
				+        </ul>
			
 
				+      </section>
			
 
				+      
			
 
				+      <section>
			
 
				+        <title>3.2 hod options</title>
			
 
				+        
			
 
				+        <ul>
			
 
				+          <li>cluster: A descriptive name given to the cluster. For Torque, this is
			
 
				+                     specified as a 'Node property' for every node in the cluster.
			
 
				+                     HOD uses this value to compute the number of available nodes.</li>
			
 
				+          
			
 
				+          <li>client-params: A comma-separated list of hadoop config parameters
			
 
				+                           specified as key-value pairs. These will be used to
			
 
				+                           generate a hadoop-site.xml on the submit node that 
			
 
				+                           should be used for running MapReduce jobs.</li>
			
 
				+         </ul>
			
 
				+      </section>
			
 
				+      
			
 
				+      <section>
			
 
				+        <title>3.3 resource_manager options</title>
			
 
				+      
			
 
				+        <ul>
			
 
				+          <li>queue: Name of the queue configured in the resource manager to which
			
 
				+                   jobs are to be submitted.</li>
			
 
				+          
			
 
				+          <li>batch-home: Install directory to which 'bin' is appended and under 
			
 
				+                        which the executables of the resource manager can be 
			
 
				+                        found.</li> 
			
 
				+          
			
 
				+          <li>env-vars: This is a comma separated list of key-value pairs, 
			
 
				+                      expressed as key=value, which would be passed to the jobs 
			
 
				+                      launched on the compute nodes. 
			
 
				+                      For example, if the python installation is 
			
 
				+                      in a non-standard location, one can set the environment
			
 
				+                      variable 'HOD_PYTHON_HOME' to the path to the python 
			
 
				+                      executable. The HOD processes launched on the compute nodes
			
 
				+                      can then use this variable.</li>
			
 
				+        </ul>
			
 
				+      </section>
			
 
				+      
			
 
				+      <section>
			
 
				+        <title>3.4 ringmaster options</title>
			
 
				+        
			
 
				+        <ul>
			
 
				+          <li>work-dirs: These are a list of comma separated paths that will serve
			
 
				+                       as the root for directories that HOD generates and passes
			
 
				+                       to Hadoop for use to store DFS / MapReduce data. For e.g.
			
 
				+                       this is where DFS data blocks will be stored. Typically,
			
 
				+                       as many paths are specified as there are disks available
			
 
				+                       to ensure all disks are being utilized. The restrictions
			
 
				+                       and notes for the temp-dir variable apply here too.</li>
			
 
				+        </ul>
			
 
				+      </section>
			
 
				+      
			
 
				+      <section>
			
 
				+        <title>3.5 gridservice-hdfs options</title>
			
 
				+        
			
 
				+        <ul>
			
 
				+          <li>external: If false, this indicates that a HDFS cluster must be 
			
 
				+                      bought up by the HOD system, on the nodes which it 
			
 
				+                      allocates via the allocate command. Note that in that case,
			
 
				+                      when the cluster is de-allocated, it will bring down the 
			
 
				+                      HDFS cluster, and all the data will be lost.
			
 
				+                      If true, it will try and connect to an externally configured
			
 
				+                      HDFS system.
			
 
				+                      Typically, because input for jobs are placed into HDFS
			
 
				+                      before jobs are run, and also the output from jobs in HDFS 
			
 
				+                      is required to be persistent, an internal HDFS cluster is 
			
 
				+                      of little value in a production system. However, it allows 
			
 
				+                      for quick testing.</li>
			
 
				+          
			
 
				+          <li>host: Hostname of the externally configured NameNode, if any</li>
			
 
				+          
			
 
				+          <li>fs_port: Port to which NameNode RPC server is bound.</li>
			
 
				+          
			
 
				+          <li>info_port: Port to which the NameNode web UI server is bound.</li>
			
 
				+          
			
 
				+          <li>pkgs: Installation directory, under which bin/hadoop executable is 
			
 
				+                  located. This can be used to use a pre-installed version of
			
 
				+                  Hadoop on the cluster.</li>
			
 
				+          
			
 
				+          <li>server-params: A comma-separated list of hadoop config parameters
			
 
				+                           specified key-value pairs. These will be used to
			
 
				+                           generate a hadoop-site.xml that will be used by the
			
 
				+                           NameNode and DataNodes.</li>
			
 
				+          
			
 
				+          <li>final-server-params: Same as above, except they will be marked final.</li>
			
 
				+        </ul>
			
 
				+      </section>
			
 
				+      
			
 
				+      <section>
			
 
				+        <title>3.6 gridservice-mapred options</title>
			
 
				+        
			
 
				+        <ul>
			
 
				+          <li>external: If false, this indicates that a MapReduce cluster must be
			
 
				+                      bought up by the HOD system on the nodes which it allocates
			
 
				+                      via the allocate command.
			
 
				+                      If true, if will try and connect to an externally 
			
 
				+                      configured MapReduce system.</li>
			
 
				+          
			
 
				+          <li>host: Hostname of the externally configured JobTracker, if any</li>
			
 
				+          
			
 
				+          <li>tracker_port: Port to which the JobTracker RPC server is bound</li>
			
 
				+          
			
 
				+          <li>info_port: Port to which the JobTracker web UI server is bound.</li>
			
 
				+          
			
 
				+          <li>pkgs: Installation directory, under which bin/hadoop executable is 
			
 
				+                  located</li>
			
 
				+          
			
 
				+          <li>server-params: A comma-separated list of hadoop config parameters
			
 
				+                           specified key-value pairs. These will be used to
			
 
				+                           generate a hadoop-site.xml that will be used by the
			
 
				+                           JobTracker and TaskTrackers</li>
			
 
				+          
			
 
				+          <li>final-server-params: Same as above, except they will be marked final.</li>
			
 
				+        </ul>
			
 
				+      </section>
			
 
				+    </section>
			
 
				+  </body>
			
 
				+</document>
			
--- a/src/docs/src/documentation/content/xdocs/hod_user_guide.xml
+++ b/src/docs/src/documentation/content/xdocs/hod_user_guide.xml
@@ -0,0 +1,506 @@
 
				+<?xml version="1.0"?>
			
 
				+
			
 
				+<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN"
			
 
				+          "http://forrest.apache.org/dtd/document-v20.dtd">
			
 
				+<document>
			
 
				+  <header>
			
 
				+    <title>
			
 
				+      Hadoop On Demand 0.4 User Guide
			
 
				+    </title>
			
 
				+  </header>
			
 
				+
			
 
				+<body>
			
 
				+  <section>
			
 
				+    <title> Introduction </title><anchor id="Introduction"></anchor>
			
 
				+  <p>Hadoop On Demand (HOD) is a system for provisioning virtual Hadoop clusters over a large physical cluster. It uses the Torque resource manager to do node allocation. On the allocated nodes, it can start Hadoop Map/Reduce and HDFS daemons. It automatically generates the appropriate configuration files (hadoop-site.xml) for the Hadoop daemons and client. HOD also has the capability to distribute Hadoop to the nodes in the virtual cluster that it allocates. In short, HOD makes it easy for administrators and users to quickly setup and use Hadoop. It is also a very useful tool for Hadoop developers and testers who need to share a physical cluster for testing their own Hadoop versions.</p>
			
 
				+  <p>HOD 0.4 supports Hadoop from version 0.15 onwards.</p>
			
 
				+  <p>The rest of the documentation comprises of a quick-start guide that helps you get quickly started with using HOD, a more detailed guide of all HOD features, command line options, known issues and trouble-shooting information.</p>
			
 
				+  </section>
			
 
				+  <section>
			
 
				+		<title> Getting Started Using HOD 0.4 </title><anchor id="Getting_Started_Using_HOD_0_4"></anchor>
			
 
				+  <p>In this section, we shall see a step-by-step introduction on how to use HOD for the most basic operations. Before following these steps, it is assumed that HOD 0.4 and its dependent hardware and software components are setup and configured correctly. This is a step that is generally performed by system administrators of the cluster.</p>
			
 
				+  <p>The HOD 0.4 user interface is a command line utility called <code>hod</code>. It is driven by a configuration file, that is typically setup for users by system administrators. Users can override this configuration when using the <code>hod</code>, which is described later in this documentation. The configuration file can be specified in two ways when using <code>hod</code>, as described below: </p>
			
 
				+  <ul>
			
 
				+    <li> Specify it on command line, using the -c option. Such as <code>hod -c path-to-the-configuration-file other-options</code></li>
			
 
				+    <li> Set up an environment variable <em>HOD_CONF_DIR</em> where <code>hod</code> will be run. This should be pointed to a directory on the local file system, containing a file called <em>hodrc</em>. Note that this is analogous to the <em>HADOOP_CONF_DIR</em> and <em>hadoop-site.xml</em> file for Hadoop. If no configuration file is specified on the command line, <code>hod</code> shall look for the <em>HOD_CONF_DIR</em> environment variable and a <em>hodrc</em> file under that.</li>
			
 
				+    </ul>
			
 
				+  <p>In examples listed below, we shall not explicitly point to the configuration option, assuming it is correctly specified.</p>
			
 
				+  <p><code>hod</code> can be used in two modes, the <em>operation</em> mode and the <em>script</em> mode. We shall describe the two modes in detail below.</p>
			
 
				+  <section><title> HOD <em>Operation</em> Mode </title><anchor id="HOD_Operation_Mode"></anchor>
			
 
				+  <p>A typical session of HOD using this option will involve at least three steps: allocate, run hadoop jobs, deallocate. In order to use this mode, perform the following steps.</p>
			
 
				+  <p><strong> Create a Cluster Directory </strong></p><anchor id="Create_a_Cluster_Directory"></anchor>
			
 
				+  <p>The <em>cluster directory</em> is a directory on the local file system where <code>hod</code> will generate the Hadoop configuration, <em>hadoop-site.xml</em>, corresponding to the cluster it allocates. Create this directory and pass it to the <code>hod</code> operations as stated below. Once a cluster is allocated, a user can utilize it to run Hadoop jobs by specifying the cluster directory as the Hadoop --config option. </p>
			
 
				+  <p><strong> Operation <em>allocate</em></strong></p><anchor id="Operation_allocate"></anchor>
			
 
				+  <p>The <em>allocate</em> operation is used to allocate a set of nodes and install and provision Hadoop on them. It has the following syntax:</p>
			
 
				+    <table>
			
 
				+      
			
 
				+        <tr>
			
 
				+          <td><code>$ hod -o "allocate cluster_dir number_of_nodes"</code></td>
			
 
				+        </tr>
			
 
				+      
			
 
				+    </table>
			
 
				+  <p>If the command completes successfully, then <code>cluster_dir/hadoop-site.xml</code> will be generated and will contain information about the allocated cluster. It will also print out the information about the Hadoop web UIs.</p>
			
 
				+  <p>An example run of this command produces the following output. Note in this example that <code>~/hod-clusters/test</code> is the cluster directory, and we are allocating 5 nodes:</p>
			
 
				+  <table>
			
 
				+    <tr>
			
 
				+      <td><code>$ hod -o "allocate ~/hod-clusters/test 5"</code><br/>
			
 
				+      <code>INFO - HDFS UI on http://foo1.bar.com:53422</code><br/>
			
 
				+      <code>INFO - Mapred UI on http://foo2.bar.com:55380</code><br/></td>
			
 
				+      </tr>
			
 
				+   </table>
			
 
				+  <p><strong> Running Hadoop jobs using the allocated cluster </strong></p><anchor id="Running_Hadoop_jobs_using_the_al"></anchor>
			
 
				+  <p>Now, one can run Hadoop jobs using the allocated cluster in the usual manner. This assumes variables like <em>JAVA_HOME</em> and path to the Hadoop installation are set up correctly.:</p>
			
 
				+    <table>
			
 
				+      
			
 
				+        <tr>
			
 
				+          <td><code>$ hadoop --config cluster_dir hadoop_command hadoop_command_args</code></td>
			
 
				+        </tr>
			
 
				+      
			
 
				+    </table>
			
 
				+  <p>or</p>
			
 
				+    <table>
			
 
				+      
			
 
				+        <tr>
			
 
				+          <td><code>$ export HADOOP_CONF_DIR=cluster_dir</code> <br />
			
 
				+              <code>$ hadoop hadoop_command hadoop_command_args</code></td>
			
 
				+        </tr>
			
 
				+      
			
 
				+    </table>
			
 
				+  <p>Continuing our example, the following command will run a wordcount example on the allocated cluster:</p>
			
 
				+  <table><tr><td><code>$ hadoop --config ~/hod-clusters/test jar /path/to/hadoop/hadoop-examples.jar wordcount /path/to/input /path/to/output</code></td></tr></table>
			
 
				+  <p>or</p>
			
 
				+  <table><tr>
			
 
				+    <td><code>$ export HADOOP_CONF_DIR=~/hod-clusters/test</code><br />
			
 
				+    <code>$ hadoop jar /path/to/hadoop/hadoop-examples.jar wordcount /path/to/input /path/to/output</code></td>
			
 
				+    </tr>
			
 
				+  </table>
			
 
				+  <p><strong> Operation <em>deallocate</em></strong></p><anchor id="Operation_deallocate"></anchor>
			
 
				+  <p>The <em>deallocate</em> operation is used to release an allocated cluster. When finished with a cluster, deallocate must be run so that the nodes become free for others to use. The <em>deallocate</em> operation has the following syntax:</p>
			
 
				+    <table>
			
 
				+      
			
 
				+        <tr>
			
 
				+          <td><code>$ hod -o "deallocate cluster_dir"</code></td>
			
 
				+        </tr>
			
 
				+      
			
 
				+    </table>
			
 
				+  <p>Continuing our example, the following command will deallocate the cluster:</p>
			
 
				+  <table><tr><td><code>$ hod -o "deallocate ~/hod-clusters/test"</code></td></tr></table>
			
 
				+  <p>As can be seen, when used in the <em>operation</em> mode, HOD allows the users to allocate a cluster, and use it flexibly for running Hadoop jobs. For example, users can run multiple jobs in parallel on the same cluster, by running hadoop from multiple shells pointing to the same configuration.</p>
			
 
				+	</section>
			
 
				+  <section><title> HOD <em>Script</em> Mode </title><anchor id="HOD_Script_Mode"></anchor>
			
 
				+  <p>The HOD <em>script mode</em> combines the operations of allocating, using and deallocating a cluster into a single operation. This is very useful for users who want to run a script of hadoop jobs and let HOD handle the cleanup automatically once the script completes. In order to use <code>hod</code> in the script mode, do the following:</p>
			
 
				+  <p><strong> Create a script file </strong></p><anchor id="Create_a_script_file"></anchor>
			
 
				+  <p>This will be a regular shell script that will typically contain hadoop commands, such as:</p>
			
 
				+  <table><tr><td><code>$ hadoop jar jar_file options</code></td>
			
 
				+  </tr></table>
			
 
				+  <p>However, the user can add any valid commands as part of the script. HOD will execute this script setting <em>HADOOP_CONF_DIR</em> automatically to point to the allocated cluster. So users do not need to worry about this. They also do not need to create a cluster directory as in the <em>operation</em> mode.</p>
			
 
				+  <p><strong> Running the script </strong></p><anchor id="Running_the_script"></anchor>
			
 
				+  <p>The syntax for the <em>script mode</em> as is as follows:</p>
			
 
				+    <table>
			
 
				+      
			
 
				+        <tr>
			
 
				+          <td><code>$ hod -m number_of_nodes -z script_file</code></td>
			
 
				+        </tr>
			
 
				+      
			
 
				+    </table>
			
 
				+  <p>Note that HOD will deallocate the cluster as soon as the script completes, and this means that the script must not complete until the hadoop jobs themselves are completed. Users must take care of this while writing the script. </p>
			
 
				+   </section>
			
 
				+  </section>
			
 
				+  <section>
			
 
				+		<title> HOD 0.4 Features </title><anchor id="HOD_0_4_Features"></anchor>
			
 
				+  <section><title> Provisioning and Managing Hadoop Clusters </title><anchor id="Provisioning_and_Managing_Hadoop"></anchor>
			
 
				+  <p>The primary feature of HOD is to provision Hadoop Map/Reduce and HDFS clusters. This is described above in the Getting Started section. Also, as long as nodes are available, and organizational policies allow, a user can use HOD to allocate multiple Map/Reduce clusters simultaneously. The user would need to specify different paths for the <code>cluster_dir</code> parameter mentioned above for each cluster he/she allocates. HOD provides the <em>list</em> and the <em>info</em> operations to enable managing multiple clusters.</p>
			
 
				+  <p><strong> Operation <em>list</em></strong></p><anchor id="Operation_list"></anchor>
			
 
				+  <p>The list operation lists all the clusters allocated so far by a user. The cluster directory where the hadoop-site.xml is stored for the cluster, and it's status vis-a-vis connectivity with the JobTracker and/or HDFS is shown. The list operation has the following syntax:</p>
			
 
				+    <table>
			
 
				+      
			
 
				+        <tr>
			
 
				+          <td><code>$ hod -o "list"</code></td>
			
 
				+        </tr>
			
 
				+      
			
 
				+    </table>
			
 
				+  <p><strong> Operation <em>info</em></strong></p><anchor id="Operation_info"></anchor>
			
 
				+  <p>The info operation shows information about a given cluster. The information shown includes the Torque job id, and locations of the important daemons like the HOD Ringmaster process, and the Hadoop JobTracker and NameNode daemons. The info operation has the following syntax:</p>
			
 
				+    <table>
			
 
				+      
			
 
				+        <tr>
			
 
				+          <td><code>$ hod -o "info cluster_dir"</code></td>
			
 
				+        </tr>
			
 
				+      
			
 
				+    </table>
			
 
				+  <p>The <code>cluster_dir</code> should be a valid cluster directory specified in an earlier <em>allocate</em> operation.</p>
			
 
				+  </section>
			
 
				+  <section><title> Using a tarball to distribute Hadoop </title><anchor id="Using_a_tarball_to_distribute_Ha"></anchor>
			
 
				+  <p>When provisioning Hadoop, HOD can use either a pre-installed Hadoop on the cluster nodes or distribute and install a Hadoop tarball as part of the provisioning operation. If the tarball option is being used, there is no need to have a pre-installed Hadoop on the cluster nodes, nor a need to use a pre-installed one. This is especially useful in a development / QE environment where individual developers may have different versions of Hadoop to test on a shared cluster. </p>
			
 
				+  <p>In order to use a pre-installed Hadoop, you must specify, in the hodrc, the <code>pkgs</code> option in the <code>gridservice-hdfs</code> and <code>gridservice-mapred</code> sections. This must point to the path where Hadoop is installed on all nodes of the cluster.</p>
			
 
				+  <p>The tarball option can be used in both the <em>operation</em> and <em>script</em> options. </p>
			
 
				+  <p>In the operation option, the syntax is as follows:</p>
			
 
				+    <table>
			
 
				+        <tr>
			
 
				+          <td><code>$ hod -t hadoop_tarball_location -o "allocate cluster_dir number_of_nodes"</code></td>
			
 
				+        </tr>
			
 
				+    </table>
			
 
				+  <p>For example, the following command allocates Hadoop provided by the tarball <code>~/share/hadoop.tar.gz</code>:</p>
			
 
				+  <table><tr><td><code>$ hod -t ~/share/hadoop.tar.gz -o "allocate ~/hadoop-cluster 10"</code></td></tr></table>
			
 
				+  <p>In the script option, the syntax is as follows:</p>
			
 
				+    <table>
			
 
				+        <tr>
			
 
				+          <td><code>$ hod -t hadoop_tarball_location -m number_of_nodes -z script_file</code></td>
			
 
				+        </tr>
			
 
				+    </table>
			
 
				+  <p>The hadoop_tarball specified in the syntax above should point to a path on a shared file system that is accessible from all the compute nodes. Currently, HOD only supports NFS mounted file systems.</p>
			
 
				+  <p><em>Note:</em></p>
			
 
				+  <ul>
			
 
				+    <li> For better distribution performance it is recommended that the Hadoop tarball contain only the libraries and binaries, and not the source or documentation.</li>
			
 
				+    <li> When you want to run jobs against a cluster allocated using the tarball, you must use a compatible version of hadoop to submit your jobs. The best would be to untar and use the version that is present in the tarball itself.</li>
			
 
				+  </ul>
			
 
				+  </section>
			
 
				+  <section><title> Using an external HDFS </title><anchor id="Using_an_external_HDFS"></anchor>
			
 
				+  <p>In typical Hadoop clusters provisioned by HOD, HDFS is already set up statically (without using HOD). This allows data to persist in HDFS after the HOD provisioned clusters is deallocated. To use a statically configured HDFS, your hodrc must point to an external HDFS. Specifically, set the following options to the correct values in the section <code>gridservice-hdfs</code> of the hodrc:</p>
			
 
				+   <table><tr><td>external = true</td></tr><tr><td>host = Hostname of the HDFS NameNode</td></tr><tr><td>fs_port = Port number of the HDFS NameNode</td></tr><tr><td>info_port = Port number of the HDFS NameNode web UI</td></tr></table>
			
 
				+  <p><em>Note:</em> You can also enable this option from command line. That is, to use a static HDFS, you will need to say: <br />
			
 
				+    </p>
			
 
				+    <table>
			
 
				+        <tr>
			
 
				+          <td><code>$ hod --gridservice-hdfs.external -o "allocate cluster_dir number_of_nodes"</code></td>
			
 
				+        </tr>
			
 
				+    </table>
			
 
				+  <p>HOD can be used to provision an HDFS cluster as well as a Map/Reduce cluster, if required. To do so, set the following option in the section <code>gridservice-hdfs</code> of the hodrc:</p>
			
 
				+  <table><tr><td>external = false</td></tr></table>
			
 
				+  </section>
			
 
				+  <section><title> Options for Configuring Hadoop </title><anchor id="Options_for_Configuring_Hadoop"></anchor>
			
 
				+  <p>HOD provides a very convenient mechanism to configure both the Hadoop daemons that it provisions and also the hadoop-site.xml that it generates on the client side. This is done by specifying Hadoop configuration parameters in either the HOD configuration file, or from the command line when allocating clusters.</p>
			
 
				+  <p><strong> Configuring Hadoop Daemons </strong></p><anchor id="Configuring_Hadoop_Daemons"></anchor>
			
 
				+  <p>For configuring the Hadoop daemons, you can do the following:</p>
			
 
				+  <p>For Map/Reduce, specify the options as a comma separated list of key-value pairs to the <code>server-params</code> option in the <code>gridservice-mapred</code> section. Likewise for a dynamically provisioned HDFS cluster, specify the options in the <code>server-params</code> option in the <code>gridservice-hdfs</code> section. If these parameters should be marked as <em>final</em>, then include these in the <code>final-server-params</code> option of the appropriate section.</p>
			
 
				+  <p>For example:</p>
			
 
				+  <table><tr><td><code>server-params = mapred.reduce.parallel.copies=20,io.sort.factor=100,io.sort.mb=128,io.file.buffer.size=131072</code></td></tr><tr><td><code>final-server-params = mapred.child.java.opts=-Xmx512m,dfs.block.size=134217728,fs.inmemory.size.mb=128</code></td>
			
 
				+  </tr></table>
			
 
				+  <p>In order to provide the options from command line, you can use the following syntax:</p>
			
 
				+  <p>For configuring the Map/Reduce daemons use:</p>
			
 
				+    <table>
			
 
				+        <tr>
			
 
				+          <td><code>$ hod -Mmapred.reduce.parallel.copies=20 -Mio.sort.factor=100 -o "allocate cluster_dir number_of_nodes"</code></td>
			
 
				+        </tr>
			
 
				+    </table>
			
 
				+  <p>In the example above, the <em>mapred.reduce.parallel.copies</em> parameter and the <em>io.sort.factor</em> parameter will be appended to the other <code>server-params</code> or if they already exist in <code>server-params</code>, will override them. In order to specify these are <em>final</em> parameters, you can use:</p>
			
 
				+    <table>
			
 
				+        <tr>
			
 
				+          <td><code>$ hod -Fmapred.reduce.parallel.copies=20 -Fio.sort.factor=100 -o "allocate cluster_dir number_of_nodes"</code></td>
			
 
				+        </tr>
			
 
				+    </table>
			
 
				+  <p>However, note that final parameters cannot be overwritten from command line. They can only be appended if not already specified.</p>
			
 
				+  <p>Similar options exist for configuring dynamically provisioned HDFS daemons. For doing so, replace -M with -H and -F with -S.</p>
			
 
				+  <p><strong> Configuring Hadoop Job Submission (Client) Programs </strong></p><anchor id="Configuring_Hadoop_Job_Submissio"></anchor>
			
 
				+  <p>As mentioned above, if the allocation operation completes successfully then <code>cluster_dir/hadoop-site.xml</code> will be generated and will contain information about the allocated cluster's JobTracker and NameNode. This configuration is used when submitting jobs to the cluster. HOD provides an option to include additional Hadoop configuration parameters into this file. The syntax for doing so is as follows:</p>
			
 
				+    <table>
			
 
				+        <tr>
			
 
				+          <td><code>$ hod -Cmapred.userlog.limit.kb=200 -Cmapred.child.java.opts=-Xmx512m -o "allocate cluster_dir number_of_nodes"</code></td>
			
 
				+        </tr>
			
 
				+    </table>
			
 
				+  <p>In this example, the <em>mapred.userlog.limit.kb</em> and <em>mapred.child.java.opts</em> options will be included into the hadoop-site.xml that is generated by HOD.</p>
			
 
				+  </section>
			
 
				+  <section><title> Viewing Hadoop Web-UIs </title><anchor id="Viewing_Hadoop_Web_UIs"></anchor>
			
 
				+  <p>The HOD allocation operation prints the JobTracker and NameNode web UI URLs. For example:</p>
			
 
				+   <table><tr><td><code>$ hod -c ~/hod-conf-dir/hodrc -o "allocate ~/hadoop-cluster 10"</code><br/>
			
 
				+    <code>INFO - HDFS UI on http://host242.foo.com:55391</code><br/>
			
 
				+    <code>INFO - Mapred UI on http://host521.foo.com:54874</code>
			
 
				+    </td></tr></table>
			
 
				+  <p>The same information is also available via the <em>info</em> operation described above.</p>
			
 
				+  </section>
			
 
				+  <section><title> Collecting and Viewing Hadoop Logs </title><anchor id="Collecting_and_Viewing_Hadoop_Lo"></anchor>
			
 
				+  <p>To get the Hadoop logs of the daemons running on one of the allocated nodes: </p>
			
 
				+  <ul>
			
 
				+    <li> Log into the node of interest. If you want to look at the logs of the JobTracker or NameNode, then you can find the node running these by using the <em>list</em> and <em>info</em> operations mentioned above.</li>
			
 
				+    <li> Get the process information of the daemon of interest (for example, <code>ps ux | grep TaskTracker</code>)</li>
			
 
				+    <li> In the process information, search for the value of the variable <code>-Dhadoop.log.dir</code>. Typically this will be a decendent directory of the <code>hodring.temp-dir</code> value from the hod configuration file.</li>
			
 
				+    <li> Change to the <code>hadoop.log.dir</code> directory to view daemon and user logs.</li>
			
 
				+  </ul>
			
 
				+  <p>HOD also provides a mechanism to collect logs when a cluster is being deallocated and persist them into a file system, or an externally configured HDFS. By doing so, these logs can be viewed after the jobs are completed and the nodes are released. In order to do so, configure the log-destination-uri to a URI as follows:</p>
			
 
				+   <table><tr><td><code>log-destination-uri = hdfs://host123:45678/user/hod/logs</code> or</td></tr>
			
 
				+    <tr><td><code>log-destination-uri = file://path/to/store/log/files</code></td></tr>
			
 
				+    </table>
			
 
				+  <p>Under the root directory specified above in the path, HOD will create a create a path user_name/torque_jobid and store gzipped log files for each node that was part of the job.</p>
			
 
				+  <p>Note that to store the files to HDFS, you may need to configure the <code>hodring.pkgs</code> option with the Hadoop version that matches the HDFS mentioned. If not, HOD will try to use the Hadoop version that it is using to provision the Hadoop cluster itself.</p>
			
 
				+  </section>
			
 
				+  <section><title> Auto-deallocation of Idle Clusters </title><anchor id="Auto_deallocation_of_Idle_Cluste"></anchor>
			
 
				+  <p>HOD automatically deallocates clusters that are not running Hadoop jobs for a given period of time. Each HOD allocation includes a monitoring facility that constantly checks for running Hadoop jobs. If it detects no running Hadoop jobs for a given period, it will automatically deallocate its own cluster and thus free up nodes which are not being used effectively.</p>
			
 
				+  <p><em>Note:</em> While the cluster is deallocated, the <em>cluster directory</em> is not cleaned up automatically. The user must deallocate this cluster through the regular <em>deallocate</em> operation to clean this up.</p>
			
 
				+	</section>
			
 
				+  <section><title> Specifying Additional Job Attributes </title><anchor id="Specifying_Additional_Job_Attrib"></anchor>
			
 
				+  <p>HOD allows the user to specify a wallclock time and a name (or title) for a Torque job. </p>
			
 
				+  <p>The wallclock time is the estimated amount of time for which the Torque job will be valid. After this time has expired, Torque will automatically delete the job and free up the nodes. Specifying the wallclock time can also help the job scheduler to better schedule jobs, and help improve utilization of cluster resources.</p>
			
 
				+  <p>To specify the wallclock time, use the following syntax:</p>
			
 
				+    <table>
			
 
				+        <tr>
			
 
				+          <td><code>$ hod -l time_in_seconds -o "allocate cluster_dir number_of_nodes"</code></td>
			
 
				+        </tr>
			
 
				+    </table>
			
 
				+  <p>The name or title of a Torque job helps in user friendly identification of the job. The string specified here will show up in all information where Torque job attributes are displayed, including the <code>qstat</code> command.</p>
			
 
				+  <p>To specify the name or title, use the following syntax:</p>
			
 
				+    <table>
			
 
				+        <tr>
			
 
				+          <td><code>$ hod -N name_of_job -o "allocate cluster_dir number_of_nodes"</code></td>
			
 
				+        </tr>
			
 
				+    </table>
			
 
				+  <p><em>Note:</em> Due to restriction in the underlying Torque resource manager, names which do not start with a alphabet or contain a 'space' will cause the job to fail. The failure message points to the problem being in the specified job name.</p>
			
 
				+  </section>
			
 
				+  <section><title> Capturing HOD exit codes in Torque </title><anchor id="Capturing_HOD_exit_codes_in_Torq"></anchor>
			
 
				+  <p>HOD exit codes are captured in the Torque exit_status field. This will help users and system administrators to distinguish successful runs from unsuccessful runs of HOD. The exit codes are 0 if allocation succeeded and all hadoop jobs ran on the allocated cluster correctly. They are non-zero if allocation failed or some of the hadoop jobs failed on the allocated cluster. The exit codes that are possible are mentioned in the table below. <em>Note: Hadoop job status is captured only if the version of Hadoop used is 16 or above.</em></p>
			
 
				+  <table>
			
 
				+    
			
 
				+      <tr>
			
 
				+        <td> Exit Code </td>
			
 
				+        <td> Meaning </td>
			
 
				+      </tr>
			
 
				+      <tr>
			
 
				+        <td> 6 </td>
			
 
				+        <td> Ringmaster failure </td>
			
 
				+      </tr>
			
 
				+      <tr>
			
 
				+        <td> 7 </td>
			
 
				+        <td> DFS failure </td>
			
 
				+      </tr>
			
 
				+      <tr>
			
 
				+        <td> 8 </td>
			
 
				+        <td> Job tracker failure </td>
			
 
				+      </tr>
			
 
				+      <tr>
			
 
				+        <td> 10 </td>
			
 
				+        <td> Cluster dead </td>
			
 
				+      </tr>
			
 
				+      <tr>
			
 
				+        <td> 12 </td>
			
 
				+        <td> Cluster already allocated </td>
			
 
				+      </tr>
			
 
				+      <tr>
			
 
				+        <td> 13 </td>
			
 
				+        <td> HDFS dead </td>
			
 
				+      </tr>
			
 
				+      <tr>
			
 
				+        <td> 14 </td>
			
 
				+        <td> Mapred dead </td>
			
 
				+      </tr>
			
 
				+      <tr>
			
 
				+        <td> 16 </td>
			
 
				+        <td> All Map/Reduce jobs that ran on the cluster failed. Refer to hadoop logs for more details. </td>
			
 
				+      </tr>
			
 
				+      <tr>
			
 
				+        <td> 17 </td>
			
 
				+        <td> Some of the Map/Reduce jobs that ran on the cluster failed. Refer to hadoop logs for more details. </td>
			
 
				+      </tr>
			
 
				+    
			
 
				+  </table>
			
 
				+  </section>
			
 
				+	</section>
			
 
				+  <section>
			
 
				+		<title> Command Line Options </title><anchor id="Command_Line_Options"></anchor>
			
 
				+  <p>Command line options for the <code>hod</code> command are used for two purposes: defining an operation that HOD must perform, and defining configuration options for customizing HOD that override options defined in the default configuration file. This section covers both types of options. </p>
			
 
				+  <section><title> Options Defining Operations </title><anchor id="Options_Defining_Operations"></anchor>
			
 
				+  <p><em>--help</em><br />
			
 
				+    Prints out the help message to see the basic options.</p>
			
 
				+  <p><em>--verbose-help</em><br />
			
 
				+    All configuration options provided in the hodrc file can be passed on the command line, using the syntax <code>--section_name.option_name[=value]</code>. When provided this way, the value provided on command line overrides the option provided in hodrc. The verbose-help command lists all the available options in the hodrc file. This is also a nice way to see the meaning of the configuration options.</p>
			
 
				+  <p><em>-o "operation_name options"</em><br />
			
 
				+    This class of options are used to define the <em>operation</em> mode of HOD. <em>Note:</em> The operation_name and other options must be specified within double quotes.</p>
			
 
				+  <p><em>-o "help"</em><br />
			
 
				+    Lists the operations available in the <em>operation</em> mode.</p>
			
 
				+  <p><em>-o "allocate cluster_dir number_of_nodes"</em><br />
			
 
				+    Allocates a cluster on the given number of cluster nodes, and store the allocation information in cluster_dir for use with subsequent <code>hadoop</code> commands. Note that the <code>cluster_dir</code> must exist before running the command.</p>
			
 
				+  <p><em>-o "list"</em><br />
			
 
				+    Lists the clusters allocated by this user. Information provided includes the Torque job id corresponding to the cluster, the cluster directory where the allocation information is stored, and whether the Map/Reduce daemon is still active or not.</p>
			
 
				+  <p><em>-o "info cluster_dir"</em><br />
			
 
				+    Lists information about the cluster whose allocation information is stored in the specified cluster directory.</p>
			
 
				+  <p><em>-o "deallocate cluster_dir"</em><br />
			
 
				+    Deallocates the cluster whose allocation information is stored in the specified cluster directory.</p>
			
 
				+  <p><em>-z script_file</em><br />
			
 
				+    Runs HOD in <em>script mode</em>. Provisions Hadoop on a given number of nodes, executes the given script from the submitting node, and deallocates the cluster when the script completes. Refer to option <em>-m</em></p>
			
 
				+  </section>
			
 
				+  <section><title> Options Configuring HOD </title><anchor id="Options_Configuring_HOD"></anchor>
			
 
				+  <p>As described above, HOD is configured using a configuration file that is usually set up by system administrators. This is a INI style configuration file that is divided into sections, and options inside each section. Each section relates to one of the HOD processes: client, ringmaster, hodring, mapreduce or hdfs. The options inside a section comprise of an option name and value. </p>
			
 
				+  <p>Users can override the configuration defined in the default configuration in two ways: </p>
			
 
				+  <ul>
			
 
				+    <li> Users can supply their own configuration file to HOD in each of the commands, using the <code>-c</code> option</li>
			
 
				+    <li> Users can supply specific configuration options to HOD/ Options provided on command line <em>override</em> the values provided in the configuration file being used.</li>
			
 
				+  </ul>
			
 
				+  <p>This section describes some of the most commonly used configuration options. These commonly used options are provided with a <em>short</em> option for convenience of specification. All other options can be specified using a <em>long</em> option that is also described below.</p>
			
 
				+  <p><em>-c config_file</em><br />
			
 
				+    Provides the configuration file to use. Can be used with all other options of HOD. Alternatively, the <code>HOD_CONF_DIR</code> environment variable can be defined to specify a directory that contains a file named <code>hodrc</code>, alleviating the need to specify the configuration file in each HOD command.</p>
			
 
				+  <p><em>-b 1|2|3|4</em><br />
			
 
				+    Enables the given debug level. Can be used with all other options of HOD. 4 is most verbose.</p>
			
 
				+  <p><em>-t hadoop_tarball</em><br />
			
 
				+    Provisions Hadoop from the given tar.gz file. This option is only applicable to the <em>allocate</em> operation. For better distribution performance it is strongly recommended that the Hadoop tarball is created <em>after</em> removing the source or documentation.</p>
			
 
				+  <p><em>-m number_of_nodes</em><br />
			
 
				+    When used in the <em>script</em> mode, this specifies the number of nodes to allocate. Note that this option is useful only in the script mode.</p>
			
 
				+  <p><em>-N job-name</em><br />
			
 
				+    The Name to give to the resource manager job that HOD uses underneath. For e.g. in the case of Torque, this translates to the <code>qsub -N</code> option, and can be seen as the job name using the <code>qstat</code> command.</p>
			
 
				+  <p><em>-l wall-clock-time</em><br />
			
 
				+    The amount of time for which the user expects to have work on the allocated cluster. This is passed to the resource manager underneath HOD, and can be used in more efficient scheduling and utilization of the cluster. Note that in the case of Torque, the cluster is automatically deallocated after this time expires.</p>
			
 
				+  <p><em>-j java-home</em><br />
			
 
				+    Path to be set to the JAVA_HOME environment variable. This is used in the <em>script</em> mode. HOD sets the JAVA_HOME environment variable tot his value and launches the user script in that.</p>
			
 
				+  <p><em>-A account-string</em><br />
			
 
				+    Accounting information to pass to underlying resource manager.</p>
			
 
				+  <p><em>-Q queue-name</em><br />
			
 
				+    Name of the queue in the underlying resource manager to which the job must be submitted.</p>
			
 
				+  <p><em>-Mkey1=value1 -Mkey2=value2</em><br />
			
 
				+    Provides configuration parameters for the provisioned Map/Reduce daemons (JobTracker and TaskTrackers). A hadoop-site.xml is generated with these values on the cluster nodes. <br />
			
 
				+    <em>Note:</em> Values which have the following characters: space, comma, equal-to, semi-colon need to be escaped with a '\' character, and need to be enclosed within quotes. You can escape a '\' with a '\' too. </p>
			
 
				+  <p><em>-Hkey1=value1 -Hkey2=value2</em><br />
			
 
				+    Provides configuration parameters for the provisioned HDFS daemons (NameNode and DataNodes). A hadoop-site.xml is generated with these values on the cluster nodes <br />
			
 
				+    <em>Note:</em> Values which have the following characters: space, comma, equal-to, semi-colon need to be escaped with a '\' character, and need to be enclosed within quotes. You can escape a '\' with a '\' too. </p>
			
 
				+  <p><em>-Ckey1=value1 -Ckey2=value2</em><br />
			
 
				+    Provides configuration parameters for the client from where jobs can be submitted. A hadoop-site.xml is generated with these values on the submit node. <br />
			
 
				+    <em>Note:</em> Values which have the following characters: space, comma, equal-to, semi-colon need to be escaped with a '\' character, and need to be enclosed within quotes. You can escape a '\' with a '\' too. </p>
			
 
				+  <p><em>--section-name.option-name=value</em><br />
			
 
				+    This is the method to provide options using the <em>long</em> format. For e.g. you could say <em>--hod.script-wait-time=20</em></p>
			
 
				+		</section>
			
 
				+	</section>
			
 
				+	<section>
			
 
				+	  <title> Troubleshooting </title><anchor id="Troubleshooting"></anchor>
			
 
				+  <p>The following section identifies some of the most likely error conditions users can run into when using HOD and ways to trouble-shoot them</p>
			
 
				+  <section><title><code>hod</code> Hangs During Allocation </title><anchor id="_hod_Hangs_During_Allocation"></anchor><anchor id="hod_Hangs_During_Allocation"></anchor>
			
 
				+  <p><em>Possible Cause:</em> One of the HOD or Hadoop components have failed to come up. In such a case, the <code>hod</code> command will return after a few minutes (typically 2-3 minutes) with an error code of either 7 or 8 as defined in the Error Codes section. Refer to that section for further details. </p>
			
 
				+  <p><em>Possible Cause:</em> A large allocation is fired with a tarball. Sometimes due to load in the network, or on the allocated nodes, the tarball distribution might be significantly slow and take a couple of minutes to come back. Wait for completion. Also check that the tarball does not have the Hadoop sources or documentation.</p>
			
 
				+  <p><em>Possible Cause:</em> A Torque related problem. If the cause is Torque related, the <code>hod</code> command will not return for more than 5 minutes. Running <code>hod</code> in debug mode may show the <code>qstat</code> command being executed repeatedly. Executing the <code>qstat</code> command from a separate shell may show that the job is in the <code>Q</code> (Queued) state. This usually indicates a problem with Torque. Possible causes could include some nodes being down, or new nodes added that Torque is not aware of. Generally, system administator help is needed to resolve this problem.</p>
			
 
				+    </section>
			
 
				+  <section><title><code>hod</code> Hangs During Deallocation </title><anchor id="_hod_Hangs_During_Deallocation"></anchor><anchor id="hod_Hangs_During_Deallocation"></anchor>
			
 
				+  <p><em>Possible Cause:</em> A Torque related problem, usually load on the Torque server, or the allocation is very large. Generally, waiting for the command to complete is the only option.</p>
			
 
				+  </section>
			
 
				+  <section><title><code>hod</code> Fails With an error code and error message </title><anchor id="hod_Fails_With_an_error_code_and"></anchor><anchor id="_hod_Fails_With_an_error_code_an"></anchor>
			
 
				+  <p>If the exit code of the <code>hod</code> command is not <code>0</code>, then refer to the following table of error exit codes to determine why the code may have occurred and how to debug the situation.</p>
			
 
				+  <p><strong> Error Codes </strong></p><anchor id="Error_Codes"></anchor>
			
 
				+  <table>
			
 
				+    
			
 
				+      <tr>
			
 
				+        <th>Error Code</th>
			
 
				+        <th>Meaning</th>
			
 
				+        <th>Possible Causes and Remedial Actions</th>
			
 
				+      </tr>
			
 
				+      <tr>
			
 
				+        <td> 1 </td>
			
 
				+        <td> Configuration error </td>
			
 
				+        <td> Incorrect configuration values specified in hodrc, or other errors related to HOD configuration. The error messages in this case must be sufficient to debug and fix the problem. </td>
			
 
				+      </tr>
			
 
				+      <tr>
			
 
				+        <td> 2 </td>
			
 
				+        <td> Invalid operation </td>
			
 
				+        <td> Do <code>hod -o "help"</code> for the list of valid operations. </td>
			
 
				+      </tr>
			
 
				+      <tr>
			
 
				+        <td> 3 </td>
			
 
				+        <td> Invalid operation arguments </td>
			
 
				+        <td> Do <code>hod -o "help"</code> for the list of valid operations. Note that for an <em>allocate</em> operation, the directory argument must specify an existing directory. </td>
			
 
				+      </tr>
			
 
				+      <tr>
			
 
				+        <td> 4 </td>
			
 
				+        <td> Scheduler failure </td>
			
 
				+        <td> 1. Requested more resources than available. Run <code>checknodes cluster_name</code> to see if enough nodes are available. <br />
			
 
				+          2. Torque is misconfigured, the path to Torque binaries is misconfigured, or other Torque problems. Contact system administrator. </td>
			
 
				+      </tr>
			
 
				+      <tr>
			
 
				+        <td> 5 </td>
			
 
				+        <td> Job execution failure </td>
			
 
				+        <td> 1. Torque Job was deleted from outside. Execute the Torque <code>qstat</code> command to see if you have any jobs in the <code>R</code> (Running) state. If none exist, try re-executing HOD. <br />
			
 
				+          2. Torque problems such as the server momentarily going down, or becoming unresponsive. Contact system administrator. </td>
			
 
				+      </tr>
			
 
				+      <tr>
			
 
				+        <td> 6 </td>
			
 
				+        <td> Ringmaster failure </td>
			
 
				+        <td> 1. Invalid configuration in the <code>ringmaster</code> section,<br />
			
 
				+          2. invalid <code>pkgs</code> option in <code>gridservice-mapred or gridservice-hdfs</code> section,<br />
			
 
				+          3. an invalid hadoop tarball,<br />
			
 
				+          4. mismatched version in Hadoop between the MapReduce and an external HDFS.<br />
			
 
				+          The Torque <code>qstat</code> command will most likely show a job in the <code>C</code> (Completed) state. Refer to the section <em>Locating Ringmaster Logs</em> below for more information. </td>
			
 
				+      </tr>
			
 
				+      <tr>
			
 
				+        <td> 7 </td>
			
 
				+        <td> DFS failure </td>
			
 
				+        <td> 1. Problem in starting Hadoop clusters. Review the Hadoop related configuration. Look at the Hadoop logs using information specified in <em>Getting Hadoop Logs</em> section above. <br />
			
 
				+          2. Invalid configuration in the <code>hodring</code> section of hodrc. <code>ssh</code> to all allocated nodes (determined by <code>qstat -f torque_job_id</code>) and grep for <code>ERROR</code> or <code>CRITICAL</code> in hodring logs. Refer to the section <em>Locating Hodring Logs</em> below for more information. <br />
			
 
				+          3. Invalid tarball specified which is not packaged correctly. <br />
			
 
				+          4. Cannot communicate with an externally configured HDFS. </td>
			
 
				+      </tr>
			
 
				+      <tr>
			
 
				+        <td> 8 </td>
			
 
				+        <td> Job tracker failure </td>
			
 
				+        <td> Similar to the causes in <em>DFS failure</em> case. </td>
			
 
				+      </tr>
			
 
				+      <tr>
			
 
				+        <td> 10 </td>
			
 
				+        <td> Cluster dead </td>
			
 
				+        <td> 1. Cluster was auto-deallocated because it was idle for a long time. <br />
			
 
				+          2. Cluster was auto-deallocated because the wallclock time specified by the system administrator or user was exceeded. <br />
			
 
				+          3. Cannot communicate with the JobTracker and HDFS NameNode which were successfully allocated. Deallocate the cluster, and allocate again. </td>
			
 
				+      </tr>
			
 
				+      <tr>
			
 
				+        <td> 12 </td>
			
 
				+        <td> Cluster already allocated </td>
			
 
				+        <td> The cluster directory specified has been used in a previous allocate operation and is not yet deallocated. Specify a different directory, or deallocate the previous allocation first. </td>
			
 
				+      </tr>
			
 
				+      <tr>
			
 
				+        <td> 13 </td>
			
 
				+        <td> HDFS dead </td>
			
 
				+        <td> Cannot communicate with the HDFS NameNode. HDFS NameNode went down. </td>
			
 
				+      </tr>
			
 
				+      <tr>
			
 
				+        <td> 14 </td>
			
 
				+        <td> Mapred dead </td>
			
 
				+        <td> 1. Cluster was auto-deallocated because it was idle for a long time. <br />
			
 
				+          2. Cluster was auto-deallocated because the wallclock time specified by the system administrator or user was exceeded. <br />
			
 
				+          3. Cannot communicate with the Map/Reduce JobTracker. JobTracker node went down. <br />
			
 
				+          </td>
			
 
				+      </tr>
			
 
				+      <tr>
			
 
				+        <td> 15 </td>
			
 
				+        <td> Cluster not allocated </td>
			
 
				+        <td> An operation which requires an allocated cluster is given a cluster directory with no state information. </td>
			
 
				+      </tr>
			
 
				+    
			
 
				+  </table>
			
 
				+    </section>
			
 
				+  <section><title> Hadoop Jobs Not Running on a Successfully Allocated Cluster </title><anchor id="Hadoop_Jobs_Not_Running_on_a_Suc"></anchor>
			
 
				+  <p>This scenario generally occurs when a cluster is allocated, and is left inactive for sometime, and then hadoop jobs are attempted to be run on them. Then Hadoop jobs fail with the following exception:</p>
			
 
				+  <table><tr><td><code>08/01/25 16:31:40 INFO ipc.Client: Retrying connect to server: foo.bar.com/1.1.1.1:53567. Already tried 1 time(s).</code></td></tr></table>
			
 
				+  <p><em>Possible Cause:</em> No Hadoop jobs were run for a significant portion of time. Thus the cluster would have got deallocated as described in the section <em>Auto-deallocation of Idle Clusters</em>. Deallocate the cluster and allocate it again.</p>
			
 
				+  <p><em>Possible Cause:</em> The wallclock limit specified by the Torque administrator or the <code>-l</code> option defined in the section <em>Specifying Additional Job Attributes</em> was exceeded since allocation time. Thus the cluster would have got released. Deallocate the cluster and allocate it again.</p>
			
 
				+  <p><em>Possible Cause:</em> There is a version mismatch between the version of the hadoop being used in provisioning (typically via the tarball option) and the external HDFS. Ensure compatible versions are being used.</p>
			
 
				+  <p><em>Possible Cause:</em> There is a version mismatch between the version of the hadoop client being used to submit jobs and the hadoop used in provisioning (typically via the tarball option). Ensure compatible versions are being used.</p>
			
 
				+  <p><em>Possible Cause:</em> You used one of the options for specifying Hadoop configuration <code>-M or -H</code>, which had special characters like space or comma that were not escaped correctly. Refer to the section <em>Options Configuring HOD</em> for checking how to specify such options correctly.</p>
			
 
				+    </section>
			
 
				+  <section><title> My Hadoop Job Got Killed </title><anchor id="My_Hadoop_Job_Got_Killed"></anchor>
			
 
				+  <p><em>Possible Cause:</em> The wallclock limit specified by the Torque administrator or the <code>-l</code> option defined in the section <em>Specifying Additional Job Attributes</em> was exceeded since allocation time. Thus the cluster would have got released. Deallocate the cluster and allocate it again, this time with a larger wallclock time.</p>
			
 
				+  <p><em>Possible Cause:</em> Problems with the JobTracker node. Refer to the section in <em>Collecting and Viewing Hadoop Logs</em> to get more information.</p>
			
 
				+    </section>
			
 
				+  <section><title> Hadoop Job Fails with Message: 'Job tracker still initializing' </title><anchor id="Hadoop_Job_Fails_with_Message_Jo"></anchor>
			
 
				+  <p><em>Possible Cause:</em> The hadoop job was being run as part of the HOD script command, and it started before the JobTracker could come up fully. Allocate the cluster using a large value for the configuration option <code>--hod.script-wait-time</code>. Typically a value of 120 should work, though it is typically unnecessary to be that large.</p>
			
 
				+    </section>
			
 
				+  <section><title> The Exit Codes For HOD Are Not Getting Into Torque </title><anchor id="The_Exit_Codes_For_HOD_Are_Not_G"></anchor>
			
 
				+  <p><em>Possible Cause:</em> Version 0.16 of hadoop is required for this functionality to work. The version of Hadoop used does not match. Use the required version of Hadoop.</p>
			
 
				+  <p><em>Possible Cause:</em> The deallocation was done without using the <code>hod</code> command; for e.g. directly using <code>qdel</code>. When the cluster is deallocated in this manner, the HOD processes are terminated using signals. This results in the exit code to be based on the signal number, rather than the exit code of the program.</p>
			
 
				+    </section>
			
 
				+  <section><title> The Hadoop Logs are Not Uploaded to DFS </title><anchor id="The_Hadoop_Logs_are_Not_Uploaded"></anchor>
			
 
				+  <p><em>Possible Cause:</em> There is a version mismatch between the version of the hadoop being used for uploading the logs and the external HDFS. Ensure that the correct version is specified in the <code>hodring.pkgs</code> option.</p>
			
 
				+    </section>
			
 
				+  <section><title> Locating Ringmaster Logs </title><anchor id="Locating_Ringmaster_Logs"></anchor>
			
 
				+  <p>To locate the ringmaster logs, follow these steps: </p>
			
 
				+  <ul>
			
 
				+    <li> Execute hod in the debug mode using the -b option. This will print the Torque job id for the current run.</li>
			
 
				+    <li> Execute <code>qstat -f torque_job_id</code> and look up the value of the <code>exec_host</code> parameter in the output. The first host in this list is the ringmaster node.</li>
			
 
				+    <li> Login to this node.</li>
			
 
				+    <li> The ringmaster log location is specified by the <code>ringmaster.log-dir</code> option in the hodrc. The name of the log file will be <code>username.torque_job_id/ringmaster-main.log</code>.</li>
			
 
				+    <li> If you don't get enough information, you may want to set the ringmaster debug level to 4. This can be done by passing <code>--ringmaster.debug 4</code> to the hod command line.</li>
			
 
				+  </ul>
			
 
				+  </section>
			
 
				+  <section><title> Locating Hodring Logs </title><anchor id="Locating_Hodring_Logs"></anchor>
			
 
				+  <p>To locate hodring logs, follow the steps below: </p>
			
 
				+  <ul>
			
 
				+    <li> Execute hod in the debug mode using the -b option. This will print the Torque job id for the current run.</li>
			
 
				+    <li> Execute <code>qstat -f torque_job_id</code> and look up the value of the <code>exec_host</code> parameter in the output. All nodes in this list should have a hodring on them.</li>
			
 
				+    <li> Login to any of these nodes.</li>
			
 
				+    <li> The hodring log location is specified by the <code>hodring.log-dir</code> option in the hodrc. The name of the log file will be <code>username.torque_job_id/hodring-main.log</code>.</li>
			
 
				+    <li> If you don't get enough information, you may want to set the hodring debug level to 4. This can be done by passing <code>--hodring.debug 4</code> to the hod command line.</li>
			
 
				+  </ul>
			
 
				+  </section>
			
 
				+	</section>
			
 
				+</body>
			
 
				+</document>
			
--- a/src/docs/src/documentation/content/xdocs/site.xml
+++ b/src/docs/src/documentation/content/xdocs/site.xml
@@ -41,7 +41,11 @@ See http://forrest.apache.org/docs/linking.html for more info.
 
				     <mapred    label="Map-Reduce Tutorial" href="mapred_tutorial.html" />
			
 
				     <mapred    label="Native Hadoop Libraries" href="native_libraries.html" />
			
 
				     <streaming label="Streaming"          href="streaming.html" />
			
 
				-    <hod       label="Hadoop On Demand"   href="hod.html" />
			
 
				+    <hod       label="Hadoop On Demand" href="hod.html">
			
 
				+      <hod-user-guide href="hod_user_guide.html"/>
			
 
				+      <hod-admin-guide href="hod_admin_guide.html"/>
			
 
				+      <hod-config-guide href="hod_config_guide.html"/>
			
 
				+    </hod>
			
 
				     <api       label="API Docs"           href="ext:api/index" />
			
 
				     <wiki      label="Wiki"               href="ext:wiki" />
			
 
				     <faq       label="FAQ"                href="ext:faq" />
			
@@ -63,6 +67,18 @@ See http://forrest.apache.org/docs/linking.html for more info.
 
				     <gzip      href="http://www.gzip.org/" />
			
 
				     <cygwin    href="http://www.cygwin.com/" />
			
 
				     <osx       href="http://www.apple.com/macosx" />
			
 
				+    <hod href="">
			
 
				+      <cluster-resources href="http://www.clusterresources.com" />
			
 
				+      <torque href="http://www.clusterresources.com/pages/products/torque-resource-manager.php" />
			
 
				+      <torque-download href="http://www.clusterresources.com/downloads/torque/" />
			
 
				+      <torque-docs href="http://www.clusterresources.com/pages/resources/documentation.php" />
			
 
				+      <torque-wiki href="http://www.clusterresources.com/wiki/doku.php?id=torque:torque_wiki" />
			
 
				+      <torque-mailing-list href="http://www.clusterresources.com/pages/resources/mailing-lists.php" />
			
 
				+      <torque-basic-config href="http://www.clusterresources.com/wiki/doku.php?id=torque:1.2_basic_configuration" />
			
 
				+      <torque-advanced-config href="http://www.clusterresources.com/wiki/doku.php?id=torque:1.3_advanced_configuration" />
			
 
				+      <python href="http://www.python.org" />
			
 
				+      <twisted-python href="http://twistedmatrix.com/trac/" />
			
 
				+    </hod>
			
 
				     <api href="api/">
			
 
				       <index href="index.html" />
			
 
				       <org href="org/">