123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494 |
- <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
- <html>
- <head>
- <META http-equiv="Content-Type" content="text/html; charset=UTF-8">
- <meta content="Apache Forrest" name="Generator">
- <meta name="Forrest-version" content="0.8">
- <meta name="Forrest-skin-name" content="pelt">
- <title>
- Hadoop On Demand: Configuration Guide
- </title>
- <link type="text/css" href="skin/basic.css" rel="stylesheet">
- <link media="screen" type="text/css" href="skin/screen.css" rel="stylesheet">
- <link media="print" type="text/css" href="skin/print.css" rel="stylesheet">
- <link type="text/css" href="skin/profile.css" rel="stylesheet">
- <script src="skin/getBlank.js" language="javascript" type="text/javascript"></script><script src="skin/getMenu.js" language="javascript" type="text/javascript"></script><script src="skin/fontsize.js" language="javascript" type="text/javascript"></script>
- <link rel="shortcut icon" href="images/favicon.ico">
- </head>
- <body onload="init()">
- <script type="text/javascript">ndeSetTextSize();</script>
- <div id="top">
- <!--+
- |breadtrail
- +-->
- <div class="breadtrail">
- <a href="http://www.apache.org/">Apache</a> > <a href="http://hadoop.apache.org/">Hadoop</a> > <a href="http://hadoop.apache.org/core/">Core</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
- </div>
- <!--+
- |header
- +-->
- <div class="header">
- <!--+
- |start group logo
- +-->
- <div class="grouplogo">
- <a href="http://hadoop.apache.org/"><img class="logoImage" alt="Hadoop" src="images/hadoop-logo.jpg" title="Apache Hadoop"></a>
- </div>
- <!--+
- |end group logo
- +-->
- <!--+
- |start Project Logo
- +-->
- <div class="projectlogo">
- <a href="http://hadoop.apache.org/core/"><img class="logoImage" alt="Hadoop" src="images/core-logo.jpg" title="Scalable Computing Platform"></a>
- </div>
- <!--+
- |end Project Logo
- +-->
- <!--+
- |start Search
- +-->
- <div class="searchbox">
- <form action="http://www.google.com/search" method="get" class="roundtopsmall">
- <input value="hadoop.apache.org" name="sitesearch" type="hidden"><input onFocus="getBlank (this, 'Search the site with google');" size="25" name="q" id="query" type="text" value="Search the site with google">
- <input name="Search" value="Search" type="submit">
- </form>
- </div>
- <!--+
- |end search
- +-->
- <!--+
- |start Tabs
- +-->
- <ul id="tabs">
- <li>
- <a class="unselected" href="http://hadoop.apache.org/core/">Project</a>
- </li>
- <li>
- <a class="unselected" href="http://wiki.apache.org/hadoop">Wiki</a>
- </li>
- <li class="current">
- <a class="selected" href="index.html">Hadoop 0.16 Documentation</a>
- </li>
- </ul>
- <!--+
- |end Tabs
- +-->
- </div>
- </div>
- <div id="main">
- <div id="publishedStrip">
- <!--+
- |start Subtabs
- +-->
- <div id="level2tabs"></div>
- <!--+
- |end Endtabs
- +-->
- <script type="text/javascript"><!--
- document.write("Last Published: " + document.lastModified);
- // --></script>
- </div>
- <!--+
- |breadtrail
- +-->
- <div class="breadtrail">
-
- </div>
- <!--+
- |start Menu, mainarea
- +-->
- <!--+
- |start Menu
- +-->
- <div id="menu">
- <div onclick="SwitchMenu('menu_1.1', 'skin/')" id="menu_1.1Title" class="menutitle">Documentation</div>
- <div id="menu_1.1" class="menuitemgroup">
- <div class="menuitem">
- <a href="index.html">Overview</a>
- </div>
- <div class="menuitem">
- <a href="quickstart.html">Quickstart</a>
- </div>
- <div class="menuitem">
- <a href="cluster_setup.html">Cluster Setup</a>
- </div>
- <div class="menuitem">
- <a href="hdfs_design.html">HDFS Architecture</a>
- </div>
- <div class="menuitem">
- <a href="hdfs_user_guide.html">HDFS User Guide</a>
- </div>
- <div class="menuitem">
- <a href="hdfs_permissions_guide.html">HDFS Permissions Guide</a>
- </div>
- <div class="menuitem">
- <a href="mapred_tutorial.html">Map-Reduce Tutorial</a>
- </div>
- <div class="menuitem">
- <a href="native_libraries.html">Native Hadoop Libraries</a>
- </div>
- <div class="menuitem">
- <a href="streaming.html">Streaming</a>
- </div>
- <div class="menuitem">
- <a href="hod.html">Hadoop On Demand</a>
- </div>
- <div class="menuitem">
- <a href="api/index.html">API Docs</a>
- </div>
- <div class="menuitem">
- <a href="http://wiki.apache.org/hadoop/">Wiki</a>
- </div>
- <div class="menuitem">
- <a href="http://wiki.apache.org/hadoop/FAQ">FAQ</a>
- </div>
- <div class="menuitem">
- <a href="http://hadoop.apache.org/core/mailing_lists.html">Mailing Lists</a>
- </div>
- </div>
- <div id="credit"></div>
- <div id="roundbottom">
- <img style="display: none" class="corner" height="15" width="15" alt="" src="skin/images/rc-b-l-15-1body-2menu-3menu.png"></div>
- <!--+
- |alternative credits
- +-->
- <div id="credit2"></div>
- </div>
- <!--+
- |end Menu
- +-->
- <!--+
- |start content
- +-->
- <div id="content">
- <div title="Portable Document Format" class="pdflink">
- <a class="dida" href="hod_config_guide.pdf"><img alt="PDF -icon" src="skin/images/pdfdoc.gif" class="skin"><br>
- PDF</a>
- </div>
- <h1>
- Hadoop On Demand: Configuration Guide
- </h1>
- <div id="minitoc-area">
- <ul class="minitoc">
- <li>
- <a href="#1.+Introduction">1. Introduction</a>
- </li>
- <li>
- <a href="#2.+Sections">2. Sections</a>
- </li>
- <li>
- <a href="#3.+Important+%2F+Commonly+Used+Configuration+Options">3. Important / Commonly Used Configuration Options</a>
- <ul class="minitoc">
- <li>
- <a href="#3.1+Common+configuration+options">3.1 Common configuration options</a>
- </li>
- <li>
- <a href="#3.2+hod+options">3.2 hod options</a>
- </li>
- <li>
- <a href="#3.3+resource_manager+options">3.3 resource_manager options</a>
- </li>
- <li>
- <a href="#3.4+ringmaster+options">3.4 ringmaster options</a>
- </li>
- <li>
- <a href="#3.5+gridservice-hdfs+options">3.5 gridservice-hdfs options</a>
- </li>
- <li>
- <a href="#3.6+gridservice-mapred+options">3.6 gridservice-mapred options</a>
- </li>
- <li>
- <a href="#3.7+hodring+options">3.7 hodring options</a>
- </li>
- </ul>
- </li>
- </ul>
- </div>
-
- <a name="N1000C"></a><a name="1.+Introduction"></a>
- <h2 class="h3">1. Introduction</h2>
- <div class="section">
- <p>Configuration options for HOD are organized as sections and options
- within them. They can be specified in two ways: a configuration file
- in the INI format, and as command line options to the HOD shell,
- specified in the format --section.option[=value]. If the same option is
- specified in both places, the value specified on the command line
- overrides the value in the configuration file.</p>
- <p>
- To get a simple description of all configuration options, you can type
- </p>
- <table class="ForrestTable" cellspacing="1" cellpadding="4">
- <tr>
- <td colspan="1" rowspan="1"><span class="codefrag">$ hod --verbose-help</span></td>
- </tr>
- </table>
- <p>This document explains some of the most important or commonly used
- configuration options in some more detail.</p>
- </div>
-
-
- <a name="N10024"></a><a name="2.+Sections"></a>
- <h2 class="h3">2. Sections</h2>
- <div class="section">
- <p>The following are the various sections in the HOD configuration:</p>
- <ul>
-
- <li> hod: Options for the HOD client</li>
-
- <li> resource_manager: Options for specifying which resource manager
- to use, and other parameters for using that resource manager</li>
-
- <li> ringmaster: Options for the RingMaster process, </li>
-
- <li> hodring: Options for the HodRing processes</li>
-
- <li> gridservice-mapred: Options for the MapReduce daemons</li>
-
- <li> gridservice-hdfs: Options for the HDFS daemons.</li>
-
- </ul>
- <p>The next section deals with some of the important options in the HOD
- configuration.</p>
- </div>
-
-
- <a name="N10046"></a><a name="3.+Important+%2F+Commonly+Used+Configuration+Options"></a>
- <h2 class="h3">3. Important / Commonly Used Configuration Options</h2>
- <div class="section">
- <a name="N1004C"></a><a name="3.1+Common+configuration+options"></a>
- <h3 class="h4">3.1 Common configuration options</h3>
- <p>Certain configuration options are defined in most of the sections of
- the HOD configuration. Options defined in a section, are used by the
- process for which that section applies. These options have the same
- meaning, but can have different values in each section.
- </p>
- <ul>
-
- <li>temp-dir: Temporary directory for usage by the HOD processes. Make
- sure that the users who will run hod have rights to create
- directories under the directory specified here.</li>
-
-
- <li>debug: A numeric value from 1-4. 4 produces the most log information,
- and 1 the least.</li>
-
-
- <li>log-dir: Directory where log files are stored. By default, this is
- <install-location>/logs/. The restrictions and notes for the
- temp-dir variable apply here too.
- </li>
-
-
- <li>xrs-port-range: A range of ports, among which an available port shall
- be picked for use to run an XML-RPC server.</li>
-
-
- <li>http-port-range: A range of ports, among which an available port shall
- be picked for use to run an HTTP server.</li>
-
-
- <li>java-home: Location of Java to be used by Hadoop.</li>
-
- <li>syslog-address: Address to which a syslog daemon is bound to. The format
- of the value is host:port. If configured, HOD log messages
- will be logged to syslog using this value.</li>
-
-
- </ul>
- <a name="N1006E"></a><a name="3.2+hod+options"></a>
- <h3 class="h4">3.2 hod options</h3>
- <ul>
-
- <li>cluster: A descriptive name given to the cluster. For Torque, this is
- specified as a 'Node property' for every node in the cluster.
- HOD uses this value to compute the number of available nodes.</li>
-
-
- <li>client-params: A comma-separated list of hadoop config parameters
- specified as key-value pairs. These will be used to
- generate a hadoop-site.xml on the submit node that
- should be used for running MapReduce jobs.</li>
-
- </ul>
- <a name="N1007E"></a><a name="3.3+resource_manager+options"></a>
- <h3 class="h4">3.3 resource_manager options</h3>
- <ul>
-
- <li>queue: Name of the queue configured in the resource manager to which
- jobs are to be submitted.</li>
-
-
- <li>batch-home: Install directory to which 'bin' is appended and under
- which the executables of the resource manager can be
- found.</li>
-
-
- <li>env-vars: This is a comma separated list of key-value pairs,
- expressed as key=value, which would be passed to the jobs
- launched on the compute nodes.
- For example, if the python installation is
- in a non-standard location, one can set the environment
- variable 'HOD_PYTHON_HOME' to the path to the python
- executable. The HOD processes launched on the compute nodes
- can then use this variable.</li>
-
- </ul>
- <a name="N10091"></a><a name="3.4+ringmaster+options"></a>
- <h3 class="h4">3.4 ringmaster options</h3>
- <ul>
-
- <li>work-dirs: These are a list of comma separated paths that will serve
- as the root for directories that HOD generates and passes
- to Hadoop for use to store DFS / MapReduce data. For e.g.
- this is where DFS data blocks will be stored. Typically,
- as many paths are specified as there are disks available
- to ensure all disks are being utilized. The restrictions
- and notes for the temp-dir variable apply here too.</li>
-
- </ul>
- <a name="N1009E"></a><a name="3.5+gridservice-hdfs+options"></a>
- <h3 class="h4">3.5 gridservice-hdfs options</h3>
- <ul>
-
- <li>external: If false, this indicates that a HDFS cluster must be
- bought up by the HOD system, on the nodes which it
- allocates via the allocate command. Note that in that case,
- when the cluster is de-allocated, it will bring down the
- HDFS cluster, and all the data will be lost.
- If true, it will try and connect to an externally configured
- HDFS system.
- Typically, because input for jobs are placed into HDFS
- before jobs are run, and also the output from jobs in HDFS
- is required to be persistent, an internal HDFS cluster is
- of little value in a production system. However, it allows
- for quick testing.</li>
-
-
- <li>host: Hostname of the externally configured NameNode, if any</li>
-
-
- <li>fs_port: Port to which NameNode RPC server is bound.</li>
-
-
- <li>info_port: Port to which the NameNode web UI server is bound.</li>
-
-
- <li>pkgs: Installation directory, under which bin/hadoop executable is
- located. This can be used to use a pre-installed version of
- Hadoop on the cluster.</li>
-
-
- <li>server-params: A comma-separated list of hadoop config parameters
- specified key-value pairs. These will be used to
- generate a hadoop-site.xml that will be used by the
- NameNode and DataNodes.</li>
-
-
- <li>final-server-params: Same as above, except they will be marked final.</li>
-
- </ul>
- <a name="N100BD"></a><a name="3.6+gridservice-mapred+options"></a>
- <h3 class="h4">3.6 gridservice-mapred options</h3>
- <ul>
-
- <li>external: If false, this indicates that a MapReduce cluster must be
- bought up by the HOD system on the nodes which it allocates
- via the allocate command.
- If true, if will try and connect to an externally
- configured MapReduce system.</li>
-
-
- <li>host: Hostname of the externally configured JobTracker, if any</li>
-
-
- <li>tracker_port: Port to which the JobTracker RPC server is bound</li>
-
-
- <li>info_port: Port to which the JobTracker web UI server is bound.</li>
-
-
- <li>pkgs: Installation directory, under which bin/hadoop executable is
- located</li>
-
-
- <li>server-params: A comma-separated list of hadoop config parameters
- specified key-value pairs. These will be used to
- generate a hadoop-site.xml that will be used by the
- JobTracker and TaskTrackers</li>
-
-
- <li>final-server-params: Same as above, except they will be marked final.</li>
-
- </ul>
- <a name="N100DC"></a><a name="3.7+hodring+options"></a>
- <h3 class="h4">3.7 hodring options</h3>
- <ul>
-
- <li>mapred-system-dir-root: Directory in the DFS under which HOD will
- generate sub-directory names and pass the full path
- as the value of the 'mapred.system.dir' configuration
- parameter to Hadoop daemons. The format of the full
- path will be value-of-this-option/userid/mapredsystem/cluster-id.
- Note that the directory specified here should be such
- that all users can create directories under this, if
- permissions are enabled in HDFS. Setting the value of
- this option to /user will make HOD use the user's
- home directory to generate the mapred.system.dir value.</li>
-
- <li>log-destination-uri: URL describing a path in an external, static DFS or the
- cluster node's local file system where HOD will upload
- Hadoop logs when a cluster is deallocated. To specify a
- DFS path, use the format 'hdfs://path'. To specify a
- cluster node's local file path, use the format 'file://path'.
- When clusters are deallocated by HOD, the hadoop logs will
- be deleted as part of HOD's cleanup process. In order to
- persist these logs, you can use this configuration option.
- The format of the path is
- value-of-this-option/userid/hod-logs/cluster-id
- Note that the directory you specify here must be such that all
- users can create sub-directories under this. Setting this value
- to hdfs://user will make the logs come in the user's home directory
- in DFS.</li>
-
- <li>pkgs: Installation directory, under which bin/hadoop executable is located. This will
- be used by HOD to upload logs if a HDFS URL is specified in log-destination-uri
- option. Note that this is useful if the users are using a tarball whose version
- may differ from the external, static HDFS version.</li>
-
-
- </ul>
- </div>
-
- </div>
- <!--+
- |end content
- +-->
- <div class="clearboth"> </div>
- </div>
- <div id="footer">
- <!--+
- |start bottomstrip
- +-->
- <div class="lastmodified">
- <script type="text/javascript"><!--
- document.write("Last Published: " + document.lastModified);
- // --></script>
- </div>
- <div class="copyright">
- Copyright ©
- 2007 <a href="http://www.apache.org/licenses/">The Apache Software Foundation.</a>
- </div>
- <!--+
- |end bottomstrip
- +-->
- </div>
- </body>
- </html>
|