Browse Source

Merge -r 735776:735777 from trunk onto 0.20 branch. Fixes HADOOP-4828.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/core/branches/branch-0.20@735779 13f79535-47bb-0310-9956-ffa450edef68
Devaraj Das 16 years ago
parent
commit
17dfa17133

+ 3 - 0
CHANGES.txt

@@ -307,6 +307,9 @@ Release 0.20.0 - Unreleased
 
 
     HADOOP-5030. Build Chukwa RPM to install into configured directory. (Eric
     HADOOP-5030. Build Chukwa RPM to install into configured directory. (Eric
     Yang via cdouglas)
     Yang via cdouglas)
+    
+    HADOOP-4828. Updates documents to do with configuration (HADOOP-4631).
+    (Sharad Agarwal via ddas)
 
 
   OPTIMIZATIONS
   OPTIMIZATIONS
 
 

+ 2 - 2
src/c++/libhdfs/hdfs.h

@@ -98,7 +98,7 @@ extern  "C" {
      * of the namenode of a hdfs cluster. 'host' should be passed as NULL if
      * of the namenode of a hdfs cluster. 'host' should be passed as NULL if
      * you want to connect to local filesystem. 'host' should be passed as
      * you want to connect to local filesystem. 'host' should be passed as
      * 'default' (and port as 0) to used the 'configured' filesystem
      * 'default' (and port as 0) to used the 'configured' filesystem
-     * (hadoop-site/hadoop-default.xml).
+     * (core-site/core-default.xml).
      * @param port The port on which the server is listening.
      * @param port The port on which the server is listening.
      * @param user the user name (this is hadoop domain user). Or NULL is equivelant to hhdfsConnect(host, port)
      * @param user the user name (this is hadoop domain user). Or NULL is equivelant to hhdfsConnect(host, port)
      * @param groups the groups (these are hadoop domain groups)
      * @param groups the groups (these are hadoop domain groups)
@@ -114,7 +114,7 @@ extern  "C" {
      * of the namenode of a hdfs cluster. 'host' should be passed as NULL if
      * of the namenode of a hdfs cluster. 'host' should be passed as NULL if
      * you want to connect to local filesystem. 'host' should be passed as
      * you want to connect to local filesystem. 'host' should be passed as
      * 'default' (and port as 0) to used the 'configured' filesystem
      * 'default' (and port as 0) to used the 'configured' filesystem
-     * (hadoop-site/hadoop-default.xml).
+     * (core-site/core-default.xml).
      * @param port The port on which the server is listening.
      * @param port The port on which the server is listening.
      * @return Returns a handle to the filesystem or NULL on error.
      * @return Returns a handle to the filesystem or NULL on error.
      */
      */

+ 5 - 3
src/c++/libhdfs/tests/conf/hadoop-site.xml

@@ -1,9 +1,11 @@
 <?xml version="1.0"?>
 <?xml version="1.0"?>
 <?xml-stylesheet type="text/xsl" href="nutch-conf.xsl"?>
 <?xml-stylesheet type="text/xsl" href="nutch-conf.xsl"?>
 
 
-<!-- Values used when running libhdfs unit tests. -->
-<!-- This is mostly empty, to use the default values, overriding the -->
-<!-- potentially user-editted hadoop-site.xml in the conf/ directory.  -->
+<!-- DO NOT PUT ANY PROPERTY IN THIS FILE. INSTEAD USE -->
+<!-- core-site.xml, mapred-site.xml OR hdfs-site.xml -->
+<!-- This empty script is to avoid picking properties from  -->
+<!-- conf/hadoop-site.xml This would be removed once support  -->
+<!-- for hadoop-site.xml is removed.  -->
 
 
 <configuration>
 <configuration>
 
 

+ 1 - 2
src/c++/libhdfs/tests/test-libhdfs.sh

@@ -30,8 +30,7 @@ HDFS_TEST=hdfs_test
 HADOOP_LIB_DIR=$HADOOP_HOME/lib
 HADOOP_LIB_DIR=$HADOOP_HOME/lib
 HADOOP_BIN_DIR=$HADOOP_HOME/bin
 HADOOP_BIN_DIR=$HADOOP_HOME/bin
 
 
-# Manipulate HADOOP_CONF_DIR so as to include 
-# HADOOP_HOME/conf/hadoop-default.xml too
+# Manipulate HADOOP_CONF_DIR too
 # which is necessary to circumvent bin/hadoop
 # which is necessary to circumvent bin/hadoop
 HADOOP_CONF_DIR=$HADOOP_CONF_DIR:$HADOOP_HOME/conf
 HADOOP_CONF_DIR=$HADOOP_CONF_DIR:$HADOOP_HOME/conf
 
 

+ 1 - 1
src/contrib/chukwa/src/java/org/apache/hadoop/chukwa/datacollection/writer/SeqFileWriter.java

@@ -105,7 +105,7 @@ public class SeqFileWriter implements ChukwaWriter
 		log.info("rotateInterval is " + rotateInterval);
 		log.info("rotateInterval is " + rotateInterval);
 		log.info("outputDir is " + outputDir);
 		log.info("outputDir is " + outputDir);
 		log.info("fsname is " + fsname);
 		log.info("fsname is " + fsname);
-		log.info("filesystem type from hadoop-default.xml is "
+		log.info("filesystem type from core-default.xml is "
 				+ conf.get("fs.hdfs.impl"));
 				+ conf.get("fs.hdfs.impl"));
 
 
 		if (fsname == null) {
 		if (fsname == null) {

+ 2 - 2
src/contrib/fairscheduler/README

@@ -70,7 +70,7 @@ from HADOOP_HOME/build/contrib/fairscheduler to HADOOP_HOME/lib. Alternatively
 you can modify HADOOP_CLASSPATH to include this jar, in conf/hadoop-env.sh.
 you can modify HADOOP_CLASSPATH to include this jar, in conf/hadoop-env.sh.
 
 
 You will also need to set the following property in the Hadoop config file
 You will also need to set the following property in the Hadoop config file
-(conf/hadoop-site.xml) to have Hadoop use the fair scheduler:
+(conf/mapred-site.xml) to have Hadoop use the fair scheduler:
 
 
 <property>
 <property>
   <name>mapred.jobtracker.taskScheduler</name>
   <name>mapred.jobtracker.taskScheduler</name>
@@ -86,7 +86,7 @@ described in the Administration section.
 
 
 CONFIGURING:
 CONFIGURING:
 
 
-The following properties can be set in hadoop-site.xml to configure the
+The following properties can be set in mapred-site.xml to configure the
 scheduler:
 scheduler:
 
 
 mapred.fairscheduler.allocation.file:
 mapred.fairscheduler.allocation.file:

+ 1 - 1
src/contrib/hdfsproxy/README

@@ -13,7 +13,7 @@ respectively. The location of these files can be specified in
 hdfsproxy-default.xml. Environmental variable HDFSPROXY_CONF_DIR can be used to
 hdfsproxy-default.xml. Environmental variable HDFSPROXY_CONF_DIR can be used to
 point to the directory where these configuration files are located. The 
 point to the directory where these configuration files are located. The 
 configuration files of the proxied HDFS cluster should also be available on the
 configuration files of the proxied HDFS cluster should also be available on the
-classpath (hadoop-default.xml and hadoop-site.xml).
+classpath (hdfs-default.xml and hdfs-site.xml).
 
 
 Mirroring those used in HDFS, a few shell scripts are provided to start and 
 Mirroring those used in HDFS, a few shell scripts are provided to start and 
 stop a group of proxy servers. The hosts to run hdfsproxy on are specified in 
 stop a group of proxy servers. The hosts to run hdfsproxy on are specified in 

+ 5 - 3
src/contrib/test/hadoop-site.xml

@@ -1,9 +1,11 @@
 <?xml version="1.0"?>
 <?xml version="1.0"?>
 <?xml-stylesheet type="text/xsl" href="nutch-conf.xsl"?>
 <?xml-stylesheet type="text/xsl" href="nutch-conf.xsl"?>
 
 
-<!-- Values used when running unit tests.  This is mostly empty, to -->
-<!-- use of the default values, overriding the potentially -->
-<!-- user-editted hadoop-site.xml in the conf/ directory.  -->
+<!-- DO NOT PUT ANY PROPERTY IN THIS FILE. INSTEAD USE -->
+<!-- core-site.xml, mapred-site.xml OR hdfs-site.xml -->
+<!-- This empty script is to avoid picking properties from  -->
+<!-- conf/hadoop-site.xml This would be removed once support  -->
+<!-- for hadoop-site.xml is removed.  -->
 
 
 <configuration>
 <configuration>
 
 

+ 2 - 2
src/core/org/apache/hadoop/fs/kfs/package.html

@@ -32,7 +32,7 @@ documentation.
 <h3>Steps</h3>
 <h3>Steps</h3>
 
 
         <ul>
         <ul>
-          <li>In the Hadoop conf directory edit hadoop-default.xml,
+          <li>In the Hadoop conf directory edit core-site.xml,
           add the following:
           add the following:
             <pre>
             <pre>
 &lt;property&gt;
 &lt;property&gt;
@@ -42,7 +42,7 @@ documentation.
 &lt;/property&gt;
 &lt;/property&gt;
             </pre>
             </pre>
 
 
-          <li>In the Hadoop conf directory edit hadoop-site.xml,
+          <li>In the Hadoop conf directory edit core-site.xml,
           adding the following (with appropriate values for
           adding the following (with appropriate values for
           &lt;server&gt; and &lt;port&gt;):
           &lt;server&gt; and &lt;port&gt;):
             <pre>
             <pre>

+ 34 - 20
src/core/overview.html

@@ -134,13 +134,13 @@ following:
 
 
 <li>The NameNode (Distributed Filesystem master) host.  This is
 <li>The NameNode (Distributed Filesystem master) host.  This is
 specified with the configuration property <tt><a
 specified with the configuration property <tt><a
- href="../hadoop-default.html#fs.default.name">fs.default.name</a></tt>.
+ href="../core-default.html#fs.default.name">fs.default.name</a></tt>.
 </li>
 </li>
 
 
 <li>The {@link org.apache.hadoop.mapred.JobTracker} (MapReduce master)
 <li>The {@link org.apache.hadoop.mapred.JobTracker} (MapReduce master)
 host and port.  This is specified with the configuration property
 host and port.  This is specified with the configuration property
 <tt><a
 <tt><a
-href="../hadoop-default.html#mapred.job.tracker">mapred.job.tracker</a></tt>.
+href="../mapred-default.html#mapred.job.tracker">mapred.job.tracker</a></tt>.
 </li>
 </li>
 
 
 <li>A <em>slaves</em> file that lists the names of all the hosts in
 <li>A <em>slaves</em> file that lists the names of all the hosts in
@@ -151,8 +151,10 @@ the cluster.  The default slaves file is <tt>conf/slaves</tt>.
 <h3>Pseudo-distributed configuration</h3>
 <h3>Pseudo-distributed configuration</h3>
 
 
 You can in fact run everything on a single host.  To run things this
 You can in fact run everything on a single host.  To run things this
-way, put the following in conf/hadoop-site.xml:
-
+way, put the following in:
+<br/>
+<br/>
+conf/core-site.xml:
 <xmp><configuration>
 <xmp><configuration>
 
 
   <property>
   <property>
@@ -160,10 +162,10 @@ way, put the following in conf/hadoop-site.xml:
     <value>hdfs://localhost/</value>
     <value>hdfs://localhost/</value>
   </property>
   </property>
 
 
-  <property>
-    <name>mapred.job.tracker</name>
-    <value>localhost:9001</value>
-  </property>
+</configuration></xmp>
+
+conf/hdfs-site.xml:
+<xmp><configuration>
 
 
   <property>
   <property>
     <name>dfs.replication</name>
     <name>dfs.replication</name>
@@ -172,6 +174,16 @@ way, put the following in conf/hadoop-site.xml:
 
 
 </configuration></xmp>
 </configuration></xmp>
 
 
+conf/mapred-site.xml:
+<xmp><configuration>
+
+  <property>
+    <name>mapred.job.tracker</name>
+    <value>localhost:9001</value>
+  </property>
+
+</configuration></xmp>
+
 <p>(We also set the HDFS replication level to 1 in order to
 <p>(We also set the HDFS replication level to 1 in order to
 reduce warnings when running on a single node.)</p>
 reduce warnings when running on a single node.)</p>
 
 
@@ -217,36 +229,38 @@ cat output/*
 <h3>Fully-distributed operation</h3>
 <h3>Fully-distributed operation</h3>
 
 
 <p>Fully distributed operation is just like the pseudo-distributed operation
 <p>Fully distributed operation is just like the pseudo-distributed operation
-described above, except, in <tt>conf/hadoop-site.xml</tt>, specify:</p>
+described above, except, specify:</p>
 
 
 <ol>
 <ol>
 
 
 <li>The hostname or IP address of your master server in the value
 <li>The hostname or IP address of your master server in the value
 for <tt><a
 for <tt><a
-href="../hadoop-default.html#fs.default.name">fs.default.name</a></tt>,
-  as <tt><em>hdfs://master.example.com/</em></tt>.</li>
+href="../core-default.html#fs.default.name">fs.default.name</a></tt>,
+  as <tt><em>hdfs://master.example.com/</em></tt> in <tt>conf/core-site.xml</tt>.</li>
 
 
 <li>The host and port of the your master server in the value
 <li>The host and port of the your master server in the value
-of <tt><a href="../hadoop-default.html#mapred.job.tracker">mapred.job.tracker</a></tt>
-as <tt><em>master.example.com</em>:<em>port</em></tt>.</li>
+of <tt><a href="../mapred-default.html#mapred.job.tracker">mapred.job.tracker</a></tt>
+as <tt><em>master.example.com</em>:<em>port</em></tt> in <tt>conf/mapred-site.xml</tt>.</li>
 
 
 <li>Directories for <tt><a
 <li>Directories for <tt><a
-href="../hadoop-default.html#dfs.name.dir">dfs.name.dir</a></tt> and
-<tt><a href="../hadoop-default.html#dfs.data.dir">dfs.data.dir</a>.
+href="../hdfs-default.html#dfs.name.dir">dfs.name.dir</a></tt> and
+<tt><a href="../hdfs-default.html#dfs.data.dir">dfs.data.dir</a> 
+in <tt>conf/hdfs-site.xml</tt>.
 </tt>These are local directories used to hold distributed filesystem
 </tt>These are local directories used to hold distributed filesystem
 data on the master node and slave nodes respectively.  Note
 data on the master node and slave nodes respectively.  Note
 that <tt>dfs.data.dir</tt> may contain a space- or comma-separated
 that <tt>dfs.data.dir</tt> may contain a space- or comma-separated
 list of directory names, so that data may be stored on multiple local
 list of directory names, so that data may be stored on multiple local
 devices.</li>
 devices.</li>
 
 
-<li><tt><a href="../hadoop-default.html#mapred.local.dir">mapred.local.dir</a></tt>,
-  the local directory where temporary MapReduce data is stored.  It
-  also may be a list of directories.</li>
+<li><tt><a href="../mapred-default.html#mapred.local.dir">mapred.local.dir</a></tt>
+  in <tt>conf/mapred-site.xml</tt>, the local directory where temporary 
+  MapReduce data is stored.  It also may be a list of directories.</li>
 
 
 <li><tt><a
 <li><tt><a
-href="../hadoop-default.html#mapred.map.tasks">mapred.map.tasks</a></tt>
+href="../mapred-default.html#mapred.map.tasks">mapred.map.tasks</a></tt>
 and <tt><a
 and <tt><a
-href="../hadoop-default.html#mapred.reduce.tasks">mapred.reduce.tasks</a></tt>.
+href="../mapred-default.html#mapred.reduce.tasks">mapred.reduce.tasks</a></tt> 
+in <tt>conf/mapred-site.xml</tt>.
 As a rule of thumb, use 10x the
 As a rule of thumb, use 10x the
 number of slave processors for <tt>mapred.map.tasks</tt>, and 2x the
 number of slave processors for <tt>mapred.map.tasks</tt>, and 2x the
 number of slave processors for <tt>mapred.reduce.tasks</tt>.</li>
 number of slave processors for <tt>mapred.reduce.tasks</tt>.</li>

+ 56 - 15
src/docs/src/documentation/content/xdocs/cluster_setup.xml

@@ -75,15 +75,20 @@
       <section>
       <section>
         <title>Configuration Files</title>
         <title>Configuration Files</title>
         
         
-        <p>Hadoop configuration is driven by two important configuration files
-        found in the <code>conf/</code> directory of the distribution:</p>
+        <p>Hadoop configuration is driven by two types of important 
+        configuration files:</p>
         <ol>
         <ol>
           <li>
           <li>
-            <a href="ext:hadoop-default">hadoop-default.xml</a> - Read-only 
-            default configuration.
+            Read-only default configuration - 
+            <a href="ext:core-default">src/core/core-default.xml</a>, 
+            <a href="ext:hdfs-default">src/hdfs/hdfs-default.xml</a> and 
+            <a href="ext:mapred-default">src/mapred/mapred-default.xml</a>.
           </li>
           </li>
           <li>
           <li>
-            <em>hadoop-site.xml</em> - Site-specific configuration.
+            Site-specific configuration - 
+            <em>conf/core-site.xml</em>, 
+            <em>conf/hdfs-site.xml</em> and 
+            <em>conf/mapred-site.xml</em>.
           </li>
           </li>
         </ol>
         </ol>
       
       
@@ -157,7 +162,9 @@
           <title>Configuring the Hadoop Daemons</title>
           <title>Configuring the Hadoop Daemons</title>
           
           
           <p>This section deals with important parameters to be specified in the
           <p>This section deals with important parameters to be specified in the
-          <code>conf/hadoop-site.xml</code> for the Hadoop cluster.</p>
+          following:
+          <br/>
+          <code>conf/core-site.xml</code>:</p>
 
 
 		  <table>
 		  <table>
   		    <tr>
   		    <tr>
@@ -165,16 +172,21 @@
 		      <th>Value</th> 
 		      <th>Value</th> 
 		      <th>Notes</th>
 		      <th>Notes</th>
 		    </tr>
 		    </tr>
-  		    <tr>
-		      <td>fs.default.name</td>
-  		      <td>URI of <code>NameNode</code>.</td>
-		      <td><em>hdfs://hostname/</em></td>
-		    </tr>
 		    <tr>
 		    <tr>
-		      <td>mapred.job.tracker</td>
-		      <td>Host or IP and port of <code>JobTracker</code>.</td>
-		      <td><em>host:port</em> pair.</td>
-		    </tr>
+              <td>fs.default.name</td>
+              <td>URI of <code>NameNode</code>.</td>
+              <td><em>hdfs://hostname/</em></td>
+            </tr>
+          </table>
+
+      <p><br/><code>conf/hdfs-site.xml</code>:</p>
+          
+      <table>   
+        <tr>
+          <th>Parameter</th>
+          <th>Value</th> 
+          <th>Notes</th>
+        </tr>
 		    <tr>
 		    <tr>
 		      <td>dfs.name.dir</td>
 		      <td>dfs.name.dir</td>
 		      <td>
 		      <td>
@@ -197,6 +209,21 @@
 		        devices.
 		        devices.
 		      </td>
 		      </td>
 		    </tr>
 		    </tr>
+      </table>
+
+      <p><br/><code>conf/mapred-site.xml</code>:</p>
+
+      <table>
+          <tr>
+          <th>Parameter</th>
+          <th>Value</th> 
+          <th>Notes</th>
+        </tr>
+        <tr>
+          <td>mapred.job.tracker</td>
+          <td>Host or IP and port of <code>JobTracker</code>.</td>
+          <td><em>host:port</em> pair.</td>
+        </tr>
 		    <tr>
 		    <tr>
 		      <td>mapred.system.dir</td>
 		      <td>mapred.system.dir</td>
 		      <td>
 		      <td>
@@ -322,16 +349,19 @@
                 that is 9TB of data sorted on a cluster with 900 nodes:</p>
                 that is 9TB of data sorted on a cluster with 900 nodes:</p>
                 <table>
                 <table>
   		          <tr>
   		          <tr>
+                <th>Configuration File</th>
 		            <th>Parameter</th>
 		            <th>Parameter</th>
 		            <th>Value</th> 
 		            <th>Value</th> 
 		            <th>Notes</th>
 		            <th>Notes</th>
 		          </tr>
 		          </tr>
                   <tr>
                   <tr>
+                    <td>conf/hdfs-site.xml</td>
                     <td>dfs.block.size</td>
                     <td>dfs.block.size</td>
                     <td>134217728</td>
                     <td>134217728</td>
                     <td>HDFS blocksize of 128MB for large file-systems.</td>
                     <td>HDFS blocksize of 128MB for large file-systems.</td>
                   </tr>
                   </tr>
                   <tr>
                   <tr>
+                    <td>conf/hdfs-site.xml</td>
                     <td>dfs.namenode.handler.count</td>
                     <td>dfs.namenode.handler.count</td>
                     <td>40</td>
                     <td>40</td>
                     <td>
                     <td>
@@ -340,6 +370,7 @@
                     </td>
                     </td>
                   </tr>
                   </tr>
                   <tr>
                   <tr>
+                    <td>conf/mapred-site.xml</td>
                     <td>mapred.reduce.parallel.copies</td>
                     <td>mapred.reduce.parallel.copies</td>
                     <td>20</td>
                     <td>20</td>
                     <td>
                     <td>
@@ -348,6 +379,7 @@
                     </td>
                     </td>
                   </tr>
                   </tr>
                   <tr>
                   <tr>
+                    <td>conf/mapred-site.xml</td>
                     <td>mapred.child.java.opts</td>
                     <td>mapred.child.java.opts</td>
                     <td>-Xmx512M</td>
                     <td>-Xmx512M</td>
                     <td>
                     <td>
@@ -355,6 +387,7 @@
                     </td>
                     </td>
                   </tr>
                   </tr>
                   <tr>
                   <tr>
+                    <td>conf/core-site.xml</td>
                     <td>fs.inmemory.size.mb</td>
                     <td>fs.inmemory.size.mb</td>
                     <td>200</td>
                     <td>200</td>
                     <td>
                     <td>
@@ -363,16 +396,19 @@
                     </td>
                     </td>
                   </tr>
                   </tr>
                   <tr>
                   <tr>
+                    <td>conf/core-site.xml</td>
                     <td>io.sort.factor</td>
                     <td>io.sort.factor</td>
                     <td>100</td>
                     <td>100</td>
                     <td>More streams merged at once while sorting files.</td>
                     <td>More streams merged at once while sorting files.</td>
                   </tr>
                   </tr>
                   <tr>
                   <tr>
+                    <td>conf/core-site.xml</td>
                     <td>io.sort.mb</td>
                     <td>io.sort.mb</td>
                     <td>200</td>
                     <td>200</td>
                     <td>Higher memory-limit while sorting data.</td>
                     <td>Higher memory-limit while sorting data.</td>
                   </tr>
                   </tr>
                   <tr>
                   <tr>
+                    <td>conf/core-site.xml</td>
                     <td>io.file.buffer.size</td>
                     <td>io.file.buffer.size</td>
                     <td>131072</td>
                     <td>131072</td>
                     <td>Size of read/write buffer used in SequenceFiles.</td>
                     <td>Size of read/write buffer used in SequenceFiles.</td>
@@ -385,11 +421,13 @@
                 data sorted on 2000 nodes:</p>
                 data sorted on 2000 nodes:</p>
                 <table>
                 <table>
   		          <tr>
   		          <tr>
+                <th>Configuration File</th>
 		            <th>Parameter</th>
 		            <th>Parameter</th>
 		            <th>Value</th> 
 		            <th>Value</th> 
 		            <th>Notes</th>
 		            <th>Notes</th>
 		          </tr>
 		          </tr>
                   <tr>
                   <tr>
+                    <td>conf/mapred-site.xml</td>
                     <td>mapred.job.tracker.handler.count</td>
                     <td>mapred.job.tracker.handler.count</td>
                     <td>60</td>
                     <td>60</td>
                     <td>
                     <td>
@@ -398,11 +436,13 @@
                     </td>
                     </td>
                   </tr>
                   </tr>
                   <tr>
                   <tr>
+                    <td>conf/mapred-site.xml</td>
                     <td>mapred.reduce.parallel.copies</td>
                     <td>mapred.reduce.parallel.copies</td>
                     <td>50</td>
                     <td>50</td>
                     <td></td>
                     <td></td>
                   </tr>
                   </tr>
                   <tr>
                   <tr>
+                    <td>conf/mapred-site.xml</td>
                     <td>tasktracker.http.threads</td>
                     <td>tasktracker.http.threads</td>
                     <td>50</td>
                     <td>50</td>
                     <td>
                     <td>
@@ -412,6 +452,7 @@
                     </td>
                     </td>
                   </tr>
                   </tr>
                   <tr>
                   <tr>
+                    <td>conf/mapred-site.xml</td>
                     <td>mapred.child.java.opts</td>
                     <td>mapred.child.java.opts</td>
                     <td>-Xmx1024M</td>
                     <td>-Xmx1024M</td>
                     <td>Larger heap-size for child jvms of maps/reduces.</td>
                     <td>Larger heap-size for child jvms of maps/reduces.</td>

+ 1 - 1
src/docs/src/documentation/content/xdocs/hdfs_user_guide.xml

@@ -474,7 +474,7 @@
         <a href="mailto:core-user@hadoop.apache.org">core-user[at]hadoop.apache.org</a>.
         <a href="mailto:core-user@hadoop.apache.org">core-user[at]hadoop.apache.org</a>.
       </li>
       </li>
       <li>
       <li>
-         Explore <code>conf/hadoop-default.xml</code>. 
+         Explore <code>src/hdfs/hdfs-default.xml</code>. 
          It includes brief 
          It includes brief 
          description of most of the configuration variables available.
          description of most of the configuration variables available.
       </li>
       </li>

+ 1 - 1
src/docs/src/documentation/content/xdocs/libhdfs.xml

@@ -82,7 +82,7 @@ gcc above_sample.c -I${HADOOP_HOME}/src/c++/libhdfs -L${HADOOP_HOME}/libhdfs -lh
 <section>
 <section>
 <title>Common problems</title>
 <title>Common problems</title>
 <p>
 <p>
-The most common problem is the CLASSPATH is not set properly when calling a program that uses libhdfs. Make sure you set it to all the hadoop jars needed to run Hadoop itself. Currently, there is no way to programmatically generate the classpath, but a good bet is to include all the jar files in ${HADOOP_HOME} and ${HADOOP_HOME}/lib as well as the right configuration directory containing hadoop-site.xml
+The most common problem is the CLASSPATH is not set properly when calling a program that uses libhdfs. Make sure you set it to all the hadoop jars needed to run Hadoop itself. Currently, there is no way to programmatically generate the classpath, but a good bet is to include all the jar files in ${HADOOP_HOME} and ${HADOOP_HOME}/lib as well as the right configuration directory containing hdfs-site.xml
 </p>
 </p>
 </section>
 </section>
 <section>
 <section>

+ 21 - 5
src/docs/src/documentation/content/xdocs/quickstart.xml

@@ -161,7 +161,9 @@
 	  
 	  
       <section>
       <section>
         <title>Configuration</title>
         <title>Configuration</title>
-        <p>Use the following <code>conf/hadoop-site.xml</code>:</p>
+        <p>Use the following:
+        <br/>
+        <code>conf/core-site.xml</code>:</p>
         <table>
         <table>
         <tr><td>&lt;configuration&gt;</td></tr>
         <tr><td>&lt;configuration&gt;</td></tr>
 
 
@@ -170,10 +172,12 @@
             <tr><td>&nbsp;&nbsp;&nbsp;&nbsp;&lt;value&gt;hdfs://localhost:9000&lt;/value&gt;</td></tr>
             <tr><td>&nbsp;&nbsp;&nbsp;&nbsp;&lt;value&gt;hdfs://localhost:9000&lt;/value&gt;</td></tr>
           <tr><td>&nbsp;&nbsp;&lt;/property&gt;</td></tr>
           <tr><td>&nbsp;&nbsp;&lt;/property&gt;</td></tr>
 
 
-          <tr><td>&nbsp;&nbsp;&lt;property&gt;</td></tr>
-            <tr><td>&nbsp;&nbsp;&nbsp;&nbsp;&lt;name&gt;mapred.job.tracker&lt;/name&gt;</td></tr>
-            <tr><td>&nbsp;&nbsp;&nbsp;&nbsp;&lt;value&gt;localhost:9001&lt;/value&gt;</td></tr>
-          <tr><td>&nbsp;&nbsp;&lt;/property&gt;</td></tr>
+        <tr><td>&lt;/configuration&gt;</td></tr>
+        </table>
+      
+        <p><br/><code>conf/hdfs-site.xml</code>:</p>
+        <table>
+        <tr><td>&lt;configuration&gt;</td></tr>
 
 
           <tr><td>&nbsp;&nbsp;&lt;property&gt;</td></tr>
           <tr><td>&nbsp;&nbsp;&lt;property&gt;</td></tr>
             <tr><td>&nbsp;&nbsp;&nbsp;&nbsp;&lt;name&gt;dfs.replication&lt;/name&gt;</td></tr>
             <tr><td>&nbsp;&nbsp;&nbsp;&nbsp;&lt;name&gt;dfs.replication&lt;/name&gt;</td></tr>
@@ -182,6 +186,18 @@
 
 
         <tr><td>&lt;/configuration&gt;</td></tr>
         <tr><td>&lt;/configuration&gt;</td></tr>
         </table>
         </table>
+      
+        <p><br/><code>conf/mapred-site.xml</code>:</p>
+        <table>
+        <tr><td>&lt;configuration&gt;</td></tr>
+
+          <tr><td>&nbsp;&nbsp;&lt;property&gt;</td></tr>
+            <tr><td>&nbsp;&nbsp;&nbsp;&nbsp;&lt;name&gt;mapred.job.tracker&lt;/name&gt;</td></tr>
+            <tr><td>&nbsp;&nbsp;&nbsp;&nbsp;&lt;value&gt;localhost:9001&lt;/value&gt;</td></tr>
+          <tr><td>&nbsp;&nbsp;&lt;/property&gt;</td></tr>
+
+        <tr><td>&lt;/configuration&gt;</td></tr>
+        </table>
       </section>
       </section>
 
 
       <section>
       <section>

+ 3 - 0
src/docs/src/documentation/content/xdocs/site.xml

@@ -73,6 +73,9 @@ See http://forrest.apache.org/docs/linking.html for more info.
     <wiki      href="http://wiki.apache.org/hadoop/" />
     <wiki      href="http://wiki.apache.org/hadoop/" />
     <faq       href="http://wiki.apache.org/hadoop/FAQ" />
     <faq       href="http://wiki.apache.org/hadoop/FAQ" />
     <hadoop-default href="http://hadoop.apache.org/core/docs/current/hadoop-default.html" />
     <hadoop-default href="http://hadoop.apache.org/core/docs/current/hadoop-default.html" />
+    <core-default href="http://hadoop.apache.org/core/docs/current/core-default.html" />
+    <hdfs-default href="http://hadoop.apache.org/core/docs/current/hdfs-default.html" />
+    <mapred-default href="http://hadoop.apache.org/core/docs/current/mapred-default.html" />
     <zlib      href="http://www.zlib.net/" />
     <zlib      href="http://www.zlib.net/" />
     <gzip      href="http://www.gzip.org/" />
     <gzip      href="http://www.gzip.org/" />
     <bzip      href="http://www.bzip.org/" />
     <bzip      href="http://www.bzip.org/" />

+ 2 - 2
src/docs/src/documentation/content/xdocs/streaming.xml

@@ -223,7 +223,7 @@ The -D mapred.reduce.tasks=2 in the above example specifies to use two reducers
 </p>
 </p>
 <p>
 <p>
 For more details on the jobconf parameters see:
 For more details on the jobconf parameters see:
-<a href="ext:hadoop-default">hadoop-default.html</a></p>
+<a href="ext:mapred-default">mapred-default.html</a></p>
 </section>
 </section>
 
 
 <section>
 <section>
@@ -276,7 +276,7 @@ To specify additional local temp directories use:
 </source>
 </source>
 <p>
 <p>
 For more details on jobconf parameters see:
 For more details on jobconf parameters see:
-<a href="ext:hadoop-default">hadoop-default.html</a></p>
+<a href="ext:mapred-default">mapred-default.html</a></p>
 <p>
 <p>
 To set an environment variable in a streaming command use:
 To set an environment variable in a streaming command use:
 </p>
 </p>

+ 1 - 1
src/mapred/org/apache/hadoop/mapred/InputFormat.java

@@ -48,7 +48,7 @@ import org.apache.hadoop.fs.FileSystem;
  * bytes, of the input files. However, the {@link FileSystem} blocksize of  
  * bytes, of the input files. However, the {@link FileSystem} blocksize of  
  * the input files is treated as an upper bound for input splits. A lower bound 
  * the input files is treated as an upper bound for input splits. A lower bound 
  * on the split size can be set via 
  * on the split size can be set via 
- * <a href="{@docRoot}/../hadoop-default.html#mapred.min.split.size">
+ * <a href="{@docRoot}/../mapred-default.html#mapred.min.split.size">
  * mapred.min.split.size</a>.</p>
  * mapred.min.split.size</a>.</p>
  * 
  * 
  * <p>Clearly, logical splits based on input-size is insufficient for many 
  * <p>Clearly, logical splits based on input-size is insufficient for many 

+ 3 - 3
src/mapred/org/apache/hadoop/mapred/JobConf.java

@@ -1047,7 +1047,7 @@ public class JobConf extends Configuration {
    * bytes, of input files. However, the {@link FileSystem} blocksize of the 
    * bytes, of input files. However, the {@link FileSystem} blocksize of the 
    * input files is treated as an upper bound for input splits. A lower bound 
    * input files is treated as an upper bound for input splits. A lower bound 
    * on the split size can be set via 
    * on the split size can be set via 
-   * <a href="{@docRoot}/../hadoop-default.html#mapred.min.split.size">
+   * <a href="{@docRoot}/../mapred-default.html#mapred.min.split.size">
    * mapred.min.split.size</a>.</p>
    * mapred.min.split.size</a>.</p>
    *  
    *  
    * <p>Thus, if you expect 10TB of input data and have a blocksize of 128MB, 
    * <p>Thus, if you expect 10TB of input data and have a blocksize of 128MB, 
@@ -1077,7 +1077,7 @@ public class JobConf extends Configuration {
    * 
    * 
    * <p>The right number of reduces seems to be <code>0.95</code> or 
    * <p>The right number of reduces seems to be <code>0.95</code> or 
    * <code>1.75</code> multiplied by (&lt;<i>no. of nodes</i>&gt; * 
    * <code>1.75</code> multiplied by (&lt;<i>no. of nodes</i>&gt; * 
-   * <a href="{@docRoot}/../hadoop-default.html#mapred.tasktracker.reduce.tasks.maximum">
+   * <a href="{@docRoot}/../mapred-default.html#mapred.tasktracker.reduce.tasks.maximum">
    * mapred.tasktracker.reduce.tasks.maximum</a>).
    * mapred.tasktracker.reduce.tasks.maximum</a>).
    * </p>
    * </p>
    * 
    * 
@@ -1173,7 +1173,7 @@ public class JobConf extends Configuration {
    * performance metrics system via the org.apache.hadoop.metrics API.  The 
    * performance metrics system via the org.apache.hadoop.metrics API.  The 
    * session identifier is intended, in particular, for use by Hadoop-On-Demand 
    * session identifier is intended, in particular, for use by Hadoop-On-Demand 
    * (HOD) which allocates a virtual Hadoop cluster dynamically and transiently. 
    * (HOD) which allocates a virtual Hadoop cluster dynamically and transiently. 
-   * HOD will set the session identifier by modifying the hadoop-site.xml file 
+   * HOD will set the session identifier by modifying the mapred-site.xml file 
    * before starting the cluster.
    * before starting the cluster.
    *
    *
    * When not running under HOD, this identifer is expected to remain set to 
    * When not running under HOD, this identifer is expected to remain set to 

+ 1 - 1
src/mapred/org/apache/hadoop/mapred/Mapper.java

@@ -145,7 +145,7 @@ public interface Mapper<K1, V1, K2, V2> extends JobConfigurable, Closeable {
    * takes an insignificant amount of time to process individual key/value 
    * takes an insignificant amount of time to process individual key/value 
    * pairs, this is crucial since the framework might assume that the task has 
    * pairs, this is crucial since the framework might assume that the task has 
    * timed-out and kill that task. The other way of avoiding this is to set 
    * timed-out and kill that task. The other way of avoiding this is to set 
-   * <a href="{@docRoot}/../hadoop-default.html#mapred.task.timeout">
+   * <a href="{@docRoot}/../mapred-default.html#mapred.task.timeout">
    * mapred.task.timeout</a> to a high-enough value (or even zero for no 
    * mapred.task.timeout</a> to a high-enough value (or even zero for no 
    * time-outs).</p>
    * time-outs).</p>
    * 
    * 

+ 1 - 1
src/mapred/org/apache/hadoop/mapred/Reducer.java

@@ -185,7 +185,7 @@ public interface Reducer<K2, V2, K3, V3> extends JobConfigurable, Closeable {
    * takes an insignificant amount of time to process individual key/value 
    * takes an insignificant amount of time to process individual key/value 
    * pairs, this is crucial since the framework might assume that the task has 
    * pairs, this is crucial since the framework might assume that the task has 
    * timed-out and kill that task. The other way of avoiding this is to set 
    * timed-out and kill that task. The other way of avoiding this is to set 
-   * <a href="{@docRoot}/../hadoop-default.html#mapred.task.timeout">
+   * <a href="{@docRoot}/../mapred-default.html#mapred.task.timeout">
    * mapred.task.timeout</a> to a high-enough value (or even zero for no 
    * mapred.task.timeout</a> to a high-enough value (or even zero for no 
    * time-outs).</p>
    * time-outs).</p>
    * 
    * 

+ 1 - 1
src/mapred/org/apache/hadoop/mapreduce/InputFormat.java

@@ -50,7 +50,7 @@ import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
  * bytes, of the input files. However, the {@link FileSystem} blocksize of  
  * bytes, of the input files. However, the {@link FileSystem} blocksize of  
  * the input files is treated as an upper bound for input splits. A lower bound 
  * the input files is treated as an upper bound for input splits. A lower bound 
  * on the split size can be set via 
  * on the split size can be set via 
- * <a href="{@docRoot}/../hadoop-default.html#mapred.min.split.size">
+ * <a href="{@docRoot}/../mapred-default.html#mapred.min.split.size">
  * mapred.min.split.size</a>.</p>
  * mapred.min.split.size</a>.</p>
  * 
  * 
  * <p>Clearly, logical splits based on input-size is insufficient for many 
  * <p>Clearly, logical splits based on input-size is insufficient for many 

+ 5 - 3
src/test/hadoop-site.xml

@@ -1,9 +1,11 @@
 <?xml version="1.0"?>
 <?xml version="1.0"?>
 <?xml-stylesheet type="text/xsl" href="nutch-conf.xsl"?>
 <?xml-stylesheet type="text/xsl" href="nutch-conf.xsl"?>
 
 
-<!-- Values used when running unit tests.  This is mostly empty, to -->
-<!-- use of the default values, overriding the potentially -->
-<!-- user-editted hadoop-site.xml in the conf/ directory.  -->
+<!-- DO NOT PUT ANY PROPERTY IN THIS FILE. INSTEAD USE -->
+<!-- core-site.xml, mapred-site.xml OR hdfs-site.xml -->
+<!-- This empty script is to avoid picking properties from  -->
+<!-- conf/hadoop-site.xml This would be removed once support  -->
+<!-- for hadoop-site.xml is removed.  -->
 
 
 <configuration>
 <configuration>