17 gadi atpakaļ · d864e2dc67
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -18,9 +18,11 @@ Branch 0.15 (unreleased)
 
				   IMPROVEMENTS
			
 
				 
			
 
				     HADOOP-2160.  Remove project-level, non-user documentation from
			
 
				-    releases, since it's now maintained in a separate tree.
			
 
				+    releases, since it's now maintained in a separate tree.  (cutting)
			
 
				 
			
 
				-    HADOOP-1327.  Add user documentation for streaming.
			
 
				+    HADOOP-1327.  Add user documentation for streaming.  (cutting)
			
 
				+
			
 
				+    HADOOP-2382.  Add hadoop-default.html to subversion. (cutting)
			
 
				 
			
 
				 
			
 
				 Release 0.15.1 - 2007-11-27
			
--- a/build.xml
+++ b/build.xml
@@ -22,7 +22,7 @@
 
				   <property name="conf.dir" value="${basedir}/conf"/>
			
 
				   <property name="docs.dir" value="${basedir}/docs"/>
			
 
				   <property name="contrib.dir" value="${basedir}/src/contrib"/>
			
 
				-  <property name="docs.src" value="${basedir}/src/web"/>
			
 
				+  <property name="docs.src" value="${basedir}/src/docs"/>
			
 
				   <property name="c++.src" value="${basedir}/src/c++"/>
			
 
				   <property name="c++.utils.src" value="${c++.src}/utils"/>
			
 
				   <property name="c++.pipes.src" value="${c++.src}/pipes"/>
			
@@ -602,14 +602,16 @@
 
				   <!-- ================================================================== -->
			
 
				   <!-- Documentation                                                      -->
			
 
				   <!-- ================================================================== -->
			
 
				-  <target name="docs">
			
 
				-    <exec dir="src/docs" executable="forrest" failonerror="true" />
			
 
				-    <copy todir="docs/">
			
 
				-      <fileset dir="src/docs/build/site/" />
			
 
				+  <target name="docs" description="Generate documentation">
			
 
				+    <exec dir="${docs.src}" executable="forrest" failonerror="true" />
			
 
				+    <copy todir="${docs.dir}">
			
 
				+      <fileset dir="${docs.src}/build/site/" />
			
 
				     </copy>
			
 
				+    <style basedir="${conf.dir}" destdir="${docs.dir}"
			
 
				+           includes="hadoop-default.xml" style="conf/configuration.xsl"/>
			
 
				   </target>
			
 
				 
			
 
				-  <target name="javadoc" depends="default-doc">
			
 
				+  <target name="javadoc" description="Generate javadoc">
			
 
				     <mkdir dir="${build.javadoc}"/>
			
 
				     <javadoc
			
 
				       overview="${src.dir}/overview.html"
			
@@ -649,14 +651,6 @@
 
				     </javadoc>
			
 
				   </target>	
			
 
				 	
			
 
				-  <target name="default-doc">
			
 
				-    <style basedir="${conf.dir}" destdir="${build.docs}"
			
 
				-           includes="hadoop-default.xml" style="conf/configuration.xsl"/>
			
 
				-    <copy todir="${build.docs}">
			
 
				-      <fileset dir="${docs.dir}" />
			
 
				-    </copy>
			
 
				-  </target>
			
 
				-
			
 
				   <!-- ================================================================== -->
			
 
				   <!-- D I S T R I B U T I O N                                            -->
			
 
				   <!-- ================================================================== -->
			
@@ -708,6 +702,7 @@
 
				     </copy>
			
 
				 
			
 
				     <copy todir="${dist.dir}/docs">
			
 
				+      <fileset dir="${docs.dir}" />
			
 
				       <fileset dir="${build.docs}"/>
			
 
				     </copy>
			
 
				 
			
@@ -762,7 +757,7 @@
 
				   <!-- ================================================================== -->
			
 
				   <target name="clean" depends="clean-contrib">
			
 
				     <delete dir="${build.dir}"/>
			
 
				-    <delete dir="src/docs/build"/>
			
 
				+    <delete dir="${docs.src}/build"/>
			
 
				   </target>
			
 
				 
			
 
				   <!-- ================================================================== -->
			
--- a/docs/hadoop-default.html
+++ b/docs/hadoop-default.html
@@ -0,0 +1,599 @@
 
				+<html>
			
 
				+<body>
			
 
				+<table border="1">
			
 
				+<tr>
			
 
				+<td>name</td><td>value</td><td>description</td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="hadoop.tmp.dir">hadoop.tmp.dir</a></td><td>/tmp/hadoop-${user.name}</td><td>A base for other temporary directories.</td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="hadoop.native.lib">hadoop.native.lib</a></td><td>true</td><td>Should native hadoop libraries, if present, be used.</td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="hadoop.logfile.size">hadoop.logfile.size</a></td><td>10000000</td><td>The max size of each log file</td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="hadoop.logfile.count">hadoop.logfile.count</a></td><td>10</td><td>The max number of log files</td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="dfs.namenode.logging.level">dfs.namenode.logging.level</a></td><td>info</td><td>The logging level for dfs namenode. Other values are "dir"(trac
			
 
				+e namespace mutations), "block"(trace block under/over replications and block
			
 
				+creations/deletions), or "all".</td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="io.sort.factor">io.sort.factor</a></td><td>10</td><td>The number of streams to merge at once while sorting
			
 
				+  files.  This determines the number of open file handles.</td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="io.sort.mb">io.sort.mb</a></td><td>100</td><td>The total amount of buffer memory to use while sorting 
			
 
				+  files, in megabytes.  By default, gives each merge stream 1MB, which
			
 
				+  should minimize seeks.</td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="io.file.buffer.size">io.file.buffer.size</a></td><td>4096</td><td>The size of buffer for use in sequence files.
			
 
				+  The size of this buffer should probably be a multiple of hardware
			
 
				+  page size (4096 on Intel x86), and it determines how much data is
			
 
				+  buffered during read and write operations.</td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="io.bytes.per.checksum">io.bytes.per.checksum</a></td><td>512</td><td>The number of bytes per checksum.  Must not be larger than
			
 
				+  io.file.buffer.size.</td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="io.skip.checksum.errors">io.skip.checksum.errors</a></td><td>false</td><td>If true, when a checksum error is encountered while
			
 
				+  reading a sequence file, entries are skipped, instead of throwing an
			
 
				+  exception.</td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="io.map.index.skip">io.map.index.skip</a></td><td>0</td><td>Number of index entries to skip between each entry.
			
 
				+  Zero by default. Setting this to values larger than zero can
			
 
				+  facilitate opening large map files using less memory.</td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="io.compression.codecs">io.compression.codecs</a></td><td>org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec</td><td>A list of the compression codec classes that can be used 
			
 
				+               for compression/decompression.</td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="fs.default.name">fs.default.name</a></td><td>file:///</td><td>The name of the default file system.  A URI whose
			
 
				+  scheme and authority determine the FileSystem implementation.  The
			
 
				+  uri's scheme determines the config property (fs.SCHEME.impl) naming
			
 
				+  the FileSystem implementation class.  The uri's authority is used to
			
 
				+  determine the host, port, etc. for a filesystem.</td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="fs.trash.root">fs.trash.root</a></td><td>${hadoop.tmp.dir}/Trash</td><td>The trash directory, used by FsShell's 'rm' command.
			
 
				+  </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="fs.trash.interval">fs.trash.interval</a></td><td>0</td><td>Number of minutes between trash checkpoints.
			
 
				+  If zero, the trash feature is disabled.
			
 
				+  </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="fs.file.impl">fs.file.impl</a></td><td>org.apache.hadoop.fs.LocalFileSystem</td><td>The FileSystem for file: uris.</td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="fs.hdfs.impl">fs.hdfs.impl</a></td><td>org.apache.hadoop.dfs.DistributedFileSystem</td><td>The FileSystem for hdfs: uris.</td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="fs.s3.impl">fs.s3.impl</a></td><td>org.apache.hadoop.fs.s3.S3FileSystem</td><td>The FileSystem for s3: uris.</td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="fs.kfs.impl">fs.kfs.impl</a></td><td>org.apache.hadoop.fs.kfs.KosmosFileSystem</td><td>The FileSystem for kfs: uris.</td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="fs.hftp.impl">fs.hftp.impl</a></td><td>org.apache.hadoop.dfs.HftpFileSystem</td><td></td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="fs.ramfs.impl">fs.ramfs.impl</a></td><td>org.apache.hadoop.fs.InMemoryFileSystem</td><td>The FileSystem for ramfs: uris.</td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="fs.inmemory.size.mb">fs.inmemory.size.mb</a></td><td>75</td><td>The size of the in-memory filsystem instance in MB</td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="fs.checkpoint.dir">fs.checkpoint.dir</a></td><td>${hadoop.tmp.dir}/dfs/namesecondary</td><td>Determines where on the local filesystem the DFS secondary
			
 
				+      name node should store the temporary images and edits to merge.  
			
 
				+  </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="fs.checkpoint.period">fs.checkpoint.period</a></td><td>3600</td><td>The number of seconds between two periodic checkpoints.
			
 
				+  </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="fs.checkpoint.size">fs.checkpoint.size</a></td><td>67108864</td><td>The size of the current edit log (in bytes) that triggers
			
 
				+       a periodic checkpoint even if the fs.checkpoint.period hasn't expired.
			
 
				+  </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="dfs.secondary.http.bindAddress">dfs.secondary.http.bindAddress</a></td><td>0.0.0.0:50090</td><td>
			
 
				+    The secondary namenode http server bind address and port.
			
 
				+    If the port is 0 then the server will start on a free port.
			
 
				+  </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="dfs.datanode.bindAddress">dfs.datanode.bindAddress</a></td><td>0.0.0.0:50010</td><td>
			
 
				+    The address where the datanode will listen to.
			
 
				+    If the port is 0 then the server will start on a free port.
			
 
				+  </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="dfs.datanode.http.bindAddress">dfs.datanode.http.bindAddress</a></td><td>0.0.0.0:50075</td><td>
			
 
				+    The datanode http server bind address and port.
			
 
				+    If the port is 0 then the server will start on a free port.
			
 
				+  </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="dfs.http.bindAddress">dfs.http.bindAddress</a></td><td>0.0.0.0:50070</td><td>
			
 
				+    The address and the base port where the dfs namenode web ui will listen on.
			
 
				+    If the port is 0 then the server will start on a free port.
			
 
				+  </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="dfs.datanode.dns.interface">dfs.datanode.dns.interface</a></td><td>default</td><td>The name of the Network Interface from which a data node should 
			
 
				+  report its IP address.
			
 
				+  </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="dfs.datanode.dns.nameserver">dfs.datanode.dns.nameserver</a></td><td>default</td><td>The host name or IP address of the name server (DNS)
			
 
				+  which a DataNode should use to determine the host name used by the
			
 
				+  NameNode for communication and display purposes.
			
 
				+  </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="dfs.replication.considerLoad">dfs.replication.considerLoad</a></td><td>true</td><td>Decide if chooseTarget considers the target's load or not
			
 
				+  </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="dfs.default.chunk.view.size">dfs.default.chunk.view.size</a></td><td>32768</td><td>The number of bytes to view for a file on the browser.
			
 
				+  </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="dfs.datanode.du.reserved">dfs.datanode.du.reserved</a></td><td>0</td><td>Reserved space in bytes per volume. Always leave this much space free for non dfs use.
			
 
				+  </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="dfs.datanode.du.pct">dfs.datanode.du.pct</a></td><td>0.98f</td><td>When calculating remaining space, only use this percentage of the real available space
			
 
				+  </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="dfs.name.dir">dfs.name.dir</a></td><td>${hadoop.tmp.dir}/dfs/name</td><td>Determines where on the local filesystem the DFS name node
			
 
				+      should store the name table.  If this is a comma-delimited list
			
 
				+      of directories then the name table is replicated in all of the
			
 
				+      directories, for redundancy. </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="dfs.client.buffer.dir">dfs.client.buffer.dir</a></td><td>${hadoop.tmp.dir}/dfs/tmp</td><td>Determines where on the local filesystem an DFS client
			
 
				+  should store its blocks before it sends them to the datanode.
			
 
				+  </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="dfs.data.dir">dfs.data.dir</a></td><td>${hadoop.tmp.dir}/dfs/data</td><td>Determines where on the local filesystem an DFS data node
			
 
				+  should store its blocks.  If this is a comma-delimited
			
 
				+  list of directories, then data will be stored in all named
			
 
				+  directories, typically on different devices.
			
 
				+  Directories that do not exist are ignored.
			
 
				+  </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="dfs.replication">dfs.replication</a></td><td>3</td><td>Default block replication. 
			
 
				+  The actual number of replications can be specified when the file is created.
			
 
				+  The default is used if replication is not specified in create time.
			
 
				+  </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="dfs.replication.max">dfs.replication.max</a></td><td>512</td><td>Maximal block replication. 
			
 
				+  </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="dfs.replication.min">dfs.replication.min</a></td><td>1</td><td>Minimal block replication. 
			
 
				+  </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="dfs.block.size">dfs.block.size</a></td><td>67108864</td><td>The default block size for new files.</td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="dfs.df.interval">dfs.df.interval</a></td><td>60000</td><td>Disk usage statistics refresh interval in msec.</td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="dfs.client.block.write.retries">dfs.client.block.write.retries</a></td><td>3</td><td>The number of retries for writing blocks to the data nodes, 
			
 
				+  before we signal failure to the application.
			
 
				+  </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="dfs.blockreport.intervalMsec">dfs.blockreport.intervalMsec</a></td><td>3600000</td><td>Determines block reporting interval in milliseconds.</td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="dfs.heartbeat.interval">dfs.heartbeat.interval</a></td><td>3</td><td>Determines datanode heartbeat interval in seconds.</td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="dfs.namenode.handler.count">dfs.namenode.handler.count</a></td><td>10</td><td>The number of server threads for the namenode.</td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="dfs.safemode.threshold.pct">dfs.safemode.threshold.pct</a></td><td>0.999f</td><td>
			
 
				+  	Specifies the percentage of blocks that should satisfy 
			
 
				+  	the minimal replication requirement defined by dfs.replication.min.
			
 
				+  	Values less than or equal to 0 mean not to start in safe mode.
			
 
				+  	Values greater than 1 will make safe mode permanent.
			
 
				+ 	</td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="dfs.safemode.extension">dfs.safemode.extension</a></td><td>30000</td><td>
			
 
				+  	Determines extension of safe mode in milliseconds 
			
 
				+  	after the threshold level is reached.
			
 
				+ 	</td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="dfs.network.script">dfs.network.script</a></td><td></td><td>
			
 
				+        Specifies a script name that print the network location path
			
 
				+        of the current machine.
			
 
				+  </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="dfs.balance.bandwidthPerSec">dfs.balance.bandwidthPerSec</a></td><td>1048576</td><td>
			
 
				+        Specifies the maximum amount of bandwidth that each datanode
			
 
				+        can utilize for the balancing purpose in term of
			
 
				+        the number of bytes per second.
			
 
				+  </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="dfs.hosts">dfs.hosts</a></td><td></td><td>Names a file that contains a list of hosts that are
			
 
				+  permitted to connect to the namenode. The full pathname of the file
			
 
				+  must be specified.  If the value is empty, all hosts are
			
 
				+  permitted.</td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="dfs.hosts.exclude">dfs.hosts.exclude</a></td><td></td><td>Names a file that contains a list of hosts that are
			
 
				+  not permitted to connect to the namenode.  The full pathname of the
			
 
				+  file must be specified.  If the value is empty, no hosts are
			
 
				+  excluded.</td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="fs.s3.block.size">fs.s3.block.size</a></td><td>67108864</td><td>Block size to use when writing files to S3.</td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="fs.s3.buffer.dir">fs.s3.buffer.dir</a></td><td>${hadoop.tmp.dir}/s3</td><td>Determines where on the local filesystem the S3 filesystem
			
 
				+  should store its blocks before it sends them to S3
			
 
				+  or after it retrieves them from S3.
			
 
				+  </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="fs.s3.maxRetries">fs.s3.maxRetries</a></td><td>4</td><td>The maximum number of retries for reading or writing blocks to S3, 
			
 
				+  before we signal failure to the application.
			
 
				+  </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="fs.s3.sleepTimeSeconds">fs.s3.sleepTimeSeconds</a></td><td>10</td><td>The number of seconds to sleep between each S3 retry.
			
 
				+  </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="mapred.job.tracker">mapred.job.tracker</a></td><td>local</td><td>The host and port that the MapReduce job tracker runs
			
 
				+  at.  If "local", then jobs are run in-process as a single map
			
 
				+  and reduce task.
			
 
				+  </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="mapred.job.tracker.http.bindAddress">mapred.job.tracker.http.bindAddress</a></td><td>0.0.0.0:50030</td><td>
			
 
				+    The job tracker http server bind address and port.
			
 
				+    If the port is 0 then the server will start on a free port.
			
 
				+  </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="mapred.job.tracker.handler.count">mapred.job.tracker.handler.count</a></td><td>10</td><td>
			
 
				+    The number of server threads for the JobTracker. This should be roughly
			
 
				+    4% of the number of tasktracker nodes.
			
 
				+  </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="mapred.task.tracker.report.bindAddress">mapred.task.tracker.report.bindAddress</a></td><td>127.0.0.1:0</td><td>The interface that task processes use to communicate
			
 
				+  with their parent tasktracker process.</td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="mapred.local.dir">mapred.local.dir</a></td><td>${hadoop.tmp.dir}/mapred/local</td><td>The local directory where MapReduce stores intermediate
			
 
				+  data files.  May be a comma-separated list of
			
 
				+  directories on different devices in order to spread disk i/o.
			
 
				+  Directories that do not exist are ignored.
			
 
				+  </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="local.cache.size">local.cache.size</a></td><td>10737418240</td><td>The limit on the size of cache you want to keep, set by default
			
 
				+  to 10GB. This will act as a soft limit on the cache directory for out of band data.
			
 
				+  </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="mapred.system.dir">mapred.system.dir</a></td><td>${hadoop.tmp.dir}/mapred/system</td><td>The shared directory where MapReduce stores control files.
			
 
				+  </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="mapred.temp.dir">mapred.temp.dir</a></td><td>${hadoop.tmp.dir}/mapred/temp</td><td>A shared directory for temporary files.
			
 
				+  </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="mapred.local.dir.minspacestart">mapred.local.dir.minspacestart</a></td><td>0</td><td>If the space in mapred.local.dir drops under this, 
			
 
				+  do not ask for more tasks.
			
 
				+  Value in bytes.
			
 
				+  </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="mapred.local.dir.minspacekill">mapred.local.dir.minspacekill</a></td><td>0</td><td>If the space in mapred.local.dir drops under this, 
			
 
				+  	do not ask more tasks until all the current ones have finished and 
			
 
				+  	cleaned up. Also, to save the rest of the tasks we have running, 
			
 
				+  	kill one of them, to clean up some space. Start with the reduce tasks,
			
 
				+  	then go with the ones that have finished the least.
			
 
				+  	Value in bytes.
			
 
				+  </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="mapred.tasktracker.expiry.interval">mapred.tasktracker.expiry.interval</a></td><td>600000</td><td>Expert: The time-interval, in miliseconds, after which
			
 
				+  a tasktracker is declared 'lost' if it doesn't send heartbeats.
			
 
				+  </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="mapred.map.tasks">mapred.map.tasks</a></td><td>2</td><td>The default number of map tasks per job.  Typically set
			
 
				+  to a prime several times greater than number of available hosts.
			
 
				+  Ignored when mapred.job.tracker is "local".  
			
 
				+  </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="mapred.reduce.tasks">mapred.reduce.tasks</a></td><td>1</td><td>The default number of reduce tasks per job.  Typically set
			
 
				+  to a prime close to the number of available hosts.  Ignored when
			
 
				+  mapred.job.tracker is "local".
			
 
				+  </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="mapred.map.max.attempts">mapred.map.max.attempts</a></td><td>4</td><td>Expert: The maximum number of attempts per map task.
			
 
				+  In other words, framework will try to execute a map task these many number
			
 
				+  of times before giving up on it.
			
 
				+  </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="mapred.reduce.max.attempts">mapred.reduce.max.attempts</a></td><td>4</td><td>Expert: The maximum number of attempts per reduce task.
			
 
				+  In other words, framework will try to execute a reduce task these many number
			
 
				+  of times before giving up on it.
			
 
				+  </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="mapred.reduce.parallel.copies">mapred.reduce.parallel.copies</a></td><td>5</td><td>The default number of parallel transfers run by reduce
			
 
				+  during the copy(shuffle) phase.
			
 
				+  </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="mapred.reduce.copy.backoff">mapred.reduce.copy.backoff</a></td><td>300</td><td>The maximum amount of time (in seconds) a reducer spends on 
			
 
				+  fetching one map output before declaring it as failed.
			
 
				+  </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="mapred.task.timeout">mapred.task.timeout</a></td><td>600000</td><td>The number of milliseconds before a task will be
			
 
				+  terminated if it neither reads an input, writes an output, nor
			
 
				+  updates its status string.
			
 
				+  </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="mapred.tasktracker.map.tasks.maximum">mapred.tasktracker.map.tasks.maximum</a></td><td>2</td><td>The maximum number of map tasks that will be run
			
 
				+  simultaneously by a task tracker.
			
 
				+  </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="mapred.tasktracker.reduce.tasks.maximum">mapred.tasktracker.reduce.tasks.maximum</a></td><td>2</td><td>The maximum number of reduce tasks that will be run
			
 
				+  simultaneously by a task tracker.
			
 
				+  </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="mapred.jobtracker.completeuserjobs.maximum">mapred.jobtracker.completeuserjobs.maximum</a></td><td>100</td><td>The maximum number of complete jobs per user to keep around before delegating them to the job history.
			
 
				+  </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="mapred.child.java.opts">mapred.child.java.opts</a></td><td>-Xmx200m</td><td>Java opts for the task tracker child processes.  Subsumes
			
 
				+  'mapred.child.heap.size' (If a mapred.child.heap.size value is found
			
 
				+  in a configuration, its maximum heap size will be used and a warning
			
 
				+  emitted that heap.size has been deprecated). Also, the following symbol,
			
 
				+  if present, will be interpolated: @taskid@ is replaced by current TaskID.
			
 
				+  Any other occurrences of '@' will go unchanged. For
			
 
				+  example, to enable verbose gc logging to a file named for the taskid in
			
 
				+  /tmp and to set the heap maximum to be a gigabyte, pass a 'value' of:
			
 
				+        -Xmx1024m -verbose:gc -Xloggc:/tmp/@taskid@.gc
			
 
				+  </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="mapred.inmem.merge.threshold">mapred.inmem.merge.threshold</a></td><td>1000</td><td>The threshold, in terms of the number of files 
			
 
				+  for the in-memory merge process. When we accumulate threshold number of files
			
 
				+  we initiate the in-memory merge and spill to disk. A value of 0 or less than
			
 
				+  0 indicates we want to DON'T have any threshold and instead depend only on
			
 
				+  the ramfs's memory consumption to trigger the merge.
			
 
				+  </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="mapred.speculative.execution">mapred.speculative.execution</a></td><td>true</td><td>If true, then multiple instances of some map and reduce tasks 
			
 
				+               may be executed in parallel.</td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="mapred.min.split.size">mapred.min.split.size</a></td><td>0</td><td>The minimum size chunk that map input should be split
			
 
				+  into.  Note that some file formats may have minimum split sizes that
			
 
				+  take priority over this setting.</td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="mapred.submit.replication">mapred.submit.replication</a></td><td>10</td><td>The replication level for submitted job files.  This
			
 
				+  should be around the square root of the number of nodes.
			
 
				+  </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="mapred.tasktracker.dns.interface">mapred.tasktracker.dns.interface</a></td><td>default</td><td>The name of the Network Interface from which a task
			
 
				+  tracker should report its IP address.
			
 
				+  </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="mapred.tasktracker.dns.nameserver">mapred.tasktracker.dns.nameserver</a></td><td>default</td><td>The host name or IP address of the name server (DNS)
			
 
				+  which a TaskTracker should use to determine the host name used by
			
 
				+  the JobTracker for communication and display purposes.
			
 
				+  </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="tasktracker.http.threads">tasktracker.http.threads</a></td><td>40</td><td>The number of worker threads that for the http server. This is
			
 
				+               used for map output fetching
			
 
				+  </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="mapred.task.tracker.http.bindAddress">mapred.task.tracker.http.bindAddress</a></td><td>0.0.0.0:50060</td><td>
			
 
				+    The task tracker http server bind address and port.
			
 
				+    If the port is 0 then the server will start on a free port.
			
 
				+  </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="keep.failed.task.files">keep.failed.task.files</a></td><td>false</td><td>Should the files for failed tasks be kept. This should only be 
			
 
				+               used on jobs that are failing, because the storage is never
			
 
				+               reclaimed. It also prevents the map outputs from being erased
			
 
				+               from the reduce directory as they are consumed.</td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="mapred.output.compress">mapred.output.compress</a></td><td>false</td><td>Should the job outputs be compressed?
			
 
				+  </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="mapred.output.compression.type">mapred.output.compression.type</a></td><td>RECORD</td><td>If the job outputs are to compressed as SequenceFiles, how should
			
 
				+               they be compressed? Should be one of NONE, RECORD or BLOCK.
			
 
				+  </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="mapred.output.compression.codec">mapred.output.compression.codec</a></td><td>org.apache.hadoop.io.compress.DefaultCodec</td><td>If the job outputs are compressed, how should they be compressed?
			
 
				+  </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="mapred.compress.map.output">mapred.compress.map.output</a></td><td>false</td><td>Should the outputs of the maps be compressed before being
			
 
				+               sent across the network. Uses SequenceFile compression.
			
 
				+  </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="mapred.map.output.compression.type">mapred.map.output.compression.type</a></td><td>RECORD</td><td>If the map outputs are to compressed, how should they
			
 
				+               be compressed? Should be one of NONE, RECORD or BLOCK.
			
 
				+  </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="mapred.map.output.compression.codec">mapred.map.output.compression.codec</a></td><td>org.apache.hadoop.io.compress.DefaultCodec</td><td>If the map outputs are compressed, how should they be 
			
 
				+               compressed?
			
 
				+  </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="io.seqfile.compress.blocksize">io.seqfile.compress.blocksize</a></td><td>1000000</td><td>The minimum block size for compression in block compressed 
			
 
				+  				SequenceFiles.
			
 
				+  </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="io.seqfile.lazydecompress">io.seqfile.lazydecompress</a></td><td>true</td><td>Should values of block-compressed SequenceFiles be decompressed
			
 
				+  				only when necessary.
			
 
				+  </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="io.seqfile.sorter.recordlimit">io.seqfile.sorter.recordlimit</a></td><td>1000000</td><td>The limit on number of records to be kept in memory in a spill 
			
 
				+  				in SequenceFiles.Sorter
			
 
				+  </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="io.seqfile.compression.type">io.seqfile.compression.type</a></td><td>RECORD</td><td>The default compression type for SequenceFile.Writer.
			
 
				+  </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="map.sort.class">map.sort.class</a></td><td>org.apache.hadoop.mapred.MergeSorter</td><td>The default sort class for sorting keys.
			
 
				+  </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="mapred.userlog.limit.kb">mapred.userlog.limit.kb</a></td><td>0</td><td>The maximum size of user-logs of each task in KB. 0 disables the cap.
			
 
				+  </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="mapred.userlog.retain.hours">mapred.userlog.retain.hours</a></td><td>24</td><td>The maximum time, in hours, for which the user-logs are to be 
			
 
				+  				retained.
			
 
				+  </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="mapred.hosts">mapred.hosts</a></td><td></td><td>Names a file that contains the list of nodes that may
			
 
				+  connect to the jobtracker.  If the value is empty, all hosts are
			
 
				+  permitted.</td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="mapred.hosts.exclude">mapred.hosts.exclude</a></td><td></td><td>Names a file that contains the list of hosts that
			
 
				+  should be excluded by the jobtracker.  If the value is empty, no
			
 
				+  hosts are excluded.</td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="mapred.max.tracker.failures">mapred.max.tracker.failures</a></td><td>4</td><td>The number of task-failures on a tasktracker of a given job 
			
 
				+               after which new tasks of that job aren't assigned to it.
			
 
				+  </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="jobclient.output.filter">jobclient.output.filter</a></td><td>FAILED</td><td>The filter for controlling the output of the task's userlogs sent
			
 
				+               to the console of the JobClient. 
			
 
				+               The permissible options are: NONE, KILLED, FAILED, SUCCEEDED and 
			
 
				+               ALL.
			
 
				+  </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="ipc.client.timeout">ipc.client.timeout</a></td><td>60000</td><td>Defines the timeout for IPC calls in milliseconds.</td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="ipc.client.idlethreshold">ipc.client.idlethreshold</a></td><td>4000</td><td>Defines the threshold number of connections after which
			
 
				+               connections will be inspected for idleness.
			
 
				+  </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="ipc.client.maxidletime">ipc.client.maxidletime</a></td><td>120000</td><td>Defines the maximum idle time for a connected client after 
			
 
				+               which it may be disconnected.
			
 
				+  </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="ipc.client.kill.max">ipc.client.kill.max</a></td><td>10</td><td>Defines the maximum number of clients to disconnect in one go.
			
 
				+  </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="ipc.client.connection.maxidletime">ipc.client.connection.maxidletime</a></td><td>1000</td><td>The maximum time after which a client will bring down the
			
 
				+               connection to the server.
			
 
				+  </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="ipc.client.connect.max.retries">ipc.client.connect.max.retries</a></td><td>10</td><td>Indicates the number of retries a client will make to establish
			
 
				+               a server connection.
			
 
				+  </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="ipc.server.listen.queue.size">ipc.server.listen.queue.size</a></td><td>128</td><td>Indicates the length of the listen queue for servers accepting
			
 
				+               client connections.
			
 
				+  </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="job.end.retry.attempts">job.end.retry.attempts</a></td><td>0</td><td>Indicates how many times hadoop should attempt to contact the
			
 
				+               notification URL </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="job.end.retry.interval">job.end.retry.interval</a></td><td>30000</td><td>Indicates time in milliseconds between notification URL retry
			
 
				+                calls</td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="webinterface.private.actions">webinterface.private.actions</a></td><td>false</td><td> If set to true, the web interfaces of JT and NN may contain 
			
 
				+                actions, such as kill job, delete file, etc., that should 
			
 
				+                not be exposed to public. Enable this option if the interfaces 
			
 
				+                are only reachable by those who have the right authorization.
			
 
				+  </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="hadoop.rpc.socket.factory.class.default">hadoop.rpc.socket.factory.class.default</a></td><td>org.apache.hadoop.net.StandardSocketFactory</td><td> Default SocketFactory to use. This parameter is expected to be
			
 
				+    formatted as "package.FactoryClassName".
			
 
				+  </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="hadoop.rpc.socket.factory.class.ClientProtocol">hadoop.rpc.socket.factory.class.ClientProtocol</a></td><td></td><td> SocketFactory to use to connect to a DFS. If null or empty, use
			
 
				+    hadoop.rpc.socket.class.default. This socket factory is also used by
			
 
				+    DFSClient to create sockets to DataNodes.
			
 
				+  </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="hadoop.rpc.socket.factory.class.JobSubmissionProtocol">hadoop.rpc.socket.factory.class.JobSubmissionProtocol</a></td><td></td><td> SocketFactory to use to connect to a Map/Reduce master
			
 
				+    (JobTracker). If null or empty, then use hadoop.rpc.socket.class.default.
			
 
				+  </td>
			
 
				+</tr>
			
 
				+<tr>
			
 
				+<td><a name="hadoop.socks.server">hadoop.socks.server</a></td><td></td><td> Address (host:port) of the SOCKS server to be used by the
			
 
				+    SocksSocketFactory.
			
 
				+  </td>
			
 
				+</tr>
			
 
				+</table>
			
 
				+</body>
			
 
				+</html>