17 years ago · 5c96dceff7
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -310,6 +310,8 @@ Release 0.18.0 - Unreleased
 
															     HADOOP-3535. Fix documentation and name of IOUtils.close to
														
 
															     reflect that it should only be used in cleanup contexts. (omalley)
														
 
															+    HADOOP-3593. Updates the mapred tutorial. (ddas)
														
 
															+
														
 
															   OPTIMIZATIONS
														
 
															     HADOOP-3274. The default constructor of BytesWritable creates empty 
														
--- a/docs/changes.html
+++ b/docs/changes.html
@@ -76,8 +76,10 @@
 
															     </ol>
														
 
															   </li>
														
 
															   <li><a href="javascript:toggleList('trunk_(unreleased_changes)_._bug_fixes_')">  BUG FIXES
														
 
															-</a>&nbsp;&nbsp;&nbsp;(none)
														
 
															+</a>&nbsp;&nbsp;&nbsp;(1)
														
 
															     <ol id="trunk_(unreleased_changes)_._bug_fixes_">
														
 
															+      <li><a href="http://issues.apache.org/jira/browse/HADOOP-3563">HADOOP-3563</a>.  Refactor the distributed upgrade code so that it is
														
 
															+easier to identify datanode and namenode related code.<br />(dhruba)</li>
														
 
															     </ol>
														
 
															   </li>
														
 
															 </ul>
														
@@ -207,7 +209,7 @@ framework.<br />(tomwhite via omalley)</li>
 
															     </ol>
														
 
															   </li>
														
 
															   <li><a href="javascript:toggleList('release_0.18.0_-_unreleased_._improvements_')">  IMPROVEMENTS
														
 
															-</a>&nbsp;&nbsp;&nbsp;(39)
														
 
															+</a>&nbsp;&nbsp;&nbsp;(41)
														
 
															     <ol id="release_0.18.0_-_unreleased_._improvements_">
														
 
															       <li><a href="http://issues.apache.org/jira/browse/HADOOP-2928">HADOOP-2928</a>. Remove deprecated FileSystem.getContentLength().<br />(Lohit Vjayarenu via rangadi)</li>
														
 
															       <li><a href="http://issues.apache.org/jira/browse/HADOOP-3130">HADOOP-3130</a>. Make the connect timeout smaller for getFile.<br />(Amar Ramesh Kamat via ddas)</li>
														
@@ -290,17 +292,18 @@ the Map-Reduce tutorial.<br />(Amareshwari Sriramadasu via ddas)</li>
 
															       <li><a href="http://issues.apache.org/jira/browse/HADOOP-3406">HADOOP-3406</a>. Add forrest documentation for Profiling.<br />(Amareshwari Sriramadasu via ddas)</li>
														
 
															       <li><a href="http://issues.apache.org/jira/browse/HADOOP-2762">HADOOP-2762</a>. Add forrest documentation for controls of memory limits on
														
 
															 hadoop daemons and Map-Reduce tasks.<br />(Amareshwari Sriramadasu via ddas)</li>
														
 
															+      <li><a href="http://issues.apache.org/jira/browse/HADOOP-3535">HADOOP-3535</a>. Fix documentation and name of IOUtils.close to
														
 
															+reflect that it should only be used in cleanup contexts.<br />(omalley)</li>
														
 
															+      <li><a href="http://issues.apache.org/jira/browse/HADOOP-3593">HADOOP-3593</a>. Updates the mapred tutorial.<br />(ddas)</li>
														
 
															     </ol>
														
 
															   </li>
														
 
															   <li><a href="javascript:toggleList('release_0.18.0_-_unreleased_._optimizations_')">  OPTIMIZATIONS
														
 
															-</a>&nbsp;&nbsp;&nbsp;(10)
														
 
															+</a>&nbsp;&nbsp;&nbsp;(9)
														
 
															     <ol id="release_0.18.0_-_unreleased_._optimizations_">
														
 
															       <li><a href="http://issues.apache.org/jira/browse/HADOOP-3274">HADOOP-3274</a>. The default constructor of BytesWritable creates empty
														
 
															 byte array. (Tsz Wo (Nicholas), SZE via shv)
														
 
															 </li>
														
 
															       <li><a href="http://issues.apache.org/jira/browse/HADOOP-3272">HADOOP-3272</a>. Remove redundant copy of Block object in BlocksMap.<br />(Lohit Vjayarenu via shv)</li>
														
 
															-      <li><a href="http://issues.apache.org/jira/browse/HADOOP-1979">HADOOP-1979</a>. Speed up fsck by adding a buffered stream.<br />(Lohit
														
 
															-Vijaya Renu via omalley)</li>
														
 
															       <li><a href="http://issues.apache.org/jira/browse/HADOOP-3164">HADOOP-3164</a>. Reduce DataNode CPU usage by using FileChannel.tranferTo().
														
 
															 On Linux DataNode takes 5 times less CPU while serving data. Results may
														
 
															 vary on other platforms.<br />(rangadi)</li>
														
@@ -421,11 +424,7 @@ security manager non-fatal.<br />(Edward Yoon via omalley)</li>
 
															 instead of removed getFileCacheHints.<br />(lohit vijayarenu via cdouglas)</li>
														
 
															       <li><a href="http://issues.apache.org/jira/browse/HADOOP-3401">HADOOP-3401</a>. Update FileBench to set the new
														
 
															 "mapred.work.output.dir" property to work post-3041.<br />(cdouglas via omalley)</li>
														
 
															-      <li><a href="http://issues.apache.org/jira/browse/HADOOP-2159">HADOOP-2159</a> Namenode stuck in safemode. The counter blockSafe should
														
 
															-not be decremented for invalid blocks.<br />(hairong)</li>
														
 
															       <li><a href="http://issues.apache.org/jira/browse/HADOOP-2669">HADOOP-2669</a>. DFSClient locks pendingCreates appropriately.<br />(dhruba)</li>
														
 
															-      <li><a href="http://issues.apache.org/jira/browse/HADOOP-3477">HADOOP-3477</a>. Fix build to not package contrib/*/bin twice in
														
 
															-distributions.<br />(Adam Heath via cutting)</li>
														
 
															       <li><a href="http://issues.apache.org/jira/browse/HADOOP-3410">HADOOP-3410</a>. Fix KFS implemenation to return correct file
														
 
															 modification time.<br />(Sriram Rao via cutting)</li>
														
 
															       <li><a href="http://issues.apache.org/jira/browse/HADOOP-3340">HADOOP-3340</a>. Fix DFS metrics for BlocksReplicated, HeartbeatsNum, and
														
@@ -434,8 +433,6 @@ BlockReportsAverageTime.<br />(lohit vijayarenu via cdouglas)</li>
 
															 /bin/bash and fix the test patch to require bash instead of sh.<br />(Brice Arnould via omalley)</li>
														
 
															       <li><a href="http://issues.apache.org/jira/browse/HADOOP-3471">HADOOP-3471</a>. Fix spurious errors from TestIndexedSort and add additional
														
 
															 logging to let failures be reproducible.<br />(cdouglas)</li>
														
 
															-      <li><a href="http://issues.apache.org/jira/browse/HADOOP-3475">HADOOP-3475</a>. Fix MapTask to correctly size the accounting allocation of
														
 
															-io.sort.mb.<br />(cdouglas)</li>
														
 
															       <li><a href="http://issues.apache.org/jira/browse/HADOOP-3443">HADOOP-3443</a>. Avoid copying map output across partitions when renaming a
														
 
															 single spill.<br />(omalley via cdouglas)</li>
														
 
															       <li><a href="http://issues.apache.org/jira/browse/HADOOP-3454">HADOOP-3454</a>. Fix Text::find to search only valid byte ranges.<br />(Chad Whipkey
														
@@ -444,8 +441,6 @@ via cdouglas)</li>
 
															 JobClient. Moves the cli parsing from JobShell to GenericOptionsParser.
														
 
															 Thus removes the class org.apache.hadoop.mapred.JobShell.<br />(Amareshwari Sriramadasu via ddas)</li>
														
 
															       <li><a href="http://issues.apache.org/jira/browse/HADOOP-2132">HADOOP-2132</a>. Only RUNNING/PREP jobs can be killed.<br />(Jothi Padmanabhan via ddas)</li>
														
 
															-      <li><a href="http://issues.apache.org/jira/browse/HADOOP-3472">HADOOP-3472</a> MapFile.Reader getClosest() function returns incorrect results
														
 
															-when before is true<br />(Todd Lipcon via Stack)</li>
														
 
															       <li><a href="http://issues.apache.org/jira/browse/HADOOP-3476">HADOOP-3476</a>. Code cleanup in fuse-dfs.<br />(Peter Wyckoff via dhruba)</li>
														
 
															       <li><a href="http://issues.apache.org/jira/browse/HADOOP-2427">HADOOP-2427</a>. Ensure that the cwd of completed tasks is cleaned-up
														
 
															 correctly on task-completion.<br />(Amareshwari Sri Ramadasu via acmurthy)</li>
														
@@ -483,9 +478,6 @@ with a configuration.<br />(Subramaniam Krishnan via omalley)</li>
 
															       <li><a href="http://issues.apache.org/jira/browse/HADOOP-3519">HADOOP-3519</a>.  Fix NPE in DFS FileSystem rename.<br />(hairong via tomwhite)</li>
														
 
															       <li><a href="http://issues.apache.org/jira/browse/HADOOP-3528">HADOOP-3528</a>. Metrics FilesCreated and files_deleted metrics
														
 
															 do not match.<br />(Lohit via Mahadev)</li>
														
 
															-      <li><a href="http://issues.apache.org/jira/browse/HADOOP-3442">HADOOP-3442</a>. Limit recursion depth on the stack for QuickSort to prevent
														
 
															-StackOverflowErrors. To avoid O(n*n) cases, when partitioning depth exceeds
														
 
															-a multiple of log(n), change to HeapSort.<br />(cdouglas)</li>
														
 
															       <li><a href="http://issues.apache.org/jira/browse/HADOOP-3418">HADOOP-3418</a>. When a directory is deleted, any leases that point to files
														
 
															 in the subdirectory are removed. ((Tsz Wo (Nicholas), SZE via dhruba)
														
 
															 </li>
														
@@ -499,11 +491,61 @@ merge may be missed.<br />(Arun Murthy via ddas)</li>
 
															       <li><a href="http://issues.apache.org/jira/browse/HADOOP-3560">HADOOP-3560</a>. Fixes a problem to do with split creation in archives.<br />(Mahadev Konar via ddas)</li>
														
 
															       <li><a href="http://issues.apache.org/jira/browse/HADOOP-3545">HADOOP-3545</a>. Fixes a overflow problem in archives.<br />(Mahadev Konar via ddas)</li>
														
 
															       <li><a href="http://issues.apache.org/jira/browse/HADOOP-3561">HADOOP-3561</a>. Prevent the trash from deleting its parent directories.<br />(cdouglas)</li>
														
 
															+      <li><a href="http://issues.apache.org/jira/browse/HADOOP-3575">HADOOP-3575</a>. Fix the clover ant target after package refactoring.<br />(Nigel Daley via cdouglas)</li>
														
 
															+      <li><a href="http://issues.apache.org/jira/browse/HADOOP-3539">HADOOP-3539</a>.  Fix the tool path in the bin/hadoop script under
														
 
															+cygwin. (Tsz Wo (Nicholas), Sze via omalley)
														
 
															+</li>
														
 
															+      <li><a href="http://issues.apache.org/jira/browse/HADOOP-3520">HADOOP-3520</a>.  TestDFSUpgradeFromImage triggers a race condition in the
														
 
															+Upgrade Manager. Fixed.<br />(dhruba)</li>
														
 
															+      <li><a href="http://issues.apache.org/jira/browse/HADOOP-3586">HADOOP-3586</a>. Provide deprecated, backwards compatibile semantics for the
														
 
															+combiner to be run once and only once on each record.<br />(cdouglas)</li>
														
 
															+      <li><a href="http://issues.apache.org/jira/browse/HADOOP-3533">HADOOP-3533</a>. Add deprecated methods to provide API compatibility
														
 
															+between 0.18 and 0.17. Remove the deprecated methods in trunk.<br />(omalley)</li>
														
 
															     </ol>
														
 
															   </li>
														
 
															 </ul>
														
 
															 <h2><a href="javascript:toggleList('older')">Older Releases</a></h2>
														
 
															 <ul id="older">
														
 
															+<h3><a href="javascript:toggleList('release_0.17.1_-_unreleased_')">Release 0.17.1 - Unreleased
														
 
															+</a></h3>
														
 
															+<ul id="release_0.17.1_-_unreleased_">
														
 
															+  <li><a href="javascript:toggleList('release_0.17.1_-_unreleased_._incompatible_changes_')">  INCOMPATIBLE CHANGES
														
 
															+</a>&nbsp;&nbsp;&nbsp;(1)
														
 
															+    <ol id="release_0.17.1_-_unreleased_._incompatible_changes_">
														
 
															+      <li><a href="http://issues.apache.org/jira/browse/HADOOP-3565">HADOOP-3565</a>. Fix the Java serialization, which is not enabled by
														
 
															+default, to clear the state of the serializer between objects.<br />(tomwhite via omalley)</li>
														
 
															+    </ol>
														
 
															+  </li>
														
 
															+  <li><a href="javascript:toggleList('release_0.17.1_-_unreleased_._improvements_')">  IMPROVEMENTS
														
 
															+</a>&nbsp;&nbsp;&nbsp;(1)
														
 
															+    <ol id="release_0.17.1_-_unreleased_._improvements_">
														
 
															+      <li><a href="http://issues.apache.org/jira/browse/HADOOP-3522">HADOOP-3522</a>. Improve documentation on reduce pointing out that
														
 
															+input keys and values will be reused.<br />(omalley)</li>
														
 
															+    </ol>
														
 
															+  </li>
														
 
															+  <li><a href="javascript:toggleList('release_0.17.1_-_unreleased_._bug_fixes_')">  BUG FIXES
														
 
															+</a>&nbsp;&nbsp;&nbsp;(8)
														
 
															+    <ol id="release_0.17.1_-_unreleased_._bug_fixes_">
														
 
															+      <li><a href="http://issues.apache.org/jira/browse/HADOOP-2159">HADOOP-2159</a> Namenode stuck in safemode. The counter blockSafe should
														
 
															+not be decremented for invalid blocks.<br />(hairong)</li>
														
 
															+      <li><a href="http://issues.apache.org/jira/browse/HADOOP-3472">HADOOP-3472</a> MapFile.Reader getClosest() function returns incorrect results
														
 
															+when before is true<br />(Todd Lipcon via Stack)</li>
														
 
															+      <li><a href="http://issues.apache.org/jira/browse/HADOOP-3442">HADOOP-3442</a>. Limit recursion depth on the stack for QuickSort to prevent
														
 
															+StackOverflowErrors. To avoid O(n*n) cases, when partitioning depth exceeds
														
 
															+a multiple of log(n), change to HeapSort.<br />(cdouglas)</li>
														
 
															+      <li><a href="http://issues.apache.org/jira/browse/HADOOP-3477">HADOOP-3477</a>. Fix build to not package contrib/*/bin twice in
														
 
															+distributions.<br />(Adam Heath via cutting)</li>
														
 
															+      <li><a href="http://issues.apache.org/jira/browse/HADOOP-3475">HADOOP-3475</a>. Fix MapTask to correctly size the accounting allocation of
														
 
															+io.sort.mb.<br />(cdouglas)</li>
														
 
															+      <li><a href="http://issues.apache.org/jira/browse/HADOOP-3550">HADOOP-3550</a>. Fix the serialization data structures in MapTask where the
														
 
															+value lengths are incorrectly calculated.<br />(cdouglas)</li>
														
 
															+      <li><a href="http://issues.apache.org/jira/browse/HADOOP-3526">HADOOP-3526</a>. Fix contrib/data_join framework by cloning values retained
														
 
															+in the reduce.<br />(Spyros Blanas via cdouglas)</li>
														
 
															+      <li><a href="http://issues.apache.org/jira/browse/HADOOP-1979">HADOOP-1979</a>. Speed up fsck by adding a buffered stream.<br />(Lohit
														
 
															+Vijaya Renu via omalley)</li>
														
 
															+    </ol>
														
 
															+  </li>
														
 
															+</ul>
														
 
															 <h3><a href="javascript:toggleList('release_0.17.0_-_2008-05-18_')">Release 0.17.0 - 2008-05-18
														
 
															 </a></h3>
														
 
															 <ul id="release_0.17.0_-_2008-05-18_">
														
--- a/docs/mapred_tutorial.html
+++ b/docs/mapred_tutorial.html
@@ -1296,14 +1296,14 @@ document.write("Last Published: " + document.lastModified);
 
															           the intermediate outputs, which helps to cut down the amount of data 
														
 
															           transferred from the <span class="codefrag">Mapper</span> to the <span class="codefrag">Reducer</span>.
														
 
															           </p>
														
 
															-<p>The intermediate, sorted outputs are always stored in files of 
														
 
															-          <a href="api/org/apache/hadoop/io/SequenceFile.html">
														
 
															-          SequenceFile</a> format. Applications can control if, and how, the 
														
 
															+<p>The intermediate, sorted outputs are always stored in a simple 
														
 
															+          (key-len, key, value-len, value) format. 
														
 
															+          Applications can control if, and how, the 
														
 
															           intermediate outputs are to be compressed and the 
														
 
															           <a href="api/org/apache/hadoop/io/compress/CompressionCodec.html">
														
 
															           CompressionCodec</a> to be used via the <span class="codefrag">JobConf</span>.
														
 
															           </p>
														
 
															-<a name="N1066B"></a><a name="How+Many+Maps%3F"></a>
														
 
															+<a name="N10667"></a><a name="How+Many+Maps%3F"></a>
														
 
															 <h5>How Many Maps?</h5>
														
 
															 <p>The number of maps is usually driven by the total size of the 
														
 
															             inputs, that is, the total number of blocks of the input files.</p>
														
@@ -1316,7 +1316,7 @@ document.write("Last Published: " + document.lastModified);
 
															             <a href="api/org/apache/hadoop/mapred/JobConf.html#setNumMapTasks(int)">
														
 
															             setNumMapTasks(int)</a> (which only provides a hint to the framework) 
														
 
															             is used to set it even higher.</p>
														
 
															-<a name="N10683"></a><a name="Reducer"></a>
														
 
															+<a name="N1067F"></a><a name="Reducer"></a>
														
 
															 <h4>Reducer</h4>
														
 
															 <p>
														
 
															 <a href="api/org/apache/hadoop/mapred/Reducer.html">
														
@@ -1339,18 +1339,18 @@ document.write("Last Published: " + document.lastModified);
 
															 <p>
														
 
															 <span class="codefrag">Reducer</span> has 3 primary phases: shuffle, sort and reduce.
														
 
															           </p>
														
 
															-<a name="N106B3"></a><a name="Shuffle"></a>
														
 
															+<a name="N106AF"></a><a name="Shuffle"></a>
														
 
															 <h5>Shuffle</h5>
														
 
															 <p>Input to the <span class="codefrag">Reducer</span> is the sorted output of the
														
 
															             mappers. In this phase the framework fetches the relevant partition 
														
 
															             of the output of all the mappers, via HTTP.</p>
														
 
															-<a name="N106C0"></a><a name="Sort"></a>
														
 
															+<a name="N106BC"></a><a name="Sort"></a>
														
 
															 <h5>Sort</h5>
														
 
															 <p>The framework groups <span class="codefrag">Reducer</span> inputs by keys (since 
														
 
															             different mappers may have output the same key) in this stage.</p>
														
 
															 <p>The shuffle and sort phases occur simultaneously; while 
														
 
															             map-outputs are being fetched they are merged.</p>
														
 
															-<a name="N106CF"></a><a name="Secondary+Sort"></a>
														
 
															+<a name="N106CB"></a><a name="Secondary+Sort"></a>
														
 
															 <h5>Secondary Sort</h5>
														
 
															 <p>If equivalence rules for grouping the intermediate keys are 
														
 
															               required to be different from those for grouping keys before 
														
@@ -1361,7 +1361,7 @@ document.write("Last Published: " + document.lastModified);
 
															               JobConf.setOutputKeyComparatorClass(Class)</a> can be used to 
														
 
															               control how intermediate keys are grouped, these can be used in 
														
 
															               conjunction to simulate <em>secondary sort on values</em>.</p>
														
 
															-<a name="N106E8"></a><a name="Reduce"></a>
														
 
															+<a name="N106E4"></a><a name="Reduce"></a>
														
 
															 <h5>Reduce</h5>
														
 
															 <p>In this phase the 
														
 
															             <a href="api/org/apache/hadoop/mapred/Reducer.html#reduce(K2, java.util.Iterator, org.apache.hadoop.mapred.OutputCollector, org.apache.hadoop.mapred.Reporter)">
														
@@ -1377,7 +1377,7 @@ document.write("Last Published: " + document.lastModified);
 
															             progress, set application-level status messages and update 
														
 
															             <span class="codefrag">Counters</span>, or just indicate that they are alive.</p>
														
 
															 <p>The output of the <span class="codefrag">Reducer</span> is <em>not sorted</em>.</p>
														
 
															-<a name="N10716"></a><a name="How+Many+Reduces%3F"></a>
														
 
															+<a name="N10712"></a><a name="How+Many+Reduces%3F"></a>
														
 
															 <h5>How Many Reduces?</h5>
														
 
															 <p>The right number of reduces seems to be <span class="codefrag">0.95</span> or 
														
 
															             <span class="codefrag">1.75</span> multiplied by (&lt;<em>no. of nodes</em>&gt; * 
														
@@ -1392,17 +1392,17 @@ document.write("Last Published: " + document.lastModified);
 
															 <p>The scaling factors above are slightly less than whole numbers to 
														
 
															             reserve a few reduce slots in the framework for speculative-tasks and
														
 
															             failed tasks.</p>
														
 
															-<a name="N1073B"></a><a name="Reducer+NONE"></a>
														
 
															+<a name="N10737"></a><a name="Reducer+NONE"></a>
														
 
															 <h5>Reducer NONE</h5>
														
 
															 <p>It is legal to set the number of reduce-tasks to <em>zero</em> if 
														
 
															             no reduction is desired.</p>
														
 
															 <p>In this case the outputs of the map-tasks go directly to the
														
 
															             <span class="codefrag">FileSystem</span>, into the output path set by 
														
 
															-            <a href="api/org/apache/hadoop/mapred/FileInputFormat.html#setOutputPath(org.apache.hadoop.mapred.JobConf,%20org.apache.hadoop.fs.Path)">
														
 
															+            <a href="api/org/apache/hadoop/mapred/FileOutputFormat.html#setOutputPath(org.apache.hadoop.mapred.JobConf,%20org.apache.hadoop.fs.Path)">
														
 
															             setOutputPath(Path)</a>. The framework does not sort the 
														
 
															             map-outputs before writing them out to the <span class="codefrag">FileSystem</span>.
														
 
															             </p>
														
 
															-<a name="N10756"></a><a name="Partitioner"></a>
														
 
															+<a name="N10752"></a><a name="Partitioner"></a>
														
 
															 <h4>Partitioner</h4>
														
 
															 <p>
														
 
															 <a href="api/org/apache/hadoop/mapred/Partitioner.html">
														
@@ -1416,7 +1416,7 @@ document.write("Last Published: " + document.lastModified);
 
															 <p>
														
 
															 <a href="api/org/apache/hadoop/mapred/lib/HashPartitioner.html">
														
 
															           HashPartitioner</a> is the default <span class="codefrag">Partitioner</span>.</p>
														
 
															-<a name="N10775"></a><a name="Reporter"></a>
														
 
															+<a name="N10771"></a><a name="Reporter"></a>
														
 
															 <h4>Reporter</h4>
														
 
															 <p>
														
 
															 <a href="api/org/apache/hadoop/mapred/Reporter.html">
														
@@ -1435,7 +1435,7 @@ document.write("Last Published: " + document.lastModified);
 
															           </p>
														
 
															 <p>Applications can also update <span class="codefrag">Counters</span> using the 
														
 
															           <span class="codefrag">Reporter</span>.</p>
														
 
															-<a name="N1079F"></a><a name="OutputCollector"></a>
														
 
															+<a name="N1079B"></a><a name="OutputCollector"></a>
														
 
															 <h4>OutputCollector</h4>
														
 
															 <p>
														
 
															 <a href="api/org/apache/hadoop/mapred/OutputCollector.html">
														
@@ -1446,7 +1446,7 @@ document.write("Last Published: " + document.lastModified);
 
															 <p>Hadoop Map-Reduce comes bundled with a 
														
 
															         <a href="api/org/apache/hadoop/mapred/lib/package-summary.html">
														
 
															         library</a> of generally useful mappers, reducers, and partitioners.</p>
														
 
															-<a name="N107BA"></a><a name="Job+Configuration"></a>
														
 
															+<a name="N107B6"></a><a name="Job+Configuration"></a>
														
 
															 <h3 class="h4">Job Configuration</h3>
														
 
															 <p>
														
 
															 <a href="api/org/apache/hadoop/mapred/JobConf.html">
														
@@ -1486,7 +1486,7 @@ document.write("Last Published: " + document.lastModified);
 
															         and (<a href="api/org/apache/hadoop/mapred/FileInputFormat.html#setInputPaths(org.apache.hadoop.mapred.JobConf,%20java.lang.String)">setInputPaths(JobConf, String)</a>
														
 
															         /<a href="api/org/apache/hadoop/mapred/FileInputFormat.html#addInputPath(org.apache.hadoop.mapred.JobConf,%20java.lang.String)">addInputPaths(JobConf, String)</a>)
														
 
															         and where the output files should be written
														
 
															-        (<a href="api/org/apache/hadoop/mapred/FileInputFormat.html#setOutputPath(org.apache.hadoop.mapred.JobConf,%20org.apache.hadoop.fs.Path)">setOutputPath(Path)</a>).</p>
														
 
															+        (<a href="api/org/apache/hadoop/mapred/FileOutputFormat.html#setOutputPath(org.apache.hadoop.mapred.JobConf,%20org.apache.hadoop.fs.Path)">setOutputPath(Path)</a>).</p>
														
 
															 <p>Optionally, <span class="codefrag">JobConf</span> is used to specify other advanced 
														
 
															         facets of the job such as the <span class="codefrag">Comparator</span> to be used, files 
														
 
															         to be put in the <span class="codefrag">DistributedCache</span>, whether intermediate 
														
@@ -1504,7 +1504,7 @@ document.write("Last Published: " + document.lastModified);
 
															         <a href="api/org/apache/hadoop/conf/Configuration.html#set(java.lang.String, java.lang.String)">set(String, String)</a>/<a href="api/org/apache/hadoop/conf/Configuration.html#get(java.lang.String, java.lang.String)">get(String, String)</a>
														
 
															         to set/get arbitrary parameters needed by applications. However, use the 
														
 
															         <span class="codefrag">DistributedCache</span> for large amounts of (read-only) data.</p>
														
 
															-<a name="N1084C"></a><a name="Task+Execution+%26+Environment"></a>
														
 
															+<a name="N10848"></a><a name="Task+Execution+%26+Environment"></a>
														
 
															 <h3 class="h4">Task Execution &amp; Environment</h3>
														
 
															 <p>The <span class="codefrag">TaskTracker</span> executes the <span class="codefrag">Mapper</span>/ 
														
 
															         <span class="codefrag">Reducer</span>  <em>task</em> as a child process in a separate jvm.
														
@@ -1741,7 +1741,7 @@ document.write("Last Published: " + document.lastModified);
 
															         loaded via <a href="http://java.sun.com/j2se/1.5.0/docs/api/java/lang/System.html#loadLibrary(java.lang.String)">
														
 
															         System.loadLibrary</a> or <a href="http://java.sun.com/j2se/1.5.0/docs/api/java/lang/System.html#load(java.lang.String)">
														
 
															         System.load</a>.</p>
														
 
															-<a name="N109F7"></a><a name="Job+Submission+and+Monitoring"></a>
														
 
															+<a name="N109F3"></a><a name="Job+Submission+and+Monitoring"></a>
														
 
															 <h3 class="h4">Job Submission and Monitoring</h3>
														
 
															 <p>
														
 
															 <a href="api/org/apache/hadoop/mapred/JobClient.html">
														
@@ -1802,7 +1802,7 @@ document.write("Last Published: " + document.lastModified);
 
															 <p>Normally the user creates the application, describes various facets 
														
 
															         of the job via <span class="codefrag">JobConf</span>, and then uses the 
														
 
															         <span class="codefrag">JobClient</span> to submit the job and monitor its progress.</p>
														
 
															-<a name="N10A57"></a><a name="Job+Control"></a>
														
 
															+<a name="N10A53"></a><a name="Job+Control"></a>
														
 
															 <h4>Job Control</h4>
														
 
															 <p>Users may need to chain map-reduce jobs to accomplish complex
														
 
															           tasks which cannot be done via a single map-reduce job. This is fairly
														
@@ -1838,7 +1838,7 @@ document.write("Last Published: " + document.lastModified);
 
															             </li>
														
 
															 </ul>
														
 
															-<a name="N10A81"></a><a name="Job+Input"></a>
														
 
															+<a name="N10A7D"></a><a name="Job+Input"></a>
														
 
															 <h3 class="h4">Job Input</h3>
														
 
															 <p>
														
 
															 <a href="api/org/apache/hadoop/mapred/InputFormat.html">
														
@@ -1886,7 +1886,7 @@ document.write("Last Published: " + document.lastModified);
 
															         appropriate <span class="codefrag">CompressionCodec</span>. However, it must be noted that
														
 
															         compressed files with the above extensions cannot be <em>split</em> and 
														
 
															         each compressed file is processed in its entirety by a single mapper.</p>
														
 
															-<a name="N10AEB"></a><a name="InputSplit"></a>
														
 
															+<a name="N10AE7"></a><a name="InputSplit"></a>
														
 
															 <h4>InputSplit</h4>
														
 
															 <p>
														
 
															 <a href="api/org/apache/hadoop/mapred/InputSplit.html">
														
@@ -1900,7 +1900,7 @@ document.write("Last Published: " + document.lastModified);
 
															           FileSplit</a> is the default <span class="codefrag">InputSplit</span>. It sets 
														
 
															           <span class="codefrag">map.input.file</span> to the path of the input file for the
														
 
															           logical split.</p>
														
 
															-<a name="N10B10"></a><a name="RecordReader"></a>
														
 
															+<a name="N10B0C"></a><a name="RecordReader"></a>
														
 
															 <h4>RecordReader</h4>
														
 
															 <p>
														
 
															 <a href="api/org/apache/hadoop/mapred/RecordReader.html">
														
@@ -1912,7 +1912,7 @@ document.write("Last Published: " + document.lastModified);
 
															           for processing. <span class="codefrag">RecordReader</span> thus assumes the 
														
 
															           responsibility of processing record boundaries and presents the tasks 
														
 
															           with keys and values.</p>
														
 
															-<a name="N10B33"></a><a name="Job+Output"></a>
														
 
															+<a name="N10B2F"></a><a name="Job+Output"></a>
														
 
															 <h3 class="h4">Job Output</h3>
														
 
															 <p>
														
 
															 <a href="api/org/apache/hadoop/mapred/OutputFormat.html">
														
@@ -1937,7 +1937,7 @@ document.write("Last Published: " + document.lastModified);
 
															 <p>
														
 
															 <span class="codefrag">TextOutputFormat</span> is the default 
														
 
															         <span class="codefrag">OutputFormat</span>.</p>
														
 
															-<a name="N10B5C"></a><a name="Task+Side-Effect+Files"></a>
														
 
															+<a name="N10B58"></a><a name="Task+Side-Effect+Files"></a>
														
 
															 <h4>Task Side-Effect Files</h4>
														
 
															 <p>In some applications, component tasks need to create and/or write to
														
 
															           side-files, which differ from the actual job-output files.</p>
														
@@ -1961,7 +1961,7 @@ document.write("Last Published: " + document.lastModified);
 
															 <p>The application-writer can take advantage of this feature by 
														
 
															           creating any side-files required in <span class="codefrag">${mapred.work.output.dir}</span>
														
 
															           during execution of a task via 
														
 
															-          <a href="api/org/apache/hadoop/mapred/FileInputFormat.html#getWorkOutputPath(org.apache.hadoop.mapred.JobConf)">
														
 
															+          <a href="api/org/apache/hadoop/mapred/FileOutputFormat.html#getWorkOutputPath(org.apache.hadoop.mapred.JobConf)">
														
 
															           FileOutputFormat.getWorkOutputPath()</a>, and the framework will promote them 
														
 
															           similarly for succesful task-attempts, thus eliminating the need to 
														
 
															           pick unique paths per task-attempt.</p>
														
@@ -1970,13 +1970,13 @@ document.write("Last Published: " + document.lastModified);
 
															           <span class="codefrag">${mapred.output.dir}/_temporary/_{$taskid}</span>, and this value is 
														
 
															           set by the map-reduce framework. So, just create any side-files in the 
														
 
															           path  returned by
														
 
															-          <a href="api/org/apache/hadoop/mapred/FileInputFormat.html#getWorkOutputPath(org.apache.hadoop.mapred.JobConf)">
														
 
															+          <a href="api/org/apache/hadoop/mapred/FileOutputFormat.html#getWorkOutputPath(org.apache.hadoop.mapred.JobConf)">
														
 
															           FileOutputFormat.getWorkOutputPath() </a>from map/reduce 
														
 
															           task to take advantage of this feature.</p>
														
 
															 <p>The entire discussion holds true for maps of jobs with 
														
 
															            reducer=NONE (i.e. 0 reduces) since output of the map, in that case, 
														
 
															            goes directly to HDFS.</p>
														
 
															-<a name="N10BA4"></a><a name="RecordWriter"></a>
														
 
															+<a name="N10BA0"></a><a name="RecordWriter"></a>
														
 
															 <h4>RecordWriter</h4>
														
 
															 <p>
														
 
															 <a href="api/org/apache/hadoop/mapred/RecordWriter.html">
														
@@ -1984,9 +1984,9 @@ document.write("Last Published: " + document.lastModified);
 
															           pairs to an output file.</p>
														
 
															 <p>RecordWriter implementations write the job outputs to the 
														
 
															           <span class="codefrag">FileSystem</span>.</p>
														
 
															-<a name="N10BBB"></a><a name="Other+Useful+Features"></a>
														
 
															+<a name="N10BB7"></a><a name="Other+Useful+Features"></a>
														
 
															 <h3 class="h4">Other Useful Features</h3>
														
 
															-<a name="N10BC1"></a><a name="Counters"></a>
														
 
															+<a name="N10BBD"></a><a name="Counters"></a>
														
 
															 <h4>Counters</h4>
														
 
															 <p>
														
 
															 <span class="codefrag">Counters</span> represent global counters, defined either by 
														
@@ -1997,7 +1997,10 @@ document.write("Last Published: " + document.lastModified);
 
															 <p>Applications can define arbitrary <span class="codefrag">Counters</span> (of type 
														
 
															           <span class="codefrag">Enum</span>) and update them via 
														
 
															           <a href="api/org/apache/hadoop/mapred/Reporter.html#incrCounter(java.lang.Enum, long)">
														
 
															-          Reporter.incrCounter(Enum, long)</a> in the <span class="codefrag">map</span> and/or 
														
 
															+          Reporter.incrCounter(Enum, long)</a> or 
														
 
															+          <a href="api/org/apache/hadoop/mapred/Reporter.html#incrCounter(java.lang.String, java.lang.String, long amount)">
														
 
															+          Reporter.incrCounter(String, String, long)</a>
														
 
															+          in the <span class="codefrag">map</span> and/or 
														
 
															           <span class="codefrag">reduce</span> methods. These counters are then globally 
														
 
															           aggregated by the framework.</p>
														
 
															 <a name="N10BEC"></a><a name="DistributedCache"></a>
														
@@ -2010,8 +2013,8 @@ document.write("Last Published: " + document.lastModified);
 
															 <span class="codefrag">DistributedCache</span> is a facility provided by the 
														
 
															           Map-Reduce framework to cache files (text, archives, jars and so on) 
														
 
															           needed by applications.</p>
														
 
															-<p>Applications specify the files to be cached via urls (hdfs:// or 
														
 
															-          http://) in the <span class="codefrag">JobConf</span>. The <span class="codefrag">DistributedCache</span> 
														
 
															+<p>Applications specify the files to be cached via urls (hdfs://)
														
 
															+          in the <span class="codefrag">JobConf</span>. The <span class="codefrag">DistributedCache</span> 
														
 
															           assumes that the files specified via hdfs:// urls are already present 
														
 
															           on the <span class="codefrag">FileSystem</span>.</p>
														
 
															 <p>The framework will copy the necessary files to the slave node 
														
@@ -2225,11 +2228,11 @@ document.write("Last Published: " + document.lastModified);
 
															 <a name="N10D57"></a><a name="Job+Outputs"></a>
														
 
															 <h5>Job Outputs</h5>
														
 
															 <p>Applications can control compression of job-outputs via the
														
 
															-            <a href="api/org/apache/hadoop/mapred/OutputFormatBase.html#setCompressOutput(org.apache.hadoop.mapred.JobConf,%20boolean)">
														
 
															-            OutputFormatBase.setCompressOutput(JobConf, boolean)</a> api and the 
														
 
															+            <a href="api/org/apache/hadoop/mapred/FileOutputFormat.html#setCompressOutput(org.apache.hadoop.mapred.JobConf,%20boolean)">
														
 
															+            FileOutputFormat.setCompressOutput(JobConf, boolean)</a> api and the 
														
 
															             <span class="codefrag">CompressionCodec</span> to be used can be specified via the
														
 
															-            <a href="api/org/apache/hadoop/mapred/OutputFormatBase.html#setOutputCompressorClass(org.apache.hadoop.mapred.JobConf,%20java.lang.Class)">
														
 
															-            OutputFormatBase.setOutputCompressorClass(JobConf, Class)</a> api.</p>
														
 
															+            <a href="api/org/apache/hadoop/mapred/FileOutputFormat.html#setOutputCompressorClass(org.apache.hadoop.mapred.JobConf,%20java.lang.Class)">
														
 
															+            FileOutputFormat.setOutputCompressorClass(JobConf, Class)</a> api.</p>
														
 
															 <p>If the job outputs are to be stored in the 
														
 
															             <a href="api/org/apache/hadoop/mapred/SequenceFileOutputFormat.html">
														
 
															             SequenceFileOutputFormat</a>, the required
														
--- a/docs/mapred_tutorial.pdf
+++ b/docs/mapred_tutorial.pdf
--- a/src/docs/src/documentation/content/xdocs/mapred_tutorial.xml
+++ b/src/docs/src/documentation/content/xdocs/mapred_tutorial.xml
@@ -771,9 +771,9 @@
 
															           transferred from the <code>Mapper</code> to the <code>Reducer</code>.
														
 
															           </p>
														
 
															-          <p>The intermediate, sorted outputs are always stored in files of 
														
 
															-          <a href="ext:api/org/apache/hadoop/io/sequencefile">
														
 
															-          SequenceFile</a> format. Applications can control if, and how, the 
														
 
															+          <p>The intermediate, sorted outputs are always stored in a simple 
														
 
															+          (key-len, key, value-len, value) format. 
														
 
															+          Applications can control if, and how, the 
														
 
															           intermediate outputs are to be compressed and the 
														
 
															           <a href="ext:api/org/apache/hadoop/io/compress/compressioncodec">
														
 
															           CompressionCodec</a> to be used via the <code>JobConf</code>.
														
@@ -1469,8 +1469,11 @@
 
															           <p>Applications can define arbitrary <code>Counters</code> (of type 
														
 
															           <code>Enum</code>) and update them via 
														
 
															-          <a href="ext:api/org/apache/hadoop/mapred/reporter/incrcounter">
														
 
															-          Reporter.incrCounter(Enum, long)</a> in the <code>map</code> and/or 
														
 
															+          <a href="ext:api/org/apache/hadoop/mapred/reporter/incrcounterEnum">
														
 
															+          Reporter.incrCounter(Enum, long)</a> or 
														
 
															+          <a href="ext:api/org/apache/hadoop/mapred/reporter/incrcounterString">
														
 
															+          Reporter.incrCounter(String, String, long)</a>
														
 
															+          in the <code>map</code> and/or 
														
 
															           <code>reduce</code> methods. These counters are then globally 
														
 
															           aggregated by the framework.</p>
														
 
															         </section>       
														
@@ -1486,8 +1489,8 @@
 
															           Map-Reduce framework to cache files (text, archives, jars and so on) 
														
 
															           needed by applications.</p>
														
 
															-          <p>Applications specify the files to be cached via urls (hdfs:// or 
														
 
															-          http://) in the <code>JobConf</code>. The <code>DistributedCache</code> 
														
 
															+          <p>Applications specify the files to be cached via urls (hdfs://)
														
 
															+          in the <code>JobConf</code>. The <code>DistributedCache</code> 
														
 
															           assumes that the files specified via hdfs:// urls are already present 
														
 
															           on the <code>FileSystem</code>.</p>
														
@@ -1719,11 +1722,11 @@
 
															             <title>Job Outputs</title>
														
 
															             <p>Applications can control compression of job-outputs via the
														
 
															-            <a href="ext:api/org/apache/hadoop/mapred/outputformatbase/setcompressoutput">
														
 
															-            OutputFormatBase.setCompressOutput(JobConf, boolean)</a> api and the 
														
 
															+            <a href="ext:api/org/apache/hadoop/mapred/fileoutputformat/setcompressoutput">
														
 
															+            FileOutputFormat.setCompressOutput(JobConf, boolean)</a> api and the 
														
 
															             <code>CompressionCodec</code> to be used can be specified via the
														
 
															-            <a href="ext:api/org/apache/hadoop/mapred/outputformatbase/setoutputcompressorclass">
														
 
															-            OutputFormatBase.setOutputCompressorClass(JobConf, Class)</a> api.</p>
														
 
															+            <a href="ext:api/org/apache/hadoop/mapred/fileoutputformat/setoutputcompressorclass">
														
 
															+            FileOutputFormat.setOutputCompressorClass(JobConf, Class)</a> api.</p>
														
 
															             <p>If the job outputs are to be stored in the 
														
 
															             <a href="ext:api/org/apache/hadoop/mapred/sequencefileoutputformat">
														
--- a/src/docs/src/documentation/content/xdocs/site.xml
+++ b/src/docs/src/documentation/content/xdocs/site.xml
@@ -136,10 +136,12 @@ See http://forrest.apache.org/docs/linking.html for more info.
 
															                  <setinputpathstring href="#setInputPaths(org.apache.hadoop.mapred.JobConf,%20java.lang.String)" />
														
 
															                  <addinputpathstring href="#addInputPath(org.apache.hadoop.mapred.JobConf,%20java.lang.String)" />
														
 
															               </fileinputformat>
														
 
															-              <fileoutputformat href="FileInputFormat.html">
														
 
															+              <fileoutputformat href="FileOutputFormat.html">
														
 
															                 <getoutputpath href="#getOutputPath(org.apache.hadoop.mapred.JobConf)" />
														
 
															                 <getworkoutputpath href="#getWorkOutputPath(org.apache.hadoop.mapred.JobConf)" />
														
 
															                 <setoutputpath href="#setOutputPath(org.apache.hadoop.mapred.JobConf,%20org.apache.hadoop.fs.Path)" />
														
 
															+                <setcompressoutput href="#setCompressOutput(org.apache.hadoop.mapred.JobConf,%20boolean)" />
														
 
															+                <setoutputcompressorclass href="#setOutputCompressorClass(org.apache.hadoop.mapred.JobConf,%20java.lang.Class)" />
														
 
															               </fileoutputformat>
														
 
															               <filesplit href="FileSplit.html" />
														
 
															               <inputformat href="InputFormat.html" />
														
@@ -200,7 +202,8 @@ See http://forrest.apache.org/docs/linking.html for more info.
 
															                 <reduce href="#reduce(K2, java.util.Iterator, org.apache.hadoop.mapred.OutputCollector, org.apache.hadoop.mapred.Reporter)" />
														
 
															               </reducer>
														
 
															               <reporter href="Reporter.html">
														
 
															-                <incrcounter href="#incrCounter(java.lang.Enum, long)" />
														
 
															+                <incrcounterEnum href="#incrCounter(java.lang.Enum, long)" />
														
 
															+                <incrcounterString href="#incrCounter(java.lang.String, java.lang.String, long amount)" />
														
 
															               </reporter>
														
 
															               <runningjob href="RunningJob.html" />
														
 
															               <textinputformat href="TextInputFormat.html" />