浏览代码

HADOOP-1230. Add new map/reduce API and deprecate the old one. Generally,
the old code should work without problem. The new api is in
org.apache.hadoop.mapreduce and the old classes in org.apache.hadoop.mapred
are deprecated. Differences in the new API:
1. All of the methods take Context objects that allow us to add new
methods without breaking compatability.
2. Mapper and Reducer now have a "run" method that is called once and
contains the control loop for the task, which lets applications
replace it.
3. Mapper and Reducer by default are Identity Mapper and Reducer.
4. The FileOutputFormats use part-r-00000 for the output of reduce 0 and
part-m-00000 for the output of map 0.
5. The reduce grouping comparator now uses the raw compare instead of
object compare.
6. The number of maps in FileInputFormat is controlled by min and max
split size rather than min size and the desired number of maps.
(omalley)


git-svn-id: https://svn.apache.org/repos/asf/hadoop/core/trunk@726850 13f79535-47bb-0310-9956-ffa450edef68

Owen O'Malley 16 年之前
父节点
当前提交
2a3f194535
共有 99 个文件被更改,包括 3164 次插入1593 次删除
  1. 20 2
      CHANGES.txt
  2. 20 0
      src/core/org/apache/hadoop/conf/Configuration.java
  3. 4 0
      src/core/org/apache/hadoop/io/BytesWritable.java
  4. 4 31
      src/core/org/apache/hadoop/io/WritableUtils.java
  5. 27 1
      src/core/org/apache/hadoop/util/GenericOptionsParser.java
  6. 72 2
      src/core/org/apache/hadoop/util/ReflectionUtils.java
  7. 1 0
      src/examples/org/apache/hadoop/examples/ExampleDriver.java
  8. 4 3
      src/examples/org/apache/hadoop/examples/MultiFileWordCount.java
  9. 43 133
      src/examples/org/apache/hadoop/examples/WordCount.java
  10. 18 75
      src/mapred/org/apache/hadoop/mapred/Counters.java
  11. 3 2
      src/mapred/org/apache/hadoop/mapred/FileInputFormat.java
  12. 7 4
      src/mapred/org/apache/hadoop/mapred/FileSplit.java
  13. 3 51
      src/mapred/org/apache/hadoop/mapred/ID.java
  14. 2 0
      src/mapred/org/apache/hadoop/mapred/InputFormat.java
  15. 2 0
      src/mapred/org/apache/hadoop/mapred/InputSplit.java
  16. 3 1
      src/mapred/org/apache/hadoop/mapred/IsolationRunner.java
  17. 1 1
      src/mapred/org/apache/hadoop/mapred/JVMId.java
  18. 175 48
      src/mapred/org/apache/hadoop/mapred/JobClient.java
  19. 36 0
      src/mapred/org/apache/hadoop/mapred/JobConf.java
  20. 1 0
      src/mapred/org/apache/hadoop/mapred/JobConfigurable.java
  21. 12 7
      src/mapred/org/apache/hadoop/mapred/JobContext.java
  22. 14 88
      src/mapred/org/apache/hadoop/mapred/JobID.java
  23. 6 4
      src/mapred/org/apache/hadoop/mapred/JobProfile.java
  24. 2 3
      src/mapred/org/apache/hadoop/mapred/JobStatus.java
  25. 3 1
      src/mapred/org/apache/hadoop/mapred/LineRecordReader.java
  26. 2 2
      src/mapred/org/apache/hadoop/mapred/LocalJobRunner.java
  27. 1 0
      src/mapred/org/apache/hadoop/mapred/MapReduceBase.java
  28. 2 0
      src/mapred/org/apache/hadoop/mapred/MapRunnable.java
  29. 231 60
      src/mapred/org/apache/hadoop/mapred/MapTask.java
  30. 2 0
      src/mapred/org/apache/hadoop/mapred/Mapper.java
  31. 74 2
      src/mapred/org/apache/hadoop/mapred/OutputCommitter.java
  32. 2 0
      src/mapred/org/apache/hadoop/mapred/OutputFormat.java
  33. 2 0
      src/mapred/org/apache/hadoop/mapred/Partitioner.java
  34. 1 1
      src/mapred/org/apache/hadoop/mapred/RawKeyValueIterator.java
  35. 136 30
      src/mapred/org/apache/hadoop/mapred/ReduceTask.java
  36. 2 0
      src/mapred/org/apache/hadoop/mapred/Reducer.java
  37. 13 2
      src/mapred/org/apache/hadoop/mapred/Reporter.java
  38. 8 0
      src/mapred/org/apache/hadoop/mapred/RunningJob.java
  39. 6 1
      src/mapred/org/apache/hadoop/mapred/SequenceFileInputFormat.java
  40. 6 1
      src/mapred/org/apache/hadoop/mapred/SequenceFileOutputFormat.java
  41. 201 172
      src/mapred/org/apache/hadoop/mapred/Task.java
  42. 20 14
      src/mapred/org/apache/hadoop/mapred/TaskAttemptContext.java
  43. 22 92
      src/mapred/org/apache/hadoop/mapred/TaskAttemptID.java
  44. 25 116
      src/mapred/org/apache/hadoop/mapred/TaskID.java
  45. 7 7
      src/mapred/org/apache/hadoop/mapred/TaskReport.java
  46. 5 1
      src/mapred/org/apache/hadoop/mapred/TextInputFormat.java
  47. 5 1
      src/mapred/org/apache/hadoop/mapred/TextOutputFormat.java
  48. 5 1
      src/mapred/org/apache/hadoop/mapred/lib/HashPartitioner.java
  49. 4 1
      src/mapred/org/apache/hadoop/mapred/lib/IdentityMapper.java
  50. 4 1
      src/mapred/org/apache/hadoop/mapred/lib/IdentityReducer.java
  51. 5 1
      src/mapred/org/apache/hadoop/mapred/lib/InverseMapper.java
  52. 5 1
      src/mapred/org/apache/hadoop/mapred/lib/LongSumReducer.java
  53. 3 0
      src/mapred/org/apache/hadoop/mapred/lib/NullOutputFormat.java
  54. 5 1
      src/mapred/org/apache/hadoop/mapred/lib/TokenCountMapper.java
  55. 48 9
      src/mapred/org/apache/hadoop/mapreduce/Counter.java
  56. 159 2
      src/mapred/org/apache/hadoop/mapreduce/CounterGroup.java
  57. 184 0
      src/mapred/org/apache/hadoop/mapreduce/Counters.java
  58. 2 0
      src/mapred/org/apache/hadoop/mapreduce/ID.java
  59. 153 39
      src/mapred/org/apache/hadoop/mapreduce/Job.java
  60. 28 67
      src/mapred/org/apache/hadoop/mapreduce/JobContext.java
  61. 35 63
      src/mapred/org/apache/hadoop/mapreduce/JobID.java
  62. 32 4
      src/mapred/org/apache/hadoop/mapreduce/MapContext.java
  63. 11 10
      src/mapred/org/apache/hadoop/mapreduce/Mapper.java
  64. 113 0
      src/mapred/org/apache/hadoop/mapreduce/OutputCommitter.java
  65. 12 0
      src/mapred/org/apache/hadoop/mapreduce/OutputFormat.java
  66. 1 0
      src/mapred/org/apache/hadoop/mapreduce/Partitioner.java
  67. 16 9
      src/mapred/org/apache/hadoop/mapreduce/RecordReader.java
  68. 5 4
      src/mapred/org/apache/hadoop/mapreduce/RecordWriter.java
  69. 148 7
      src/mapred/org/apache/hadoop/mapreduce/ReduceContext.java
  70. 18 11
      src/mapred/org/apache/hadoop/mapreduce/Reducer.java
  71. 25 0
      src/mapred/org/apache/hadoop/mapreduce/StatusReporter.java
  72. 7 12
      src/mapred/org/apache/hadoop/mapreduce/TaskAttemptContext.java
  73. 46 81
      src/mapred/org/apache/hadoop/mapreduce/TaskAttemptID.java
  74. 25 58
      src/mapred/org/apache/hadoop/mapreduce/TaskID.java
  75. 54 14
      src/mapred/org/apache/hadoop/mapreduce/TaskInputOutputContext.java
  76. 31 17
      src/mapred/org/apache/hadoop/mapreduce/lib/input/FileInputFormat.java
  77. 1 0
      src/mapred/org/apache/hadoop/mapreduce/lib/input/InvalidInputException.java
  78. 25 11
      src/mapred/org/apache/hadoop/mapreduce/lib/input/LineRecordReader.java
  79. 18 9
      src/mapred/org/apache/hadoop/mapreduce/lib/input/SequenceFileRecordReader.java
  80. 2 1
      src/mapred/org/apache/hadoop/mapreduce/lib/map/InverseMapper.java
  81. 85 45
      src/mapred/org/apache/hadoop/mapreduce/lib/map/MultithreadedMapper.java
  82. 3 2
      src/mapred/org/apache/hadoop/mapreduce/lib/map/TokenCounterMapper.java
  83. 177 0
      src/mapred/org/apache/hadoop/mapreduce/lib/output/FileOutputCommitter.java
  84. 96 115
      src/mapred/org/apache/hadoop/mapreduce/lib/output/FileOutputFormat.java
  85. 19 1
      src/mapred/org/apache/hadoop/mapreduce/lib/output/NullOutputFormat.java
  86. 12 9
      src/mapred/org/apache/hadoop/mapreduce/lib/output/SequenceFileOutputFormat.java
  87. 15 16
      src/mapred/org/apache/hadoop/mapreduce/lib/output/TextOutputFormat.java
  88. 1 1
      src/mapred/org/apache/hadoop/mapreduce/lib/reduce/IntSumReducer.java
  89. 1 1
      src/mapred/org/apache/hadoop/mapreduce/lib/reduce/LongSumReducer.java
  90. 0 1
      src/test/org/apache/hadoop/mapred/NotificationTestCase.java
  91. 2 2
      src/test/org/apache/hadoop/mapred/TestFileOutputCommitter.java
  92. 0 1
      src/test/org/apache/hadoop/mapred/TestJobSysDirWithDFS.java
  93. 0 1
      src/test/org/apache/hadoop/mapred/TestKillCompletedJob.java
  94. 0 1
      src/test/org/apache/hadoop/mapred/TestMiniMRWithDFS.java
  95. 0 1
      src/test/org/apache/hadoop/mapred/TestSpilledRecordsCounter.java
  96. 2 2
      src/test/org/apache/hadoop/mapred/TestTaskTrackerMemoryManager.java
  97. 159 0
      src/test/org/apache/hadoop/mapred/WordCount.java
  98. 105 0
      src/test/org/apache/hadoop/mapreduce/TestMapReduceLocal.java
  99. 1 1
      src/test/testjar/ClassWordCount.java

+ 20 - 2
CHANGES.txt

@@ -87,8 +87,26 @@ Trunk (unreleased changes)
     HADOOP-4826. Introduce admin command saveNamespace. (shv)
 
     HADOOP-3063  BloomMapFile - fail-fast version of MapFile for sparsely
-                 populated key space (Andrzej Bialecki via stack)
-
+    populated key space (Andrzej Bialecki via stack)
+
+    HADOOP-1230. Add new map/reduce API and deprecate the old one. Generally,
+    the old code should work without problem. The new api is in 
+    org.apache.hadoop.mapreduce and the old classes in org.apache.hadoop.mapred
+    are deprecated. Differences in the new API:
+      1. All of the methods take Context objects that allow us to add new
+         methods without breaking compatability.
+      2. Mapper and Reducer now have a "run" method that is called once and
+         contains the control loop for the task, which lets applications
+         replace it.
+      3. Mapper and Reducer by default are Identity Mapper and Reducer.
+      4. The FileOutputFormats use part-r-00000 for the output of reduce 0 and
+         part-m-00000 for the output of map 0.
+      5. The reduce grouping comparator now uses the raw compare instead of 
+         object compare.
+      6. The number of maps in FileInputFormat is controlled by min and max
+         split size rather than min size and the desired number of maps.
+      (omalley)
+    
   IMPROVEMENTS
 
     HADOOP-4749. Added a new counter REDUCE_INPUT_BYTES. (Yongqiang He via 

+ 20 - 0
src/core/org/apache/hadoop/conf/Configuration.java

@@ -352,6 +352,17 @@ public class Configuration implements Iterable<Map.Entry<String,String>>,
     getProps().setProperty(name, value);
   }
   
+  /**
+   * Sets a property if it is currently unset.
+   * @param name the property name
+   * @param value the new value
+   */
+  public void setIfUnset(String name, String value) {
+    if (get(name) == null) {
+      set(name, value);
+    }
+  }
+  
   private synchronized Properties getOverlay() {
     if (overlay==null){
       overlay=new Properties();
@@ -521,6 +532,15 @@ public class Configuration implements Iterable<Map.Entry<String,String>>,
     set(name, Boolean.toString(value));
   }
 
+  /**
+   * Set the given property, if it is currently unset.
+   * @param name property name
+   * @param value new value
+   */
+  public void setBooleanIfUnset(String name, boolean value) {
+    setIfUnset(name, Boolean.toString(value));
+  }
+
   /**
    * A class that represents a set of positive integer ranges. It parses 
    * strings of the form: "2-3,5,7-" where ranges are separated by comma and 

+ 4 - 0
src/core/org/apache/hadoop/io/BytesWritable.java

@@ -22,6 +22,9 @@ import java.io.IOException;
 import java.io.DataInput;
 import java.io.DataOutput;
 
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
 /** 
  * A byte sequence that is usable as a key or value.
  * It is resizable and distinguishes between the size of the seqeunce and
@@ -30,6 +33,7 @@ import java.io.DataOutput;
  */
 public class BytesWritable extends BinaryComparable
     implements WritableComparable<BinaryComparable> {
+  private static final Log LOG = LogFactory.getLog(BytesWritable.class);
   private static final int LENGTH_BYTES = 4;
   private static final byte[] EMPTY_BYTES = {};
 

+ 4 - 31
src/core/org/apache/hadoop/io/WritableUtils.java

@@ -196,30 +196,6 @@ public final class WritableUtils  {
     System.out.println();
   }
 
-  /**
-   * A pair of input/output buffers that we use to clone writables.
-   */
-  private static class CopyInCopyOutBuffer {
-    DataOutputBuffer outBuffer = new DataOutputBuffer();
-    DataInputBuffer inBuffer = new DataInputBuffer();
-    /**
-     * Move the data from the output buffer to the input buffer.
-     */
-    void moveData() {
-      inBuffer.reset(outBuffer.getData(), outBuffer.getLength());
-    }
-  }
-  
-  /**
-   * Allocate a buffer for each thread that tries to clone objects.
-   */
-  private static ThreadLocal<CopyInCopyOutBuffer> cloneBuffers
-      = new ThreadLocal<CopyInCopyOutBuffer>() {
-      protected synchronized CopyInCopyOutBuffer initialValue() {
-        return new CopyInCopyOutBuffer();
-      }
-    };
-
   /**
    * Make a copy of a writable object using serialization to a buffer.
    * @param orig The object to copy
@@ -229,7 +205,7 @@ public final class WritableUtils  {
     try {
       @SuppressWarnings("unchecked") // Unchecked cast from Class to Class<T>
       T newInst = ReflectionUtils.newInstance((Class<T>) orig.getClass(), conf);
-      cloneInto(newInst, orig);
+      ReflectionUtils.copy(conf, orig, newInst);
       return newInst;
     } catch (IOException e) {
       throw new RuntimeException("Error writing/reading clone buffer", e);
@@ -241,14 +217,11 @@ public final class WritableUtils  {
    * @param dst the object to copy from
    * @param src the object to copy into, which is destroyed
    * @throws IOException
+   * @deprecated use ReflectionUtils.cloneInto instead.
    */
+  @Deprecated
   public static void cloneInto(Writable dst, Writable src) throws IOException {
-    CopyInCopyOutBuffer buffer = cloneBuffers.get();
-    buffer.outBuffer.reset();
-    src.write(buffer.outBuffer);
-    buffer.moveData();
-    dst.readFields(buffer.inBuffer);
-    return;
+    ReflectionUtils.cloneWritableInto(dst, src);
   }
 
   /**

+ 27 - 1
src/core/org/apache/hadoop/util/GenericOptionsParser.java

@@ -104,9 +104,26 @@ import org.apache.hadoop.fs.Path;
 public class GenericOptionsParser {
 
   private static final Log LOG = LogFactory.getLog(GenericOptionsParser.class);
-
+  private Configuration conf;
   private CommandLine commandLine;
 
+  /**
+   * Create an options parser with the given options to parse the args.
+   * @param opts the options
+   * @param args the command line arguments
+   */
+  public GenericOptionsParser(Options opts, String[] args) {
+    this(new Configuration(), new Options(), args);
+  }
+
+  /**
+   * Create an options parser to parse the args.
+   * @param args the command line arguments
+   */
+  public GenericOptionsParser(String[] args) {
+    this(new Configuration(), new Options(), args);
+  }
+  
   /** 
    * Create a <code>GenericOptionsParser<code> to parse only the generic Hadoop  
    * arguments. 
@@ -134,6 +151,7 @@ public class GenericOptionsParser {
    */
   public GenericOptionsParser(Configuration conf, Options options, String[] args) {
     parseGeneralOptions(options, conf, args);
+    this.conf = conf;
   }
 
   /**
@@ -146,6 +164,14 @@ public class GenericOptionsParser {
     return (commandLine == null) ? new String[]{} : commandLine.getArgs();
   }
 
+  /**
+   * Get the modified configuration
+   * @return the configuration that has the modified parameters.
+   */
+  public Configuration getConfiguration() {
+    return conf;
+  }
+
   /**
    * Returns the commons-cli <code>CommandLine</code> object 
    * to process the parsed arguments. 

+ 72 - 2
src/core/org/apache/hadoop/util/ReflectionUtils.java

@@ -27,6 +27,12 @@ import java.util.concurrent.ConcurrentHashMap;
 
 import org.apache.commons.logging.Log;
 import org.apache.hadoop.conf.*;
+import org.apache.hadoop.io.DataInputBuffer;
+import org.apache.hadoop.io.DataOutputBuffer;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.serializer.Deserializer;
+import org.apache.hadoop.io.serializer.SerializationFactory;
+import org.apache.hadoop.io.serializer.Serializer;
 
 /**
  * General reflection utils
@@ -34,7 +40,9 @@ import org.apache.hadoop.conf.*;
 
 public class ReflectionUtils {
     
-  private static final Class[] emptyArray = new Class[]{};
+  private static final Class<?>[] EMPTY_ARRAY = new Class[]{};
+  private static SerializationFactory serialFactory = null;
+
   /** 
    * Cache of constructors for each class. Pins the classes so they
    * can't be garbage collected until ReflectionUtils can be collected.
@@ -98,7 +106,7 @@ public class ReflectionUtils {
     try {
       Constructor<T> meth = (Constructor<T>) CONSTRUCTOR_CACHE.get(theClass);
       if (meth == null) {
-        meth = theClass.getDeclaredConstructor(emptyArray);
+        meth = theClass.getDeclaredConstructor(EMPTY_ARRAY);
         meth.setAccessible(true);
         CONSTRUCTOR_CACHE.put(theClass, meth);
       }
@@ -217,5 +225,67 @@ public class ReflectionUtils {
   static int getCacheSize() {
     return CONSTRUCTOR_CACHE.size();
   }
+  /**
+   * A pair of input/output buffers that we use to clone writables.
+   */
+  private static class CopyInCopyOutBuffer {
+    DataOutputBuffer outBuffer = new DataOutputBuffer();
+    DataInputBuffer inBuffer = new DataInputBuffer();
+    /**
+     * Move the data from the output buffer to the input buffer.
+     */
+    void moveData() {
+      inBuffer.reset(outBuffer.getData(), outBuffer.getLength());
+    }
+  }
+  
+  /**
+   * Allocate a buffer for each thread that tries to clone objects.
+   */
+  private static ThreadLocal<CopyInCopyOutBuffer> cloneBuffers
+      = new ThreadLocal<CopyInCopyOutBuffer>() {
+      protected synchronized CopyInCopyOutBuffer initialValue() {
+        return new CopyInCopyOutBuffer();
+      }
+    };
+
+  private static SerializationFactory getFactory(Configuration conf) {
+    if (serialFactory == null) {
+      serialFactory = new SerializationFactory(conf);
+    }
+    return serialFactory;
+  }
+  
+  /**
+   * Make a copy of the writable object using serialization to a buffer
+   * @param dst the object to copy from
+   * @param src the object to copy into, which is destroyed
+   * @throws IOException
+   */
+  @SuppressWarnings("unchecked")
+  public static <T> T copy(Configuration conf, 
+                                T src, T dst) throws IOException {
+    CopyInCopyOutBuffer buffer = cloneBuffers.get();
+    buffer.outBuffer.reset();
+    SerializationFactory factory = getFactory(conf);
+    Class<T> cls = (Class<T>) src.getClass();
+    Serializer<T> serializer = factory.getSerializer(cls);
+    serializer.open(buffer.outBuffer);
+    serializer.serialize(src);
+    buffer.moveData();
+    Deserializer<T> deserializer = factory.getDeserializer(cls);
+    deserializer.open(buffer.inBuffer);
+    dst = deserializer.deserialize(dst);
+    return dst;
+  }
 
+  @Deprecated
+  public static void cloneWritableInto(Writable dst, 
+                                       Writable src) throws IOException {
+    CopyInCopyOutBuffer buffer = cloneBuffers.get();
+    buffer.outBuffer.reset();
+    src.write(buffer.outBuffer);
+    buffer.moveData();
+    dst.readFields(buffer.inBuffer);
+  }
 }

+ 1 - 0
src/examples/org/apache/hadoop/examples/ExampleDriver.java

@@ -17,6 +17,7 @@
  */
 
 package org.apache.hadoop.examples;
+
 import org.apache.hadoop.examples.dancing.DistributedPentomino;
 import org.apache.hadoop.examples.dancing.Sudoku;
 import org.apache.hadoop.examples.terasort.TeraGen;

+ 4 - 3
src/examples/org/apache/hadoop/examples/MultiFileWordCount.java

@@ -45,6 +45,7 @@ import org.apache.hadoop.mapred.MultiFileSplit;
 import org.apache.hadoop.mapred.OutputCollector;
 import org.apache.hadoop.mapred.RecordReader;
 import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapred.lib.LongSumReducer;
 import org.apache.hadoop.util.Tool;
 import org.apache.hadoop.util.ToolRunner;
 
@@ -234,7 +235,7 @@ public class MultiFileWordCount extends Configured implements Tool {
       return 1;
     }
 
-    JobConf job = new JobConf(getConf(), WordCount.class);
+    JobConf job = new JobConf(getConf(), MultiFileWordCount.class);
     job.setJobName("MultiFileWordCount");
 
     //set the InputFormat of the job to our InputFormat
@@ -248,8 +249,8 @@ public class MultiFileWordCount extends Configured implements Tool {
     //use the defined mapper
     job.setMapperClass(MapClass.class);
     //use the WordCount Reducer
-    job.setCombinerClass(WordCount.Reduce.class);
-    job.setReducerClass(WordCount.Reduce.class);
+    job.setCombinerClass(LongSumReducer.class);
+    job.setReducerClass(LongSumReducer.class);
 
     FileInputFormat.addInputPaths(job, args[0]);
     FileOutputFormat.setOutputPath(job, new Path(args[1]));

+ 43 - 133
src/examples/org/apache/hadoop/examples/WordCount.java

@@ -1,159 +1,69 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
 package org.apache.hadoop.examples;
 
 import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
 import java.util.StringTokenizer;
 
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapred.FileInputFormat;
-import org.apache.hadoop.mapred.FileOutputFormat;
-import org.apache.hadoop.mapred.JobClient;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.Mapper;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reducer;
-import org.apache.hadoop.mapred.Reporter;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.hadoop.util.GenericOptionsParser;
 
-/**
- * This is an example Hadoop Map/Reduce application.
- * It reads the text input files, breaks each line into words
- * and counts them. The output is a locally sorted list of words and the 
- * count of how often they occurred.
- *
- * To run: bin/hadoop jar build/hadoop-examples.jar wordcount
- *            [-m <i>maps</i>] [-r <i>reduces</i>] <i>in-dir</i> <i>out-dir</i> 
- */
-public class WordCount extends Configured implements Tool {
-  
-  /**
-   * Counts the words in each line.
-   * For each line of input, break the line into words and emit them as
-   * (<b>word</b>, <b>1</b>).
-   */
-  public static class MapClass extends MapReduceBase
-    implements Mapper<LongWritable, Text, Text, IntWritable> {
+public class WordCount {
+
+  public static class TokenizerMapper 
+       extends Mapper<Object, Text, Text, IntWritable>{
     
     private final static IntWritable one = new IntWritable(1);
     private Text word = new Text();
-    
-    public void map(LongWritable key, Text value, 
-                    OutputCollector<Text, IntWritable> output, 
-                    Reporter reporter) throws IOException {
-      String line = value.toString();
-      StringTokenizer itr = new StringTokenizer(line);
+      
+    public void map(Object key, Text value, Context context
+                    ) throws IOException, InterruptedException {
+      StringTokenizer itr = new StringTokenizer(value.toString());
       while (itr.hasMoreTokens()) {
         word.set(itr.nextToken());
-        output.collect(word, one);
+        context.write(word, one);
       }
     }
   }
   
-  /**
-   * A reducer class that just emits the sum of the input values.
-   */
-  public static class Reduce extends MapReduceBase
-    implements Reducer<Text, IntWritable, Text, IntWritable> {
-    
-    public void reduce(Text key, Iterator<IntWritable> values,
-                       OutputCollector<Text, IntWritable> output, 
-                       Reporter reporter) throws IOException {
+  public static class IntSumReducer 
+       extends Reducer<Text,IntWritable,Text,IntWritable> {
+    private IntWritable result = new IntWritable();
+
+    public void reduce(Text key, Iterable<IntWritable> values, 
+                       Context context
+                       ) throws IOException, InterruptedException {
       int sum = 0;
-      while (values.hasNext()) {
-        sum += values.next().get();
-      }
-      output.collect(key, new IntWritable(sum));
-    }
-  }
-  
-  static int printUsage() {
-    System.out.println("wordcount [-m <maps>] [-r <reduces>] <input> <output>");
-    ToolRunner.printGenericCommandUsage(System.out);
-    return -1;
-  }
-  
-  /**
-   * The main driver for word count map/reduce program.
-   * Invoke this method to submit the map/reduce job.
-   * @throws IOException When there is communication problems with the 
-   *                     job tracker.
-   */
-  public int run(String[] args) throws Exception {
-    JobConf conf = new JobConf(getConf(), WordCount.class);
-    conf.setJobName("wordcount");
- 
-    // the keys are words (strings)
-    conf.setOutputKeyClass(Text.class);
-    // the values are counts (ints)
-    conf.setOutputValueClass(IntWritable.class);
-    
-    conf.setMapperClass(MapClass.class);        
-    conf.setCombinerClass(Reduce.class);
-    conf.setReducerClass(Reduce.class);
-    
-    List<String> other_args = new ArrayList<String>();
-    for(int i=0; i < args.length; ++i) {
-      try {
-        if ("-m".equals(args[i])) {
-          conf.setNumMapTasks(Integer.parseInt(args[++i]));
-        } else if ("-r".equals(args[i])) {
-          conf.setNumReduceTasks(Integer.parseInt(args[++i]));
-        } else {
-          other_args.add(args[i]);
-        }
-      } catch (NumberFormatException except) {
-        System.out.println("ERROR: Integer expected instead of " + args[i]);
-        return printUsage();
-      } catch (ArrayIndexOutOfBoundsException except) {
-        System.out.println("ERROR: Required parameter missing from " +
-                           args[i-1]);
-        return printUsage();
+      for (IntWritable val : values) {
+        sum += val.get();
       }
+      result.set(sum);
+      context.write(key, result);
     }
-    // Make sure there are exactly 2 parameters left.
-    if (other_args.size() != 2) {
-      System.out.println("ERROR: Wrong number of parameters: " +
-                         other_args.size() + " instead of 2.");
-      return printUsage();
-    }
-    FileInputFormat.setInputPaths(conf, other_args.get(0));
-    FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1)));
-        
-    JobClient.runJob(conf);
-    return 0;
   }
-  
-  
+
   public static void main(String[] args) throws Exception {
-    int res = ToolRunner.run(new Configuration(), new WordCount(), args);
-    System.exit(res);
+    Configuration conf = new Configuration();
+    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
+    if (otherArgs.length != 2) {
+      System.err.println("Usage: wordcount <in> <out>");
+      System.exit(2);
+    }
+    Job job = new Job(conf, "word count");
+    job.setJarByClass(WordCount.class);
+    job.setMapperClass(TokenizerMapper.class);
+    job.setCombinerClass(IntSumReducer.class);
+    job.setReducerClass(IntSumReducer.class);
+    job.setOutputKeyClass(Text.class);
+    job.setOutputValueClass(IntWritable.class);
+    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
+    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
+    System.exit(job.waitForCompletion() ? 0 : 1);
   }
-
 }

+ 18 - 75
src/mapred/org/apache/hadoop/mapred/Counters.java

@@ -47,7 +47,9 @@ import org.apache.hadoop.util.StringUtils;
  * 
  * <p><code>Counters</code> are bunched into {@link Group}s, each comprising of
  * counters from a particular <code>Enum</code> class. 
+ * @deprecated Use {@link org.apache.hadoop.mapreduce.Counters} instead.
  */
+@Deprecated
 public class Counters implements Writable, Iterable<Counters.Group> {
   private static final Log LOG = LogFactory.getLog(Counters.class);
   private static final char GROUP_OPEN = '{';
@@ -65,69 +67,18 @@ public class Counters implements Writable, Iterable<Counters.Group> {
   /**
    * A counter record, comprising its name and value. 
    */
-  public static class Counter implements Writable {
-
-    private String name;
-    private String displayName;
-    private long value;
+  public static class Counter extends org.apache.hadoop.mapreduce.Counter {
     
     Counter() { 
-      value = 0L;
     }
 
     Counter(String name, String displayName, long value) {
-      this.name = name;
-      this.displayName = displayName;
-      this.value = value;
-    }
-    
-    /**
-     * Read the binary representation of the counter
-     */
-    public synchronized void readFields(DataInput in) throws IOException {
-      name = Text.readString(in);
-      if (in.readBoolean()) {
-        displayName = Text.readString(in);
-      } else {
-        displayName = name;
-      }
-      value = WritableUtils.readVLong(in);
-    }
-    
-    /**
-     * Write the binary representation of the counter
-     */
-    public synchronized void write(DataOutput out) throws IOException {
-      Text.writeString(out, name);
-      boolean distinctDisplayName = (! name.equals(displayName));
-      out.writeBoolean(distinctDisplayName);
-      if (distinctDisplayName) {
-        Text.writeString(out, displayName);
-      }
-      WritableUtils.writeVLong(out, value);
+      super(name, displayName);
+      increment(value);
     }
     
-    /**
-     * Get the internal name of the counter.
-     * @return the internal name of the counter
-     */
-    public synchronized String getName() {
-      return name;
-    }
-    
-    /**
-     * Get the name of the counter.
-     * @return the user facing name of the counter
-     */
-    public synchronized String getDisplayName() {
-      return displayName;
-    }
-    
-    /**
-     * Set the display name of the counter.
-     */
-    public synchronized void setDisplayName(String displayName) {
-      this.displayName = displayName;
+    public void setDisplayName(String newName) {
+      super.setDisplayName(newName);
     }
     
     /**
@@ -150,7 +101,7 @@ public class Counters implements Writable, Iterable<Counters.Group> {
       
       // Add the value
       buf.append(UNIT_OPEN);
-      buf.append(this.value);
+      buf.append(this.getValue());
       buf.append(UNIT_CLOSE);
       
       buf.append(COUNTER_CLOSE);
@@ -159,10 +110,9 @@ public class Counters implements Writable, Iterable<Counters.Group> {
     }
     
     // Checks for (content) equality of two (basic) counters
+    @Deprecated
     synchronized boolean contentEquals(Counter c) {
-      return name.equals(c.getName())
-             && displayName.equals(c.getDisplayName())
-             && value == c.getCounter();
+      return this.equals(c);
     }
     
     /**
@@ -170,16 +120,9 @@ public class Counters implements Writable, Iterable<Counters.Group> {
      * @return the current value
      */
     public synchronized long getCounter() {
-      return value;
+      return getValue();
     }
     
-    /**
-     * Increment this counter by the given value
-     * @param incr the value to increase this counter by
-     */
-    public synchronized void increment(long incr) {
-      value += incr;
-    }
   }
   
   /**
@@ -297,8 +240,8 @@ public class Counters implements Writable, Iterable<Counters.Group> {
      */
     public synchronized long getCounter(String counterName) {
       for(Counter counter: subcounters.values()) {
-        if (counter != null && counter.displayName.equals(counterName)) {
-          return counter.value;
+        if (counter != null && counter.getDisplayName().equals(counterName)) {
+          return counter.getValue();
         }
       }
       return 0L;
@@ -459,7 +402,7 @@ public class Counters implements Writable, Iterable<Counters.Group> {
    * @param amount amount by which counter is to be incremented
    */
   public synchronized void incrCounter(Enum key, long amount) {
-    findCounter(key).value += amount;
+    findCounter(key).increment(amount);
   }
   
   /**
@@ -470,7 +413,7 @@ public class Counters implements Writable, Iterable<Counters.Group> {
    * @param amount amount by which counter is to be incremented
    */
   public synchronized void incrCounter(String group, String counter, long amount) {
-    getGroup(group).getCounterForName(counter).value += amount;
+    getGroup(group).getCounterForName(counter).increment(amount);
   }
   
   /**
@@ -478,7 +421,7 @@ public class Counters implements Writable, Iterable<Counters.Group> {
    * does not exist.
    */
   public synchronized long getCounter(Enum key) {
-    return findCounter(key).value;
+    return findCounter(key).getValue();
   }
   
   /**
@@ -492,8 +435,8 @@ public class Counters implements Writable, Iterable<Counters.Group> {
       group.displayName = otherGroup.displayName;
       for (Counter otherCounter : otherGroup) {
         Counter counter = group.getCounterForName(otherCounter.getName());
-        counter.displayName = otherCounter.displayName;
-        counter.value += otherCounter.value;
+        counter.setDisplayName(otherCounter.getDisplayName());
+        counter.increment(otherCounter.getValue());
       }
     }
   }

+ 3 - 2
src/mapred/org/apache/hadoop/mapred/FileInputFormat.java

@@ -22,7 +22,6 @@ import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Comparator;
-import java.util.HashMap;
 import java.util.HashSet;
 import java.util.IdentityHashMap;
 import java.util.LinkedList;
@@ -35,7 +34,6 @@ import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.fs.BlockLocation;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.FileUtil;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.PathFilter;
 import org.apache.hadoop.net.NetworkTopology;
@@ -53,7 +51,10 @@ import org.apache.hadoop.util.StringUtils;
  * Subclasses of <code>FileInputFormat</code> can also override the 
  * {@link #isSplitable(FileSystem, Path)} method to ensure input-files are
  * not split-up and are processed as a whole by {@link Mapper}s.
+ * @deprecated Use {@link org.apache.hadoop.mapreduce.lib.input.FileInputFormat}
+ *  instead.
  */
+@Deprecated
 public abstract class FileInputFormat<K, V> implements InputFormat<K, V> {
 
   public static final Log LOG =

+ 7 - 4
src/mapred/org/apache/hadoop/mapred/FileSplit.java

@@ -21,16 +21,19 @@ package org.apache.hadoop.mapred;
 import java.io.IOException;
 import java.io.DataInput;
 import java.io.DataOutput;
-import java.io.File;                              // deprecated
 
 import org.apache.hadoop.io.UTF8;
-import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 
 /** A section of an input file.  Returned by {@link
  * InputFormat#getSplits(JobConf, int)} and passed to
- * {@link InputFormat#getRecordReader(InputSplit,JobConf,Reporter)}. */
-public class FileSplit implements InputSplit {
+ * {@link InputFormat#getRecordReader(InputSplit,JobConf,Reporter)}. 
+ * @deprecated Use {@link org.apache.hadoop.mapreduce.lib.input.FileSplit}
+ *  instead.
+ */
+@Deprecated
+public class FileSplit extends org.apache.hadoop.mapreduce.InputSplit 
+                       implements InputSplit {
   private Path file;
   private long start;
   private long length;

+ 3 - 51
src/mapred/org/apache/hadoop/mapred/ID.java

@@ -18,12 +18,6 @@
 
 package org.apache.hadoop.mapred;
 
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-
-import org.apache.hadoop.io.WritableComparable;
-
 /**
  * A general identifier, which internally stores the id
  * as an integer. This is the super class of {@link JobID}, 
@@ -33,57 +27,15 @@ import org.apache.hadoop.io.WritableComparable;
  * @see TaskID
  * @see TaskAttemptID
  */
-public abstract class ID implements WritableComparable<ID> {
-  protected static final char SEPARATOR = '_';
-  protected int id;
+@Deprecated
+public abstract class ID extends org.apache.hadoop.mapreduce.ID {
 
   /** constructs an ID object from the given int */
   public ID(int id) {
-    this.id = id;
+    super(id);
   }
 
   protected ID() {
   }
 
-  /** returns the int which represents the identifier */
-  public int getId() {
-    return id;
-  }
-
-  @Override
-  public String toString() {
-    return String.valueOf(id);
-  }
-
-  @Override
-  public int hashCode() {
-    return Integer.valueOf(id).hashCode();
-  }
-
-  @Override
-  public boolean equals(Object o) {
-    if (this == o)
-      return true;
-    if(o == null)
-      return false;
-    if (o.getClass() == this.getClass()) {
-      ID that = (ID) o;
-      return this.id == that.id;
-    }
-    else
-      return false;
-  }
-
-  /** Compare IDs by associated numbers */
-  public int compareTo(ID that) {
-    return this.id - that.id;
-  }
-
-  public void readFields(DataInput in) throws IOException {
-    this.id = in.readInt();
-  }
-
-  public void write(DataOutput out) throws IOException {
-    out.writeInt(id);
-  }
 }

+ 2 - 0
src/mapred/org/apache/hadoop/mapred/InputFormat.java

@@ -61,7 +61,9 @@ import org.apache.hadoop.fs.FileSystem;
  * @see RecordReader
  * @see JobClient
  * @see FileInputFormat
+ * @deprecated Use {@link org.apache.hadoop.mapreduce.InputFormat} instead.
  */
+@Deprecated
 public interface InputFormat<K, V> {
 
   /** 

+ 2 - 0
src/mapred/org/apache/hadoop/mapred/InputSplit.java

@@ -31,7 +31,9 @@ import org.apache.hadoop.io.Writable;
  * 
  * @see InputFormat
  * @see RecordReader
+ * @deprecated Use {@link org.apache.hadoop.mapreduce.InputSplit} instead.
  */
+@Deprecated
 public interface InputSplit extends Writable {
 
   /**

+ 3 - 1
src/mapred/org/apache/hadoop/mapred/IsolationRunner.java

@@ -159,7 +159,9 @@ public class IsolationRunner {
    * Run a single task
    * @param args the first argument is the task directory
    */
-  public static void main(String[] args) throws IOException {
+  public static void main(String[] args
+                          ) throws ClassNotFoundException, IOException, 
+                                   InterruptedException {
     if (args.length != 1) {
       System.out.println("Usage: IsolationRunner <path>/job.xml");
       System.exit(1);

+ 1 - 1
src/mapred/org/apache/hadoop/mapred/JVMId.java

@@ -68,7 +68,7 @@ class JVMId extends ID {
   /**Compare TaskInProgressIds by first jobIds, then by tip numbers. Reduces are 
    * defined as greater then maps.*/
   @Override
-  public int compareTo(ID o) {
+  public int compareTo(org.apache.hadoop.mapreduce.ID o) {
     JVMId that = (JVMId)o;
     int jobComp = this.jobId.compareTo(that.jobId);
     if(jobComp == 0) {

+ 175 - 48
src/mapred/org/apache/hadoop/mapred/JobClient.java

@@ -21,6 +21,7 @@ import java.io.BufferedReader;
 import java.io.BufferedWriter;
 import java.io.DataInput;
 import java.io.DataOutput;
+import java.io.DataOutputStream;
 import java.io.FileNotFoundException;
 import java.io.FileOutputStream;
 import java.io.IOException;
@@ -38,7 +39,7 @@ import java.net.UnknownHostException;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.Comparator;
-import java.util.Random;
+import java.util.List;
 
 import javax.security.auth.login.LoginException;
 
@@ -59,11 +60,14 @@ import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.io.WritableUtils;
+import org.apache.hadoop.io.serializer.SerializationFactory;
+import org.apache.hadoop.io.serializer.Serializer;
 import org.apache.hadoop.ipc.RPC;
 import org.apache.hadoop.mapred.Counters.Counter;
 import org.apache.hadoop.mapred.Counters.Group;
 import org.apache.hadoop.net.NetUtils;
 import org.apache.hadoop.security.UnixUserGroupInformation;
+import org.apache.hadoop.util.ReflectionUtils;
 import org.apache.hadoop.util.StringUtils;
 import org.apache.hadoop.util.Tool;
 import org.apache.hadoop.util.ToolRunner;
@@ -155,7 +159,7 @@ public class JobClient extends Configured implements MRConstants, Tool  {
   private static final Log LOG = LogFactory.getLog(JobClient.class);
   public static enum TaskStatusFilter { NONE, KILLED, FAILED, SUCCEEDED, ALL }
   private TaskStatusFilter taskOutputFilter = TaskStatusFilter.FAILED; 
-  static long MAX_JOBPROFILE_AGE = 1000 * 2;
+  private static final long MAX_JOBPROFILE_AGE = 1000 * 2;
 
   /**
    * A NetworkedJob is an implementation of RunningJob.  It holds
@@ -371,14 +375,17 @@ public class JobClient extends Configured implements MRConstants, Tool  {
     public Counters getCounters() throws IOException {
       return jobSubmitClient.getJobCounters(getID());
     }
+    
+    @Override
+    public String[] getTaskDiagnostics(TaskAttemptID id) throws IOException {
+      return jobSubmitClient.getTaskDiagnostics(id);
+    }
   }
 
-  JobSubmissionProtocol jobSubmitClient;
-  Path sysDir = null;
+  private JobSubmissionProtocol jobSubmitClient;
+  private Path sysDir = null;
   
-  FileSystem fs = null;
-
-  static Random r = new Random();
+  private FileSystem fs = null;
 
   /**
    * Create a job client.
@@ -710,11 +717,34 @@ public class JobClient extends Configured implements MRConstants, Tool  {
    * @return a handle to the {@link RunningJob} which can be used to track the
    *         running-job.
    * @throws FileNotFoundException
-   * @throws InvalidJobConfException
    * @throws IOException
    */
-  public RunningJob submitJob(JobConf job) throws FileNotFoundException, 
-                                  InvalidJobConfException, IOException {
+  public RunningJob submitJob(JobConf job) throws FileNotFoundException,
+                                                  IOException {
+    try {
+      return submitJobInternal(job);
+    } catch (InterruptedException ie) {
+      throw new IOException("interrupted", ie);
+    } catch (ClassNotFoundException cnfe) {
+      throw new IOException("class not found", cnfe);
+    }
+  }
+
+  /**
+   * Internal method for submitting jobs to the system.
+   * @param job the configuration to submit
+   * @return a proxy object for the running job
+   * @throws FileNotFoundException
+   * @throws ClassNotFoundException
+   * @throws InterruptedException
+   * @throws IOException
+   */
+  public 
+  RunningJob submitJobInternal(JobConf job
+                               ) throws FileNotFoundException, 
+                                        ClassNotFoundException,
+                                        InterruptedException,
+                                        IOException {
     /*
      * configure the command line options correctly on the submitting dfs
      */
@@ -725,12 +755,53 @@ public class JobClient extends Configured implements MRConstants, Tool  {
     Path submitSplitFile = new Path(submitJobDir, "job.split");
     configureCommandLineOptions(job, submitJobDir, submitJarFile);
     Path submitJobFile = new Path(submitJobDir, "job.xml");
+    int reduces = job.getNumReduceTasks();
+    JobContext context = new JobContext(job, jobId);
     
     // Check the output specification
-    job.getOutputFormat().checkOutputSpecs(fs, job);
+    if (reduces == 0 ? job.getUseNewMapper() : job.getUseNewReducer()) {
+      org.apache.hadoop.mapreduce.OutputFormat<?,?> output =
+        ReflectionUtils.newInstance(context.getOutputFormatClass(), job);
+      output.checkOutputSpecs(context);
+    } else {
+      job.getOutputFormat().checkOutputSpecs(fs, job);
+    }
 
     // Create the splits for the job
     LOG.debug("Creating splits at " + fs.makeQualified(submitSplitFile));
+    int maps;
+    if (job.getUseNewMapper()) {
+      maps = writeNewSplits(context, submitSplitFile);
+    } else {
+      maps = writeOldSplits(job, submitSplitFile);
+    }
+    job.set("mapred.job.split.file", submitSplitFile.toString());
+    job.setNumMapTasks(maps);
+        
+    // Write job file to JobTracker's fs        
+    FSDataOutputStream out = 
+      FileSystem.create(fs, submitJobFile,
+                        new FsPermission(JOB_FILE_PERMISSION));
+
+    try {
+      job.writeXml(out);
+    } finally {
+      out.close();
+    }
+
+    //
+    // Now, actually submit the job (using the submit name)
+    //
+    JobStatus status = jobSubmitClient.submitJob(jobId);
+    if (status != null) {
+      return new NetworkedJob(status);
+    } else {
+      throw new IOException("Could not launch job");
+    }
+  }
+
+  private int writeOldSplits(JobConf job, 
+                             Path submitSplitFile) throws IOException {
     InputSplit[] splits = 
       job.getInputFormat().getSplits(job, job.getNumMapTasks());
     // sort the splits into order based on size, so that the biggest
@@ -753,36 +824,91 @@ public class JobClient extends Configured implements MRConstants, Tool  {
         }
       }
     });
-    // write the splits to a file for the job tracker
-    FSDataOutputStream out = FileSystem.create(fs,
-        submitSplitFile, new FsPermission(JOB_FILE_PERMISSION));
+    DataOutputStream out = writeSplitsFileHeader(job, submitSplitFile, splits.length);
+    
     try {
-      writeSplitsFile(splits, out);
+      DataOutputBuffer buffer = new DataOutputBuffer();
+      RawSplit rawSplit = new RawSplit();
+      for(InputSplit split: splits) {
+        rawSplit.setClassName(split.getClass().getName());
+        buffer.reset();
+        split.write(buffer);
+        rawSplit.setDataLength(split.getLength());
+        rawSplit.setBytes(buffer.getData(), 0, buffer.getLength());
+        rawSplit.setLocations(split.getLocations());
+        rawSplit.write(out);
+      }
     } finally {
       out.close();
     }
-    job.set("mapred.job.split.file", submitSplitFile.toString());
-    job.setNumMapTasks(splits.length);
-        
-    // Write job file to JobTracker's fs        
-    out = FileSystem.create(fs, submitJobFile,
-        new FsPermission(JOB_FILE_PERMISSION));
+    return splits.length;
+  }
+
+  private static class NewSplitComparator 
+    implements Comparator<org.apache.hadoop.mapreduce.InputSplit>{
+
+    @Override
+    public int compare(org.apache.hadoop.mapreduce.InputSplit o1,
+                       org.apache.hadoop.mapreduce.InputSplit o2) {
+      try {
+        long len1 = o1.getLength();
+        long len2 = o2.getLength();
+        if (len1 < len2) {
+          return 1;
+        } else if (len1 == len2) {
+          return 0;
+        } else {
+          return -1;
+        }
+      } catch (IOException ie) {
+        throw new RuntimeException("exception in compare", ie);
+      } catch (InterruptedException ie) {
+        throw new RuntimeException("exception in compare", ie);        
+      }
+    }
+  }
+
+  @SuppressWarnings("unchecked")
+  private <T extends org.apache.hadoop.mapreduce.InputSplit> 
+  int writeNewSplits(JobContext job, Path submitSplitFile
+                     ) throws IOException, InterruptedException, 
+                              ClassNotFoundException {
+    JobConf conf = job.getJobConf();
+    org.apache.hadoop.mapreduce.InputFormat<?,?> input =
+      ReflectionUtils.newInstance(job.getInputFormatClass(), job.getJobConf());
+    
+    List<org.apache.hadoop.mapreduce.InputSplit> splits = input.getSplits(job);
+    T[] array = (T[])
+      splits.toArray(new org.apache.hadoop.mapreduce.InputSplit[splits.size()]);
 
+    // sort the splits into order based on size, so that the biggest
+    // go first
+    Arrays.sort(array, new NewSplitComparator());
+    DataOutputStream out = writeSplitsFileHeader(conf, submitSplitFile, 
+                                                 array.length);
     try {
-      job.writeXml(out);
+      if (array.length != 0) {
+        DataOutputBuffer buffer = new DataOutputBuffer();
+        RawSplit rawSplit = new RawSplit();
+        SerializationFactory factory = new SerializationFactory(conf);
+        Serializer<T> serializer = 
+          factory.getSerializer((Class<T>) array[0].getClass());
+        serializer.open(buffer);
+        for(T split: array) {
+          rawSplit.setClassName(split.getClass().getName());
+          buffer.reset();
+          serializer.serialize(split);
+          rawSplit.setDataLength(split.getLength());
+          rawSplit.setBytes(buffer.getData(), 0, buffer.getLength());
+          rawSplit.setLocations(split.getLocations());
+          rawSplit.write(out);
+        }
+        serializer.close();
+      }
     } finally {
       out.close();
     }
-
-    //
-    // Now, actually submit the job (using the submit name)
-    //
-    JobStatus status = jobSubmitClient.submitJob(jobId);
-    if (status != null) {
-      return new NetworkedJob(status);
-    } else {
-      throw new IOException("Could not launch job");
-    }
+    return array.length;
   }
 
   /** 
@@ -878,7 +1004,21 @@ public class JobClient extends Configured implements MRConstants, Tool  {
     
   private static final int CURRENT_SPLIT_FILE_VERSION = 0;
   private static final byte[] SPLIT_FILE_HEADER = "SPL".getBytes();
-    
+
+  private DataOutputStream writeSplitsFileHeader(Configuration conf,
+                                                 Path filename,
+                                                 int length
+                                                 ) throws IOException {
+    // write the splits to a file for the job tracker
+    FileSystem fs = filename.getFileSystem(conf);
+    FSDataOutputStream out = 
+      FileSystem.create(fs, filename, new FsPermission(JOB_FILE_PERMISSION));
+    out.write(SPLIT_FILE_HEADER);
+    WritableUtils.writeVInt(out, CURRENT_SPLIT_FILE_VERSION);
+    WritableUtils.writeVInt(out, length);
+    return out;
+  }
+
   /** Create the list of input splits and write them out in a file for
    *the JobTracker. The format is:
    * <format version>
@@ -888,21 +1028,8 @@ public class JobClient extends Configured implements MRConstants, Tool  {
    * @param splits the input splits to write out
    * @param out the stream to write to
    */
-  private void writeSplitsFile(InputSplit[] splits, FSDataOutputStream out) throws IOException {
-    out.write(SPLIT_FILE_HEADER);
-    WritableUtils.writeVInt(out, CURRENT_SPLIT_FILE_VERSION);
-    WritableUtils.writeVInt(out, splits.length);
-    DataOutputBuffer buffer = new DataOutputBuffer();
-    RawSplit rawSplit = new RawSplit();
-    for(InputSplit split: splits) {
-      rawSplit.setClassName(split.getClass().getName());
-      buffer.reset();
-      split.write(buffer);
-      rawSplit.setDataLength(split.getLength());
-      rawSplit.setBytes(buffer.getData(), 0, buffer.getLength());
-      rawSplit.setLocations(split.getLocations());
-      rawSplit.write(out);
-    }
+  private void writeOldSplitsFile(InputSplit[] splits, 
+                                  FSDataOutputStream out) throws IOException {
   }
 
   /**

+ 36 - 0
src/mapred/org/apache/hadoop/mapred/JobConf.java

@@ -99,7 +99,9 @@ import org.apache.hadoop.util.Tool;
  * @see ClusterStatus
  * @see Tool
  * @see DistributedCache
+ * @deprecated Use {@link Configuration} instead
  */
+@Deprecated
 public class JobConf extends Configuration {
   
   private static final Log LOG = LogFactory.getLog(JobConf.class);
@@ -769,6 +771,40 @@ public class JobConf extends Configuration {
              theClass, RawComparator.class);
   }
 
+  /**
+   * Should the framework use the new context-object code for running
+   * the mapper?
+   * @return true, if the new api should be used
+   */
+  public boolean getUseNewMapper() {
+    return getBoolean("mapred.mapper.new-api", false);
+  }
+  /**
+   * Set whether the framework should use the new api for the mapper.
+   * This is the default for jobs submitted with the new Job api.
+   * @param flag true, if the new api should be used
+   */
+  public void setUseNewMapper(boolean flag) {
+    setBoolean("mapred.mapper.new-api", flag);
+  }
+
+  /**
+   * Should the framework use the new context-object code for running
+   * the reducer?
+   * @return true, if the new api should be used
+   */
+  public boolean getUseNewReducer() {
+    return getBoolean("mapred.reducer.new-api", false);
+  }
+  /**
+   * Set whether the framework should use the new api for the reducer. 
+   * This is the default for jobs submitted with the new Job api.
+   * @param flag true, if the new api should be used
+   */
+  public void setUseNewReducer(boolean flag) {
+    setBoolean("mapred.reducer.new-api", flag);
+  }
+
   /**
    * Get the value class for job outputs.
    * 

+ 1 - 0
src/mapred/org/apache/hadoop/mapred/JobConfigurable.java

@@ -19,6 +19,7 @@
 package org.apache.hadoop.mapred;
 
 /** That what may be configured. */
+@Deprecated
 public interface JobConfigurable {
   /** Initializes a new instance from a {@link JobConf}.
    *

+ 12 - 7
src/mapred/org/apache/hadoop/mapred/JobContext.java

@@ -19,18 +19,23 @@ package org.apache.hadoop.mapred;
 
 import org.apache.hadoop.util.Progressable;
 
-public class JobContext {
-
-  JobConf job;
+/**
+ * @deprecated Use {@link org.apache.hadoop.mapreduce.JobContext} instead.
+ */
+@Deprecated
+public class JobContext extends org.apache.hadoop.mapreduce.JobContext {
+  private JobConf job;
   private Progressable progress;
 
-  JobContext(JobConf conf, Progressable progress) {
-    job = conf;
+  JobContext(JobConf conf, org.apache.hadoop.mapreduce.JobID jobId, 
+             Progressable progress) {
+    super(conf, jobId);
+    this.job = conf;
     this.progress = progress;
   }
 
-  JobContext(JobConf conf) {
-    this(conf, Reporter.NULL);
+  JobContext(JobConf conf, org.apache.hadoop.mapreduce.JobID jobId) {
+    this(conf, jobId, Reporter.NULL);
   }
   
   /**

+ 14 - 88
src/mapred/org/apache/hadoop/mapred/JobID.java

@@ -19,11 +19,7 @@
 package org.apache.hadoop.mapred;
 
 import java.io.DataInput;
-import java.io.DataOutput;
 import java.io.IOException;
-import java.text.NumberFormat;
-
-import org.apache.hadoop.io.Text;
 
 /**
  * JobID represents the immutable and unique identifier for 
@@ -41,91 +37,33 @@ import org.apache.hadoop.io.Text;
  * 
  * @see TaskID
  * @see TaskAttemptID
- * @see JobTracker#getNewJobId()
- * @see JobTracker#getStartTime()
  */
-public class JobID extends ID {
-  protected static final String JOB = "job";
-  private Text jtIdentifier = new Text();
-  
-  private static NumberFormat idFormat = NumberFormat.getInstance();
-  static {
-    idFormat.setGroupingUsed(false);
-    idFormat.setMinimumIntegerDigits(4);
-  }
-  
+@Deprecated
+public class JobID extends org.apache.hadoop.mapreduce.JobID {
   /**
    * Constructs a JobID object 
    * @param jtIdentifier jobTracker identifier
    * @param id job number
    */
   public JobID(String jtIdentifier, int id) {
-    super(id);
-    this.jtIdentifier.set(jtIdentifier);
+    super(jtIdentifier, id);
   }
   
   public JobID() { }
-  
-  public String getJtIdentifier() {
-    return jtIdentifier.toString();
-  }
-  
-  @Override
-  public boolean equals(Object o) {
-    if (!super.equals(o))
-      return false;
-
-    JobID that = (JobID)o;
-    return this.jtIdentifier.equals(that.jtIdentifier);
-  }
-  
-  /**Compare JobIds by first jtIdentifiers, then by job numbers*/
-  @Override
-  public int compareTo(ID o) {
-    JobID that = (JobID)o;
-    int jtComp = this.jtIdentifier.compareTo(that.jtIdentifier);
-    if(jtComp == 0) {
-      return this.id - that.id;
-    }
-    else return jtComp;
-  }
-  
-  @Override
-  public String toString() {
-    return appendTo(new StringBuilder(JOB)).toString();
-  }
 
   /**
-   * Add the stuff after the "job" prefix to the given builder. This is useful,
-   * because the sub-ids use this substring at the start of their string.
-   * @param builder the builder to append to
-   * @return the builder that was passed in
+   * Downgrade a new JobID to an old one
+   * @param old a new or old JobID
+   * @return either old or a new JobID build to match old
    */
-  protected StringBuilder appendTo(StringBuilder builder) {
-    builder.append(SEPARATOR);
-    builder.append(jtIdentifier);
-    builder.append(SEPARATOR);
-    builder.append(idFormat.format(id));
-    return builder;
-  }
-
-  @Override
-  public int hashCode() {
-    return jtIdentifier.hashCode() + id;
-  }
-  
-  @Override
-  public void readFields(DataInput in) throws IOException {
-    super.readFields(in);
-    jtIdentifier.readFields(in);
+  public static JobID downgrade(org.apache.hadoop.mapreduce.JobID old) {
+    if (old instanceof JobID) {
+      return (JobID) old;
+    } else {
+      return new JobID(old.getJtIdentifier(), old.getId());
+    }
   }
 
-  @Override
-  public void write(DataOutput out) throws IOException {
-    super.write(out);
-    jtIdentifier.write(out);
-  }
-  
   @Deprecated
   public static JobID read(DataInput in) throws IOException {
     JobID jobId = new JobID();
@@ -138,19 +76,7 @@ public class JobID extends ID {
    * @throws IllegalArgumentException if the given string is malformed
    */
   public static JobID forName(String str) throws IllegalArgumentException {
-    if(str == null)
-      return null;
-    try {
-      String[] parts = str.split(Character.toString(SEPARATOR));
-      if(parts.length == 3) {
-        if(parts[0].equals(JOB)) {
-          return new JobID(parts[1], Integer.parseInt(parts[2]));
-        }
-      }
-    }catch (Exception ex) {//fall below
-    }
-    throw new IllegalArgumentException("JobId string : " + str 
-        + " is not properly formed");
+    return (JobID) org.apache.hadoop.mapreduce.JobID.forName(str);
   }
   
   /** 
@@ -187,5 +113,5 @@ public class JobID extends ID {
       .append(jobId != null ? idFormat.format(jobId) : "[0-9]*");
     return builder;
   }
-  
+
 }

+ 6 - 4
src/mapred/org/apache/hadoop/mapred/JobProfile.java

@@ -66,7 +66,8 @@ public class JobProfile implements Writable {
    * @param url link to the web-ui for details of the job.
    * @param name user-specified job name.
    */
-  public JobProfile(String user, JobID jobid, String jobFile, String url,
+  public JobProfile(String user, org.apache.hadoop.mapreduce.JobID jobid, 
+                    String jobFile, String url,
                     String name) {
     this(user, jobid, jobFile, url, name, JobConf.DEFAULT_QUEUE_NAME);
   }
@@ -82,10 +83,11 @@ public class JobProfile implements Writable {
    * @param name user-specified job name.
    * @param queueName name of the queue to which the job is submitted
    */
-  public JobProfile(String user, JobID jobid, String jobFile, String url,
-                      String name, String queueName) {
+  public JobProfile(String user, org.apache.hadoop.mapreduce.JobID jobid, 
+                    String jobFile, String url,
+                    String name, String queueName) {
     this.user = user;
-    this.jobid = jobid;
+    this.jobid = JobID.downgrade(jobid);
     this.jobFile = jobFile;
     this.url = url;
     this.name = name;

+ 2 - 3
src/mapred/org/apache/hadoop/mapred/JobStatus.java

@@ -48,7 +48,7 @@ public class JobStatus implements Writable, Cloneable {
   public static final int PREP = 4;
   public static final int KILLED = 5;
 
-  private final JobID jobid;
+  private JobID jobid;
   private float mapProgress;
   private float reduceProgress;
   private float cleanupProgress;
@@ -62,7 +62,6 @@ public class JobStatus implements Writable, Cloneable {
   /**
    */
   public JobStatus() {
-    jobid = new JobID();
   }
 
   /**
@@ -288,7 +287,7 @@ public class JobStatus implements Writable, Cloneable {
   }
 
   public synchronized void readFields(DataInput in) throws IOException {
-    jobid.readFields(in);
+    this.jobid = JobID.read(in);
     this.setupProgress = in.readFloat();
     this.mapProgress = in.readFloat();
     this.reduceProgress = in.readFloat();

+ 3 - 1
src/mapred/org/apache/hadoop/mapred/LineRecordReader.java

@@ -20,7 +20,6 @@ package org.apache.hadoop.mapred;
 
 import java.io.IOException;
 import java.io.InputStream;
-import java.io.OutputStream;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataInputStream;
@@ -35,7 +34,10 @@ import org.apache.commons.logging.Log;
 
 /**
  * Treats keys as offset in file and value as line. 
+ * @deprecated Use 
+ *   {@link org.apache.hadoop.mapreduce.lib.input.LineRecordReader} instead.
  */
+@Deprecated
 public class LineRecordReader implements RecordReader<LongWritable, Text> {
   private static final Log LOG
     = LogFactory.getLog(LineRecordReader.class.getName());

+ 2 - 2
src/mapred/org/apache/hadoop/mapred/LocalJobRunner.java

@@ -101,13 +101,13 @@ class LocalJobRunner implements JobSubmissionProtocol {
     
     @Override
     public void run() {
-      JobContext jContext = new JobContext(conf);
+      JobID jobId = profile.getJobID();
+      JobContext jContext = new JobContext(conf, jobId);
       OutputCommitter outputCommitter = job.getOutputCommitter();
       try {
         // split input into minimum number of splits
         InputSplit[] splits;
         splits = job.getInputFormat().getSplits(job, 1);
-        JobID jobId = profile.getJobID();
         
         int numReduceTasks = job.getNumReduceTasks();
         if (numReduceTasks > 1 || numReduceTasks < 0) {

+ 1 - 0
src/mapred/org/apache/hadoop/mapred/MapReduceBase.java

@@ -29,6 +29,7 @@ import org.apache.hadoop.mapred.JobConfigurable;
  * <p>Provides default no-op implementations for a few methods, most non-trivial
  * applications need to override some of them.</p>
  */
+@Deprecated
 public class MapReduceBase implements Closeable, JobConfigurable {
 
   /** Default implementation that does nothing. */

+ 2 - 0
src/mapred/org/apache/hadoop/mapred/MapRunnable.java

@@ -27,7 +27,9 @@ import java.io.IOException;
  * control on map processing e.g. multi-threaded, asynchronous mappers etc.</p>
  * 
  * @see Mapper
+ * @deprecated Use {@link org.apache.hadoop.mapreduce.Mapper} instead.
  */
+@Deprecated
 public interface MapRunnable<K1, V1, K2, V2>
     extends JobConfigurable {
   

+ 231 - 60
src/mapred/org/apache/hadoop/mapred/MapTask.java

@@ -30,6 +30,8 @@ import java.io.DataOutput;
 import java.io.DataOutputStream;
 import java.io.IOException;
 import java.io.OutputStream;
+import java.lang.reflect.Constructor;
+import java.lang.reflect.InvocationTargetException;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.concurrent.locks.Condition;
@@ -37,7 +39,7 @@ import java.util.concurrent.locks.ReentrantLock;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.LocalFileSystem;
@@ -50,12 +52,13 @@ import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.SequenceFile.CompressionType;
 import org.apache.hadoop.io.compress.CompressionCodec;
 import org.apache.hadoop.io.compress.DefaultCodec;
+import org.apache.hadoop.io.serializer.Deserializer;
 import org.apache.hadoop.io.serializer.SerializationFactory;
 import org.apache.hadoop.io.serializer.Serializer;
 import org.apache.hadoop.mapred.IFile.Writer;
-import org.apache.hadoop.mapred.IFile.Reader;
 import org.apache.hadoop.mapred.Merger.Segment;
 import org.apache.hadoop.mapred.SortedRanges.SkipRangeIterator;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
 import org.apache.hadoop.util.IndexedSortable;
 import org.apache.hadoop.util.IndexedSorter;
 import org.apache.hadoop.util.Progress;
@@ -72,7 +75,6 @@ class MapTask extends Task {
 
   private BytesWritable split = new BytesWritable();
   private String splitClass;
-  private InputSplit instantiatedSplit = null;
   private final static int APPROX_HEADER_LENGTH = 150;
 
   private static final Log LOG = LogFactory.getLog(MapTask.class.getName());
@@ -131,11 +133,6 @@ class MapTask extends Task {
     split.readFields(in);
   }
 
-  @Override
-  InputSplit getInputSplit() throws UnsupportedOperationException {
-    return instantiatedSplit;
-  }
-
   /**
    * This class wraps the user's record reader to update the counters and progress
    * as records are read.
@@ -147,14 +144,16 @@ class MapTask extends Task {
     private RecordReader<K,V> rawIn;
     private Counters.Counter inputByteCounter;
     private Counters.Counter inputRecordCounter;
+    private TaskReporter reporter;
     private long beforePos = -1;
     private long afterPos = -1;
     
-    TrackedRecordReader(RecordReader<K,V> raw, Counters counters) 
+    TrackedRecordReader(RecordReader<K,V> raw, TaskReporter reporter) 
       throws IOException{
       rawIn = raw;
-      inputRecordCounter = counters.findCounter(MAP_INPUT_RECORDS);
-      inputByteCounter = counters.findCounter(MAP_INPUT_BYTES);
+      inputRecordCounter = reporter.getCounter(MAP_INPUT_RECORDS);
+      inputByteCounter = reporter.getCounter(MAP_INPUT_BYTES);
+      this.reporter = reporter;
     }
 
     public K createKey() {
@@ -181,7 +180,7 @@ class MapTask extends Task {
      
     protected synchronized boolean moveToNext(K key, V value)
       throws IOException {
-      setProgress(getProgress());
+      reporter.setProgress(getProgress());
       beforePos = getPos();
       boolean ret = rawIn.next(key, value);
       afterPos = getPos();
@@ -193,6 +192,9 @@ class MapTask extends Task {
     public float getProgress() throws IOException {
       return rawIn.getProgress();
     }
+    TaskReporter getTaskReporter() {
+      return reporter;
+    }
   }
 
   /**
@@ -207,11 +209,11 @@ class MapTask extends Task {
     private Counters.Counter skipRecCounter;
     private long recIndex = -1;
     
-    SkippingRecordReader(RecordReader<K,V> raw, Counters counters, 
-        TaskUmbilicalProtocol umbilical) throws IOException{
-      super(raw,counters);
+    SkippingRecordReader(RecordReader<K,V> raw, TaskUmbilicalProtocol umbilical,
+                         TaskReporter reporter) throws IOException{
+      super(raw, reporter);
       this.umbilical = umbilical;
-      this.skipRecCounter = counters.findCounter(Counter.MAP_SKIPPED_RECORDS);
+      this.skipRecCounter = reporter.getCounter(Counter.MAP_SKIPPED_RECORDS);
       this.toWriteSkipRecs = toWriteSkipRecs() &&  
         SkipBadRecords.getSkipOutputPath(conf)!=null;
       skipIt = getSkipRanges().skipRangeIterator();
@@ -261,44 +263,50 @@ class MapTask extends Task {
               skipFile.getFileSystem(conf), conf, skipFile,
               (Class<K>) createKey().getClass(),
               (Class<V>) createValue().getClass(), 
-              CompressionType.BLOCK, getReporter(umbilical));
+              CompressionType.BLOCK, getTaskReporter());
       }
       skipWriter.append(key, value);
     }
   }
 
   @Override
-  @SuppressWarnings("unchecked")
   public void run(final JobConf job, final TaskUmbilicalProtocol umbilical)
-    throws IOException {
-
-    final Reporter reporter = getReporter(umbilical);
+    throws IOException, ClassNotFoundException, InterruptedException {
 
     // start thread that will handle communication with parent
-    startCommunicationThread(umbilical);
+    TaskReporter reporter = new TaskReporter(getProgress(), umbilical);
+    reporter.startCommunicationThread();
+    boolean useNewApi = job.getUseNewMapper();
+    initialize(job, getJobID(), reporter, useNewApi);
 
-    initialize(job, reporter);
     // check if it is a cleanupJobTask
     if (cleanupJob) {
-      runCleanup(umbilical);
+      runCleanup(umbilical, reporter);
       return;
     }
     if (setupJob) {
-      runSetupJob(umbilical);
+      runSetupJob(umbilical, reporter);
       return;
     }
 
-    int numReduceTasks = conf.getNumReduceTasks();
-    LOG.info("numReduceTasks: " + numReduceTasks);
-    MapOutputCollector collector = null;
-    if (numReduceTasks > 0) {
-      collector = new MapOutputBuffer(umbilical, job, reporter);
-    } else { 
-      collector = new DirectMapOutputCollector(umbilical, job, reporter);
+    if (useNewApi) {
+      runNewMapper(job, split, umbilical, reporter);
+    } else {
+      runOldMapper(job, split, umbilical, reporter);
     }
+    done(umbilical, reporter);
+  }
+
+  @SuppressWarnings("unchecked")
+  private <INKEY,INVALUE,OUTKEY,OUTVALUE>
+  void runOldMapper(final JobConf job,
+                    final BytesWritable rawSplit,
+                    final TaskUmbilicalProtocol umbilical,
+                    TaskReporter reporter) throws IOException {
+    InputSplit inputSplit = null;
     // reinstantiate the split
     try {
-      instantiatedSplit = (InputSplit) 
+      inputSplit = (InputSplit) 
         ReflectionUtils.newInstance(job.getClassByName(splitClass), job);
     } catch (ClassNotFoundException exp) {
       IOException wrap = new IOException("Split class " + splitClass + 
@@ -308,24 +316,28 @@ class MapTask extends Task {
     }
     DataInputBuffer splitBuffer = new DataInputBuffer();
     splitBuffer.reset(split.getBytes(), 0, split.getLength());
-    instantiatedSplit.readFields(splitBuffer);
+    inputSplit.readFields(splitBuffer);
     
-    // if it is a file split, we can give more details
-    if (instantiatedSplit instanceof FileSplit) {
-      FileSplit fileSplit = (FileSplit) instantiatedSplit;
-      job.set("map.input.file", fileSplit.getPath().toString());
-      job.setLong("map.input.start", fileSplit.getStart());
-      job.setLong("map.input.length", fileSplit.getLength());
-    }
-      
-    RecordReader rawIn =                  // open input
-      job.getInputFormat().getRecordReader(instantiatedSplit, job, reporter);
-    RecordReader in = isSkipping() ? 
-        new SkippingRecordReader(rawIn, getCounters(), umbilical) :
-        new TrackedRecordReader(rawIn, getCounters());
+    updateJobWithSplit(job, inputSplit);
+    reporter.setInputSplit(inputSplit);
+
+    RecordReader<INKEY,INVALUE> rawIn =                  // open input
+      job.getInputFormat().getRecordReader(inputSplit, job, reporter);
+    RecordReader<INKEY,INVALUE> in = isSkipping() ? 
+        new SkippingRecordReader<INKEY,INVALUE>(rawIn, umbilical, reporter) :
+        new TrackedRecordReader<INKEY,INVALUE>(rawIn, reporter);
     job.setBoolean("mapred.skip.on", isSkipping());
 
-    MapRunnable runner =
+
+    int numReduceTasks = conf.getNumReduceTasks();
+    LOG.info("numReduceTasks: " + numReduceTasks);
+    MapOutputCollector collector = null;
+    if (numReduceTasks > 0) {
+      collector = new MapOutputBuffer(umbilical, job, reporter);
+    } else { 
+      collector = new DirectMapOutputCollector(umbilical, job, reporter);
+    }
+    MapRunnable<INKEY,INVALUE,OUTKEY,OUTVALUE> runner =
       ReflectionUtils.newInstance(job.getMapRunnerClass(), job);
 
     try {
@@ -336,7 +348,168 @@ class MapTask extends Task {
       in.close();                               // close input
       collector.close();
     }
-    done(umbilical);
+  }
+
+  /**
+   * Update the job with details about the file split
+   * @param job the job configuration to update
+   * @param inputSplit the file split
+   */
+  private void updateJobWithSplit(final JobConf job, InputSplit inputSplit) {
+    if (inputSplit instanceof FileSplit) {
+      FileSplit fileSplit = (FileSplit) inputSplit;
+      job.set("map.input.file", fileSplit.getPath().toString());
+      job.setLong("map.input.start", fileSplit.getStart());
+      job.setLong("map.input.length", fileSplit.getLength());
+    }
+  }
+
+  static class NewTrackingRecordReader<K,V> 
+    extends org.apache.hadoop.mapreduce.RecordReader<K,V> {
+    private final org.apache.hadoop.mapreduce.RecordReader<K,V> real;
+    private final org.apache.hadoop.mapreduce.Counter inputRecordCounter;
+    
+    NewTrackingRecordReader(org.apache.hadoop.mapreduce.RecordReader<K,V> real,
+                            TaskReporter reporter) {
+      this.real = real;
+      this.inputRecordCounter = reporter.getCounter(MAP_INPUT_RECORDS);
+    }
+
+    @Override
+    public void close() throws IOException {
+      real.close();
+    }
+
+    @Override
+    public K getCurrentKey() throws IOException, InterruptedException {
+      return real.getCurrentKey();
+    }
+
+    @Override
+    public V getCurrentValue() throws IOException, InterruptedException {
+      return real.getCurrentValue();
+    }
+
+    @Override
+    public float getProgress() throws IOException, InterruptedException {
+      return real.getProgress();
+    }
+
+    @Override
+    public void initialize(org.apache.hadoop.mapreduce.InputSplit split,
+                           org.apache.hadoop.mapreduce.TaskAttemptContext context
+                           ) throws IOException, InterruptedException {
+      real.initialize(split, context);
+    }
+
+    @Override
+    public boolean nextKeyValue() throws IOException, InterruptedException {
+      boolean result = real.nextKeyValue();
+      if (result) {
+        inputRecordCounter.increment(1);
+      }
+      return result;
+    }
+  }
+
+  private class NewOutputCollector<K,V>
+    extends org.apache.hadoop.mapreduce.RecordWriter<K,V> {
+    private MapOutputCollector<K,V> collector;
+
+    NewOutputCollector(JobConf job, 
+                       TaskUmbilicalProtocol umbilical,
+                       TaskReporter reporter
+                       ) throws IOException {
+      collector = new MapOutputBuffer<K,V>(umbilical, job, reporter);
+    }
+
+    @Override
+    public void write(K key, V value) throws IOException {
+      collector.collect(key, value);
+    }
+
+    @Override
+    public void close(TaskAttemptContext context) throws IOException {
+      collector.flush();
+      collector.close();
+    }
+  }
+
+  @SuppressWarnings("unchecked")
+  private <INKEY,INVALUE,OUTKEY,OUTVALUE>
+  void runNewMapper(final JobConf job,
+                    final BytesWritable rawSplit,
+                    final TaskUmbilicalProtocol umbilical,
+                    TaskReporter reporter
+                    ) throws IOException, ClassNotFoundException,
+                             InterruptedException {
+    // make a task context so we can get the classes
+    org.apache.hadoop.mapreduce.TaskAttemptContext taskContext =
+      new org.apache.hadoop.mapreduce.TaskAttemptContext(job, getTaskID());
+    // make a mapper
+    org.apache.hadoop.mapreduce.Mapper<INKEY,INVALUE,OUTKEY,OUTVALUE> mapper =
+      (org.apache.hadoop.mapreduce.Mapper<INKEY,INVALUE,OUTKEY,OUTVALUE>)
+        ReflectionUtils.newInstance(taskContext.getMapperClass(), job);
+    // make the input format
+    org.apache.hadoop.mapreduce.InputFormat<INKEY,INVALUE> inputFormat =
+      (org.apache.hadoop.mapreduce.InputFormat<INKEY,INVALUE>)
+        ReflectionUtils.newInstance(taskContext.getInputFormatClass(), job);
+    // rebuild the input split
+    org.apache.hadoop.mapreduce.InputSplit split = null;
+    DataInputBuffer splitBuffer = new DataInputBuffer();
+    splitBuffer.reset(rawSplit.getBytes(), 0, rawSplit.getLength());
+    SerializationFactory factory = new SerializationFactory(job);
+    Deserializer<? extends org.apache.hadoop.mapreduce.InputSplit>
+      deserializer = 
+        (Deserializer<? extends org.apache.hadoop.mapreduce.InputSplit>) 
+        factory.getDeserializer(job.getClassByName(splitClass));
+    deserializer.open(splitBuffer);
+    split = deserializer.deserialize(null);
+
+    org.apache.hadoop.mapreduce.RecordReader<INKEY,INVALUE> input =
+      new NewTrackingRecordReader<INKEY,INVALUE>
+          (inputFormat.createRecordReader(split, taskContext), reporter);
+    
+    job.setBoolean("mapred.skip.on", isSkipping());
+    org.apache.hadoop.mapreduce.RecordWriter output = null;
+    org.apache.hadoop.mapreduce.Mapper<INKEY,INVALUE,OUTKEY,OUTVALUE>.Context 
+         mapperContext = null;
+    try {
+      Constructor<org.apache.hadoop.mapreduce.Mapper.Context> contextConstructor =
+        org.apache.hadoop.mapreduce.Mapper.Context.class.getConstructor
+        (new Class[]{org.apache.hadoop.mapreduce.Mapper.class,
+                     Configuration.class,
+                     org.apache.hadoop.mapreduce.TaskAttemptID.class,
+                     org.apache.hadoop.mapreduce.RecordReader.class,
+                     org.apache.hadoop.mapreduce.RecordWriter.class,
+                     org.apache.hadoop.mapreduce.OutputCommitter.class,
+                     org.apache.hadoop.mapreduce.StatusReporter.class,
+                     org.apache.hadoop.mapreduce.InputSplit.class});
+
+      // get an output object
+      if (job.getNumReduceTasks() == 0) {
+        output = outputFormat.getRecordWriter(taskContext);
+      } else {
+        output = new NewOutputCollector(job, umbilical, reporter);
+      }
+
+      mapperContext = contextConstructor.newInstance(mapper, job, getTaskID(),
+                                                     input, output, committer,
+                                                     reporter, split);
+
+      input.initialize(split, mapperContext);
+      mapper.run(mapperContext);
+      input.close();
+      output.close(mapperContext);
+    } catch (NoSuchMethodException e) {
+      throw new IOException("Can't find Context constructor", e);
+    } catch (InstantiationException e) {
+      throw new IOException("Can't create Context", e);
+    } catch (InvocationTargetException e) {
+      throw new IOException("Can't invoke Context constructor", e);
+    } catch (IllegalAccessException e) {
+      throw new IOException("Can't invoke Context constructor", e);
+    }
   }
 
   interface MapOutputCollector<K, V>
@@ -353,21 +526,20 @@ class MapTask extends Task {
  
     private RecordWriter<K, V> out = null;
 
-    private Reporter reporter = null;
+    private TaskReporter reporter = null;
 
     private final Counters.Counter mapOutputRecordCounter;
 
     @SuppressWarnings("unchecked")
     public DirectMapOutputCollector(TaskUmbilicalProtocol umbilical,
-        JobConf job, Reporter reporter) throws IOException {
+        JobConf job, TaskReporter reporter) throws IOException {
       this.reporter = reporter;
       String finalName = getOutputName(getPartition());
       FileSystem fs = FileSystem.get(job);
 
       out = job.getOutputFormat().getRecordWriter(fs, job, finalName, reporter);
 
-      Counters counters = getCounters();
-      mapOutputRecordCounter = counters.findCounter(MAP_OUTPUT_RECORDS);
+      mapOutputRecordCounter = reporter.getCounter(MAP_OUTPUT_RECORDS);
     }
 
     public void close() throws IOException {
@@ -393,7 +565,7 @@ class MapTask extends Task {
     private final int partitions;
     private final Partitioner<K, V> partitioner;
     private final JobConf job;
-    private final Reporter reporter;
+    private final TaskReporter reporter;
     private final Class<K> keyClass;
     private final Class<V> valClass;
     private final RawComparator<K> comparator;
@@ -454,7 +626,7 @@ class MapTask extends Task {
 
     @SuppressWarnings("unchecked")
     public MapOutputBuffer(TaskUmbilicalProtocol umbilical, JobConf job,
-                           Reporter reporter) throws IOException {
+                           TaskReporter reporter) throws IOException {
       this.job = job;
       this.reporter = reporter;
       localFs = FileSystem.getLocal(job);
@@ -504,11 +676,10 @@ class MapTask extends Task {
       valSerializer = serializationFactory.getSerializer(valClass);
       valSerializer.open(bb);
       // counters
-      Counters counters = getCounters();
-      mapOutputByteCounter = counters.findCounter(MAP_OUTPUT_BYTES);
-      mapOutputRecordCounter = counters.findCounter(MAP_OUTPUT_RECORDS);
-      combineInputCounter = counters.findCounter(COMBINE_INPUT_RECORDS);
-      combineOutputCounter = counters.findCounter(COMBINE_OUTPUT_RECORDS);
+      mapOutputByteCounter = reporter.getCounter(MAP_OUTPUT_BYTES);
+      mapOutputRecordCounter = reporter.getCounter(MAP_OUTPUT_RECORDS);
+      combineInputCounter = reporter.getCounter(COMBINE_INPUT_RECORDS);
+      combineOutputCounter = reporter.getCounter(COMBINE_OUTPUT_RECORDS);
       // compression
       if (job.getCompressMapOutput()) {
         Class<? extends CompressionCodec> codecClass =

+ 2 - 0
src/mapred/org/apache/hadoop/mapred/Mapper.java

@@ -127,7 +127,9 @@ import org.apache.hadoop.io.compress.CompressionCodec;
  * @see MapReduceBase
  * @see MapRunnable
  * @see SequenceFile
+ * @deprecated Use {@link org.apache.hadoop.mapreduce.Mapper} instead.
  */
+@Deprecated
 public interface Mapper<K1, V1, K2, V2> extends JobConfigurable, Closeable {
   
   /** 

+ 74 - 2
src/mapred/org/apache/hadoop/mapred/OutputCommitter.java

@@ -53,9 +53,11 @@ import java.io.IOException;
  * @see FileOutputCommitter 
  * @see JobContext
  * @see TaskAttemptContext 
- *
+ * @deprecated Use {@link org.apache.hadoop.mapreduce.OutputCommitter} instead.
  */
-public abstract class OutputCommitter {
+@Deprecated
+public abstract class OutputCommitter 
+                extends org.apache.hadoop.mapreduce.OutputCommitter {
   /**
    * For the framework to setup the job output during initialization
    * 
@@ -110,4 +112,74 @@ public abstract class OutputCommitter {
    */
   public abstract void abortTask(TaskAttemptContext taskContext)
   throws IOException;
+
+  /**
+   * This method implements the new interface by calling the old method. Note
+   * that the input types are different between the new and old apis and this
+   * is a bridge between the two.
+   */
+  @Override
+  public final void setupJob(org.apache.hadoop.mapreduce.JobContext jobContext
+                             ) throws IOException {
+    setupJob((JobContext) jobContext);
+  }
+
+  /**
+   * This method implements the new interface by calling the old method. Note
+   * that the input types are different between the new and old apis and this
+   * is a bridge between the two.
+   */
+  @Override
+  public final void cleanupJob(org.apache.hadoop.mapreduce.JobContext context
+                               ) throws IOException {
+    cleanupJob((JobContext) context);
+  }
+
+  /**
+   * This method implements the new interface by calling the old method. Note
+   * that the input types are different between the new and old apis and this
+   * is a bridge between the two.
+   */
+  @Override
+  public final 
+  void setupTask(org.apache.hadoop.mapreduce.TaskAttemptContext taskContext
+                 ) throws IOException {
+    setupTask((TaskAttemptContext) taskContext);
+  }
+  
+  /**
+   * This method implements the new interface by calling the old method. Note
+   * that the input types are different between the new and old apis and this
+   * is a bridge between the two.
+   */
+  @Override
+  public final boolean 
+    needsTaskCommit(org.apache.hadoop.mapreduce.TaskAttemptContext taskContext
+                    ) throws IOException {
+    return needsTaskCommit((TaskAttemptContext) taskContext);
+  }
+
+  /**
+   * This method implements the new interface by calling the old method. Note
+   * that the input types are different between the new and old apis and this
+   * is a bridge between the two.
+   */
+  @Override
+  public final 
+  void commitTask(org.apache.hadoop.mapreduce.TaskAttemptContext taskContext
+                  ) throws IOException {
+    commitTask((TaskAttemptContext) taskContext);
+  }
+  
+  /**
+   * This method implements the new interface by calling the old method. Note
+   * that the input types are different between the new and old apis and this
+   * is a bridge between the two.
+   */
+  @Override
+  public final 
+  void abortTask(org.apache.hadoop.mapreduce.TaskAttemptContext taskContext
+                 ) throws IOException {
+    abortTask((TaskAttemptContext) taskContext);
+  }
 }

+ 2 - 0
src/mapred/org/apache/hadoop/mapred/OutputFormat.java

@@ -42,7 +42,9 @@ import org.apache.hadoop.util.Progressable;
  * 
  * @see RecordWriter
  * @see JobConf
+ * @deprecated Use {@link org.apache.hadoop.mapreduce.OutputFormat} instead.
  */
+@Deprecated
 public interface OutputFormat<K, V> {
 
   /** 

+ 2 - 0
src/mapred/org/apache/hadoop/mapred/Partitioner.java

@@ -29,7 +29,9 @@ package org.apache.hadoop.mapred;
  * record) is sent for reduction.</p>
  * 
  * @see Reducer
+ * @deprecated Use {@link org.apache.hadoop.mapreduce.Partitioner} instead.
  */
+@Deprecated
 public interface Partitioner<K2, V2> extends JobConfigurable {
   
   /** 

+ 1 - 1
src/mapred/org/apache/hadoop/mapred/RawKeyValueIterator.java

@@ -26,7 +26,7 @@ import org.apache.hadoop.util.Progress;
  * <code>RawKeyValueIterator</code> is an iterator used to iterate over
  * the raw keys and values during sort/merge of intermediate data. 
  */
-interface RawKeyValueIterator {
+public interface RawKeyValueIterator {
   /** 
    * Gets the current raw key.
    * 

+ 136 - 30
src/mapred/org/apache/hadoop/mapred/ReduceTask.java

@@ -25,6 +25,8 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
 import java.lang.Math;
+import java.lang.reflect.Constructor;
+import java.lang.reflect.InvocationTargetException;
 import java.net.URI;
 import java.net.URL;
 import java.net.URLClassLoader;
@@ -73,6 +75,7 @@ import org.apache.hadoop.mapred.IFile.*;
 import org.apache.hadoop.mapred.Merger.Segment;
 import org.apache.hadoop.mapred.SortedRanges.SkipRangeIterator;
 import org.apache.hadoop.mapred.TaskTracker.TaskInProgress;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
 import org.apache.hadoop.metrics.MetricsContext;
 import org.apache.hadoop.metrics.MetricsRecord;
 import org.apache.hadoop.metrics.MetricsUtil;
@@ -258,21 +261,23 @@ class ReduceTask extends Task {
      private SequenceFile.Writer skipWriter;
      private boolean toWriteSkipRecs;
      private boolean hasNext;
+     private TaskReporter reporter;
      
      public SkippingReduceValuesIterator(RawKeyValueIterator in,
          RawComparator<KEY> comparator, Class<KEY> keyClass,
-         Class<VALUE> valClass, Configuration conf, Progressable reporter,
+         Class<VALUE> valClass, Configuration conf, TaskReporter reporter,
          TaskUmbilicalProtocol umbilical) throws IOException {
        super(in, comparator, keyClass, valClass, conf, reporter);
        this.umbilical = umbilical;
        this.skipGroupCounter = 
-         getCounters().findCounter(Counter.REDUCE_SKIPPED_GROUPS);
+         reporter.getCounter(Counter.REDUCE_SKIPPED_GROUPS);
        this.skipRecCounter = 
-         getCounters().findCounter(Counter.REDUCE_SKIPPED_RECORDS);
+         reporter.getCounter(Counter.REDUCE_SKIPPED_RECORDS);
        this.toWriteSkipRecs = toWriteSkipRecs() &&  
          SkipBadRecords.getSkipOutputPath(conf)!=null;
        this.keyClass = keyClass;
        this.valClass = valClass;
+       this.reporter = reporter;
        skipIt = getSkipRanges().skipRangeIterator();
        mayBeSkip();
      }
@@ -326,7 +331,7 @@ class ReduceTask extends Task {
          skipWriter = SequenceFile.createWriter(
                skipFile.getFileSystem(conf), conf, skipFile,
                keyClass, valClass, 
-               CompressionType.BLOCK, getReporter(umbilical));
+               CompressionType.BLOCK, reporter);
        }
        skipWriter.append(key, value);
      }
@@ -335,9 +340,8 @@ class ReduceTask extends Task {
   @Override
   @SuppressWarnings("unchecked")
   public void run(JobConf job, final TaskUmbilicalProtocol umbilical)
-    throws IOException {
+    throws IOException, InterruptedException, ClassNotFoundException {
     job.setBoolean("mapred.skip.on", isSkipping());
-    Reducer reducer = ReflectionUtils.newInstance(job.getReducerClass(), job);
 
     if (!cleanupJob && !setupJob) {
       copyPhase = getProgress().addPhase("copy");
@@ -345,17 +349,18 @@ class ReduceTask extends Task {
       reducePhase = getProgress().addPhase("reduce");
     }
     // start thread that will handle communication with parent
-    startCommunicationThread(umbilical);
-    final Reporter reporter = getReporter(umbilical);
-    initialize(job, reporter);
+    TaskReporter reporter = new TaskReporter(getProgress(), umbilical);
+    reporter.startCommunicationThread();
+    boolean useNewApi = job.getUseNewReducer();
+    initialize(job, getJobID(), reporter, useNewApi);
 
     // check if it is a cleanupJobTask
     if (cleanupJob) {
-      runCleanup(umbilical);
+      runCleanup(umbilical, reporter);
       return;
     }
     if (setupJob) {
-      runSetupJob(umbilical);
+      runSetupJob(umbilical, reporter);
       return;
     }
     
@@ -364,7 +369,7 @@ class ReduceTask extends Task {
 
     boolean isLocal = "local".equals(job.get("mapred.job.tracker", "local"));
     if (!isLocal) {
-      reduceCopier = new ReduceCopier(umbilical, job);
+      reduceCopier = new ReduceCopier(umbilical, job, reporter);
       if (!reduceCopier.fetchOutputs()) {
         if(reduceCopier.mergeThrowable instanceof FSError) {
           LOG.error("Task: " + getTaskID() + " - FSError: " + 
@@ -394,17 +399,42 @@ class ReduceTask extends Task {
     
     sortPhase.complete();                         // sort is complete
     setPhase(TaskStatus.Phase.REDUCE); 
+    Class keyClass = job.getMapOutputKeyClass();
+    Class valueClass = job.getMapOutputValueClass();
+    RawComparator comparator = job.getOutputValueGroupingComparator();
 
+    if (useNewApi) {
+      runNewReducer(job, umbilical, reporter, rIter, comparator, 
+                    keyClass, valueClass);
+    } else {
+      runOldReducer(job, umbilical, reporter, rIter, comparator, 
+                    keyClass, valueClass);
+    }
+    done(umbilical, reporter);
+  }
+
+  @SuppressWarnings("unchecked")
+  private <INKEY,INVALUE,OUTKEY,OUTVALUE>
+  void runOldReducer(JobConf job,
+                     TaskUmbilicalProtocol umbilical,
+                     final TaskReporter reporter,
+                     RawKeyValueIterator rIter,
+                     RawComparator<INKEY> comparator,
+                     Class<INKEY> keyClass,
+                     Class<INVALUE> valueClass) throws IOException {
+    Reducer<INKEY,INVALUE,OUTKEY,OUTVALUE> reducer = 
+      ReflectionUtils.newInstance(job.getReducerClass(), job);
     // make output collector
     String finalName = getOutputName(getPartition());
 
     FileSystem fs = FileSystem.get(job);
 
-    final RecordWriter out = 
+    final RecordWriter<OUTKEY,OUTVALUE> out = 
       job.getOutputFormat().getRecordWriter(fs, job, finalName, reporter);  
     
-    OutputCollector collector = new OutputCollector() {
-        public void collect(Object key, Object value)
+    OutputCollector<OUTKEY,OUTVALUE> collector = 
+      new OutputCollector<OUTKEY,OUTVALUE>() {
+        public void collect(OUTKEY key, OUTVALUE value)
           throws IOException {
           out.write(key, value);
           reduceOutputCounter.increment(1);
@@ -415,18 +445,16 @@ class ReduceTask extends Task {
     
     // apply reduce function
     try {
-      Class keyClass = job.getMapOutputKeyClass();
-      Class valClass = job.getMapOutputValueClass();
       //increment processed counter only if skipping feature is enabled
       boolean incrProcCount = SkipBadRecords.getReducerMaxSkipGroups(job)>0 &&
         SkipBadRecords.getAutoIncrReducerProcCount(job);
       
-      ReduceValuesIterator values = isSkipping() ? 
-          new SkippingReduceValuesIterator(rIter, 
-              job.getOutputValueGroupingComparator(), keyClass, valClass, 
+      ReduceValuesIterator<INKEY,INVALUE> values = isSkipping() ? 
+          new SkippingReduceValuesIterator<INKEY,INVALUE>(rIter, 
+              comparator, keyClass, valueClass, 
               job, reporter, umbilical) :
-          new ReduceValuesIterator(rIter, 
-          job.getOutputValueGroupingComparator(), keyClass, valClass, 
+          new ReduceValuesIterator<INKEY,INVALUE>(rIter, 
+          job.getOutputValueGroupingComparator(), keyClass, valueClass, 
           job, reporter);
       values.informReduceProgress();
       while (values.more()) {
@@ -455,13 +483,94 @@ class ReduceTask extends Task {
       
       throw ioe;
     }
-    done(umbilical);
+  }
+
+  static class NewTrackingRecordWriter<K,V> 
+      extends org.apache.hadoop.mapreduce.RecordWriter<K,V> {
+    private final org.apache.hadoop.mapreduce.RecordWriter<K,V> real;
+    private final org.apache.hadoop.mapreduce.Counter outputRecordCounter;
+  
+    NewTrackingRecordWriter(org.apache.hadoop.mapreduce.RecordWriter<K,V> real,
+                            org.apache.hadoop.mapreduce.Counter recordCounter) {
+      this.real = real;
+      this.outputRecordCounter = recordCounter;
+    }
+
+    @Override
+    public void close(TaskAttemptContext context) throws IOException,
+    InterruptedException {
+      real.close(context);
+    }
+
+    @Override
+    public void write(K key, V value) throws IOException, InterruptedException {
+      real.write(key,value);
+      outputRecordCounter.increment(1);
+    }
+  }
+
+  @SuppressWarnings("unchecked")
+  private <INKEY,INVALUE,OUTKEY,OUTVALUE>
+  void runNewReducer(JobConf job,
+                     final TaskUmbilicalProtocol umbilical,
+                     final Reporter reporter,
+                     RawKeyValueIterator rIter,
+                     RawComparator<INKEY> comparator,
+                     Class<INKEY> keyClass,
+                     Class<INVALUE> valueClass
+                     ) throws IOException,InterruptedException, 
+                              ClassNotFoundException {
+    // make a task context so we can get the classes
+    org.apache.hadoop.mapreduce.TaskAttemptContext taskContext =
+      new org.apache.hadoop.mapreduce.TaskAttemptContext(job, getTaskID());
+    // make a reducer
+    org.apache.hadoop.mapreduce.Reducer<INKEY,INVALUE,OUTKEY,OUTVALUE> reducer =
+      (org.apache.hadoop.mapreduce.Reducer<INKEY,INVALUE,OUTKEY,OUTVALUE>)
+        ReflectionUtils.newInstance(taskContext.getReducerClass(), job);
+    org.apache.hadoop.mapreduce.RecordWriter<OUTKEY,OUTVALUE> output =
+      (org.apache.hadoop.mapreduce.RecordWriter<OUTKEY,OUTVALUE>)
+        outputFormat.getRecordWriter(taskContext);
+    job.setBoolean("mapred.skip.on", isSkipping());
+    org.apache.hadoop.mapreduce.Reducer<INKEY,INVALUE,OUTKEY,OUTVALUE>.Context 
+         reducerContext = null;
+    try {
+      Constructor<org.apache.hadoop.mapreduce.Reducer.Context> contextConstructor =
+        org.apache.hadoop.mapreduce.Reducer.Context.class.getConstructor
+        (new Class[]{org.apache.hadoop.mapreduce.Reducer.class,
+            Configuration.class,
+            org.apache.hadoop.mapreduce.TaskAttemptID.class,
+            RawKeyValueIterator.class,
+            org.apache.hadoop.mapreduce.RecordWriter.class,
+            org.apache.hadoop.mapreduce.OutputCommitter.class,
+            org.apache.hadoop.mapreduce.StatusReporter.class,
+            RawComparator.class,
+            Class.class,
+            Class.class});
+
+      reducerContext = contextConstructor.newInstance(reducer, job, 
+                                                      getTaskID(),
+                                                      rIter, output, committer,
+                                                      reporter, comparator, 
+                                                      keyClass, valueClass);
+
+      reducer.run(reducerContext);
+      output.close(reducerContext);
+    } catch (NoSuchMethodException e) {
+      throw new IOException("Can't find Context constructor", e);
+    } catch (InstantiationException e) {
+      throw new IOException("Can't create Context", e);
+    } catch (InvocationTargetException e) {
+      throw new IOException("Can't invoke Context constructor", e);
+    } catch (IllegalAccessException e) {
+      throw new IOException("Can't invoke Context constructor", e);
+    }
   }
 
   class ReduceCopier<K, V> implements MRConstants {
 
     /** Reference to the umbilical object */
     private TaskUmbilicalProtocol umbilical;
+    private final TaskReporter reporter;
     
     /** Reference to the task object */
     
@@ -1560,10 +1669,11 @@ class ReduceTask extends Task {
       conf.setClassLoader(loader);
     }
     
-    public ReduceCopier(TaskUmbilicalProtocol umbilical, JobConf conf)
-      throws IOException {
+    public ReduceCopier(TaskUmbilicalProtocol umbilical, JobConf conf,
+                        TaskReporter reporter)throws IOException {
       
       configureClasspath(conf);
+      this.reporter = reporter;
       this.shuffleClientMetrics = new ShuffleClientMetrics(conf);
       this.umbilical = umbilical;      
       this.reduceTask = ReduceTask.this;
@@ -1650,8 +1760,6 @@ class ReduceTask extends Task {
       
       copiers = new ArrayList<MapOutputCopier>(numCopiers);
       
-      Reporter reporter = getReporter(umbilical);
-
       // start all the copying threads
       for (int i=0; i < numCopiers; i++) {
         MapOutputCopier copier = new MapOutputCopier(conf, reporter);
@@ -2272,7 +2380,6 @@ class ReduceTask extends Task {
                          codec, null);
             RawKeyValueIterator iter  = null;
             Path tmpDir = new Path(reduceTask.getTaskID().toString());
-            final Reporter reporter = getReporter(umbilical);
             try {
               iter = Merger.merge(conf, rfs,
                                   conf.getMapOutputKeyClass(),
@@ -2312,7 +2419,7 @@ class ReduceTask extends Task {
     }
 
     private class InMemFSMergeThread extends Thread {
-     
+      
       public InMemFSMergeThread() {
         setName("Thread for merging in memory files");
         setDaemon(true);
@@ -2367,7 +2474,6 @@ class ReduceTask extends Task {
                      codec, null);
 
         RawKeyValueIterator rIter = null;
-        final Reporter reporter = getReporter(umbilical);
         try {
           LOG.info("Initiating in-memory merge with " + noInMemorySegments + 
                    " segments...");

+ 2 - 0
src/mapred/org/apache/hadoop/mapred/Reducer.java

@@ -160,7 +160,9 @@ import org.apache.hadoop.io.Closeable;
  * @see Partitioner
  * @see Reporter
  * @see MapReduceBase
+ * @deprecated Use {@link org.apache.hadoop.mapreduce.Reducer} instead.
  */
+@Deprecated
 public interface Reducer<K2, V2, K3, V3> extends JobConfigurable, Closeable {
   
   /** 

+ 13 - 2
src/mapred/org/apache/hadoop/mapred/Reporter.java

@@ -47,10 +47,13 @@ public interface Reporter extends Progressable {
       }
       public void progress() {
       }
+      public Counter getCounter(Enum<?> name) {
+        return null;
+      }
       public Counter getCounter(String group, String name) {
         return null;
       }
-      public void incrCounter(Enum key, long amount) {
+      public void incrCounter(Enum<?> key, long amount) {
       }
       public void incrCounter(String group, String counter, long amount) {
       }
@@ -66,6 +69,14 @@ public interface Reporter extends Progressable {
    */
   public abstract void setStatus(String status);
   
+  /**
+   * Get the {@link Counter} of the given group with the given name.
+   * 
+   * @param name counter name
+   * @return the <code>Counter</code> of the given group/name.
+   */
+  public abstract Counter getCounter(Enum<?> name);
+
   /**
    * Get the {@link Counter} of the given group with the given name.
    * 
@@ -84,7 +95,7 @@ public interface Reporter extends Progressable {
    * @param amount A non-negative amount by which the counter is to 
    *               be incremented.
    */
-  public abstract void incrCounter(Enum key, long amount);
+  public abstract void incrCounter(Enum<?> key, long amount);
   
   /**
    * Increments the counter identified by the group and counter name

+ 8 - 0
src/mapred/org/apache/hadoop/mapred/RunningJob.java

@@ -181,4 +181,12 @@ public interface RunningJob {
    * @throws IOException
    */
   public Counters getCounters() throws IOException;
+  
+  /**
+   * Gets the diagnostic messages for a given task attempt.
+   * @param taskid
+   * @return the list of diagnostic messages for the task
+   * @throws IOException
+   */
+  public String[] getTaskDiagnostics(TaskAttemptID taskid) throws IOException;
 }

+ 6 - 1
src/mapred/org/apache/hadoop/mapred/SequenceFileInputFormat.java

@@ -27,7 +27,12 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.SequenceFile;
 import org.apache.hadoop.io.MapFile;
 
-/** An {@link InputFormat} for {@link SequenceFile}s. */
+/** An {@link InputFormat} for {@link SequenceFile}s. 
+ * @deprecated Use 
+ *  {@link org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat} 
+ *  instead.
+ */
+@Deprecated
 public class SequenceFileInputFormat<K, V> extends FileInputFormat<K, V> {
 
   public SequenceFileInputFormat() {

+ 6 - 1
src/mapred/org/apache/hadoop/mapred/SequenceFileOutputFormat.java

@@ -32,7 +32,12 @@ import org.apache.hadoop.io.compress.DefaultCodec;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.util.*;
 
-/** An {@link OutputFormat} that writes {@link SequenceFile}s. */
+/** An {@link OutputFormat} that writes {@link SequenceFile}s. 
+ * @deprecated Use 
+ *   {@link org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat} 
+ *   instead.
+ */
+@Deprecated
 public class SequenceFileOutputFormat <K,V> extends FileOutputFormat<K, V> {
 
   public RecordWriter<K, V> getRecordWriter(

+ 201 - 172
src/mapred/org/apache/hadoop/mapred/Task.java

@@ -22,10 +22,8 @@ import java.io.DataInput;
 import java.io.DataOutput;
 import java.io.IOException;
 import java.text.NumberFormat;
-import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.Iterator;
-import java.util.List;
 import java.util.Map;
 import java.util.NoSuchElementException;
 import java.util.concurrent.atomic.AtomicBoolean;
@@ -109,12 +107,11 @@ abstract class Task implements Writable, Configurable {
   ////////////////////////////////////////////
 
   private String jobFile;                         // job configuration file
-  private final TaskAttemptID taskId;             // unique, includes job id
+  private TaskAttemptID taskId;                   // unique, includes job id
   private int partition;                          // id within job
   TaskStatus taskStatus;                          // current status of the task
   protected boolean cleanupJob = false;
   protected boolean setupJob = false;
-  private Thread pingProgressThread;
   
   //skip ranges based on failed ranges from previous attempts
   private SortedRanges skipRanges = new SortedRanges();
@@ -132,6 +129,8 @@ abstract class Task implements Writable, Configurable {
   private final static int MAX_RETRIES = 10;
   protected JobContext jobContext;
   protected TaskAttemptContext taskContext;
+  protected org.apache.hadoop.mapreduce.OutputFormat<?,?> outputFormat;
+  protected org.apache.hadoop.mapreduce.OutputCommitter committer;
   private volatile boolean commitPending = false;
   protected final Counters.Counter spilledRecordsCounter;
 
@@ -168,7 +167,7 @@ abstract class Task implements Writable, Configurable {
   public void setJobFile(String jobFile) { this.jobFile = jobFile; }
   public String getJobFile() { return jobFile; }
   public TaskAttemptID getTaskID() { return taskId; }
-  public Counters getCounters() { return counters; }
+  Counters getCounters() { return counters; }
   
   /**
    * Get the job name for this task.
@@ -271,7 +270,7 @@ abstract class Task implements Writable, Configurable {
   }
   public void readFields(DataInput in) throws IOException {
     jobFile = Text.readString(in);
-    taskId.readFields(in);
+    taskId = TaskAttemptID.read(in);
     partition = in.readInt();
     taskStatus.readFields(in);
     this.mapOutputFile.setJobId(taskId.getJobID()); 
@@ -315,7 +314,7 @@ abstract class Task implements Writable, Configurable {
    * @param umbilical for progress reports
    */
   public abstract void run(JobConf job, TaskUmbilicalProtocol umbilical)
-    throws IOException;
+    throws IOException, ClassNotFoundException, InterruptedException;
 
 
   /** Return an approprate thread runner for this task. 
@@ -330,160 +329,194 @@ abstract class Task implements Writable, Configurable {
 
   // Current counters
   private transient Counters counters = new Counters();
-  
-  /**
-   * flag that indicates whether progress update needs to be sent to parent.
-   * If true, it has been set. If false, it has been reset. 
-   * Using AtomicBoolean since we need an atomic read & reset method. 
-   */  
-  private AtomicBoolean progressFlag = new AtomicBoolean(false);
+
   /* flag to track whether task is done */
   private AtomicBoolean taskDone = new AtomicBoolean(false);
-  // getters and setters for flag
-  private void setProgressFlag() {
-    progressFlag.set(true);
-  }
-  private boolean resetProgressFlag() {
-    return progressFlag.getAndSet(false);
-  }
   
   public abstract boolean isMapTask();
 
   public Progress getProgress() { return taskProgress; }
 
-  InputSplit getInputSplit() throws UnsupportedOperationException {
-    throw new UnsupportedOperationException("Input only available on map");
-  }
-
-  /** 
-   * The communication thread handles communication with the parent (Task Tracker). 
-   * It sends progress updates if progress has been made or if the task needs to 
-   * let the parent know that it's alive. It also pings the parent to see if it's alive. 
-   */
-  protected void startCommunicationThread(final TaskUmbilicalProtocol umbilical) {
-    pingProgressThread = new Thread(new Runnable() {
-        public void run() {
-          final int MAX_RETRIES = 3;
-          int remainingRetries = MAX_RETRIES;
-          // get current flag value and reset it as well
-          boolean sendProgress = resetProgressFlag();
-          while (!taskDone.get()) {
-            try {
-              boolean taskFound = true; // whether TT knows about this task
-              // sleep for a bit
-              try {
-                Thread.sleep(PROGRESS_INTERVAL);
-              } 
-              catch (InterruptedException e) {
-                LOG.debug(getTaskID() + " Progress/ping thread exiting " +
-                                        "since it got interrupted");
-                break;
-              }
-              
-              if (sendProgress) {
-                // we need to send progress update
-                updateCounters();
-                if (commitPending) {
-                  taskStatus.statusUpdate(TaskStatus.State.COMMIT_PENDING,
-                                          taskProgress.get(),
-                                          taskProgress.toString(), 
-                                          counters);
-                } else {
-                  taskStatus.statusUpdate(TaskStatus.State.RUNNING,
-                                          taskProgress.get(),
-                                          taskProgress.toString(), 
-                                          counters);
-                }
-                taskFound = umbilical.statusUpdate(taskId, taskStatus);
-                taskStatus.clearStatus();
-              }
-              else {
-                // send ping 
-                taskFound = umbilical.ping(taskId);
-              }
-              
-              // if Task Tracker is not aware of our task ID (probably because it died and 
-              // came back up), kill ourselves
-              if (!taskFound) {
-                LOG.warn("Parent died.  Exiting "+taskId);
-                System.exit(66);
-              }
-              
-              sendProgress = resetProgressFlag(); 
-              remainingRetries = MAX_RETRIES;
-            } 
-            catch (Throwable t) {
-              LOG.info("Communication exception: " + StringUtils.stringifyException(t));
-              remainingRetries -=1;
-              if (remainingRetries == 0) {
-                ReflectionUtils.logThreadInfo(LOG, "Communication exception", 0);
-                LOG.warn("Last retry, killing "+taskId);
-                System.exit(65);
-              }
-            }
-          }
-        }
-      }, "Comm thread for "+taskId);
-    pingProgressThread.setDaemon(true);
-    pingProgressThread.start();
-    LOG.debug(getTaskID() + " Progress/ping thread started");
-  }
-
-  public void initialize(JobConf job, Reporter reporter) 
-  throws IOException {
-    jobContext = new JobContext(job, reporter);
+  public void initialize(JobConf job, JobID id, 
+                         Reporter reporter,
+                         boolean useNewApi) throws IOException, 
+                                                   ClassNotFoundException,
+                                                   InterruptedException {
+    jobContext = new JobContext(job, id, reporter);
     taskContext = new TaskAttemptContext(job, taskId, reporter);
-    OutputCommitter committer = conf.getOutputCommitter();
+    if (useNewApi) {
+      LOG.debug("using new api for output committer");
+      outputFormat =
+        ReflectionUtils.newInstance(taskContext.getOutputFormatClass(), job);
+      committer = outputFormat.getOutputCommitter(taskContext);
+    } else {
+      committer = conf.getOutputCommitter();
+    }
     committer.setupTask(taskContext);
   }
   
-  protected Reporter getReporter(final TaskUmbilicalProtocol umbilical) 
-    throws IOException 
-  {
-    return new Reporter() {
-        public void setStatus(String status) {
-          taskProgress.setStatus(status);
-          // indicate that progress update needs to be sent
-          setProgressFlag();
-        }
-        public void progress() {
-          // indicate that progress update needs to be sent
-          setProgressFlag();
+  protected class TaskReporter 
+      extends org.apache.hadoop.mapreduce.StatusReporter
+      implements Runnable, Reporter {
+    private TaskUmbilicalProtocol umbilical;
+    private InputSplit split = null;
+    private Progress taskProgress;
+    private Thread pingThread = null;
+    /**
+     * flag that indicates whether progress update needs to be sent to parent.
+     * If true, it has been set. If false, it has been reset. 
+     * Using AtomicBoolean since we need an atomic read & reset method. 
+     */  
+    private AtomicBoolean progressFlag = new AtomicBoolean(false);
+    
+    TaskReporter(Progress taskProgress,
+                 TaskUmbilicalProtocol umbilical) {
+      this.umbilical = umbilical;
+      this.taskProgress = taskProgress;
+    }
+    // getters and setters for flag
+    void setProgressFlag() {
+      progressFlag.set(true);
+    }
+    boolean resetProgressFlag() {
+      return progressFlag.getAndSet(false);
+    }
+    public void setStatus(String status) {
+      taskProgress.setStatus(status);
+      // indicate that progress update needs to be sent
+      setProgressFlag();
+    }
+    public void setProgress(float progress) {
+      taskProgress.set(progress);
+      // indicate that progress update needs to be sent
+      setProgressFlag();
+    }
+    public void progress() {
+      // indicate that progress update needs to be sent
+      setProgressFlag();
+    }
+    public Counters.Counter getCounter(String group, String name) {
+      Counters.Counter counter = null;
+      if (counters != null) {
+        counter = counters.findCounter(group, name);
+      }
+      return counter;
+    }
+    public Counters.Counter getCounter(Enum<?> name) {
+      return counters == null ? null : counters.findCounter(name);
+    }
+    public void incrCounter(Enum key, long amount) {
+      if (counters != null) {
+        counters.incrCounter(key, amount);
+      }
+      setProgressFlag();
+    }
+    public void incrCounter(String group, String counter, long amount) {
+      if (counters != null) {
+        counters.incrCounter(group, counter, amount);
+      }
+      if(skipping && SkipBadRecords.COUNTER_GROUP.equals(group) && (
+          SkipBadRecords.COUNTER_MAP_PROCESSED_RECORDS.equals(counter) ||
+          SkipBadRecords.COUNTER_REDUCE_PROCESSED_GROUPS.equals(counter))) {
+        //if application reports the processed records, move the 
+        //currentRecStartIndex to the next.
+        //currentRecStartIndex is the start index which has not yet been 
+        //finished and is still in task's stomach.
+        for(int i=0;i<amount;i++) {
+          currentRecStartIndex = currentRecIndexIterator.next();
         }
-        public Counters.Counter getCounter(String group, String name) {
-          Counters.Counter counter = null;
-          if (counters != null) {
-            counter = counters.findCounter(group, name);
+      }
+      setProgressFlag();
+    }
+    public void setInputSplit(InputSplit split) {
+      this.split = split;
+    }
+    public InputSplit getInputSplit() throws UnsupportedOperationException {
+      if (split == null) {
+        throw new UnsupportedOperationException("Input only available on map");
+      } else {
+        return split;
+      }
+    }    
+    /** 
+     * The communication thread handles communication with the parent (Task Tracker). 
+     * It sends progress updates if progress has been made or if the task needs to 
+     * let the parent know that it's alive. It also pings the parent to see if it's alive. 
+     */
+    public void run() {
+      final int MAX_RETRIES = 3;
+      int remainingRetries = MAX_RETRIES;
+      // get current flag value and reset it as well
+      boolean sendProgress = resetProgressFlag();
+      while (!taskDone.get()) {
+        try {
+          boolean taskFound = true; // whether TT knows about this task
+          // sleep for a bit
+          try {
+            Thread.sleep(PROGRESS_INTERVAL);
+          } 
+          catch (InterruptedException e) {
+            LOG.debug(getTaskID() + " Progress/ping thread exiting " +
+            "since it got interrupted");
+            break;
           }
-          return counter;
-        }
-        public void incrCounter(Enum key, long amount) {
-          if (counters != null) {
-            counters.incrCounter(key, amount);
+
+          if (sendProgress) {
+            // we need to send progress update
+            updateCounters();
+            if (commitPending) {
+              taskStatus.statusUpdate(TaskStatus.State.COMMIT_PENDING,
+                                      taskProgress.get(),
+                                      taskProgress.toString(), 
+                                      counters);
+            } else {
+              taskStatus.statusUpdate(TaskStatus.State.RUNNING,
+                                      taskProgress.get(),
+                                      taskProgress.toString(), 
+                                      counters);
+            }
+            taskFound = umbilical.statusUpdate(taskId, taskStatus);
+            taskStatus.clearStatus();
           }
-          setProgressFlag();
-        }
-        public void incrCounter(String group, String counter, long amount) {
-          if (counters != null) {
-            counters.incrCounter(group, counter, amount);
+          else {
+            // send ping 
+            taskFound = umbilical.ping(taskId);
           }
-          if(skipping && SkipBadRecords.COUNTER_GROUP.equals(group) && (
-              SkipBadRecords.COUNTER_MAP_PROCESSED_RECORDS.equals(counter) ||
-              SkipBadRecords.COUNTER_REDUCE_PROCESSED_GROUPS.equals(counter))) {
-            //if application reports the processed records, move the 
-            //currentRecStartIndex to the next.
-            //currentRecStartIndex is the start index which has not yet been 
-            //finished and is still in task's stomach.
-            for(int i=0;i<amount;i++) {
-              currentRecStartIndex = currentRecIndexIterator.next();
-            }
+
+          // if Task Tracker is not aware of our task ID (probably because it died and 
+          // came back up), kill ourselves
+          if (!taskFound) {
+            LOG.warn("Parent died.  Exiting "+taskId);
+            System.exit(66);
+          }
+
+          sendProgress = resetProgressFlag(); 
+          remainingRetries = MAX_RETRIES;
+        } 
+        catch (Throwable t) {
+          LOG.info("Communication exception: " + StringUtils.stringifyException(t));
+          remainingRetries -=1;
+          if (remainingRetries == 0) {
+            ReflectionUtils.logThreadInfo(LOG, "Communication exception", 0);
+            LOG.warn("Last retry, killing "+taskId);
+            System.exit(65);
           }
-          setProgressFlag();
-        }
-        public InputSplit getInputSplit() throws UnsupportedOperationException {
-          return Task.this.getInputSplit();
         }
-      };
+      }
+    }
+    public void startCommunicationThread() {
+      if (pingThread == null) {
+        pingThread = new Thread(this, "communication thread");
+        pingThread.setDaemon(true);
+        pingThread.start();
+      }
+    }
+    public void stopCommunicationThread() throws InterruptedException {
+      if (pingThread != null) {
+        pingThread.interrupt();
+        pingThread.join();
+      }
+    }
   }
   
   /**
@@ -505,12 +538,6 @@ abstract class Task implements Writable, Configurable {
     umbilical.reportNextRecordRange(taskId, range);
   }
 
-  public void setProgress(float progress) {
-    taskProgress.set(progress);
-    // indicate that progress update needs to be sent
-    setProgressFlag();
-  }
-
   /**
    * An updater that tracks the last number reported for a given file
    * system and only creates the counters when they are needed.
@@ -569,14 +596,15 @@ abstract class Task implements Writable, Configurable {
     }
   }
 
-  public void done(TaskUmbilicalProtocol umbilical) throws IOException {
+  public void done(TaskUmbilicalProtocol umbilical,
+                   TaskReporter reporter
+                   ) throws IOException, InterruptedException {
     LOG.info("Task:" + taskId + " is done."
              + " And is in the process of commiting");
     updateCounters();
 
-    OutputCommitter outputCommitter = conf.getOutputCommitter();
     // check whether the commit is required.
-    boolean commitRequired = outputCommitter.needsTaskCommit(taskContext);
+    boolean commitRequired = committer.needsTaskCommit(taskContext);
     if (commitRequired) {
       int retries = MAX_RETRIES;
       taskStatus.setRunState(TaskStatus.State.COMMIT_PENDING);
@@ -597,13 +625,10 @@ abstract class Task implements Writable, Configurable {
         }
       }
       //wait for commit approval and commit
-      commit(umbilical, outputCommitter);
+      commit(umbilical, reporter, committer);
     }
     taskDone.set(true);
-    pingProgressThread.interrupt();
-    try {
-      pingProgressThread.join();
-    } catch (InterruptedException ie) {}
+    reporter.stopCommunicationThread();
     sendLastUpdate(umbilical);
     //signal the tasktracker that we are done
     sendDone(umbilical);
@@ -666,7 +691,9 @@ abstract class Task implements Writable, Configurable {
   }
 
   private void commit(TaskUmbilicalProtocol umbilical,
-                      OutputCommitter committer) throws IOException {
+                      TaskReporter reporter,
+                      org.apache.hadoop.mapreduce.OutputCommitter committer
+                      ) throws IOException {
     int retries = MAX_RETRIES;
     while (true) {
       try {
@@ -676,7 +703,7 @@ abstract class Task implements Writable, Configurable {
           } catch(InterruptedException ie) {
             //ignore
           }
-          setProgressFlag();
+          reporter.setProgressFlag();
         }
         // task can Commit now  
         try {
@@ -686,7 +713,7 @@ abstract class Task implements Writable, Configurable {
         } catch (IOException iee) {
           LOG.warn("Failure committing: " + 
                     StringUtils.stringifyException(iee));
-          discardOutput(taskContext, committer);
+          discardOutput(taskContext);
           throw iee;
         }
       } catch (IOException ie) {
@@ -694,15 +721,15 @@ abstract class Task implements Writable, Configurable {
             StringUtils.stringifyException(ie));
         if (--retries == 0) {
           //if it couldn't commit a successfully then delete the output
-          discardOutput(taskContext, committer);
+          discardOutput(taskContext);
           System.exit(68);
         }
       }
     }
   }
 
-  private void discardOutput(TaskAttemptContext taskContext,
-                             OutputCommitter committer) {
+  private 
+  void discardOutput(TaskAttemptContext taskContext) {
     try {
       committer.abortTask(taskContext);
     } catch (IOException ioe)  {
@@ -711,22 +738,24 @@ abstract class Task implements Writable, Configurable {
     }
   }
 
-  protected void runCleanup(TaskUmbilicalProtocol umbilical) 
-  throws IOException {
+  protected void runCleanup(TaskUmbilicalProtocol umbilical,
+                            TaskReporter reporter
+                            ) throws IOException, InterruptedException {
     // set phase for this task
     setPhase(TaskStatus.Phase.CLEANUP);
     getProgress().setStatus("cleanup");
     // do the cleanup
-    conf.getOutputCommitter().cleanupJob(jobContext);
-    done(umbilical);
+    committer.cleanupJob(jobContext);
+    done(umbilical, reporter);
   }
 
-  protected void runSetupJob(TaskUmbilicalProtocol umbilical) 
-  throws IOException {
+  protected void runSetupJob(TaskUmbilicalProtocol umbilical,
+                             TaskReporter reporter
+                             ) throws IOException, InterruptedException {
     // do the setup
     getProgress().setStatus("setup");
-    conf.getOutputCommitter().setupJob(jobContext);
-    done(umbilical);
+    committer.setupJob(jobContext);
+    done(umbilical, reporter);
   }
   
   public void setConf(Configuration conf) {

+ 20 - 14
src/mapred/org/apache/hadoop/mapred/TaskAttemptContext.java

@@ -19,20 +19,23 @@ package org.apache.hadoop.mapred;
 
 import org.apache.hadoop.util.Progressable;
 
-public class TaskAttemptContext extends JobContext {
+/**
+ * @deprecated Use {@link org.apache.hadoop.mapreduce.TaskAttemptContext}
+ *   instead.
+ */
+@Deprecated
+public class TaskAttemptContext 
+       extends org.apache.hadoop.mapreduce.TaskAttemptContext {
+  private Progressable progress;
 
-  private JobConf conf;
-  private TaskAttemptID taskid;
-  
   TaskAttemptContext(JobConf conf, TaskAttemptID taskid) {
     this(conf, taskid, Reporter.NULL);
   }
   
   TaskAttemptContext(JobConf conf, TaskAttemptID taskid,
                      Progressable progress) {
-    super(conf, progress);
-    this.conf = conf;
-    this.taskid = taskid;
+    super(conf, taskid);
+    this.progress = progress;
   }
   
   /**
@@ -41,16 +44,19 @@ public class TaskAttemptContext extends JobContext {
    * @return TaskAttemptID
    */
   public TaskAttemptID getTaskAttemptID() {
-    return taskid;
+    return (TaskAttemptID) super.getTaskAttemptID();
+  }
+  
+  public Progressable getProgressible() {
+    return progress;
   }
   
-  /**
-   * Get the job Configuration.
-   * 
-   * @return JobConf
-   */
   public JobConf getJobConf() {
-    return conf;
+    return (JobConf) getConfiguration();
   }
 
+  @Override
+  public void progress() {
+    progress.progress();
+  }
 }

+ 22 - 92
src/mapred/org/apache/hadoop/mapred/TaskAttemptID.java

@@ -19,7 +19,6 @@
 package org.apache.hadoop.mapred;
 
 import java.io.DataInput;
-import java.io.DataOutput;
 import java.io.IOException;
 
 /**
@@ -42,9 +41,8 @@ import java.io.IOException;
  * @see JobID
  * @see TaskID
  */
-public class TaskAttemptID extends ID {
-  private static final String ATTEMPT = "attempt";
-  private final TaskID taskId;
+@Deprecated
+public class TaskAttemptID extends org.apache.hadoop.mapreduce.TaskAttemptID {
   
   /**
    * Constructs a TaskAttemptID object from given {@link TaskID}.  
@@ -52,11 +50,7 @@ public class TaskAttemptID extends ID {
    * @param id the task attempt number
    */
   public TaskAttemptID(TaskID taskId, int id) {
-    super(id);
-    if(taskId == null) {
-      throw new IllegalArgumentException("taskId cannot be null");
-    }
-    this.taskId = taskId;
+    super(taskId, id);
   }
   
   /**
@@ -73,77 +67,31 @@ public class TaskAttemptID extends ID {
   }
   
   public TaskAttemptID() { 
-    taskId = new TaskID();
-  }
-  
-  /** Returns the {@link JobID} object that this task attempt belongs to */
-  public JobID getJobID() {
-    return taskId.getJobID();
-  }
-  
-  /** Returns the {@link TaskID} object that this task attempt belongs to */
-  public TaskID getTaskID() {
-    return taskId;
-  }
-  
-  /**Returns whether this TaskAttemptID is a map ID */
-  public boolean isMap() {
-    return taskId.isMap();
-  }
-  
-  @Override
-  public boolean equals(Object o) {
-    if (!super.equals(o))
-      return false;
-    if(o.getClass().equals(TaskAttemptID.class)) {
-      TaskAttemptID that = (TaskAttemptID)o;
-      return this.id==that.id
-             && this.taskId.equals(that.taskId);
-    }
-    else return false;
-  }
-  
-  /**Compare TaskIds by first tipIds, then by task numbers. */
-  @Override
-  public int compareTo(ID o) {
-    TaskAttemptID that = (TaskAttemptID)o;
-    int tipComp = this.taskId.compareTo(that.taskId);
-    if(tipComp == 0) {
-      return this.id - that.id;
-    }
-    else return tipComp;
-  }
-  @Override
-  public String toString() { 
-    return appendTo(new StringBuilder(ATTEMPT)).toString();
+    super(new TaskID(), 0);
   }
 
   /**
-   * Add the unique string to the StringBuilder
-   * @param builder the builder to append ot
-   * @return the builder that was passed in.
+   * Downgrade a new TaskAttemptID to an old one
+   * @param old the new id
+   * @return either old or a new TaskAttemptID constructed to match old
    */
-  protected StringBuilder appendTo(StringBuilder builder) {
-    return taskId.appendTo(builder).append(SEPARATOR).append(id);
-  }
-  
-  @Override
-  public int hashCode() {
-    return taskId.hashCode() * 5 + id;
+  public static 
+  TaskAttemptID downgrade(org.apache.hadoop.mapreduce.TaskAttemptID old) {
+    if (old instanceof TaskAttemptID) {
+      return (TaskAttemptID) old;
+    } else {
+      return new TaskAttemptID(TaskID.downgrade(old.getTaskID()), old.getId());
+    }
   }
-  
-  @Override
-  public void readFields(DataInput in) throws IOException {
-    super.readFields(in);
-    taskId.readFields(in);
+
+  public TaskID getTaskID() {
+    return (TaskID) super.getTaskID();
   }
 
-  @Override
-  public void write(DataOutput out) throws IOException {
-    super.write(out);
-    taskId.write(out);
+  public JobID getJobID() {
+    return (JobID) super.getJobID();
   }
-  
+
   @Deprecated
   public static TaskAttemptID read(DataInput in) throws IOException {
     TaskAttemptID taskId = new TaskAttemptID();
@@ -157,25 +105,8 @@ public class TaskAttemptID extends ID {
    */
   public static TaskAttemptID forName(String str
                                       ) throws IllegalArgumentException {
-    if(str == null)
-      return null;
-    try {
-      String[] parts = str.split(Character.toString(SEPARATOR));
-      if(parts.length == 6) {
-        if(parts[0].equals(ATTEMPT)) {
-          boolean isMap = false;
-          if(parts[3].equals("m")) isMap = true;
-          else if(parts[3].equals("r")) isMap = false;
-          else throw new Exception();
-          return new TaskAttemptID(parts[1], Integer.parseInt(parts[2]),
-              isMap, Integer.parseInt(parts[4]), Integer.parseInt(parts[5]));
-        }
-      }
-    } catch (Exception ex) {
-      //fall below
-    }
-    throw new IllegalArgumentException("TaskAttemptId string : " + str 
-        + " is not properly formed");
+    return (TaskAttemptID) 
+             org.apache.hadoop.mapreduce.TaskAttemptID.forName(str);
   }
   
   /** 
@@ -215,5 +146,4 @@ public class TaskAttemptID extends ID {
         .append(attemptId != null ? attemptId : "[0-9]*");
     return builder;
   }
-  
 }

+ 25 - 116
src/mapred/org/apache/hadoop/mapred/TaskID.java

@@ -19,9 +19,7 @@
 package org.apache.hadoop.mapred;
 
 import java.io.DataInput;
-import java.io.DataOutput;
 import java.io.IOException;
-import java.text.NumberFormat;
 
 /**
  * TaskID represents the immutable and unique identifier for 
@@ -45,16 +43,8 @@ import java.text.NumberFormat;
  * @see JobID
  * @see TaskAttemptID
  */
-public class TaskID extends ID {
-  private static final String TASK = "task";
-  private static final NumberFormat idFormat = NumberFormat.getInstance();
-  static {
-    idFormat.setGroupingUsed(false);
-    idFormat.setMinimumIntegerDigits(6);
-  }
-  
-  private JobID jobId;
-  private boolean isMap;
+@Deprecated
+public class TaskID extends org.apache.hadoop.mapreduce.TaskID {
 
   /**
    * Constructs a TaskID object from given {@link JobID}.  
@@ -62,13 +52,8 @@ public class TaskID extends ID {
    * @param isMap whether the tip is a map 
    * @param id the tip number
    */
-  public TaskID(JobID jobId, boolean isMap, int id) {
-    super(id);
-    if(jobId == null) {
-      throw new IllegalArgumentException("jobId cannot be null");
-    }
-    this.jobId = jobId;
-    this.isMap = isMap;
+  public TaskID(org.apache.hadoop.mapreduce.JobID jobId, boolean isMap,int id) {
+    super(jobId, isMap, id);
   }
   
   /**
@@ -82,81 +67,22 @@ public class TaskID extends ID {
     this(new JobID(jtIdentifier, jobId), isMap, id);
   }
   
-  public TaskID() { 
-    jobId = new JobID();
+  public TaskID() {
+    super(new JobID(), false, 0);
   }
   
-  /** Returns the {@link JobID} object that this tip belongs to */
-  public JobID getJobID() {
-    return jobId;
-  }
-  
-  /**Returns whether this TaskID is a map ID */
-  public boolean isMap() {
-    return isMap;
-  }
-  
-  @Override
-  public boolean equals(Object o) {
-    if (!super.equals(o))
-      return false;
-
-    TaskID that = (TaskID)o;
-    return this.isMap == that.isMap && this.jobId.equals(that.jobId);
-  }
-
-  /**Compare TaskInProgressIds by first jobIds, then by tip numbers. Reduces are 
-   * defined as greater then maps.*/
-  @Override
-  public int compareTo(ID o) {
-    TaskID that = (TaskID)o;
-    int jobComp = this.jobId.compareTo(that.jobId);
-    if (jobComp == 0) {
-      if (this.isMap == that.isMap) {
-        return this.id - that.id;
-      } else {
-        return this.isMap ? -1 : 1;
-      }
-    } else {
-      return jobComp;
-    }
-  }
-  
-  @Override
-  public String toString() { 
-    return appendTo(new StringBuilder(TASK)).toString();
-  }
-
   /**
-   * Add the unique string to the given builder.
-   * @param builder the builder to append to
-   * @return the builder that was passed in
+   * Downgrade a new TaskID to an old one
+   * @param old a new or old TaskID
+   * @return either old or a new TaskID build to match old
    */
-  protected StringBuilder appendTo(StringBuilder builder) {
-    return jobId.appendTo(builder).
-                 append(SEPARATOR).
-                 append(isMap ? 'm' : 'r').
-                 append(SEPARATOR).
-                 append(idFormat.format(id));
-  }
-  
-  @Override
-  public int hashCode() {
-    return jobId.hashCode() * 524287 + id;
-  }
-  
-  @Override
-  public void readFields(DataInput in) throws IOException {
-    super.readFields(in);
-    jobId.readFields(in);
-    isMap = in.readBoolean();
-  }
-
-  @Override
-  public void write(DataOutput out) throws IOException {
-    super.write(out);
-    jobId.write(out);
-    out.writeBoolean(isMap);
+  public static TaskID downgrade(org.apache.hadoop.mapreduce.TaskID old) {
+    if (old instanceof TaskID) {
+      return (TaskID) old;
+    } else {
+      return new TaskID(JobID.downgrade(old.getJobID()), old.isMap(), 
+                        old.getId());
+    }
   }
 
   @Deprecated
@@ -166,32 +92,10 @@ public class TaskID extends ID {
     return tipId;
   }
   
-  /** Construct a TaskID object from given string 
-   * @return constructed TaskID object or null if the given String is null
-   * @throws IllegalArgumentException if the given string is malformed
-   */
-  public static TaskID forName(String str) 
-    throws IllegalArgumentException {
-    if(str == null)
-      return null;
-    try {
-      String[] parts = str.split(Character.toString(SEPARATOR));
-      if(parts.length == 5) {
-        if(parts[0].equals(TASK)) {
-          boolean isMap = false;
-          if(parts[3].equals("m")) isMap = true;
-          else if(parts[3].equals("r")) isMap = false;
-          else throw new Exception();
-          return new TaskID(parts[1], Integer.parseInt(parts[2]),
-              isMap, Integer.parseInt(parts[4]));
-        }
-      }
-    }catch (Exception ex) {//fall below
-    }
-    throw new IllegalArgumentException("TaskId string : " + str 
-        + " is not properly formed");
+  public JobID getJobID() {
+    return (JobID) super.getJobID();
   }
-  
+
   /** 
    * Returns a regex pattern which matches task IDs. Arguments can 
    * be given null, in which case that part of the regex will be generic.  
@@ -226,5 +130,10 @@ public class TaskID extends ID {
       .append(taskId != null ? idFormat.format(taskId) : "[0-9]*");
     return builder;
   }
-  
+
+  public static TaskID forName(String str
+                               ) throws IllegalArgumentException {
+    return (TaskID) org.apache.hadoop.mapreduce.TaskID.forName(str);
+  }
+
 }

+ 7 - 7
src/mapred/org/apache/hadoop/mapred/TaskReport.java

@@ -30,7 +30,7 @@ import org.apache.hadoop.io.WritableUtils;
 
 /** A report on the state of a task. */
 public class TaskReport implements Writable {
-  private final TaskID taskid;
+  private TaskID taskid;
   private float progress;
   private String state;
   private String[] diagnostics;
@@ -172,7 +172,7 @@ public class TaskReport implements Writable {
       return false;
     if(o.getClass().equals(TaskReport.class)) {
       TaskReport report = (TaskReport) o;
-      return counters.contentEquals(report.getCounters())
+      return counters.equals(report.getCounters())
              && Arrays.toString(this.diagnostics)
                       .equals(Arrays.toString(report.getDiagnostics()))
              && this.finishTime == report.getFinishTime()
@@ -215,11 +215,11 @@ public class TaskReport implements Writable {
   }
 
   public void readFields(DataInput in) throws IOException {
-    taskid.readFields(in);
-    progress = in.readFloat();
-    state = Text.readString(in);
-    startTime = in.readLong(); 
-    finishTime = in.readLong();
+    this.taskid.readFields(in);
+    this.progress = in.readFloat();
+    this.state = Text.readString(in);
+    this.startTime = in.readLong(); 
+    this.finishTime = in.readLong();
     
     diagnostics = WritableUtils.readStringArray(in);
     counters = new Counters();

+ 5 - 1
src/mapred/org/apache/hadoop/mapred/TextInputFormat.java

@@ -27,7 +27,11 @@ import org.apache.hadoop.io.compress.*;
 
 /** An {@link InputFormat} for plain text files.  Files are broken into lines.
  * Either linefeed or carriage-return are used to signal end of line.  Keys are
- * the position in the file, and values are the line of text.. */
+ * the position in the file, and values are the line of text.. 
+ * @deprecated Use {@link org.apache.hadoop.mapreduce.lib.input.TextInputFormat}
+ *  instead.
+ */
+@Deprecated
 public class TextInputFormat extends FileInputFormat<LongWritable, Text>
   implements JobConfigurable {
 

+ 5 - 1
src/mapred/org/apache/hadoop/mapred/TextOutputFormat.java

@@ -32,7 +32,11 @@ import org.apache.hadoop.io.compress.CompressionCodec;
 import org.apache.hadoop.io.compress.GzipCodec;
 import org.apache.hadoop.util.*;
 
-/** An {@link OutputFormat} that writes plain text files. */
+/** An {@link OutputFormat} that writes plain text files. 
+ * @deprecated Use 
+ *   {@link org.apache.hadoop.mapreduce.lib.output.TextOutputFormat} instead.
+ */
+@Deprecated
 public class TextOutputFormat<K, V> extends FileOutputFormat<K, V> {
 
   protected static class LineRecordWriter<K, V>

+ 5 - 1
src/mapred/org/apache/hadoop/mapred/lib/HashPartitioner.java

@@ -21,7 +21,11 @@ package org.apache.hadoop.mapred.lib;
 import org.apache.hadoop.mapred.Partitioner;
 import org.apache.hadoop.mapred.JobConf;
 
-/** Partition keys by their {@link Object#hashCode()}. */
+/** Partition keys by their {@link Object#hashCode()}. 
+ * @deprecated Use 
+ *   {@link org.apache.hadoop.mapreduce.lib.partition.HashPartitioner} instead.
+ */
+@Deprecated
 public class HashPartitioner<K2, V2> implements Partitioner<K2, V2> {
 
   public void configure(JobConf job) {}

+ 4 - 1
src/mapred/org/apache/hadoop/mapred/lib/IdentityMapper.java

@@ -25,7 +25,10 @@ import org.apache.hadoop.mapred.OutputCollector;
 import org.apache.hadoop.mapred.Reporter;
 import org.apache.hadoop.mapred.MapReduceBase;
 
-/** Implements the identity function, mapping inputs directly to outputs. */
+/** Implements the identity function, mapping inputs directly to outputs. 
+ * @deprecated Use {@link org.apache.hadoop.mapreduce.Mapper} instead.
+ */
+@Deprecated
 public class IdentityMapper<K, V>
     extends MapReduceBase implements Mapper<K, V, K, V> {
 

+ 4 - 1
src/mapred/org/apache/hadoop/mapred/lib/IdentityReducer.java

@@ -27,7 +27,10 @@ import org.apache.hadoop.mapred.OutputCollector;
 import org.apache.hadoop.mapred.Reporter;
 import org.apache.hadoop.mapred.MapReduceBase;
 
-/** Performs no reduction, writing all input values directly to the output. */
+/** Performs no reduction, writing all input values directly to the output. 
+ * @deprecated Use {@link org.apache.hadoop.mapreduce.Reducer} instead.
+ */
+@Deprecated
 public class IdentityReducer<K, V>
     extends MapReduceBase implements Reducer<K, V, K, V> {
 

+ 5 - 1
src/mapred/org/apache/hadoop/mapred/lib/InverseMapper.java

@@ -25,7 +25,11 @@ import org.apache.hadoop.mapred.Mapper;
 import org.apache.hadoop.mapred.OutputCollector;
 import org.apache.hadoop.mapred.Reporter;
 
-/** A {@link Mapper} that swaps keys and values. */
+/** A {@link Mapper} that swaps keys and values. 
+ * @deprecated Use {@link org.apache.hadoop.mapreduce.lib.map.InverseMapper} 
+ *   instead.
+ */
+@Deprecated
 public class InverseMapper<K, V>
     extends MapReduceBase implements Mapper<K, V, V, K> {
 

+ 5 - 1
src/mapred/org/apache/hadoop/mapred/lib/LongSumReducer.java

@@ -28,7 +28,11 @@ import org.apache.hadoop.mapred.MapReduceBase;
 
 import org.apache.hadoop.io.LongWritable;
 
-/** A {@link Reducer} that sums long values. */
+/** A {@link Reducer} that sums long values. 
+ * @deprecated Use {@link org.apache.hadoop.mapreduce.lib.reduce.LongSumReducer}
+ *    instead.
+ */
+@Deprecated
 public class LongSumReducer<K> extends MapReduceBase
     implements Reducer<K, LongWritable, K, LongWritable> {
 

+ 3 - 0
src/mapred/org/apache/hadoop/mapred/lib/NullOutputFormat.java

@@ -27,7 +27,10 @@ import org.apache.hadoop.util.Progressable;
 
 /**
  * Consume all outputs and put them in /dev/null. 
+ * @deprecated Use 
+ *   {@link org.apache.hadoop.mapreduce.lib.output.NullOutputFormat} instead.
  */
+@Deprecated
 public class NullOutputFormat<K, V> implements OutputFormat<K, V> {
   
   public RecordWriter<K, V> getRecordWriter(FileSystem ignored, JobConf job, 

+ 5 - 1
src/mapred/org/apache/hadoop/mapred/lib/TokenCountMapper.java

@@ -30,7 +30,11 @@ import org.apache.hadoop.mapred.Reporter;
 
 
 /** A {@link Mapper} that maps text values into <token,freq> pairs.  Uses
- * {@link StringTokenizer} to break text into tokens. */
+ * {@link StringTokenizer} to break text into tokens. 
+ * @deprecated Use 
+ *    {@link org.apache.hadoop.mapreduce.lib.map.TokenCounterMapper} instead.
+ */
+@Deprecated
 public class TokenCountMapper<K> extends MapReduceBase
     implements Mapper<K, Text, Text, LongWritable> {
 

+ 48 - 9
src/mapred/org/apache/hadoop/mapreduce/Counter.java

@@ -38,39 +38,60 @@ import org.apache.hadoop.io.WritableUtils;
  */
 public class Counter implements Writable {
 
+  private String name;
   private String displayName;
-  private long value;
+  private long value = 0;
     
-  Counter() { 
-    value = 0L;
+  protected Counter() { 
   }
 
-  Counter(String displayName, long value) {
+  protected Counter(String name, String displayName) {
+    this.name = name;
+    this.displayName = displayName;
+  }
+  
+  @Deprecated
+  protected synchronized void setDisplayName(String displayName) {
     this.displayName = displayName;
-    this.value = value;
   }
     
   /**
    * Read the binary representation of the counter
    */
+  @Override
   public synchronized void readFields(DataInput in) throws IOException {
-    displayName = Text.readString(in);
+    name = Text.readString(in);
+    if (in.readBoolean()) {
+      displayName = Text.readString(in);
+    } else {
+      displayName = name;
+    }
     value = WritableUtils.readVLong(in);
   }
     
   /**
    * Write the binary representation of the counter
    */
+  @Override
   public synchronized void write(DataOutput out) throws IOException {
-    Text.writeString(out, displayName);
+    Text.writeString(out, name);
+    boolean distinctDisplayName = ! name.equals(displayName);
+    out.writeBoolean(distinctDisplayName);
+    if (distinctDisplayName) {
+      Text.writeString(out, displayName);
+    }
     WritableUtils.writeVLong(out, value);
   }
-    
+
+  public synchronized String getName() {
+    return name;
+  }
+
   /**
    * Get the name of the counter.
    * @return the user facing name of the counter
    */
-  public String getDisplayName() {
+  public synchronized String getDisplayName() {
     return displayName;
   }
     
@@ -89,4 +110,22 @@ public class Counter implements Writable {
   public synchronized void increment(long incr) {
     value += incr;
   }
+
+  @Override
+  public synchronized boolean equals(Object genericRight) {
+    if (genericRight instanceof Counter) {
+      synchronized (genericRight) {
+        Counter right = (Counter) genericRight;
+        return name.equals(right.name) && 
+               displayName.equals(right.displayName) &&
+               value == right.value;
+      }
+    }
+    return false;
+  }
+  
+  @Override
+  public synchronized int hashCode() {
+    return name.hashCode() + displayName.hashCode();
+  }
 }

+ 159 - 2
src/mapred/org/apache/hadoop/mapreduce/CounterGroup.java

@@ -18,10 +18,167 @@
 
 package org.apache.hadoop.mapreduce;
 
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.MissingResourceException;
+import java.util.ResourceBundle;
+import java.util.TreeMap;
+
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableUtils;
+
 /**
  * A group of {@link Counter}s that logically belong together. Typically,
  * it is an {@link Enum} subclass and the counters are the values.
  */
-public abstract class CounterGroup implements Iterable<Counter> {
-  abstract public String getName();
+public class CounterGroup implements Writable, Iterable<Counter> {
+  private String name;
+  private String displayName;
+  private TreeMap<String, Counter> counters = new TreeMap<String, Counter>();
+  // Optional ResourceBundle for localization of group and counter names.
+  private ResourceBundle bundle = null;    
+  
+  /**
+   * Returns the specified resource bundle, or throws an exception.
+   * @throws MissingResourceException if the bundle isn't found
+   */
+  private static ResourceBundle getResourceBundle(String enumClassName) {
+    String bundleName = enumClassName.replace('$','_');
+    return ResourceBundle.getBundle(bundleName);
+  }
+
+  protected CounterGroup(String name) {
+    this.name = name;
+    try {
+      bundle = getResourceBundle(name);
+    }
+    catch (MissingResourceException neverMind) {
+    }
+    displayName = localize("CounterGroupName", name);
+  }
+  
+  protected CounterGroup(String name, String displayName) {
+    this.name = name;
+    this.displayName = displayName;
+  }
+ 
+  /**
+   * Get the internal name of the group
+   * @return the internal name
+   */
+  public synchronized String getName() {
+    return name;
+  }
+  
+  /**
+   * Get the display name of the group.
+   * @return the human readable name
+   */
+  public synchronized String getDisplayName() {
+    return displayName;
+  }
+
+  synchronized void addCounter(Counter counter) {
+    counters.put(counter.getName(), counter);
+  }
+
+  /**
+   * Internal to find a counter in a group.
+   * @param counterName the name of the counter
+   * @param displayName the display name of the counter
+   * @return the counter that was found or added
+   */
+  protected Counter findCounter(String counterName, String displayName) {
+    Counter result = counters.get(counterName);
+    if (result == null) {
+      result = new Counter(counterName, displayName);
+      counters.put(counterName, result);
+    }
+    return result;
+  }
+
+  public synchronized Counter findCounter(String counterName) {
+    Counter result = counters.get(counterName);
+    if (result == null) {
+      String displayName = localize(counterName, counterName);
+      result = new Counter(counterName, displayName);
+      counters.put(counterName, result);
+    }
+    return result;
+  }
+  
+  public synchronized Iterator<Counter> iterator() {
+    return counters.values().iterator();
+  }
+
+  public synchronized void write(DataOutput out) throws IOException {
+    Text.writeString(out, displayName);
+    WritableUtils.writeVInt(out, counters.size());
+    for(Counter counter: counters.values()) {
+      counter.write(out);
+    }
+  }
+  
+  public synchronized void readFields(DataInput in) throws IOException {
+    displayName = Text.readString(in);
+    counters.clear();
+    int size = WritableUtils.readVInt(in);
+    for(int i=0; i < size; i++) {
+      Counter counter = new Counter();
+      counter.readFields(in);
+      counters.put(counter.getName(), counter);
+    }
+  }
+
+  /**
+   * Looks up key in the ResourceBundle and returns the corresponding value.
+   * If the bundle or the key doesn't exist, returns the default value.
+   */
+  private String localize(String key, String defaultValue) {
+    String result = defaultValue;
+    if (bundle != null) {
+      try {
+        result = bundle.getString(key);
+      }
+      catch (MissingResourceException mre) {
+      }
+    }
+    return result;
+  }
+
+  /**
+   * Returns the number of counters in this group.
+   */
+  public synchronized int size() {
+    return counters.size();
+  }
+
+  public synchronized boolean equals(Object genericRight) {
+    if (genericRight instanceof CounterGroup) {
+      Iterator<Counter> right = ((CounterGroup) genericRight).counters.
+                                       values().iterator();
+      Iterator<Counter> left = counters.values().iterator();
+      while (left.hasNext()) {
+        if (!right.hasNext() || !left.next().equals(right.next())) {
+          return false;
+        }
+      }
+      return !right.hasNext();
+    }
+    return false;
+  }
+
+  public synchronized int hashCode() {
+    return counters.hashCode();
+  }
+  
+  public synchronized void incrAllCounters(CounterGroup rightGroup) {
+    for(Counter right: rightGroup.counters.values()) {
+      Counter left = findCounter(right.getName(), right.getDisplayName());
+      left.increment(right.getValue());
+    }
+  }
 }

+ 184 - 0
src/mapred/org/apache/hadoop/mapreduce/Counters.java

@@ -0,0 +1,184 @@
+package org.apache.hadoop.mapreduce;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.Collection;
+import java.util.IdentityHashMap;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.TreeMap;
+
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+
+public class Counters implements Writable,Iterable<CounterGroup> {
+  /**
+   * A cache from enum values to the associated counter. Dramatically speeds up
+   * typical usage.
+   */
+  private Map<Enum<?>, Counter> cache = new IdentityHashMap<Enum<?>, Counter>();
+
+  private TreeMap<String, CounterGroup> groups = 
+      new TreeMap<String, CounterGroup>();
+  
+  public Counters() {
+  }
+  
+  Counters(org.apache.hadoop.mapred.Counters counters) {
+    for(org.apache.hadoop.mapred.Counters.Group group: counters) {
+      String name = group.getName();
+      CounterGroup newGroup = new CounterGroup(name, group.getDisplayName());
+      groups.put(name, newGroup);
+      for(Counter counter: group) {
+        newGroup.addCounter(counter);
+      }
+    }
+  }
+
+  public Counter findCounter(String groupName, String counterName) {
+    CounterGroup grp = groups.get(groupName);
+    if (grp == null) {
+      grp = new CounterGroup(groupName);
+      groups.put(groupName, grp);
+    }
+    return grp.findCounter(counterName);
+  }
+
+  /**
+   * Find the counter for the given enum. The same enum will always return the
+   * same counter.
+   * @param key the counter key
+   * @return the matching counter object
+   */
+  public synchronized Counter findCounter(Enum<?> key) {
+    Counter counter = cache.get(key);
+    if (counter == null) {
+      counter = findCounter(key.getDeclaringClass().getName(), key.toString());
+      cache.put(key, counter);
+    }
+    return counter;    
+  }
+
+  /**
+   * Returns the names of all counter classes.
+   * @return Set of counter names.
+   */
+  public synchronized Collection<String> getGroupNames() {
+    return groups.keySet();
+  }
+
+  @Override
+  public Iterator<CounterGroup> iterator() {
+    return groups.values().iterator();
+  }
+
+  /**
+   * Returns the named counter group, or an empty group if there is none
+   * with the specified name.
+   */
+  public synchronized CounterGroup getGroup(String groupName) {
+    return groups.get(groupName);
+  }
+
+  /**
+   * Returns the total number of counters, by summing the number of counters
+   * in each group.
+   */
+  public synchronized  int countCounters() {
+    int result = 0;
+    for (CounterGroup group : this) {
+      result += group.size();
+    }
+    return result;
+  }
+
+  /**
+   * Write the set of groups.
+   * The external format is:
+   *     #groups (groupName group)*
+   *
+   * i.e. the number of groups followed by 0 or more groups, where each 
+   * group is of the form:
+   *
+   *     groupDisplayName #counters (false | true counter)*
+   *
+   * where each counter is of the form:
+   *
+   *     name (false | true displayName) value
+   */
+  @Override
+  public synchronized void write(DataOutput out) throws IOException {
+    out.writeInt(groups.size());
+    for (org.apache.hadoop.mapreduce.CounterGroup group: groups.values()) {
+      Text.writeString(out, group.getName());
+      group.write(out);
+    }
+  }
+  
+  /**
+   * Read a set of groups.
+   */
+  @Override
+  public synchronized void readFields(DataInput in) throws IOException {
+    int numClasses = in.readInt();
+    groups.clear();
+    while (numClasses-- > 0) {
+      String groupName = Text.readString(in);
+      CounterGroup group = new CounterGroup(groupName);
+      group.readFields(in);
+      groups.put(groupName, group);
+    }
+  }
+
+  /**
+   * Return textual representation of the counter values.
+   */
+  public synchronized String toString() {
+    StringBuilder sb = new StringBuilder("Counters: " + countCounters());
+    for (CounterGroup group: this) {
+      sb.append("\n\t" + group.getDisplayName());
+      for (Counter counter: group) {
+        sb.append("\n\t\t" + counter.getDisplayName() + "=" + 
+                  counter.getValue());
+      }
+    }
+    return sb.toString();
+  }
+
+  /**
+   * Increments multiple counters by their amounts in another Counters 
+   * instance.
+   * @param other the other Counters instance
+   */
+  public synchronized void incrAllCounters(Counters other) {
+    for(Map.Entry<String, CounterGroup> rightEntry: other.groups.entrySet()) {
+      CounterGroup left = groups.get(rightEntry.getKey());
+      CounterGroup right = rightEntry.getValue();
+      if (left == null) {
+        left = new CounterGroup(right.getName(), right.getDisplayName());
+        groups.put(rightEntry.getKey(), left);
+      }
+      left.incrAllCounters(right);
+    }
+  }
+
+  public boolean equals(Object genericRight) {
+    if (genericRight instanceof Counters) {
+      Iterator<CounterGroup> right = ((Counters) genericRight).groups.
+                                       values().iterator();
+      Iterator<CounterGroup> left = groups.values().iterator();
+      while (left.hasNext()) {
+        if (!right.hasNext() || !left.next().equals(right.next())) {
+          return false;
+        }
+      }
+      return !right.hasNext();
+    }
+    return false;
+  }
+  
+  public int hashCode() {
+    return groups.hashCode();
+  }
+}

+ 2 - 0
src/mapred/org/apache/hadoop/mapreduce/ID.java

@@ -34,6 +34,7 @@ import org.apache.hadoop.io.WritableComparable;
  * @see TaskAttemptID
  */
 public abstract class ID implements WritableComparable<ID> {
+  protected static final char SEPARATOR = '_';
   protected int id;
 
   /** constructs an ID object from the given int */
@@ -85,4 +86,5 @@ public abstract class ID implements WritableComparable<ID> {
   public void write(DataOutput out) throws IOException {
     out.writeInt(id);
   }
+  
 }

+ 153 - 39
src/mapred/org/apache/hadoop/mapreduce/Job.java

@@ -21,10 +21,12 @@ package org.apache.hadoop.mapreduce;
 import java.io.IOException;
 
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.RawComparator;
 import org.apache.hadoop.mapreduce.TaskAttemptID;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.RunningJob;
 import org.apache.hadoop.mapred.TaskCompletionEvent;
 
 /**
@@ -34,27 +36,40 @@ import org.apache.hadoop.mapred.TaskCompletionEvent;
  * IllegalStateException.
  */
 public class Job extends JobContext {  
-  
-  public Job() {
+  public static enum JobState {DEFINE, RUNNING};
+  private JobState state = JobState.DEFINE;
+  private JobClient jobTracker;
+  private RunningJob info;
+
+  public Job() throws IOException {
     this(new Configuration());
   }
 
-  public Job(Configuration conf) {
+  public Job(Configuration conf) throws IOException {
     super(conf, null);
+    jobTracker = new JobClient((JobConf) getConfiguration());
   }
 
-  public Job(Configuration conf, String jobName) {
+  public Job(Configuration conf, String jobName) throws IOException {
     this(conf);
     setJobName(jobName);
   }
 
+  private void ensureState(JobState state) throws IllegalStateException {
+    if (state != this.state) {
+      throw new IllegalStateException("Job in state "+ this.state + 
+                                      " instead of " + state);
+    }
+  }
+
   /**
    * Set the number of reduce tasks for the job.
    * @param tasks the number of reduce tasks
    * @throws IllegalStateException if the job is submitted
    */
   public void setNumReduceTasks(int tasks) throws IllegalStateException {
-    conf.setInt(NUM_REDUCES_ATTR, tasks);
+    ensureState(JobState.DEFINE);
+    conf.setNumReduceTasks(tasks);
   }
 
   /**
@@ -64,8 +79,8 @@ public class Job extends JobContext {
    * @throws IllegalStateException if the job is submitted
    */
   public void setWorkingDirectory(Path dir) throws IOException {
-    dir = dir.makeQualified(FileSystem.get(conf));
-    conf.set(WORKING_DIR_ATTR, dir.toString());
+    ensureState(JobState.DEFINE);
+    conf.setWorkingDirectory(dir);
   }
 
   /**
@@ -75,6 +90,7 @@ public class Job extends JobContext {
    */
   public void setInputFormatClass(Class<? extends InputFormat<?,?>> cls
                                   ) throws IllegalStateException {
+    ensureState(JobState.DEFINE);
     conf.setClass(INPUT_FORMAT_CLASS_ATTR, cls, InputFormat.class);
   }
 
@@ -85,6 +101,7 @@ public class Job extends JobContext {
    */
   public void setOutputFormatClass(Class<? extends OutputFormat<?,?>> cls
                                    ) throws IllegalStateException {
+    ensureState(JobState.DEFINE);
     conf.setClass(OUTPUT_FORMAT_CLASS_ATTR, cls, OutputFormat.class);
   }
 
@@ -95,9 +112,26 @@ public class Job extends JobContext {
    */
   public void setMapperClass(Class<? extends Mapper<?,?,?,?>> cls
                              ) throws IllegalStateException {
+    ensureState(JobState.DEFINE);
     conf.setClass(MAP_CLASS_ATTR, cls, Mapper.class);
   }
 
+  /**
+   * Set the Jar by finding where a given class came from.
+   * @param cls the example class
+   */
+  public void setJarByClass(Class<?> cls) {
+    conf.setJarByClass(cls);
+  }
+  
+  /**
+   * Get the pathname of the job's jar.
+   * @return the pathname
+   */
+  public String getJar() {
+    return conf.getJar();
+  }
+
   /**
    * Set the combiner class for the job.
    * @param cls the combiner to use
@@ -105,6 +139,7 @@ public class Job extends JobContext {
    */
   public void setCombinerClass(Class<? extends Reducer<?,?,?,?>> cls
                                ) throws IllegalStateException {
+    ensureState(JobState.DEFINE);
     conf.setClass(COMBINE_CLASS_ATTR, cls, Reducer.class);
   }
 
@@ -115,6 +150,7 @@ public class Job extends JobContext {
    */
   public void setReducerClass(Class<? extends Reducer<?,?,?,?>> cls
                               ) throws IllegalStateException {
+    ensureState(JobState.DEFINE);
     conf.setClass(REDUCE_CLASS_ATTR, cls, Reducer.class);
   }
 
@@ -125,6 +161,7 @@ public class Job extends JobContext {
    */
   public void setPartitionerClass(Class<? extends Partitioner<?,?>> cls
                                   ) throws IllegalStateException {
+    ensureState(JobState.DEFINE);
     conf.setClass(PARTITIONER_CLASS_ATTR, cls, Partitioner.class);
   }
 
@@ -138,7 +175,8 @@ public class Job extends JobContext {
    */
   public void setMapOutputKeyClass(Class<?> theClass
                                    ) throws IllegalStateException {
-    conf.setClass(MAP_OUTPUT_KEY_CLASS_ATTR, theClass, Object.class);
+    ensureState(JobState.DEFINE);
+    conf.setMapOutputKeyClass(theClass);
   }
 
   /**
@@ -151,7 +189,8 @@ public class Job extends JobContext {
    */
   public void setMapOutputValueClass(Class<?> theClass
                                      ) throws IllegalStateException {
-    conf.setClass(MAP_OUTPUT_VALUE_CLASS_ATTR, theClass, Object.class);
+    ensureState(JobState.DEFINE);
+    conf.setMapOutputValueClass(theClass);
   }
 
   /**
@@ -162,7 +201,8 @@ public class Job extends JobContext {
    */
   public void setOutputKeyClass(Class<?> theClass
                                 ) throws IllegalStateException {
-    conf.setClass(OUTPUT_KEY_CLASS_ATTR, theClass, Object.class);
+    ensureState(JobState.DEFINE);
+    conf.setOutputKeyClass(theClass);
   }
 
   /**
@@ -173,7 +213,8 @@ public class Job extends JobContext {
    */
   public void setOutputValueClass(Class<?> theClass
                                   ) throws IllegalStateException {
-    conf.setClass(OUTPUT_VALUE_CLASS_ATTR, theClass, Object.class);
+    ensureState(JobState.DEFINE);
+    conf.setOutputValueClass(theClass);
   }
 
   /**
@@ -184,19 +225,22 @@ public class Job extends JobContext {
    */
   public void setSortComparatorClass(Class<? extends RawComparator<?>> cls
                                      ) throws IllegalStateException {
-    conf.setClass(SORT_COMPARATOR_ATTR, cls, RawComparator.class);
+    ensureState(JobState.DEFINE);
+    conf.setOutputKeyComparatorClass(cls);
   }
 
   /**
    * Define the comparator that controls which keys are grouped together
    * for a single call to 
-   * {@link Reducer#reduce(Object, Iterable, org.apache.hadoop.mapreduce.Reducer.Context)}
+   * {@link Reducer#reduce(Object, Iterable, 
+   *                       org.apache.hadoop.mapreduce.Reducer.Context)}
    * @param cls the raw comparator to use
    * @throws IllegalStateException if the job is submitted
    */
   public void setGroupingComparatorClass(Class<? extends RawComparator<?>> cls
                                          ) throws IllegalStateException {
-    conf.setClass(GROUPING_COMPARATOR_ATTR, cls, RawComparator.class);
+    ensureState(JobState.DEFINE);
+    conf.setOutputValueGroupingComparator(cls);
   }
 
   /**
@@ -206,7 +250,8 @@ public class Job extends JobContext {
    * @throws IllegalStateException if the job is submitted
    */
   public void setJobName(String name) throws IllegalStateException {
-    conf.set(JOB_NAME_ATTR, name);
+    ensureState(JobState.DEFINE);
+    conf.setJobName(name);
   }
 
   /**
@@ -215,8 +260,8 @@ public class Job extends JobContext {
    * @return the URL where some job progress information will be displayed.
    */
   public String getTrackingURL() {
-    // TODO
-    return null;
+    ensureState(JobState.RUNNING);
+    return info.getTrackingURL();
   }
 
   /**
@@ -227,8 +272,8 @@ public class Job extends JobContext {
    * @throws IOException
    */
   public float mapProgress() throws IOException {
-    // TODO
-    return 0.0f;
+    ensureState(JobState.RUNNING);
+    return info.mapProgress();
   }
 
   /**
@@ -239,8 +284,8 @@ public class Job extends JobContext {
    * @throws IOException
    */
   public float reduceProgress() throws IOException {
-    // TODO
-    return 0.0f;
+    ensureState(JobState.RUNNING);
+    return info.reduceProgress();
   }
 
   /**
@@ -251,8 +296,8 @@ public class Job extends JobContext {
    * @throws IOException
    */
   public boolean isComplete() throws IOException {
-    // TODO
-    return false;
+    ensureState(JobState.RUNNING);
+    return info.isComplete();
   }
 
   /**
@@ -262,8 +307,8 @@ public class Job extends JobContext {
    * @throws IOException
    */
   public boolean isSuccessful() throws IOException {
-    // TODO
-    return false;
+    ensureState(JobState.RUNNING);
+    return info.isSuccessful();
   }
 
   /**
@@ -273,7 +318,8 @@ public class Job extends JobContext {
    * @throws IOException
    */
   public void killJob() throws IOException {
-    // TODO
+    ensureState(JobState.RUNNING);
+    info.killJob();
   }
     
   /**
@@ -285,8 +331,8 @@ public class Job extends JobContext {
    */
   public TaskCompletionEvent[] getTaskCompletionEvents(int startFrom
                                                        ) throws IOException {
-    // TODO
-    return null;
+    ensureState(JobState.RUNNING);
+    return info.getTaskCompletionEvents(startFrom);
   }
   
   /**
@@ -296,7 +342,9 @@ public class Job extends JobContext {
    * @throws IOException
    */
   public void killTask(TaskAttemptID taskId) throws IOException {
-    // TODO
+    ensureState(JobState.RUNNING);
+    info.killTask(org.apache.hadoop.mapred.TaskAttemptID.downgrade(taskId), 
+                  false);
   }
 
   /**
@@ -306,7 +354,9 @@ public class Job extends JobContext {
    * @throws IOException
    */
   public void failTask(TaskAttemptID taskId) throws IOException {
-    // TODO
+    ensureState(JobState.RUNNING);
+    info.killTask(org.apache.hadoop.mapred.TaskAttemptID.downgrade(taskId), 
+                  true);
   }
 
   /**
@@ -316,17 +366,77 @@ public class Job extends JobContext {
    * @throws IOException
    */
   public Iterable<CounterGroup> getCounters() throws IOException {
-    // TODO
-    return null;
+    ensureState(JobState.RUNNING);
+    return new Counters(info.getCounters());
+  }
+
+  private void ensureNotSet(String attr, String msg) throws IOException {
+    if (conf.get(attr) != null) {
+      throw new IOException(attr + " is incompatible with " + msg + " mode.");
+    }    
+  }
+
+  /**
+   * Default to the new APIs unless they are explicitly set or the old mapper or
+   * reduce attributes are used.
+   * @throws IOException if the configuration is inconsistant
+   */
+  private void setUseNewAPI() throws IOException {
+    int numReduces = conf.getNumReduceTasks();
+    String oldMapperClass = "mapred.mapper.class";
+    String oldReduceClass = "mapred.reducer.class";
+    String oldCombineClass = "mapred.combiner.class";
+    conf.setBooleanIfUnset("mapred.mapper.new-api",
+                           conf.get(oldMapperClass) == null);
+    if (conf.getUseNewMapper()) {
+      String mode = "new map API";
+      ensureNotSet("mapred.input.format.class", mode);
+      ensureNotSet(oldMapperClass, mode);
+      if (numReduces != 0) {
+        ensureNotSet(oldCombineClass, mode);
+        ensureNotSet("mapred.partitioner.class", mode);
+       } else {
+        ensureNotSet("mapred.output.format.class", mode);
+      }      
+    } else {
+      String mode = "map compatability";
+      ensureNotSet(JobContext.INPUT_FORMAT_CLASS_ATTR, mode);
+      ensureNotSet(JobContext.MAP_CLASS_ATTR, mode);
+      if (numReduces != 0) {
+        ensureNotSet(JobContext.COMBINE_CLASS_ATTR, mode);
+        ensureNotSet(JobContext.PARTITIONER_CLASS_ATTR, mode);
+       } else {
+        ensureNotSet(JobContext.OUTPUT_FORMAT_CLASS_ATTR, mode);
+      }
+    }
+    if (numReduces != 0) {
+      conf.setBooleanIfUnset("mapred.reducer.new-api",
+                             conf.get(oldReduceClass) == null);
+      if (conf.getUseNewReducer()) {
+        String mode = "new reduce API";
+        ensureNotSet("mapred.output.format.class", mode);
+        ensureNotSet(oldReduceClass, mode);   
+        ensureNotSet(oldCombineClass, mode);
+      } else {
+        String mode = "reduce compatability";
+        ensureNotSet(JobContext.OUTPUT_FORMAT_CLASS_ATTR, mode);
+        ensureNotSet(JobContext.REDUCE_CLASS_ATTR, mode);   
+        ensureNotSet(JobContext.COMBINE_CLASS_ATTR, mode);        
+      }
+    }   
   }
 
   /**
    * Submit the job to the cluster and return immediately.
    * @throws IOException
    */
-  public void submit() throws IOException {
-    // TODO
-  }
+  public void submit() throws IOException, InterruptedException, 
+                              ClassNotFoundException {
+    ensureState(JobState.DEFINE);
+    setUseNewAPI();
+    info = jobTracker.submitJobInternal(conf);
+    state = JobState.RUNNING;
+   }
   
   /**
    * Submit the job to the cluster and wait for it to finish.
@@ -334,8 +444,12 @@ public class Job extends JobContext {
    * @throws IOException thrown if the communication with the 
    *         <code>JobTracker</code> is lost
    */
-  public boolean waitForCompletion() throws IOException {
-    // TODO
-    return false;
+  public boolean waitForCompletion() throws IOException, InterruptedException,
+                                            ClassNotFoundException {
+    if (state == JobState.DEFINE) {
+      submit();
+    }
+    info.waitForCompletion();
+    return isSuccessful();
   }
 }

+ 28 - 67
src/mapred/org/apache/hadoop/mapreduce/JobContext.java

@@ -21,15 +21,12 @@ package org.apache.hadoop.mapreduce;
 import java.io.IOException;
 
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.RawComparator;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.WritableComparable;
-import org.apache.hadoop.io.WritableComparator;
 import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.util.ReflectionUtils;
+import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
+import org.apache.hadoop.mapreduce.lib.partition.HashPartitioner;
 
 /**
  * A read-only view of the job that is provided to the tasks while they
@@ -38,35 +35,21 @@ import org.apache.hadoop.util.ReflectionUtils;
 public class JobContext {
   // Put all of the attribute names in here so that Job and JobContext are
   // consistent.
-  protected static final String INPUT_FORMAT_CLASS_ATTR = "mapreduce.map.class";
+  protected static final String INPUT_FORMAT_CLASS_ATTR = 
+    "mapreduce.inputformat.class";
   protected static final String MAP_CLASS_ATTR = "mapreduce.map.class";
   protected static final String COMBINE_CLASS_ATTR = "mapreduce.combine.class";
   protected static final String REDUCE_CLASS_ATTR = "mapreduce.reduce.class";
   protected static final String OUTPUT_FORMAT_CLASS_ATTR = 
     "mapreduce.outputformat.class";
-  protected static final String OUTPUT_KEY_CLASS_ATTR = 
-    "mapreduce.out.key.class";
-  protected static final String OUTPUT_VALUE_CLASS_ATTR = 
-    "mapreduce.out.value.class";
-  protected static final String MAP_OUTPUT_KEY_CLASS_ATTR = 
-    "mapreduce.map.out.key.class";
-  protected static final String MAP_OUTPUT_VALUE_CLASS_ATTR = 
-    "mapreduce.map.out.value.class";
-  protected static final String NUM_REDUCES_ATTR = "mapreduce.reduce.tasks";
-  protected static final String WORKING_DIR_ATTR = "mapreduce.work.dir";
-  protected static final String JOB_NAME_ATTR = "mapreduce.job.name";
-  protected static final String SORT_COMPARATOR_ATTR = 
-    "mapreduce.sort.comparator";
-  protected static final String GROUPING_COMPARATOR_ATTR = 
-    "mapreduce.grouping.comparator";
   protected static final String PARTITIONER_CLASS_ATTR = 
     "mapreduce.partitioner.class";
 
-  protected final Configuration conf;
+  protected final org.apache.hadoop.mapred.JobConf conf;
   private final JobID jobId;
   
   public JobContext(Configuration conf, JobID jobId) {
-    this.conf = conf;
+    this.conf = new org.apache.hadoop.mapred.JobConf(conf);
     this.jobId = jobId;
   }
 
@@ -92,7 +75,7 @@ public class JobContext {
    * @return the number of reduce tasks for this job.
    */
   public int getNumReduceTasks() {
-    return conf.getInt(NUM_REDUCES_ATTR, 1);
+    return conf.getNumReduceTasks();
   }
   
   /**
@@ -101,14 +84,7 @@ public class JobContext {
    * @return the directory name.
    */
   public Path getWorkingDirectory() throws IOException {
-    String name = conf.get(WORKING_DIR_ATTR);
-    if (name != null) {
-      return new Path(name);
-    } else {
-      Path dir = FileSystem.get(conf).getWorkingDirectory();
-      conf.set(WORKING_DIR_ATTR, dir.toString());
-      return dir;
-    }
+    return conf.getWorkingDirectory();
   }
 
   /**
@@ -116,8 +92,7 @@ public class JobContext {
    * @return the key class for the job output data.
    */
   public Class<?> getOutputKeyClass() {
-    return conf.getClass(OUTPUT_KEY_CLASS_ATTR,
-                         LongWritable.class, Object.class);
+    return conf.getOutputKeyClass();
   }
   
   /**
@@ -125,7 +100,7 @@ public class JobContext {
    * @return the value class for job outputs.
    */
   public Class<?> getOutputValueClass() {
-    return conf.getClass(OUTPUT_VALUE_CLASS_ATTR, Text.class, Object.class);
+    return conf.getOutputValueClass();
   }
 
   /**
@@ -135,12 +110,7 @@ public class JobContext {
    * @return the map output key class.
    */
   public Class<?> getMapOutputKeyClass() {
-    Class<?> retv = conf.getClass(MAP_OUTPUT_KEY_CLASS_ATTR, null, 
-                                  Object.class);
-    if (retv == null) {
-      retv = getOutputKeyClass();
-    }
-    return retv;
+    return conf.getMapOutputKeyClass();
   }
 
   /**
@@ -151,12 +121,7 @@ public class JobContext {
    * @return the map output value class.
    */
   public Class<?> getMapOutputValueClass() {
-    Class<?> retv = conf.getClass(MAP_OUTPUT_VALUE_CLASS_ATTR, null,
-        Object.class);
-    if (retv == null) {
-      retv = getOutputValueClass();
-    }
-    return retv;
+    return conf.getMapOutputValueClass();
   }
 
   /**
@@ -166,7 +131,7 @@ public class JobContext {
    * @return the job's name, defaulting to "".
    */
   public String getJobName() {
-    return conf.get(JOB_NAME_ATTR, "");
+    return conf.getJobName();
   }
 
   /**
@@ -178,7 +143,7 @@ public class JobContext {
   public Class<? extends InputFormat<?,?>> getInputFormatClass() 
      throws ClassNotFoundException {
     return (Class<? extends InputFormat<?,?>>) 
-      conf.getClass(INPUT_FORMAT_CLASS_ATTR, InputFormat.class);
+      conf.getClass(INPUT_FORMAT_CLASS_ATTR, TextInputFormat.class);
   }
 
   /**
@@ -202,7 +167,7 @@ public class JobContext {
   public Class<? extends Reducer<?,?,?,?>> getCombinerClass() 
      throws ClassNotFoundException {
     return (Class<? extends Reducer<?,?,?,?>>) 
-      conf.getClass(COMBINE_CLASS_ATTR, Reducer.class);
+      conf.getClass(COMBINE_CLASS_ATTR, null);
   }
 
   /**
@@ -226,7 +191,7 @@ public class JobContext {
   public Class<? extends OutputFormat<?,?>> getOutputFormatClass() 
      throws ClassNotFoundException {
     return (Class<? extends OutputFormat<?,?>>) 
-      conf.getClass(OUTPUT_FORMAT_CLASS_ATTR, OutputFormat.class);
+      conf.getClass(OUTPUT_FORMAT_CLASS_ATTR, TextOutputFormat.class);
   }
 
   /**
@@ -238,7 +203,7 @@ public class JobContext {
   public Class<? extends Partitioner<?,?>> getPartitionerClass() 
      throws ClassNotFoundException {
     return (Class<? extends Partitioner<?,?>>) 
-      conf.getClass(PARTITIONER_CLASS_ATTR, Partitioner.class);
+      conf.getClass(PARTITIONER_CLASS_ATTR, HashPartitioner.class);
   }
 
   /**
@@ -246,14 +211,16 @@ public class JobContext {
    * 
    * @return the {@link RawComparator} comparator used to compare keys.
    */
-  @SuppressWarnings("unchecked")
   public RawComparator<?> getSortComparator() {
-    Class<?> theClass = conf.getClass(SORT_COMPARATOR_ATTR, null,
-                                   RawComparator.class);
-    if (theClass != null)
-      return (RawComparator<?>) ReflectionUtils.newInstance(theClass, conf);
-    return WritableComparator.get(
-        (Class<? extends WritableComparable>)getMapOutputKeyClass());
+    return conf.getOutputKeyComparator();
+  }
+
+  /**
+   * Get the pathname of the job's jar.
+   * @return the pathname
+   */
+  public String getJar() {
+    return conf.getJar();
   }
 
   /** 
@@ -264,12 +231,6 @@ public class JobContext {
    * @see Job#setGroupingComparatorClass(Class) for details.  
    */
   public RawComparator<?> getGroupingComparator() {
-    Class<?> theClass = conf.getClass(GROUPING_COMPARATOR_ATTR, null,
-                                   RawComparator.class);
-    if (theClass == null) {
-      return getSortComparator();
-    }
-    return (RawComparator<?>) ReflectionUtils.newInstance(theClass, conf);
+    return conf.getOutputValueGroupingComparator();
   }
-
 }

+ 35 - 63
src/mapred/org/apache/hadoop/mapreduce/JobID.java

@@ -24,7 +24,6 @@ import java.io.IOException;
 import java.text.NumberFormat;
 
 import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapred.JobTracker;
 
 /**
  * JobID represents the immutable and unique identifier for 
@@ -42,15 +41,15 @@ import org.apache.hadoop.mapred.JobTracker;
  * 
  * @see TaskID
  * @see TaskAttemptID
- * @see JobTracker#getNewJobId()
- * @see JobTracker#getStartTime()
+ * @see org.apache.hadoop.mapred.JobTracker#getNewJobId()
+ * @see org.apache.hadoop.mapred.JobTracker#getStartTime()
  */
-public class JobID extends ID {
-  private static final String JOB = "job";
-  private String jtIdentifier;
-  private static char UNDERSCORE = '_';
+public class JobID extends org.apache.hadoop.mapred.ID 
+                   implements Comparable<ID> {
+  protected static final String JOB = "job";
+  private final Text jtIdentifier;
   
-  private static NumberFormat idFormat = NumberFormat.getInstance();
+  protected static final NumberFormat idFormat = NumberFormat.getInstance();
   static {
     idFormat.setGroupingUsed(false);
     idFormat.setMinimumIntegerDigits(4);
@@ -63,13 +62,15 @@ public class JobID extends ID {
    */
   public JobID(String jtIdentifier, int id) {
     super(id);
-    this.jtIdentifier = jtIdentifier;
+    this.jtIdentifier = new Text(jtIdentifier);
   }
   
-  private JobID() { }
+  public JobID() { 
+    jtIdentifier = new Text();
+  }
   
   public String getJtIdentifier() {
-    return jtIdentifier;
+    return jtIdentifier.toString();
   }
   
   @Override
@@ -92,42 +93,40 @@ public class JobID extends ID {
     else return jtComp;
   }
   
-  @Override
-  public String toString() {
-    StringBuilder builder = new StringBuilder();
-    return builder.append(JOB).append(UNDERSCORE)
-      .append(toStringWOPrefix()).toString();
-  }
-  
-  /** Returns the string representation w/o prefix */
-  StringBuilder toStringWOPrefix() {
-    StringBuilder builder = new StringBuilder();
-    builder.append(jtIdentifier).append(UNDERSCORE)
-    .append(idFormat.format(id)).toString();
+  /**
+   * Add the stuff after the "job" prefix to the given builder. This is useful,
+   * because the sub-ids use this substring at the start of their string.
+   * @param builder the builder to append to
+   * @return the builder that was passed in
+   */
+  public StringBuilder appendTo(StringBuilder builder) {
+    builder.append(SEPARATOR);
+    builder.append(jtIdentifier);
+    builder.append(SEPARATOR);
+    builder.append(idFormat.format(id));
     return builder;
   }
-  
+
   @Override
   public int hashCode() {
-    return toStringWOPrefix().toString().hashCode();
+    return jtIdentifier.hashCode() + id;
   }
-  
+
+  @Override
+  public String toString() {
+    return appendTo(new StringBuilder(JOB)).toString();
+  }
+
   @Override
   public void readFields(DataInput in) throws IOException {
     super.readFields(in);
-    this.jtIdentifier = Text.readString(in);
+    this.jtIdentifier.readFields(in);
   }
 
   @Override
   public void write(DataOutput out) throws IOException {
     super.write(out);
-    Text.writeString(out, jtIdentifier);
-  }
-  
-  public static JobID read(DataInput in) throws IOException {
-    JobID jobId = new JobID();
-    jobId.readFields(in);
-    return jobId;
+    jtIdentifier.write(out);
   }
   
   /** Construct a JobId object from given string 
@@ -141,7 +140,8 @@ public class JobID extends ID {
       String[] parts = str.split("_");
       if(parts.length == 3) {
         if(parts[0].equals(JOB)) {
-          return new JobID(parts[1], Integer.parseInt(parts[2]));
+          return new org.apache.hadoop.mapred.JobID(parts[1], 
+                                                    Integer.parseInt(parts[2]));
         }
       }
     }catch (Exception ex) {//fall below
@@ -150,32 +150,4 @@ public class JobID extends ID {
         + " is not properly formed");
   }
   
-  /** 
-   * Returns a regex pattern which matches task IDs. Arguments can 
-   * be given null, in which case that part of the regex will be generic.  
-   * For example to obtain a regex matching <i>any job</i> 
-   * run on the jobtracker started at <i>200707121733</i>, we would use :
-   * <pre> 
-   * JobID.getTaskIDsPattern("200707121733", null);
-   * </pre>
-   * which will return :
-   * <pre> "job_200707121733_[0-9]*" </pre> 
-   * @param jtIdentifier jobTracker identifier, or null
-   * @param jobId job number, or null
-   * @return a regex pattern matching JobIDs
-   */
-  public static String getJobIDsPattern(String jtIdentifier, Integer jobId) {
-    StringBuilder builder = new StringBuilder(JOB).append(UNDERSCORE);
-    builder.append(getJobIDsPatternWOPrefix(jtIdentifier, jobId));
-    return builder.toString();
-  }
-  
-  static StringBuilder getJobIDsPatternWOPrefix(String jtIdentifier
-      , Integer jobId) {
-    StringBuilder builder = new StringBuilder()
-      .append(jtIdentifier != null ? jtIdentifier : "[^_]*").append(UNDERSCORE)
-      .append(jobId != null ? idFormat.format(jobId) : "[0-9]*");
-    return builder;
-  }
-  
 }

+ 32 - 4
src/mapred/org/apache/hadoop/mapreduce/MapContext.java

@@ -18,6 +18,8 @@
 
 package org.apache.hadoop.mapreduce;
 
+import java.io.IOException;
+
 import org.apache.hadoop.conf.Configuration;
 
 /**
@@ -27,17 +29,43 @@ import org.apache.hadoop.conf.Configuration;
  * @param <KEYOUT> the key output type from the Mapper
  * @param <VALUEOUT> the value output type from the Mapper
  */
-public abstract class MapContext<KEYIN,VALUEIN,KEYOUT,VALUEOUT> 
+public class MapContext<KEYIN,VALUEIN,KEYOUT,VALUEOUT> 
   extends TaskInputOutputContext<KEYIN,VALUEIN,KEYOUT,VALUEOUT> {
+  private RecordReader<KEYIN,VALUEIN> reader;
+  private InputSplit split;
 
-  public MapContext(Configuration conf, TaskAttemptID taskid) {
-    super(conf, taskid);
+  public MapContext(Configuration conf, TaskAttemptID taskid,
+                    RecordReader<KEYIN,VALUEIN> reader,
+                    RecordWriter<KEYOUT,VALUEOUT> writer,
+                    OutputCommitter committer,
+                    StatusReporter reporter,
+                    InputSplit split) {
+    super(conf, taskid, writer, committer, reporter);
+    this.reader = reader;
+    this.split = split;
   }
 
   /**
    * Get the input split for this map.
    */
-  public abstract InputSplit getInputSplit();
+  public InputSplit getInputSplit() {
+    return split;
+  }
+
+  @Override
+  public KEYIN getCurrentKey() throws IOException, InterruptedException {
+    return reader.getCurrentKey();
+  }
+
+  @Override
+  public VALUEIN getCurrentValue() throws IOException, InterruptedException {
+    return reader.getCurrentValue();
+  }
+
+  @Override
+  public boolean nextKeyValue() throws IOException, InterruptedException {
+    return reader.nextKeyValue();
+  }
 
 }
      

+ 11 - 10
src/mapred/org/apache/hadoop/mapreduce/Mapper.java

@@ -94,10 +94,15 @@ import org.apache.hadoop.io.compress.CompressionCodec;
  */
 public class Mapper<KEYIN, VALUEIN, KEYOUT, VALUEOUT> {
 
-  public abstract class Context 
+  public class Context 
     extends MapContext<KEYIN,VALUEIN,KEYOUT,VALUEOUT> {
-    public Context(Configuration conf, TaskAttemptID taskid) {
-      super(conf, taskid);
+    public Context(Configuration conf, TaskAttemptID taskid,
+                   RecordReader<KEYIN,VALUEIN> reader,
+                   RecordWriter<KEYOUT,VALUEOUT> writer,
+                   OutputCommitter committer,
+                   StatusReporter reporter,
+                   InputSplit split) throws IOException, InterruptedException {
+      super(conf, taskid, reader, writer, committer, reporter, split);
     }
   }
   
@@ -116,7 +121,7 @@ public class Mapper<KEYIN, VALUEIN, KEYOUT, VALUEOUT> {
   @SuppressWarnings("unchecked")
   protected void map(KEYIN key, VALUEIN value, 
                      Context context) throws IOException, InterruptedException {
-    context.collect((KEYOUT) key, (VALUEOUT) value);
+    context.write((KEYOUT) key, (VALUEOUT) value);
   }
 
   /**
@@ -135,12 +140,8 @@ public class Mapper<KEYIN, VALUEIN, KEYOUT, VALUEOUT> {
    */
   public void run(Context context) throws IOException, InterruptedException {
     setup(context);
-    KEYIN key = context.nextKey(null);
-    VALUEIN value = null;
-    while (key != null) {
-      value = context.nextValue(value);
-      map(key, value, context);
-      key = context.nextKey(key);
+    while (context.nextKeyValue()) {
+      map(context.getCurrentKey(), context.getCurrentValue(), context);
     }
     cleanup(context);
   }

+ 113 - 0
src/mapred/org/apache/hadoop/mapreduce/OutputCommitter.java

@@ -0,0 +1,113 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.mapreduce;
+
+import java.io.IOException;
+
+/**
+ * <code>OutputCommitter</code> describes the commit of task output for a 
+ * Map-Reduce job.
+ *
+ * <p>The Map-Reduce framework relies on the <code>OutputCommitter</code> of 
+ * the job to:<p>
+ * <ol>
+ *   <li>
+ *   Setup the job during initialization. For example, create the temporary 
+ *   output directory for the job during the initialization of the job.
+ *   </li>
+ *   <li>
+ *   Cleanup the job after the job completion. For example, remove the
+ *   temporary output directory after the job completion. 
+ *   </li>
+ *   <li>
+ *   Setup the task temporary output.
+ *   </li> 
+ *   <li>
+ *   Check whether a task needs a commit. This is to avoid the commit
+ *   procedure if a task does not need commit.
+ *   </li>
+ *   <li>
+ *   Commit of the task output.
+ *   </li>  
+ *   <li>
+ *   Discard the task commit.
+ *   </li>
+ * </ol>
+ * 
+ * @see org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter 
+ * @see JobContext
+ * @see TaskAttemptContext 
+ *
+ */
+public abstract class OutputCommitter {
+  /**
+   * For the framework to setup the job output during initialization
+   * 
+   * @param jobContext Context of the job whose output is being written.
+   * @throws IOException if temporary output could not be created
+   */
+  public abstract void setupJob(JobContext jobContext) throws IOException;
+
+  /**
+   * For cleaning up the job's output after job completion
+   * 
+   * @param jobContext Context of the job whose output is being written.
+   * @throws IOException
+   */
+  public abstract void cleanupJob(JobContext jobContext) throws IOException;
+
+  /**
+   * Sets up output for the task.
+   * 
+   * @param taskContext Context of the task whose output is being written.
+   * @throws IOException
+   */
+  public abstract void setupTask(TaskAttemptContext taskContext)
+  throws IOException;
+  
+  /**
+   * Check whether task needs a commit
+   * 
+   * @param taskContext
+   * @return true/false
+   * @throws IOException
+   */
+  public abstract boolean needsTaskCommit(TaskAttemptContext taskContext)
+  throws IOException;
+
+  /**
+   * To promote the task's temporary output to final output location
+   * 
+   * The task's output is moved to the job's output directory.
+   * 
+   * @param taskContext Context of the task whose output is being written.
+   * @throws IOException if commit is not 
+   */
+  public abstract void commitTask(TaskAttemptContext taskContext)
+  throws IOException;
+  
+  /**
+   * Discard the task output
+   * 
+   * @param taskContext
+   * @throws IOException
+   */
+  public abstract void abortTask(TaskAttemptContext taskContext)
+  throws IOException;
+}

+ 12 - 0
src/mapred/org/apache/hadoop/mapreduce/OutputFormat.java

@@ -68,5 +68,17 @@ public abstract class OutputFormat<K, V> {
   public abstract void checkOutputSpecs(JobContext context
                                         ) throws IOException, 
                                                  InterruptedException;
+
+  /**
+   * Get the output committer for this output format. This is responsible
+   * for ensuring the output is committed correctly.
+   * @param context the task context
+   * @return an output committer
+   * @throws IOException
+   * @throws InterruptedException
+   */
+  public abstract 
+  OutputCommitter getOutputCommitter(TaskAttemptContext context
+                                     ) throws IOException, InterruptedException;
 }
 

+ 1 - 0
src/mapred/org/apache/hadoop/mapreduce/Partitioner.java

@@ -44,4 +44,5 @@ public abstract class Partitioner<KEY, VALUE> {
    * @return the partition number for the <code>key</code>.
    */
   public abstract int getPartition(KEY key, VALUE value, int numPartitions);
+  
 }

+ 16 - 9
src/mapred/org/apache/hadoop/mapreduce/RecordReader.java

@@ -41,24 +41,31 @@ public abstract class RecordReader<KEYIN, VALUEIN> implements Closeable {
                                   ) throws IOException, InterruptedException;
 
   /**
-   * Read the next key.
-   * @param key the object to be read into, which may be null
-   * @return the object that was read
+   * Read the next key, value pair.
+   * @return true if a key/value pair was read
    * @throws IOException
    * @throws InterruptedException
    */
-  public abstract KEYIN nextKey(KEYIN key
-                                ) throws IOException, InterruptedException;
+  public abstract 
+  boolean nextKeyValue() throws IOException, InterruptedException;
 
   /**
-   * Read the next value. It must be called after {@link #nextKey(Object)}.
-   * @param value the object to read into, which may be null
+   * Get the current key
+   * @return the current key or null if there is no current key
+   * @throws IOException
+   * @throws InterruptedException
+   */
+  public abstract
+  KEYIN getCurrentKey() throws IOException, InterruptedException;
+  
+  /**
+   * Get the current value.
    * @return the object that was read
    * @throws IOException
    * @throws InterruptedException
    */
-  public abstract VALUEIN nextValue(VALUEIN value
-                                    ) throws IOException, InterruptedException;
+  public abstract 
+  VALUEIN getCurrentValue() throws IOException, InterruptedException;
   
   /**
    * The current progress of the record reader through its data.

+ 5 - 4
src/mapred/org/apache/hadoop/mapreduce/RecordWriter.java

@@ -31,7 +31,7 @@ import org.apache.hadoop.fs.FileSystem;
  * 
  * @see OutputFormat
  */
-public interface RecordWriter<K, V> {
+public abstract class RecordWriter<K, V> {
   /** 
    * Writes a key/value pair.
    *
@@ -39,7 +39,8 @@ public interface RecordWriter<K, V> {
    * @param value the value to write.
    * @throws IOException
    */      
-  void write(K key, V value) throws IOException, InterruptedException;
+  public abstract void write(K key, V value
+                             ) throws IOException, InterruptedException;
 
   /** 
    * Close this <code>RecordWriter</code> to future operations.
@@ -47,6 +48,6 @@ public interface RecordWriter<K, V> {
    * @param context the context of the task
    * @throws IOException
    */ 
-  void close(TaskAttemptContext context
-             ) throws IOException, InterruptedException;
+  public abstract void close(TaskAttemptContext context
+                             ) throws IOException, InterruptedException;
 }

+ 148 - 7
src/mapred/org/apache/hadoop/mapreduce/ReduceContext.java

@@ -19,8 +19,17 @@
 package org.apache.hadoop.mapreduce;
 
 import java.io.IOException;
+import java.util.Iterator;
+import java.util.NoSuchElementException;
 
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.DataInputBuffer;
+import org.apache.hadoop.io.RawComparator;
+import org.apache.hadoop.io.serializer.Deserializer;
+import org.apache.hadoop.io.serializer.SerializationFactory;
+import org.apache.hadoop.mapred.RawKeyValueIterator;
+import org.apache.hadoop.util.Progressable;
 
 /**
  * The context passed to the {@link Reducer}.
@@ -29,19 +38,151 @@ import org.apache.hadoop.conf.Configuration;
  * @param <KEYOUT> the class of the output keys
  * @param <VALUEOUT> the class of the output values
  */
-public abstract class ReduceContext<KEYIN,VALUEIN,KEYOUT,VALUEOUT>
+public class ReduceContext<KEYIN,VALUEIN,KEYOUT,VALUEOUT>
     extends TaskInputOutputContext<KEYIN,VALUEIN,KEYOUT,VALUEOUT> {
+  private RawKeyValueIterator input;
+  private RawComparator<KEYIN> comparator;
+  private KEYIN key;                                  // current key
+  private VALUEIN value;                              // current value
+  private boolean firstValue = false;                 // first value in key
+  private boolean nextKeyIsSame = false;              // more w/ this key
+  private boolean hasMore;                            // more in file
+  protected Progressable reporter;
+  private Deserializer<KEYIN> keyDeserializer;
+  private Deserializer<VALUEIN> valueDeserializer;
+  private DataInputBuffer buffer = new DataInputBuffer();
+  private BytesWritable currentRawKey = new BytesWritable();
+  private ValueIterable iterable = new ValueIterable();
 
-  public ReduceContext(Configuration conf, TaskAttemptID taskid) {
-    super(conf, taskid);
+  public ReduceContext(Configuration conf, TaskAttemptID taskid,
+                       RawKeyValueIterator input, 
+                       RecordWriter<KEYOUT,VALUEOUT> output,
+                       OutputCommitter committer,
+                       StatusReporter reporter,
+                       RawComparator<KEYIN> comparator,
+                       Class<KEYIN> keyClass,
+                       Class<VALUEIN> valueClass
+                       ) throws InterruptedException, IOException{
+    super(conf, taskid, output, committer, reporter);
+    this.input = input;
+    this.comparator = comparator;
+    SerializationFactory serializationFactory = new SerializationFactory(conf);
+    this.keyDeserializer = serializationFactory.getDeserializer(keyClass);
+    this.keyDeserializer.open(buffer);
+    this.valueDeserializer = serializationFactory.getDeserializer(valueClass);
+    this.valueDeserializer.open(buffer);
+    hasMore = input.next();
   }
 
+  /** Start processing next unique key. */
+  public boolean nextKey() throws IOException,InterruptedException {
+    while (hasMore && nextKeyIsSame) {
+      nextKeyValue();
+    }
+    if (hasMore) {
+      return nextKeyValue();
+    } else {
+      return false;
+    }
+  }
+
+  /**
+   * Advance to the next key/value pair.
+   */
+  @Override
+  public boolean nextKeyValue() throws IOException, InterruptedException {
+    if (!hasMore) {
+      key = null;
+      value = null;
+      return false;
+    }
+    firstValue = !nextKeyIsSame;
+    DataInputBuffer next = input.getKey();
+    currentRawKey.set(next.getData(), next.getPosition(), 
+                      next.getLength() - next.getPosition());
+    buffer.reset(currentRawKey.getBytes(), 0, currentRawKey.getLength());
+    key = keyDeserializer.deserialize(key);
+    next = input.getValue();
+    buffer.reset(next.getData(), next.getPosition(), next.getLength());
+    value = valueDeserializer.deserialize(value);
+    hasMore = input.next();
+    if (hasMore) {
+      next = input.getKey();
+      nextKeyIsSame = comparator.compare(currentRawKey.getBytes(), 0, 
+                                         currentRawKey.getLength(),
+                                         next.getData(),
+                                         next.getPosition(),
+                                         next.getLength() - next.getPosition()
+                                         ) == 0;
+    } else {
+      nextKeyIsSame = false;
+    }
+    return true;
+  }
+
+  public KEYIN getCurrentKey() {
+    return key;
+  }
+
+  @Override
+  public VALUEIN getCurrentValue() {
+    return value;
+  }
+
+  protected class ValueIterator implements Iterator<VALUEIN> {
+
+    @Override
+    public boolean hasNext() {
+      return firstValue || nextKeyIsSame;
+    }
+
+    @Override
+    public VALUEIN next() {
+      // if this is the first record, we don't need to advance
+      if (firstValue) {
+        firstValue = false;
+        return value;
+      }
+      // if this isn't the first record and the next key is different, they
+      // can't advance it here.
+      if (!nextKeyIsSame) {
+        throw new NoSuchElementException("iterate past last value");
+      }
+      // otherwise, go to the next key/value pair
+      try {
+        nextKeyValue();
+        return value;
+      } catch (IOException ie) {
+        throw new RuntimeException("next value iterator failed", ie);
+      } catch (InterruptedException ie) {
+        // this is bad, but we can't modify the exception list of java.util
+        throw new RuntimeException("next value iterator interrupted", ie);        
+      }
+    }
+
+    @Override
+    public void remove() {
+      throw new UnsupportedOperationException("remove not implemented");
+    }
+    
+  }
+
+  protected class ValueIterable implements Iterable<VALUEIN> {
+    private ValueIterator iterator = new ValueIterator();
+    @Override
+    public Iterator<VALUEIN> iterator() {
+      return iterator;
+    } 
+  }
+  
   /**
    * Iterate through the values for the current key, reusing the same value 
    * object, which is stored in the context.
-   * @return the series of values associated with the current key
+   * @return the series of values associated with the current key. All of the 
+   * objects returned directly and indirectly from this method are reused.
    */
-  public abstract 
-  Iterable<VALUEIN> getValues() throws IOException, InterruptedException;
-
+  public 
+  Iterable<VALUEIN> getValues() throws IOException, InterruptedException {
+    return iterable;
+  }
 }

+ 18 - 11
src/mapred/org/apache/hadoop/mapreduce/Reducer.java

@@ -21,8 +21,8 @@ package org.apache.hadoop.mapreduce;
 import java.io.IOException;
 
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mapreduce.Partitioner;
+import org.apache.hadoop.io.RawComparator;
+import org.apache.hadoop.mapred.RawKeyValueIterator;
 
 /** 
  * Reduces a set of intermediate values which share a key to a smaller set of
@@ -88,7 +88,7 @@ import org.apache.hadoop.mapreduce.Partitioner;
  *   the sorted inputs.</p>
  *   <p>The output of the reduce task is typically written to a 
  *   {@link RecordWriter} via 
- *   {@link Context#collect(Object, Object)}.</p>
+ *   {@link Context#write(Object, Object)}.</p>
  *   </li>
  * </ol>
  * 
@@ -117,10 +117,19 @@ import org.apache.hadoop.mapreduce.Partitioner;
  */
 public abstract class Reducer<KEYIN,VALUEIN,KEYOUT,VALUEOUT> {
 
-  protected abstract class Context 
+  public class Context 
     extends ReduceContext<KEYIN,VALUEIN,KEYOUT,VALUEOUT> {
-    public Context(Configuration conf, TaskAttemptID taskid) {
-      super(conf, taskid);
+    public Context(Configuration conf, TaskAttemptID taskid,
+                   RawKeyValueIterator input, 
+                   RecordWriter<KEYOUT,VALUEOUT> output,
+                   OutputCommitter committer,
+                   StatusReporter reporter,
+                   RawComparator<KEYIN> comparator,
+                   Class<KEYIN> keyClass,
+                   Class<VALUEIN> valueClass
+                   ) throws IOException, InterruptedException {
+      super(conf, taskid, input, output, committer, reporter, comparator, 
+            keyClass, valueClass);
     }
   }
 
@@ -141,7 +150,7 @@ public abstract class Reducer<KEYIN,VALUEIN,KEYOUT,VALUEOUT> {
   protected void reduce(KEYIN key, Iterable<VALUEIN> values, Context context
                         ) throws IOException, InterruptedException {
     for(VALUEIN value: values) {
-      context.collect((KEYOUT) key, (VALUEOUT) value);
+      context.write((KEYOUT) key, (VALUEOUT) value);
     }
   }
 
@@ -160,10 +169,8 @@ public abstract class Reducer<KEYIN,VALUEIN,KEYOUT,VALUEOUT> {
    */
   public void run(Context context) throws IOException, InterruptedException {
     setup(context);
-    KEYIN key = context.nextKey(null);
-    while(key != null) {
-      reduce(key, context.getValues(), context);
-      key = context.nextKey(key);
+    while (context.nextKey()) {
+      reduce(context.getCurrentKey(), context.getValues(), context);
     }
     cleanup(context);
   }

+ 25 - 0
src/mapred/org/apache/hadoop/mapreduce/StatusReporter.java

@@ -0,0 +1,25 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.mapreduce;
+
+public abstract class StatusReporter {
+  public abstract Counter getCounter(Enum<?> name);
+  public abstract Counter getCounter(String group, String name);
+  public abstract void progress();
+  public abstract void setStatus(String status);
+}

+ 7 - 12
src/mapred/org/apache/hadoop/mapreduce/TaskAttemptContext.java

@@ -26,12 +26,12 @@ import org.apache.hadoop.util.Progressable;
 /**
  * The context for task attempts.
  */
-public abstract class TaskAttemptContext extends JobContext 
-    implements Progressable {
+public class TaskAttemptContext extends JobContext implements Progressable {
   private final TaskAttemptID taskId;
   private String status = "";
   
-  public TaskAttemptContext(Configuration conf, TaskAttemptID taskId) {
+  public TaskAttemptContext(Configuration conf, 
+                            TaskAttemptID taskId) {
     super(conf, taskId.getJobID());
     this.taskId = taskId;
   }
@@ -39,7 +39,7 @@ public abstract class TaskAttemptContext extends JobContext
   /**
    * Get the unique name for this task attempt.
    */
-  public TaskAttemptID getTaskAttemptId() {
+  public TaskAttemptID getTaskAttemptID() {
     return taskId;
   }
 
@@ -59,13 +59,8 @@ public abstract class TaskAttemptContext extends JobContext
   }
 
   /**
-   * Lookup a counter by an enum.
+   * Report progress. The subtypes actually do work in this method.
    */
-  public abstract Counter getCounter(Enum<?> counterName);
-
-  /**
-   * Lookup a counter by group and counter name. The enum-based interface is
-   * preferred.
-   */
-  public abstract Counter getCounter(String groupName, String counterName);
+  public void progress() { 
+  }
 }

+ 46 - 81
src/mapred/org/apache/hadoop/mapreduce/TaskAttemptID.java

@@ -33,7 +33,7 @@ import java.io.IOException;
  * An example TaskAttemptID is : 
  * <code>attempt_200707121733_0003_m_000005_0</code> , which represents the
  * zeroth task attempt for the fifth map task in the third job 
- * running at the jobtracker started at <code>200707121733</code>. 
+ * running at the jobtracker started at <code>200707121733</code>.
  * <p>
  * Applications should never construct or parse TaskAttemptID strings
  * , but rather use appropriate constructors or {@link #forName(String)} 
@@ -42,10 +42,9 @@ import java.io.IOException;
  * @see JobID
  * @see TaskID
  */
-public class TaskAttemptID extends ID {
-  private static final String ATTEMPT = "attempt";
+public class TaskAttemptID extends org.apache.hadoop.mapred.ID {
+  protected static final String ATTEMPT = "attempt";
   private TaskID taskId;
-  private static final char UNDERSCORE = '_';
   
   /**
    * Constructs a TaskAttemptID object from given {@link TaskID}.  
@@ -68,12 +67,14 @@ public class TaskAttemptID extends ID {
    * @param taskId taskId number
    * @param id the task attempt number
    */
-  public TaskAttemptID(String jtIdentifier, int jobId, boolean isMap
-      , int taskId, int id) {
+  public TaskAttemptID(String jtIdentifier, int jobId, boolean isMap, 
+                       int taskId, int id) {
     this(new TaskID(jtIdentifier, jobId, isMap, taskId), id);
   }
   
-  private TaskAttemptID() { }
+  public TaskAttemptID() { 
+    taskId = new TaskID();
+  }
   
   /** Returns the {@link JobID} object that this task attempt belongs to */
   public JobID getJobID() {
@@ -99,38 +100,19 @@ public class TaskAttemptID extends ID {
     return this.taskId.equals(that.taskId);
   }
   
-  /**Compare TaskIds by first tipIds, then by task numbers. */
-  @Override
-  public int compareTo(ID o) {
-    TaskAttemptID that = (TaskAttemptID)o;
-    int tipComp = this.taskId.compareTo(that.taskId);
-    if(tipComp == 0) {
-      return this.id - that.id;
-    }
-    else return tipComp;
-  }
-  @Override
-  public String toString() { 
-    StringBuilder builder = new StringBuilder();
-    return builder.append(ATTEMPT).append(UNDERSCORE)
-      .append(toStringWOPrefix()).toString();
-  }
-
-  StringBuilder toStringWOPrefix() {
-    StringBuilder builder = new StringBuilder();
-    return builder.append(taskId.toStringWOPrefix())
-      .append(UNDERSCORE).append(id);
-  }
-  
-  @Override
-  public int hashCode() {
-    return toStringWOPrefix().toString().hashCode();
+  /**
+   * Add the unique string to the StringBuilder
+   * @param builder the builder to append ot
+   * @return the builder that was passed in.
+   */
+  protected StringBuilder appendTo(StringBuilder builder) {
+    return taskId.appendTo(builder).append(SEPARATOR).append(id);
   }
   
   @Override
   public void readFields(DataInput in) throws IOException {
     super.readFields(in);
-    this.taskId = TaskID.read(in);
+    taskId.readFields(in);
   }
 
   @Override
@@ -138,72 +120,55 @@ public class TaskAttemptID extends ID {
     super.write(out);
     taskId.write(out);
   }
-  
-  public static TaskAttemptID read(DataInput in) throws IOException {
-    TaskAttemptID taskId = new TaskAttemptID();
-    taskId.readFields(in);
-    return taskId;
+
+  @Override
+  public int hashCode() {
+    return taskId.hashCode() * 5 + id;
   }
   
+  /**Compare TaskIds by first tipIds, then by task numbers. */
+  @Override
+  public int compareTo(ID o) {
+    TaskAttemptID that = (TaskAttemptID)o;
+    int tipComp = this.taskId.compareTo(that.taskId);
+    if(tipComp == 0) {
+      return this.id - that.id;
+    }
+    else return tipComp;
+  }
+  @Override
+  public String toString() { 
+    return appendTo(new StringBuilder(ATTEMPT)).toString();
+  }
+
   /** Construct a TaskAttemptID object from given string 
    * @return constructed TaskAttemptID object or null if the given String is null
    * @throws IllegalArgumentException if the given string is malformed
    */
-  public static TaskAttemptID forName(String str) throws IllegalArgumentException {
+  public static TaskAttemptID forName(String str
+                                      ) throws IllegalArgumentException {
     if(str == null)
       return null;
     try {
-      String[] parts = str.split("_");
+      String[] parts = str.split(Character.toString(SEPARATOR));
       if(parts.length == 6) {
         if(parts[0].equals(ATTEMPT)) {
           boolean isMap = false;
           if(parts[3].equals("m")) isMap = true;
           else if(parts[3].equals("r")) isMap = false;
           else throw new Exception();
-          return new TaskAttemptID(parts[1], Integer.parseInt(parts[2]),
-              isMap, Integer.parseInt(parts[4]), Integer.parseInt(parts[5]));
+          return new org.apache.hadoop.mapred.TaskAttemptID
+                       (parts[1],
+                        Integer.parseInt(parts[2]),
+                        isMap, Integer.parseInt(parts[4]), 
+                        Integer.parseInt(parts[5]));
         }
       }
-    }catch (Exception ex) {//fall below
+    } catch (Exception ex) {
+      //fall below
     }
     throw new IllegalArgumentException("TaskAttemptId string : " + str 
         + " is not properly formed");
   }
-  
-  /** 
-   * Returns a regex pattern which matches task attempt IDs. Arguments can 
-   * be given null, in which case that part of the regex will be generic.  
-   * For example to obtain a regex matching <i>all task attempt IDs</i> 
-   * of <i>any jobtracker</i>, in <i>any job</i>, of the <i>first 
-   * map task</i>, we would use :
-   * <pre> 
-   * TaskAttemptID.getTaskAttemptIDsPattern(null, null, true, 1, null);
-   * </pre>
-   * which will return :
-   * <pre> "attempt_[^_]*_[0-9]*_m_000001_[0-9]*" </pre> 
-   * @param jtIdentifier jobTracker identifier, or null
-   * @param jobId job number, or null
-   * @param isMap whether the tip is a map, or null 
-   * @param taskId taskId number, or null
-   * @param attemptId the task attempt number, or null
-   * @return a regex pattern matching TaskAttemptIDs
-   */
-  public static String getTaskAttemptIDsPattern(String jtIdentifier,
-      Integer jobId, Boolean isMap, Integer taskId, Integer attemptId) {
-    StringBuilder builder = new StringBuilder(ATTEMPT).append(UNDERSCORE);
-    builder.append(getTaskAttemptIDsPatternWOPrefix(jtIdentifier, jobId,
-        isMap, taskId, attemptId));
-    return builder.toString();
-  }
-  
-  static StringBuilder getTaskAttemptIDsPatternWOPrefix(String jtIdentifier
-      , Integer jobId, Boolean isMap, Integer taskId, Integer attemptId) {
-    StringBuilder builder = new StringBuilder();
-    builder.append(TaskID.getTaskIDsPatternWOPrefix(jtIdentifier
-        , jobId, isMap, taskId))
-        .append(UNDERSCORE)
-        .append(attemptId != null ? attemptId : "[0-9]*");
-    return builder;
-  }
-  
+
 }

+ 25 - 58
src/mapred/org/apache/hadoop/mapreduce/TaskID.java

@@ -45,10 +45,9 @@ import java.text.NumberFormat;
  * @see JobID
  * @see TaskAttemptID
  */
-public class TaskID extends ID {
-  private static final String TASK = "task";
-  private static char UNDERSCORE = '_';  
-  private static NumberFormat idFormat = NumberFormat.getInstance();
+public class TaskID extends org.apache.hadoop.mapred.ID {
+  protected static final String TASK = "task";
+  protected static final NumberFormat idFormat = NumberFormat.getInstance();
   static {
     idFormat.setGroupingUsed(false);
     idFormat.setMinimumIntegerDigits(6);
@@ -83,7 +82,9 @@ public class TaskID extends ID {
     this(new JobID(jtIdentifier, jobId), isMap, id);
   }
   
-  private TaskID() { }
+  public TaskID() { 
+    jobId = new JobID();
+  }
   
   /** Returns the {@link JobID} object that this tip belongs to */
   public JobID getJobID() {
@@ -118,31 +119,34 @@ public class TaskID extends ID {
     }
     else return jobComp;
   }
-  
   @Override
   public String toString() { 
-    StringBuilder builder = new StringBuilder();
-    return builder.append(TASK).append(UNDERSCORE)
-      .append(toStringWOPrefix()).toString();
+    return appendTo(new StringBuilder(TASK)).toString();
   }
 
-  StringBuilder toStringWOPrefix() {
-    StringBuilder builder = new StringBuilder();
-    builder.append(jobId.toStringWOPrefix())
-      .append(isMap ? "_m_" : "_r_");
-    return builder.append(idFormat.format(id));
+  /**
+   * Add the unique string to the given builder.
+   * @param builder the builder to append to
+   * @return the builder that was passed in
+   */
+  protected StringBuilder appendTo(StringBuilder builder) {
+    return jobId.appendTo(builder).
+                 append(SEPARATOR).
+                 append(isMap ? 'm' : 'r').
+                 append(SEPARATOR).
+                 append(idFormat.format(id));
   }
   
   @Override
   public int hashCode() {
-    return toStringWOPrefix().toString().hashCode();
+    return jobId.hashCode() * 524287 + id;
   }
   
   @Override
   public void readFields(DataInput in) throws IOException {
     super.readFields(in);
-    this.jobId = JobID.read(in);
-    this.isMap = in.readBoolean();
+    jobId.readFields(in);
+    isMap = in.readBoolean();
   }
 
   @Override
@@ -152,12 +156,6 @@ public class TaskID extends ID {
     out.writeBoolean(isMap);
   }
   
-  public static TaskID read(DataInput in) throws IOException {
-    TaskID tipId = new TaskID();
-    tipId.readFields(in);
-    return tipId;
-  }
-  
   /** Construct a TaskID object from given string 
    * @return constructed TaskID object or null if the given String is null
    * @throws IllegalArgumentException if the given string is malformed
@@ -174,8 +172,10 @@ public class TaskID extends ID {
           if(parts[3].equals("m")) isMap = true;
           else if(parts[3].equals("r")) isMap = false;
           else throw new Exception();
-          return new TaskID(parts[1], Integer.parseInt(parts[2]),
-              isMap, Integer.parseInt(parts[4]));
+          return new org.apache.hadoop.mapred.TaskID(parts[1], 
+                                                     Integer.parseInt(parts[2]),
+                                                     isMap, 
+                                                     Integer.parseInt(parts[4]));
         }
       }
     }catch (Exception ex) {//fall below
@@ -184,37 +184,4 @@ public class TaskID extends ID {
         + " is not properly formed");
   }
   
-  /** 
-   * Returns a regex pattern which matches task IDs. Arguments can 
-   * be given null, in which case that part of the regex will be generic.  
-   * For example to obtain a regex matching <i>the first map task</i> 
-   * of <i>any jobtracker</i>, of <i>any job</i>, we would use :
-   * <pre> 
-   * TaskID.getTaskIDsPattern(null, null, true, 1);
-   * </pre>
-   * which will return :
-   * <pre> "task_[^_]*_[0-9]*_m_000001*" </pre> 
-   * @param jtIdentifier jobTracker identifier, or null
-   * @param jobId job number, or null
-   * @param isMap whether the tip is a map, or null 
-   * @param taskId taskId number, or null
-   * @return a regex pattern matching TaskIDs
-   */
-  public static String getTaskIDsPattern(String jtIdentifier, Integer jobId
-      , Boolean isMap, Integer taskId) {
-    StringBuilder builder = new StringBuilder(TASK).append(UNDERSCORE)
-      .append(getTaskIDsPatternWOPrefix(jtIdentifier, jobId, isMap, taskId));
-    return builder.toString();
-  }
-  
-  static StringBuilder getTaskIDsPatternWOPrefix(String jtIdentifier
-      , Integer jobId, Boolean isMap, Integer taskId) {
-    StringBuilder builder = new StringBuilder();
-    builder.append(JobID.getJobIDsPatternWOPrefix(jtIdentifier, jobId))
-      .append(UNDERSCORE)
-      .append(isMap != null ? (isMap ? "m" : "r") : "(m|r)").append(UNDERSCORE)
-      .append(taskId != null ? idFormat.format(taskId) : "[0-9]*");
-    return builder;
-  }
-  
 }

+ 54 - 14
src/mapred/org/apache/hadoop/mapreduce/TaskInputOutputContext.java

@@ -21,6 +21,7 @@ package org.apache.hadoop.mapreduce;
 import java.io.IOException;
 
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.util.Progressable;
 
 /**
  * A context object that allows input and output from the task. It is only
@@ -31,34 +32,73 @@ import org.apache.hadoop.conf.Configuration;
  * @param <VALUEOUT> the output value type for the task
  */
 public abstract class TaskInputOutputContext<KEYIN,VALUEIN,KEYOUT,VALUEOUT> 
-    extends TaskAttemptContext {
+       extends TaskAttemptContext implements Progressable {
+  private RecordWriter<KEYOUT,VALUEOUT> output;
+  private StatusReporter reporter;
+  private OutputCommitter committer;
 
-  public TaskInputOutputContext(Configuration conf, TaskAttemptID taskid) {
+  public TaskInputOutputContext(Configuration conf, TaskAttemptID taskid,
+                                RecordWriter<KEYOUT,VALUEOUT> output,
+                                OutputCommitter committer,
+                                StatusReporter reporter) {
     super(conf, taskid);
+    this.output = output;
+    this.reporter = reporter;
+    this.committer = committer;
   }
 
   /**
-   * Advance to the next key, returning null if at end.
-   * @param key the key object to read in to, which may be null
-   * @return the key object that was read into
+   * Advance to the next key, value pair, returning null if at end.
+   * @return the key object that was read into, or null if no more
    */
-  public abstract KEYIN nextKey(KEYIN key
-                                ) throws IOException, InterruptedException;
-  
+  public abstract 
+  boolean nextKeyValue() throws IOException, InterruptedException;
+ 
+  /**
+   * Get the current key.
+   * @return the current key object or null if there isn't one
+   * @throws IOException
+   * @throws InterruptedException
+   */
+  public abstract 
+  KEYIN getCurrentKey() throws IOException, InterruptedException;
+
   /**
-   * Read the next value. Must be called after nextKey.
-   * @param value the value object to read in to, which may be null
+   * Get the current value.
    * @return the value object that was read into
    * @throws IOException
    * @throws InterruptedException
    */
-  public abstract VALUEIN nextValue(VALUEIN value
-                                    ) throws IOException, InterruptedException;
+  public abstract VALUEIN getCurrentValue() throws IOException, 
+                                                   InterruptedException;
 
   /**
    * Generate an output key/value pair.
    */
-  public abstract void collect(KEYOUT key, VALUEOUT value
-                               ) throws IOException, InterruptedException;
+  public void write(KEYOUT key, VALUEOUT value
+                    ) throws IOException, InterruptedException {
+    output.write(key, value);
+  }
+
+  public Counter getCounter(Enum<?> counterName) {
+    return reporter.getCounter(counterName);
+  }
+
+  public Counter getCounter(String groupName, String counterName) {
+    return reporter.getCounter(groupName, counterName);
+  }
+
+  @Override
+  public void progress() {
+    reporter.progress();
+  }
 
+  @Override
+  public void setStatus(String status) {
+    reporter.setStatus(status);
+  }
+  
+  public OutputCommitter getOutputCommitter() {
+    return committer;
+  }
 }

+ 31 - 17
src/mapred/org/apache/hadoop/mapreduce/lib/input/FileInputFormat.java

@@ -32,6 +32,7 @@ import org.apache.hadoop.fs.PathFilter;
 import org.apache.hadoop.fs.BlockLocation;
 import org.apache.hadoop.mapreduce.InputFormat;
 import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.Job;
 import org.apache.hadoop.mapreduce.JobContext;
 import org.apache.hadoop.mapreduce.Mapper;
 import org.apache.hadoop.util.ReflectionUtils;
@@ -108,26 +109,37 @@ public abstract class FileInputFormat<K, V> extends InputFormat<K, V> {
 
   /**
    * Set a PathFilter to be applied to the input paths for the map-reduce job.
-   *
+   * @param job the job to modify
    * @param filter the PathFilter class use for filtering the input paths.
    */
-  public static void setInputPathFilter(Configuration conf,
+  public static void setInputPathFilter(Job job,
                                         Class<? extends PathFilter> filter) {
-    conf.setClass("mapred.input.pathFilter.class", filter, PathFilter.class);
+    job.getConfiguration().setClass("mapred.input.pathFilter.class", filter, 
+                                    PathFilter.class);
   }
 
-  public static void setMinInputSplitSize(Configuration conf,
+  /**
+   * Set the minimum input split size
+   * @param job the job to modify
+   * @param size the minimum size
+   */
+  public static void setMinInputSplitSize(Job job,
                                           long size) {
-    conf.setLong("mapred.min.split.size", size);
+    job.getConfiguration().setLong("mapred.min.split.size", size);
   }
 
   public static long getMinSplitSize(Configuration conf) {
     return conf.getLong("mapred.min.split.size", 1L);
   }
 
-  public static void setMaxInputSplitSize(Configuration conf,
+  /**
+   * Set the maximum split size
+   * @param job the job to modify
+   * @param size the maximum split size
+   */
+  public static void setMaxInputSplitSize(Job job,
                                           long size) {
-    conf.setLong("mapred.max.split.size", size);
+    job.getConfiguration().setLong("mapred.max.split.size", size);
   }
 
   public static long getMaxSplitSize(Configuration conf) {
@@ -271,14 +283,14 @@ public abstract class FileInputFormat<K, V> extends InputFormat<K, V> {
    * Sets the given comma separated paths as the list of inputs 
    * for the map-reduce job.
    * 
-   * @param conf Configuration of the job
+   * @param job the job
    * @param commaSeparatedPaths Comma separated paths to be set as 
    *        the list of inputs for the map-reduce job.
    */
-  public static void setInputPaths(Configuration conf, 
+  public static void setInputPaths(Job job, 
                                    String commaSeparatedPaths
                                    ) throws IOException {
-    setInputPaths(conf, StringUtils.stringToPath(
+    setInputPaths(job, StringUtils.stringToPath(
                         getPathStrings(commaSeparatedPaths)));
   }
 
@@ -286,15 +298,15 @@ public abstract class FileInputFormat<K, V> extends InputFormat<K, V> {
    * Add the given comma separated paths to the list of inputs for
    *  the map-reduce job.
    * 
-   * @param conf The configuration of the job 
+   * @param job The job to modify
    * @param commaSeparatedPaths Comma separated paths to be added to
    *        the list of inputs for the map-reduce job.
    */
-  public static void addInputPaths(Configuration conf, 
+  public static void addInputPaths(Job job, 
                                    String commaSeparatedPaths
                                    ) throws IOException {
     for (String str : getPathStrings(commaSeparatedPaths)) {
-      addInputPath(conf, new Path(str));
+      addInputPath(job, new Path(str));
     }
   }
 
@@ -302,12 +314,13 @@ public abstract class FileInputFormat<K, V> extends InputFormat<K, V> {
    * Set the array of {@link Path}s as the list of inputs
    * for the map-reduce job.
    * 
-   * @param conf Configuration of the job. 
+   * @param job The job to modify 
    * @param inputPaths the {@link Path}s of the input directories/files 
    * for the map-reduce job.
    */ 
-  public static void setInputPaths(Configuration conf, 
+  public static void setInputPaths(Job job, 
                                    Path... inputPaths) throws IOException {
+    Configuration conf = job.getConfiguration();
     FileSystem fs = FileSystem.get(conf);
     Path path = inputPaths[0].makeQualified(fs);
     StringBuffer str = new StringBuffer(StringUtils.escapeString(path.toString()));
@@ -322,12 +335,13 @@ public abstract class FileInputFormat<K, V> extends InputFormat<K, V> {
   /**
    * Add a {@link Path} to the list of inputs for the map-reduce job.
    * 
-   * @param conf The configuration of the job 
+   * @param job The {@link Job} to modify
    * @param path {@link Path} to be added to the list of inputs for 
    *            the map-reduce job.
    */
-  public static void addInputPath(Configuration conf, 
+  public static void addInputPath(Job job, 
                                   Path path) throws IOException {
+    Configuration conf = job.getConfiguration();
     FileSystem fs = FileSystem.get(conf);
     path = path.makeQualified(fs);
     String dirStr = StringUtils.escapeString(path.toString());

+ 1 - 0
src/mapred/org/apache/hadoop/mapreduce/lib/input/InvalidInputException.java

@@ -27,6 +27,7 @@ import java.util.Iterator;
  * by one.
  */
 public class InvalidInputException extends IOException {
+  private static final long serialVersionUID = -380668190578456802L;
   private List<IOException> problems;
   
   /**

+ 25 - 11
src/mapred/org/apache/hadoop/mapreduce/lib/input/LineRecordReader.java

@@ -46,7 +46,9 @@ public class LineRecordReader extends RecordReader<LongWritable, Text> {
   private long pos;
   private long end;
   private LineReader in;
-  int maxLineLength;
+  private int maxLineLength;
+  private LongWritable key = null;
+  private Text value = null;
 
   public void initialize(InputSplit genericSplit,
                          TaskAttemptContext context) throws IOException {
@@ -82,25 +84,21 @@ public class LineRecordReader extends RecordReader<LongWritable, Text> {
     this.pos = start;
   }
   
-  public LongWritable nextKey(LongWritable key) throws IOException {
+  public boolean nextKeyValue() throws IOException {
     if (key == null) {
       key = new LongWritable();
     }
     key.set(pos);
-    return key;
-  }
-
-  public Text nextValue(Text value) throws IOException {
     if (value == null) {
       value = new Text();
     }
+    int newSize = 0;
     while (pos < end) {
-      int newSize = in.readLine(value, maxLineLength,
-                                Math.max((int)Math.min(Integer.MAX_VALUE, 
-                                                       end-pos),
-                                         maxLineLength));
+      newSize = in.readLine(value, maxLineLength,
+                            Math.max((int)Math.min(Integer.MAX_VALUE, end-pos),
+                                     maxLineLength));
       if (newSize == 0) {
-        return null;
+        break;
       }
       pos += newSize;
       if (newSize < maxLineLength) {
@@ -111,6 +109,22 @@ public class LineRecordReader extends RecordReader<LongWritable, Text> {
       LOG.info("Skipped line of size " + newSize + " at pos " + 
                (pos - newSize));
     }
+    if (newSize == 0) {
+      key = null;
+      value = null;
+      return false;
+    } else {
+      return true;
+    }
+  }
+
+  @Override
+  public LongWritable getCurrentKey() {
+    return key;
+  }
+
+  @Override
+  public Text getCurrentValue() {
     return value;
   }
 

+ 18 - 9
src/mapred/org/apache/hadoop/mapreduce/lib/input/SequenceFileRecordReader.java

@@ -35,6 +35,8 @@ public class SequenceFileRecordReader<K, V> extends RecordReader<K, V> {
   private long start;
   private long end;
   private boolean more = true;
+  private K key = null;
+  private V value = null;
   protected Configuration conf;
   
   @Override
@@ -58,23 +60,30 @@ public class SequenceFileRecordReader<K, V> extends RecordReader<K, V> {
 
   @Override
   @SuppressWarnings("unchecked")
-  public K nextKey(K key) throws IOException, InterruptedException {
+  public boolean nextKeyValue() throws IOException, InterruptedException {
     if (!more) {
-      return null;
+      return false;
     }
     long pos = in.getPosition();
-    K result = (K) in.next(key);
-    if (result == null || (pos >= end && in.syncSeen())) {
+    key = (K) in.next(key);
+    if (key == null || (pos >= end && in.syncSeen())) {
       more = false;
-      result = null;
+      key = null;
+      value = null;
+    } else {
+      value = (V) in.getCurrentValue(value);
     }
-    return result;
+    return more;
   }
 
   @Override
-  @SuppressWarnings("unchecked")
-  public V nextValue(V value) throws IOException, InterruptedException {
-    return (V) in.getCurrentValue(value);
+  public K getCurrentKey() {
+    return key;
+  }
+  
+  @Override
+  public V getCurrentValue() {
+    return value;
   }
   
   /**

+ 2 - 1
src/mapred/org/apache/hadoop/mapreduce/lib/map/InverseMapper.java

@@ -26,9 +26,10 @@ import org.apache.hadoop.mapreduce.Mapper;
 public class InverseMapper<K, V> extends Mapper<K,V,V,K> {
 
   /** The inverse function.  Input keys and values are swapped.*/
+  @Override
   public void map(K key, V value, Context context
                   ) throws IOException, InterruptedException {
-    context.collect(value, key);
+    context.write(value, key);
   }
   
 }

+ 85 - 45
src/mapred/org/apache/hadoop/mapreduce/lib/map/MultithreadedMapper.java

@@ -23,10 +23,16 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.mapreduce.Counter;
 import org.apache.hadoop.mapreduce.InputSplit;
 import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.RecordReader;
+import org.apache.hadoop.mapreduce.RecordWriter;
+import org.apache.hadoop.mapreduce.StatusReporter;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 
 import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
 
 /**
  * Multithreaded implementation for @link org.apache.hadoop.mapreduce.Mapper.
@@ -50,7 +56,7 @@ public class MultithreadedMapper<K1, V1, K2, V2>
   private static final Log LOG = LogFactory.getLog(MultithreadedMapper.class);
   private Class<Mapper<K1,V1,K2,V2>> mapClass;
   private Context outer;
-  private MapRunner[] runners;
+  private List<MapRunner> runners;
 
   public static int getNumberOfThreads(Configuration conf) {
     return conf.getInt("mapred.map.multithreadedrunner.threads", 10);
@@ -78,6 +84,7 @@ public class MultithreadedMapper<K1, V1, K2, V2>
     conf.setClass("mapred.map.multithreadedrunner.class", cls, Mapper.class);
   }
 
+  @Override
   public void run(Context context) throws IOException, InterruptedException {
     Configuration conf = context.getConfiguration();
     outer = context;
@@ -88,14 +95,16 @@ public class MultithreadedMapper<K1, V1, K2, V2>
                 " threads");
     }
     
-    runners = (MapRunner[]) new Object[numberOfThreads];
+    runners =  new ArrayList<MapRunner>(numberOfThreads);
     for(int i=0; i < numberOfThreads; ++i) {
-      runners[i] = new MapRunner();
-      runners[i].start();
+      MapRunner thread = new MapRunner(context);
+      thread.start();
+      runners.set(i, thread);
     }
     for(int i=0; i < numberOfThreads; ++i) {
-      runners[i].join();
-      Throwable th = runners[i].throwable;
+      MapRunner thread = runners.get(i);
+      thread.join();
+      Throwable th = thread.throwable;
       if (th != null) {
         if (th instanceof IOException) {
           throw (IOException) th;
@@ -108,85 +117,116 @@ public class MultithreadedMapper<K1, V1, K2, V2>
     }
   }
 
-  private class SubMapContext extends Context {
+  private class SubMapRecordReader extends RecordReader<K1,V1> {
     private K1 key;
     private V1 value;
-    
-    SubMapContext() {
-      super(outer.getConfiguration(), outer.getTaskAttemptId());
+    private Configuration conf;
+
+    @Override
+    public void close() throws IOException {
     }
 
     @Override
-    public InputSplit getInputSplit() {
-      synchronized (outer) {
-        return outer.getInputSplit();
-      }
+    public float getProgress() throws IOException, InterruptedException {
+      return 0;
     }
 
     @Override
-    public Counter getCounter(Enum<?> counterName) {
-      synchronized (outer) {
-        return outer.getCounter(counterName);
-      }
+    public void initialize(InputSplit split, 
+                           TaskAttemptContext context
+                           ) throws IOException, InterruptedException {
+      conf = context.getConfiguration();
     }
 
+
     @Override
-    public Counter getCounter(String groupName, String counterName) {
+    public boolean nextKeyValue() throws IOException, InterruptedException {
       synchronized (outer) {
-        return outer.getCounter(groupName, counterName);
+        if (!outer.nextKeyValue()) {
+          return false;
+        }
+        key = ReflectionUtils.copy(outer.getConfiguration(),
+                                   outer.getCurrentKey(), key);
+        value = ReflectionUtils.copy(conf, outer.getCurrentValue(), value);
+        return true;
       }
     }
 
+    public K1 getCurrentKey() {
+      return key;
+    }
+
     @Override
-    public void progress() {
-      synchronized (outer) {
-        outer.progress();
-      }
+    public V1 getCurrentValue() {
+      return value;
     }
+  }
+  
+  private class SubMapRecordWriter extends RecordWriter<K2,V2> {
 
     @Override
-    public void collect(K2 key, V2 value) throws IOException,
-                                         InterruptedException {
-      synchronized (outer) {
-        outer.collect(key, value);
-      }
+    public void close(TaskAttemptContext context) throws IOException,
+                                                 InterruptedException {
     }
 
     @Override
-    public K1 nextKey(K1 k) throws IOException, InterruptedException {
+    public void write(K2 key, V2 value) throws IOException,
+                                               InterruptedException {
       synchronized (outer) {
-        key = outer.nextKey(key);
-        if (key != null) {
-          value = outer.nextValue(value);
-        }
-        return key;
+        outer.write(key, value);
       }
+    }  
+  }
+
+  private class SubMapStatusReporter extends StatusReporter {
+
+    @Override
+    public Counter getCounter(Enum<?> name) {
+      return outer.getCounter(name);
     }
-    
-    public V1 nextValue(V1 v) throws IOException, InterruptedException {
-      return value;
+
+    @Override
+    public Counter getCounter(String group, String name) {
+      return outer.getCounter(group, name);
     }
+
+    @Override
+    public void progress() {
+      outer.progress();
+    }
+
+    @Override
+    public void setStatus(String status) {
+      outer.setStatus(status);
+    }
+    
   }
 
   private class MapRunner extends Thread {
     private Mapper<K1,V1,K2,V2> mapper;
-    private Context context;
+    private Context subcontext;
     private Throwable throwable;
 
-    @SuppressWarnings("unchecked")
-    MapRunner() {
-      mapper = (Mapper<K1,V1,K2,V2>) 
-        ReflectionUtils.newInstance(mapClass, context.getConfiguration());
-      context = new SubMapContext();
+    MapRunner(Context context) throws IOException, InterruptedException {
+      mapper = ReflectionUtils.newInstance(mapClass, 
+                                           context.getConfiguration());
+      subcontext = new Context(outer.getConfiguration(), 
+                            outer.getTaskAttemptID(),
+                            new SubMapRecordReader(),
+                            new SubMapRecordWriter(), 
+                            context.getOutputCommitter(),
+                            new SubMapStatusReporter(),
+                            outer.getInputSplit());
     }
 
     public Throwable getThrowable() {
       return throwable;
     }
 
+    @Override
     public void run() {
       try {
-        mapper.run(context);
+        mapper.run(subcontext);
       } catch (Throwable ie) {
         throwable = ie;
       }

+ 3 - 2
src/mapred/org/apache/hadoop/mapreduce/lib/map/TokenCounterMapper.java

@@ -29,13 +29,14 @@ public class TokenCounterMapper extends Mapper<Object, Text, Text, IntWritable>{
     
   private final static IntWritable one = new IntWritable(1);
   private Text word = new Text();
-    
+  
+  @Override
   public void map(Object key, Text value, Context context
                   ) throws IOException, InterruptedException {
     StringTokenizer itr = new StringTokenizer(value.toString());
     while (itr.hasMoreTokens()) {
       word.set(itr.nextToken());
-      context.collect(word, one);
+      context.write(word, one);
     }
   }
 }

+ 177 - 0
src/mapred/org/apache/hadoop/mapreduce/lib/output/FileOutputCommitter.java

@@ -0,0 +1,177 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.mapreduce.lib.output;
+
+import java.io.IOException;
+import java.net.URI;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.OutputCommitter;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.TaskAttemptID;
+import org.apache.hadoop.util.StringUtils;
+
+/** An {@link OutputCommitter} that commits files specified 
+ * in job output directory i.e. ${mapred.output.dir}. 
+ **/
+public class FileOutputCommitter extends OutputCommitter {
+
+  private static final Log LOG = LogFactory.getLog(FileOutputCommitter.class);
+
+  /**
+   * Temporary directory name 
+   */
+  protected static final String TEMP_DIR_NAME = "_temporary";
+  private FileSystem outputFileSystem = null;
+  private Path outputPath = null;
+  private Path workPath = null;
+
+  public FileOutputCommitter(Path outputPath, 
+                             TaskAttemptContext context) throws IOException {
+    if (outputPath != null) {
+      this.outputPath = outputPath;
+      outputFileSystem = outputPath.getFileSystem(context.getConfiguration());
+      workPath = new Path(outputPath,
+                          (FileOutputCommitter.TEMP_DIR_NAME + Path.SEPARATOR +
+                           "_" + context.getTaskAttemptID().toString()
+                           )).makeQualified(outputFileSystem);
+    }
+  }
+
+  public void setupJob(JobContext context) throws IOException {
+    if (outputPath != null) {
+      Path tmpDir = new Path(outputPath, FileOutputCommitter.TEMP_DIR_NAME);
+      FileSystem fileSys = tmpDir.getFileSystem(context.getConfiguration());
+      if (!fileSys.mkdirs(tmpDir)) {
+        LOG.error("Mkdirs failed to create " + tmpDir.toString());
+      }
+    }
+  }
+
+  public void cleanupJob(JobContext context) throws IOException {
+    if (outputPath != null) {
+      Path tmpDir = new Path(outputPath, FileOutputCommitter.TEMP_DIR_NAME);
+      FileSystem fileSys = tmpDir.getFileSystem(context.getConfiguration());
+      if (fileSys.exists(tmpDir)) {
+        fileSys.delete(tmpDir, true);
+      }
+    }
+  }
+
+  public void setupTask(TaskAttemptContext context) throws IOException {
+    // FileOutputCommitter's setupTask doesn't do anything. Because the
+    // temporary task directory is created on demand when the 
+    // task is writing.
+  }
+		  
+  public void commitTask(TaskAttemptContext context) 
+  throws IOException {
+    TaskAttemptID attemptId = context.getTaskAttemptID();
+    if (workPath != null) {
+      context.progress();
+      if (outputFileSystem.exists(workPath)) {
+        // Move the task outputs to their final place
+        moveTaskOutputs(context, outputFileSystem, outputPath, workPath);
+        // Delete the temporary task-specific output directory
+        if (!outputFileSystem.delete(workPath, true)) {
+          LOG.warn("Failed to delete the temporary output" + 
+          " directory of task: " + attemptId + " - " + workPath);
+        }
+        LOG.info("Saved output of task '" + attemptId + "' to " + 
+                 outputPath);
+      }
+    }
+  }
+		  
+  private void moveTaskOutputs(TaskAttemptContext context,
+                               FileSystem fs,
+                               Path jobOutputDir,
+                               Path taskOutput) 
+  throws IOException {
+    TaskAttemptID attemptId = context.getTaskAttemptID();
+    context.progress();
+    if (fs.isFile(taskOutput)) {
+      Path finalOutputPath = getFinalPath(jobOutputDir, taskOutput, 
+                                          workPath);
+      if (!fs.rename(taskOutput, finalOutputPath)) {
+        if (!fs.delete(finalOutputPath, true)) {
+          throw new IOException("Failed to delete earlier output of task: " + 
+                                 attemptId);
+        }
+        if (!fs.rename(taskOutput, finalOutputPath)) {
+          throw new IOException("Failed to save output of task: " + 
+        		  attemptId);
+        }
+      }
+      LOG.debug("Moved " + taskOutput + " to " + finalOutputPath);
+    } else if(fs.getFileStatus(taskOutput).isDir()) {
+      FileStatus[] paths = fs.listStatus(taskOutput);
+      Path finalOutputPath = getFinalPath(jobOutputDir, taskOutput, workPath);
+      fs.mkdirs(finalOutputPath);
+      if (paths != null) {
+        for (FileStatus path : paths) {
+          moveTaskOutputs(context, fs, jobOutputDir, path.getPath());
+        }
+      }
+    }
+  }
+
+  public void abortTask(TaskAttemptContext context) {
+    try {
+      context.progress();
+      outputFileSystem.delete(workPath, true);
+    } catch (IOException ie) {
+      LOG.warn("Error discarding output" + StringUtils.stringifyException(ie));
+    }
+  }
+
+  private Path getFinalPath(Path jobOutputDir, Path taskOutput, 
+                            Path taskOutputPath) throws IOException {
+    URI taskOutputUri = taskOutput.toUri();
+    URI relativePath = taskOutputPath.toUri().relativize(taskOutputUri);
+    if (taskOutputUri == relativePath) {//taskOutputPath is not a parent of taskOutput
+      throw new IOException("Can not get the relative path: base = " + 
+          taskOutputPath + " child = " + taskOutput);
+    }
+    if (relativePath.getPath().length() > 0) {
+      return new Path(jobOutputDir, relativePath.getPath());
+    } else {
+      return jobOutputDir;
+    }
+  }
+
+  public boolean needsTaskCommit(TaskAttemptContext context
+                                 ) throws IOException {
+    return workPath != null && outputFileSystem.exists(workPath);
+  }
+
+  /**
+   * Get the directory that the task should write results into
+   * @return the work directory
+   * @throws IOException
+   */
+  public Path getWorkPath() throws IOException {
+    return workPath;
+  }
+}

+ 96 - 115
src/mapred/org/apache/hadoop/mapreduce/lib/output/FileOutputFormat.java

@@ -27,22 +27,34 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.compress.CompressionCodec;
 import org.apache.hadoop.mapred.FileAlreadyExistsException;
 import org.apache.hadoop.mapred.InvalidJobConfException;
+import org.apache.hadoop.mapreduce.Job;
 import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.OutputCommitter;
 import org.apache.hadoop.mapreduce.OutputFormat;
 import org.apache.hadoop.mapreduce.RecordWriter;
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.TaskID;
+import org.apache.hadoop.mapreduce.TaskInputOutputContext;
 
 /** A base class for {@link OutputFormat}s that read from {@link FileSystem}s.*/
 public abstract class FileOutputFormat<K, V> extends OutputFormat<K, V> {
 
-  private static final String TEMP_DIR_NAME = "_temp";
+  /** Construct output file names so that, when an output directory listing is
+   * sorted lexicographically, positions correspond to output partitions.*/
+  private static final NumberFormat NUMBER_FORMAT = NumberFormat.getInstance();
+  static {
+    NUMBER_FORMAT.setMinimumIntegerDigits(5);
+    NUMBER_FORMAT.setGroupingUsed(false);
+  }
+  private FileOutputCommitter committer = null;
+
   /**
    * Set whether the output of the job is compressed.
-   * @param conf the {@link Configuration} to modify
+   * @param job the job to modify
    * @param compress should the output of the job be compressed?
    */
-  public static void setCompressOutput(Configuration conf, boolean compress) {
-    conf.setBoolean("mapred.output.compress", compress);
+  public static void setCompressOutput(Job job, boolean compress) {
+    job.getConfiguration().setBoolean("mapred.output.compress", compress);
   }
   
   /**
@@ -57,16 +69,17 @@ public abstract class FileOutputFormat<K, V> extends OutputFormat<K, V> {
   
   /**
    * Set the {@link CompressionCodec} to be used to compress job outputs.
-   * @param conf the {@link Configuration} to modify
+   * @param job the job to modify
    * @param codecClass the {@link CompressionCodec} to be used to
    *                   compress the job outputs
    */
   public static void 
-  setOutputCompressorClass(Configuration conf, 
+  setOutputCompressorClass(Job job, 
                            Class<? extends CompressionCodec> codecClass) {
-    setCompressOutput(conf, true);
-    conf.setClass("mapred.output.compression.codec", codecClass, 
-                  CompressionCodec.class);
+    setCompressOutput(job, true);
+    job.getConfiguration().setClass("mapred.output.compression.codec", 
+                                    codecClass, 
+                                    CompressionCodec.class);
   }
   
   /**
@@ -95,20 +108,19 @@ public abstract class FileOutputFormat<K, V> extends OutputFormat<K, V> {
     return codecClass;
   }
   
-  public abstract 
-    RecordWriter<K, V> getRecordWriter(TaskAttemptContext context
-                                       ) throws IOException;
+  public abstract RecordWriter<K, V> 
+     getRecordWriter(TaskAttemptContext context
+                     ) throws IOException, InterruptedException;
 
-  public void checkOutputSpecs(JobContext context) 
-    throws FileAlreadyExistsException, 
-           InvalidJobConfException, IOException {
+  public void checkOutputSpecs(JobContext context
+                               ) throws FileAlreadyExistsException, IOException{
     // Ensure that the output directory is set and not already there
     Configuration job = context.getConfiguration();
     Path outDir = getOutputPath(job);
-    if (outDir == null && context.getNumReduceTasks() != 0) {
-      throw new InvalidJobConfException("Output directory not set in JobConf.");
+    if (outDir == null) {
+      throw new InvalidJobConfException("Output directory not set.");
     }
-    if (outDir != null && outDir.getFileSystem(job).exists(outDir)) {
+    if (outDir.getFileSystem(job).exists(outDir)) {
       throw new FileAlreadyExistsException("Output directory " + outDir + 
                                            " already exists");
     }
@@ -117,19 +129,19 @@ public abstract class FileOutputFormat<K, V> extends OutputFormat<K, V> {
   /**
    * Set the {@link Path} of the output directory for the map-reduce job.
    *
-   * @param conf The configuration of the job.
+   * @param job The job to modify
    * @param outputDir the {@link Path} of the output directory for 
    * the map-reduce job.
    */
-  public static void setOutputPath(Configuration conf, Path outputDir) {
-    conf.set("mapred.output.dir", outputDir.toString());
+  public static void setOutputPath(Job job, Path outputDir) {
+    job.getConfiguration().set("mapred.output.dir", outputDir.toString());
   }
 
   /**
    * Get the {@link Path} to the output directory for the map-reduce job.
    * 
    * @return the {@link Path} to the output directory for the map-reduce job.
-   * @see FileOutputFormat#getWorkOutputPath(Configuration)
+   * @see FileOutputFormat#getWorkOutputPath(TaskInputOutputContext)
    */
   public static Path getOutputPath(Configuration conf) {
     String name = conf.get("mapred.output.dir");
@@ -162,18 +174,12 @@ public abstract class FileOutputFormat<K, V> extends OutputFormat<K, V> {
    * is completely transparent to the application.</p>
    * 
    * <p>The application-writer can take advantage of this by creating any 
-   * side-files required in <tt>${mapred.work.output.dir}</tt> during execution 
-   * of his reduce-task i.e. via {@link #getWorkOutputPath(Configuration)}, and
+   * side-files required in a work directory during execution 
+   * of his task i.e. via 
+   * {@link #getWorkOutputPath(TaskInputOutputContext)}, and
    * the framework will move them out similarly - thus she doesn't have to pick 
    * unique paths per task-attempt.</p>
    * 
-   * <p><i>Note</i>: the value of <tt>${mapred.work.output.dir}</tt> during 
-   * execution of a particular task-attempt is actually 
-   * <tt>${mapred.output.dir}/_temporary/_{$taskid}</tt>, and this value is 
-   * set by the map-reduce framework. So, just create any side-files in the 
-   * path  returned by {@link #getWorkOutputPath(Configuration)} from map/reduce 
-   * task to take advantage of this feature.</p>
-   * 
    * <p>The entire discussion holds true for maps of jobs with 
    * reducer=NONE (i.e. 0 reduces) since output of the map, in that case, 
    * goes directly to HDFS.</p> 
@@ -181,77 +187,12 @@ public abstract class FileOutputFormat<K, V> extends OutputFormat<K, V> {
    * @return the {@link Path} to the task's temporary output directory 
    * for the map-reduce job.
    */
-  public static Path getWorkOutputPath(Configuration conf) {
-    String name = conf.get("mapred.work.output.dir");
-    return name == null ? null: new Path(name);
-  }
-
-  /**
-   * Helper function to create the task's temporary output directory and 
-   * return the path to the task's output file.
-   * 
-   * @param context the task's context
-   * @return path to the task's temporary output file
-   * @throws IOException
-   */
-  protected static Path getTaskOutputPath(TaskAttemptContext context
-                                          ) throws IOException {
-    // ${mapred.job.dir}
-    Configuration conf = context.getConfiguration();
-    Path outputPath = getOutputPath(conf);
-    if (outputPath == null) {
-      throw new IOException("Undefined job output-path");
-    }
-
-    // ${mapred.out.dir}/_temporary
-    Path jobTmpDir = new Path(outputPath, TEMP_DIR_NAME);
-    FileSystem fs = jobTmpDir.getFileSystem(conf);
-    if (!fs.exists(jobTmpDir)) {
-      throw new IOException("The temporary job-output directory " + 
-          jobTmpDir.toString() + " doesn't exist!"); 
-    }
-
-    // ${mapred.out.dir}/_temporary/_${taskid}
-    Path taskTmpDir = getWorkOutputPath(conf);
-    if (!fs.mkdirs(taskTmpDir)) {
-      throw new IOException("Mkdirs failed to create " 
-          + taskTmpDir.toString());
-    }
-    
-    // ${mapred.out.dir}/_temporary/_${taskid}/${name}
-    return new Path(taskTmpDir, getOutputName(context));
-  } 
-
-  /**
-   * Helper function to generate a name that is unique for the task.
-   *
-   * <p>The generated name can be used to create custom files from within the
-   * different tasks for the job, the names for different tasks will not collide
-   * with each other.</p>
-   *
-   * <p>The given name is postfixed with the task type, 'm' for maps, 'r' for
-   * reduces and the task partition number. For example, give a name 'test'
-   * running on the first map o the job the generated name will be
-   * 'test-m-00000'.</p>
-   *
-   * @param conf the configuration for the job.
-   * @param name the name to make unique.
-   * @return a unique name accross all tasks of the job.
-   */
-  public static String getUniqueName(Configuration conf, String name) {
-    int partition = conf.getInt("mapred.task.partition", -1);
-    if (partition == -1) {
-      throw new IllegalArgumentException(
-        "This method can only be called from within a Job");
-    }
-
-    String taskType = (conf.getBoolean("mapred.task.is.map", true)) ? "m" : "r";
-
-    NumberFormat numberFormat = NumberFormat.getInstance();
-    numberFormat.setMinimumIntegerDigits(5);
-    numberFormat.setGroupingUsed(false);
-
-    return name + "-" + taskType + "-" + numberFormat.format(partition);
+  public static Path getWorkOutputPath(TaskInputOutputContext<?,?,?,?> context
+                                       ) throws IOException, 
+                                                InterruptedException {
+    FileOutputCommitter committer = (FileOutputCommitter) 
+      context.getOutputCommitter();
+    return committer.getWorkPath();
   }
 
   /**
@@ -262,28 +203,68 @@ public abstract class FileOutputFormat<K, V> extends OutputFormat<K, V> {
    * reduce tasks. The path name will be unique for each task. The path parent
    * will be the job output directory.</p>ls
    *
-   * <p>This method uses the {@link #getUniqueName} method to make the file name
+   * <p>This method uses the {@link #getUniqueFile} method to make the file name
    * unique for the task.</p>
    *
-   * @param conf the configuration for the job.
+   * @param context the context for the task.
    * @param name the name for the file.
+   * @param extension the extension for the file
    * @return a unique path accross all tasks of the job.
    */
-  public static Path getPathForCustomFile(Configuration conf, String name) {
-    return new Path(getWorkOutputPath(conf), getUniqueName(conf, name));
+  public 
+  static Path getPathForWorkFile(TaskInputOutputContext<?,?,?,?> context, 
+                                 String name,
+                                 String extension
+                                ) throws IOException, InterruptedException {
+    return new Path(getWorkOutputPath(context),
+                    getUniqueFile(context, name, extension));
   }
 
-  /** Construct output file names so that, when an output directory listing is
-   * sorted lexicographically, positions correspond to output partitions.*/
-  private static final NumberFormat NUMBER_FORMAT = NumberFormat.getInstance();
-  static {
-    NUMBER_FORMAT.setMinimumIntegerDigits(5);
-    NUMBER_FORMAT.setGroupingUsed(false);
+  /**
+   * Generate a unique filename, based on the task id, name, and extension
+   * @param context the task that is calling this
+   * @param name the base filename
+   * @param extension the filename extension
+   * @return a string like $name-[mr]-$id$extension
+   */
+  public synchronized static String getUniqueFile(TaskAttemptContext context,
+                                                  String name,
+                                                  String extension) {
+    TaskID taskId = context.getTaskAttemptID().getTaskID();
+    int partition = taskId.getId();
+    StringBuilder result = new StringBuilder();
+    result.append(name);
+    result.append('-');
+    result.append(taskId.isMap() ? 'm' : 'r');
+    result.append('-');
+    result.append(NUMBER_FORMAT.format(partition));
+    result.append(extension);
+    return result.toString();
   }
 
-  protected static synchronized 
-  String getOutputName(TaskAttemptContext context) {
-    return "part-" + NUMBER_FORMAT.format(context.getTaskAttemptId().getId());
+  /**
+   * Get the default path and filename for the output format.
+   * @param context the task context
+   * @param extension an extension to add to the filename
+   * @return a full path $output/_temporary/$taskid/part-[mr]-$id
+   * @throws IOException
+   */
+  public Path getDefaultWorkFile(TaskAttemptContext context,
+                                 String extension) throws IOException{
+    FileOutputCommitter committer = 
+      (FileOutputCommitter) getOutputCommitter(context);
+    return new Path(committer.getWorkPath(), getUniqueFile(context, "part", 
+                                                           extension));
+  }
+
+  public synchronized 
+     OutputCommitter getOutputCommitter(TaskAttemptContext context
+                                        ) throws IOException {
+    if (committer == null) {
+      Path output = getOutputPath(context.getConfiguration());
+      committer = new FileOutputCommitter(output, context);
+    }
+    return committer;
   }
 }
 

+ 19 - 1
src/mapred/org/apache/hadoop/mapreduce/lib/output/NullOutputFormat.java

@@ -19,6 +19,7 @@
 package org.apache.hadoop.mapreduce.lib.output;
 
 import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.OutputCommitter;
 import org.apache.hadoop.mapreduce.OutputFormat;
 import org.apache.hadoop.mapreduce.RecordWriter;
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
@@ -28,12 +29,29 @@ import org.apache.hadoop.mapreduce.TaskAttemptContext;
  */
 public class NullOutputFormat<K, V> extends OutputFormat<K, V> {
   
-  public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) {
+  @Override
+  public RecordWriter<K, V> 
+         getRecordWriter(TaskAttemptContext context) {
     return new RecordWriter<K, V>(){
         public void write(K key, V value) { }
         public void close(TaskAttemptContext context) { }
       };
   }
   
+  @Override
   public void checkOutputSpecs(JobContext context) { }
+  
+  @Override
+  public OutputCommitter getOutputCommitter(TaskAttemptContext context) {
+    return new OutputCommitter() {
+      public void abortTask(TaskAttemptContext taskContext) { }
+      public void cleanupJob(JobContext jobContext) { }
+      public void commitTask(TaskAttemptContext taskContext) { }
+      public boolean needsTaskCommit(TaskAttemptContext taskContext) {
+        return false;
+      }
+      public void setupJob(JobContext jobContext) { }
+      public void setupTask(TaskAttemptContext taskContext) { }
+    };
+  }
 }

+ 12 - 9
src/mapred/org/apache/hadoop/mapreduce/lib/output/SequenceFileOutputFormat.java

@@ -27,6 +27,7 @@ import org.apache.hadoop.io.SequenceFile;
 import org.apache.hadoop.io.SequenceFile.CompressionType;
 import org.apache.hadoop.io.compress.CompressionCodec;
 import org.apache.hadoop.io.compress.DefaultCodec;
+import org.apache.hadoop.mapreduce.Job;
 import org.apache.hadoop.mapreduce.OutputFormat;
 import org.apache.hadoop.mapreduce.RecordWriter;
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
@@ -36,13 +37,11 @@ import org.apache.hadoop.conf.Configuration;
 /** An {@link OutputFormat} that writes {@link SequenceFile}s. */
 public class SequenceFileOutputFormat <K,V> extends FileOutputFormat<K, V> {
 
-  public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context)
-    throws IOException {
-    // get the path of the temporary output file 
-    Path file = FileOutputFormat.getTaskOutputPath(context);
+  public RecordWriter<K, V> 
+         getRecordWriter(TaskAttemptContext context
+                         ) throws IOException, InterruptedException {
     Configuration conf = context.getConfiguration();
     
-    FileSystem fs = file.getFileSystem(conf);
     CompressionCodec codec = null;
     CompressionType compressionType = CompressionType.NONE;
     if (getCompressOutput(conf)) {
@@ -54,6 +53,9 @@ public class SequenceFileOutputFormat <K,V> extends FileOutputFormat<K, V> {
       codec = (CompressionCodec) 
         ReflectionUtils.newInstance(codecClass, conf);
     }
+    // get the path of the temporary output file 
+    Path file = getDefaultWorkFile(context, "");
+    FileSystem fs = file.getFileSystem(conf);
     final SequenceFile.Writer out = 
       SequenceFile.createWriter(fs, conf, file,
                                 context.getOutputKeyClass(),
@@ -90,14 +92,15 @@ public class SequenceFileOutputFormat <K,V> extends FileOutputFormat<K, V> {
   
   /**
    * Set the {@link CompressionType} for the output {@link SequenceFile}.
-   * @param conf the {@link Configuration} to modify
+   * @param job the {@link Job} to modify
    * @param style the {@link CompressionType} for the output
    *              {@link SequenceFile} 
    */
-  public static void setOutputCompressionType(Configuration conf, 
+  public static void setOutputCompressionType(Job job, 
 		                                          CompressionType style) {
-    setCompressOutput(conf, true);
-    conf.set("mapred.output.compression.type", style.toString());
+    setCompressOutput(job, true);
+    job.getConfiguration().set("mapred.output.compression.type", 
+                               style.toString());
   }
 
 }

+ 15 - 16
src/mapred/org/apache/hadoop/mapreduce/lib/output/TextOutputFormat.java

@@ -38,9 +38,8 @@ import org.apache.hadoop.util.*;
 
 /** An {@link OutputFormat} that writes plain text files. */
 public class TextOutputFormat<K, V> extends FileOutputFormat<K, V> {
-
   protected static class LineRecordWriter<K, V>
-    implements RecordWriter<K, V> {
+    extends RecordWriter<K, V> {
     private static final String utf8 = "UTF-8";
     private static final byte[] newline;
     static {
@@ -108,26 +107,26 @@ public class TextOutputFormat<K, V> extends FileOutputFormat<K, V> {
     }
   }
 
-  public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context)
-    throws IOException {
+  public RecordWriter<K, V> 
+         getRecordWriter(TaskAttemptContext context
+                         ) throws IOException, InterruptedException {
     Configuration job = context.getConfiguration();
     boolean isCompressed = getCompressOutput(job);
-    String keyValueSeparator = job.get("mapred.textoutputformat.separator", 
-                                       "\t");
-    Path file = FileOutputFormat.getTaskOutputPath(context);
+    String keyValueSeparator= job.get("mapred.textoutputformat.separator","\t");
+    CompressionCodec codec = null;
+    String extension = "";
+    if (isCompressed) {
+      Class<? extends CompressionCodec> codecClass = 
+        getOutputCompressorClass(job, GzipCodec.class);
+      codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, job);
+      extension = codec.getDefaultExtension();
+    }
+    Path file = getDefaultWorkFile(context, extension);
+    FileSystem fs = file.getFileSystem(job);
     if (!isCompressed) {
-      FileSystem fs = file.getFileSystem(job);
       FSDataOutputStream fileOut = fs.create(file, context);
       return new LineRecordWriter<K, V>(fileOut, keyValueSeparator);
     } else {
-      Class<? extends CompressionCodec> codecClass =
-        getOutputCompressorClass(job, GzipCodec.class);
-      // create the named codec
-      CompressionCodec codec = (CompressionCodec)
-        ReflectionUtils.newInstance(codecClass, job);
-      // build the filename including the extension
-      file = new Path(file + codec.getDefaultExtension());
-      FileSystem fs = file.getFileSystem(job);
       FSDataOutputStream fileOut = fs.create(file, context);
       return new LineRecordWriter<K, V>(new DataOutputStream
                                         (codec.createOutputStream(fileOut)),

+ 1 - 1
src/mapred/org/apache/hadoop/mapreduce/lib/reduce/IntSumReducer.java

@@ -34,7 +34,7 @@ public class IntSumReducer<Key> extends Reducer<Key,IntWritable,
       sum += val.get();
     }
     result.set(sum);
-    context.collect(key, result);
+    context.write(key, result);
   }
 
 }

+ 1 - 1
src/mapred/org/apache/hadoop/mapreduce/lib/reduce/LongSumReducer.java

@@ -34,7 +34,7 @@ public class LongSumReducer<KEY> extends Reducer<KEY, LongWritable,
       sum += val.get();
     }
     result.set(sum);
-    context.collect(key, result);
+    context.write(key, result);
   }
 
 }

+ 0 - 1
src/test/org/apache/hadoop/mapred/NotificationTestCase.java

@@ -25,7 +25,6 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.examples.WordCount;
 
 import javax.servlet.http.HttpServletRequest;
 import javax.servlet.http.HttpServletResponse;

+ 2 - 2
src/test/org/apache/hadoop/mapred/TestFileOutputCommitter.java

@@ -37,9 +37,9 @@ public class TestFileOutputCommitter extends TestCase {
     JobConf job = new JobConf();
     job.set("mapred.task.id", attempt);
     job.setOutputCommitter(FileOutputCommitter.class);
-    JobContext jContext = new JobContext(job);
-    TaskAttemptContext tContext = new TaskAttemptContext(job, taskID);
     FileOutputFormat.setOutputPath(job, outDir);
+    JobContext jContext = new JobContext(job, taskID.getJobID());
+    TaskAttemptContext tContext = new TaskAttemptContext(job, taskID);
     FileOutputCommitter committer = new FileOutputCommitter();
     FileOutputFormat.setWorkOutputPath(job, 
       committer.getTempTaskOutputPath(tContext));

+ 0 - 1
src/test/org/apache/hadoop/mapred/TestJobSysDirWithDFS.java

@@ -26,7 +26,6 @@ import junit.framework.TestCase;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
-import org.apache.hadoop.examples.WordCount;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.IntWritable;

+ 0 - 1
src/test/org/apache/hadoop/mapred/TestKillCompletedJob.java

@@ -28,7 +28,6 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.Text;
 
-import org.apache.hadoop.examples.WordCount;
 
 
 /**

+ 0 - 1
src/test/org/apache/hadoop/mapred/TestMiniMRWithDFS.java

@@ -34,7 +34,6 @@ import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
-import org.apache.hadoop.examples.WordCount;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.FileUtil;
 import org.apache.hadoop.fs.Path;

+ 0 - 1
src/test/org/apache/hadoop/mapred/TestSpilledRecordsCounter.java

@@ -27,7 +27,6 @@ import java.io.IOException;
 import junit.framework.TestCase;
 
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.examples.WordCount;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.IntWritable;

+ 2 - 2
src/test/org/apache/hadoop/mapred/TestTaskTrackerMemoryManager.java

@@ -108,7 +108,7 @@ public class TestTaskTrackerMemoryManager extends TestCase {
 
     for (TaskCompletionEvent tce : taskComplEvents) {
       String[] diagnostics =
-          jClient.jobSubmitClient.getTaskDiagnostics(tce.getTaskAttemptId());
+          rj.getTaskDiagnostics(tce.getTaskAttemptId());
 
       if (diagnostics != null) {
         for (String str : diagnostics) {
@@ -304,7 +304,7 @@ public class TestTaskTrackerMemoryManager extends TestCase {
           .getTaskStatus() == TaskCompletionEvent.Status.FAILED);
 
       String[] diagnostics =
-          jClient.jobSubmitClient.getTaskDiagnostics(tce.getTaskAttemptId());
+          rj.getTaskDiagnostics(tce.getTaskAttemptId());
 
       // Every task HAS to spit out the out-of-memory errors
       assert (diagnostics != null);

+ 159 - 0
src/test/org/apache/hadoop/mapred/WordCount.java

@@ -0,0 +1,159 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.mapred;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.StringTokenizer;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.FileOutputFormat;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.Mapper;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+/**
+ * This is an example Hadoop Map/Reduce application.
+ * It reads the text input files, breaks each line into words
+ * and counts them. The output is a locally sorted list of words and the 
+ * count of how often they occurred.
+ *
+ * To run: bin/hadoop jar build/hadoop-examples.jar wordcount
+ *            [-m <i>maps</i>] [-r <i>reduces</i>] <i>in-dir</i> <i>out-dir</i> 
+ */
+public class WordCount extends Configured implements Tool {
+  
+  /**
+   * Counts the words in each line.
+   * For each line of input, break the line into words and emit them as
+   * (<b>word</b>, <b>1</b>).
+   */
+  public static class MapClass extends MapReduceBase
+    implements Mapper<LongWritable, Text, Text, IntWritable> {
+    
+    private final static IntWritable one = new IntWritable(1);
+    private Text word = new Text();
+    
+    public void map(LongWritable key, Text value, 
+                    OutputCollector<Text, IntWritable> output, 
+                    Reporter reporter) throws IOException {
+      String line = value.toString();
+      StringTokenizer itr = new StringTokenizer(line);
+      while (itr.hasMoreTokens()) {
+        word.set(itr.nextToken());
+        output.collect(word, one);
+      }
+    }
+  }
+  
+  /**
+   * A reducer class that just emits the sum of the input values.
+   */
+  public static class Reduce extends MapReduceBase
+    implements Reducer<Text, IntWritable, Text, IntWritable> {
+    
+    public void reduce(Text key, Iterator<IntWritable> values,
+                       OutputCollector<Text, IntWritable> output, 
+                       Reporter reporter) throws IOException {
+      int sum = 0;
+      while (values.hasNext()) {
+        sum += values.next().get();
+      }
+      output.collect(key, new IntWritable(sum));
+    }
+  }
+  
+  static int printUsage() {
+    System.out.println("wordcount [-m <maps>] [-r <reduces>] <input> <output>");
+    ToolRunner.printGenericCommandUsage(System.out);
+    return -1;
+  }
+  
+  /**
+   * The main driver for word count map/reduce program.
+   * Invoke this method to submit the map/reduce job.
+   * @throws IOException When there is communication problems with the 
+   *                     job tracker.
+   */
+  public int run(String[] args) throws Exception {
+    JobConf conf = new JobConf(getConf(), WordCount.class);
+    conf.setJobName("wordcount");
+ 
+    // the keys are words (strings)
+    conf.setOutputKeyClass(Text.class);
+    // the values are counts (ints)
+    conf.setOutputValueClass(IntWritable.class);
+    
+    conf.setMapperClass(MapClass.class);        
+    conf.setCombinerClass(Reduce.class);
+    conf.setReducerClass(Reduce.class);
+    
+    List<String> other_args = new ArrayList<String>();
+    for(int i=0; i < args.length; ++i) {
+      try {
+        if ("-m".equals(args[i])) {
+          conf.setNumMapTasks(Integer.parseInt(args[++i]));
+        } else if ("-r".equals(args[i])) {
+          conf.setNumReduceTasks(Integer.parseInt(args[++i]));
+        } else {
+          other_args.add(args[i]);
+        }
+      } catch (NumberFormatException except) {
+        System.out.println("ERROR: Integer expected instead of " + args[i]);
+        return printUsage();
+      } catch (ArrayIndexOutOfBoundsException except) {
+        System.out.println("ERROR: Required parameter missing from " +
+                           args[i-1]);
+        return printUsage();
+      }
+    }
+    // Make sure there are exactly 2 parameters left.
+    if (other_args.size() != 2) {
+      System.out.println("ERROR: Wrong number of parameters: " +
+                         other_args.size() + " instead of 2.");
+      return printUsage();
+    }
+    FileInputFormat.setInputPaths(conf, other_args.get(0));
+    FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1)));
+        
+    JobClient.runJob(conf);
+    return 0;
+  }
+  
+  
+  public static void main(String[] args) throws Exception {
+    int res = ToolRunner.run(new Configuration(), new WordCount(), args);
+    System.exit(res);
+  }
+
+}

+ 105 - 0
src/test/org/apache/hadoop/mapreduce/TestMapReduceLocal.java

@@ -0,0 +1,105 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.mapreduce;
+
+import java.io.BufferedReader;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+
+import junit.framework.TestCase;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.examples.WordCount;
+import org.apache.hadoop.examples.WordCount.IntSumReducer;
+import org.apache.hadoop.examples.WordCount.TokenizerMapper;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.MiniMRCluster;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+
+/**
+ * A JUnit test to test min map-reduce cluster with local file system.
+ */
+public class TestMapReduceLocal extends TestCase {
+  private static Path TEST_ROOT_DIR =
+    new Path(System.getProperty("test.build.data","/tmp"));
+  private static Configuration conf = new Configuration();
+  private static FileSystem localFs;
+  static {
+    try {
+      localFs = FileSystem.getLocal(conf);
+    } catch (IOException io) {
+      throw new RuntimeException("problem getting local fs", io);
+    }
+  }
+
+  public Path writeFile(String name, String data) throws IOException {
+    Path file = new Path(TEST_ROOT_DIR + "/" + name);
+    localFs.delete(file, false);
+    DataOutputStream f = localFs.create(file);
+    f.write(data.getBytes());
+    f.close();
+    return file;
+  }
+
+  public String readFile(String name) throws IOException {
+    DataInputStream f = localFs.open(new Path(TEST_ROOT_DIR + "/" + name));
+    BufferedReader b = new BufferedReader(new InputStreamReader(f));
+    StringBuilder result = new StringBuilder();
+    String line = b.readLine();
+    while (line != null) {
+     result.append(line);
+     result.append('\n');
+     line = b.readLine();
+    }
+    return result.toString();
+  }
+
+  public void testWithLocal() throws Exception {
+    MiniMRCluster mr = null;
+    try {
+      mr = new MiniMRCluster(2, "file:///", 3);
+      Configuration conf = mr.createJobConf();
+      writeFile("in/part1", "this is a test\nof word count\n");
+      writeFile("in/part2", "more test");
+      Job job = new Job(conf, "word count");     
+      job.setJarByClass(WordCount.class);
+      job.setMapperClass(TokenizerMapper.class);
+      job.setCombinerClass(IntSumReducer.class);
+      job.setReducerClass(IntSumReducer.class);
+      job.setOutputKeyClass(Text.class);
+      job.setOutputValueClass(IntWritable.class);
+      FileInputFormat.addInputPath(job, new Path(TEST_ROOT_DIR + "/in"));
+      FileOutputFormat.setOutputPath(job, new Path(TEST_ROOT_DIR + "/out"));
+      assertTrue(job.waitForCompletion());
+      String out = readFile("out/part-r-00000");
+      System.out.println(out);
+      assertEquals("a\t1\ncount\t1\nis\t1\nmore\t1\nof\t1\ntest\t2\nthis\t1\nword\t1\n",
+                   out);
+    } finally {
+      if (mr != null) { mr.shutdown(); }
+    }
+  }
+  
+}

+ 1 - 1
src/test/testjar/ClassWordCount.java

@@ -34,7 +34,7 @@ import org.apache.hadoop.mapred.OutputCollector;
 import org.apache.hadoop.mapred.Reducer;
 import org.apache.hadoop.mapred.Reporter;
 import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.examples.WordCount;
+import org.apache.hadoop.mapred.WordCount;
 
 /**
  * This is an example Hadoop Map/Reduce application being used for