пре 17 година · adc1f57269
--- a/src/mapred/org/apache/hadoop/mapreduce/Counter.java
+++ b/src/mapred/org/apache/hadoop/mapreduce/Counter.java
@@ -0,0 +1,92 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.mapreduce;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+import java.io.DataInput;
			
 
				+import java.io.DataOutput;
			
 
				+
			
 
				+import org.apache.hadoop.io.Text;
			
 
				+import org.apache.hadoop.io.Writable;
			
 
				+import org.apache.hadoop.io.WritableUtils;
			
 
				+
			
 
				+/**
			
 
				+ * A named counter that tracks the progress of a map/reduce job.
			
 
				+ * 
			
 
				+ * <p><code>Counters</code> represent global counters, defined either by the 
			
 
				+ * Map-Reduce framework or applications. Each <code>Counter</code> is named by
			
 
				+ * an {@link Enum} and has a long for the value.</p>
			
 
				+ * 
			
 
				+ * <p><code>Counters</code> are bunched into Groups, each comprising of
			
 
				+ * counters from a particular <code>Enum</code> class. 
			
 
				+ */
			
 
				+public class Counter implements Writable {
			
 
				+
			
 
				+  private String displayName;
			
 
				+  private long value;
			
 
				+    
			
 
				+  Counter() { 
			
 
				+    value = 0L;
			
 
				+  }
			
 
				+
			
 
				+  Counter(String displayName, long value) {
			
 
				+    this.displayName = displayName;
			
 
				+    this.value = value;
			
 
				+  }
			
 
				+    
			
 
				+  /**
			
 
				+   * Read the binary representation of the counter
			
 
				+   */
			
 
				+  public synchronized void readFields(DataInput in) throws IOException {
			
 
				+    displayName = Text.readString(in);
			
 
				+    value = WritableUtils.readVLong(in);
			
 
				+  }
			
 
				+    
			
 
				+  /**
			
 
				+   * Write the binary representation of the counter
			
 
				+   */
			
 
				+  public synchronized void write(DataOutput out) throws IOException {
			
 
				+    Text.writeString(out, displayName);
			
 
				+    WritableUtils.writeVLong(out, value);
			
 
				+  }
			
 
				+    
			
 
				+  /**
			
 
				+   * Get the name of the counter.
			
 
				+   * @return the user facing name of the counter
			
 
				+   */
			
 
				+  public String getDisplayName() {
			
 
				+    return displayName;
			
 
				+  }
			
 
				+    
			
 
				+  /**
			
 
				+   * What is the current value of this counter?
			
 
				+   * @return the current value
			
 
				+   */
			
 
				+  public synchronized long getValue() {
			
 
				+    return value;
			
 
				+  }
			
 
				+    
			
 
				+  /**
			
 
				+   * Increment this counter by the given value
			
 
				+   * @param incr the value to increase this counter by
			
 
				+   */
			
 
				+  public synchronized void increment(long incr) {
			
 
				+    value += incr;
			
 
				+  }
			
 
				+}
			
--- a/src/mapred/org/apache/hadoop/mapreduce/CounterGroup.java
+++ b/src/mapred/org/apache/hadoop/mapreduce/CounterGroup.java
@@ -0,0 +1,27 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.mapreduce;
			
 
				+
			
 
				+/**
			
 
				+ * A group of {@link Counter}s that logically belong together. Typically,
			
 
				+ * it is an {@link Enum} subclass and the counters are the values.
			
 
				+ */
			
 
				+public abstract class CounterGroup implements Iterable<Counter> {
			
 
				+  abstract public String getName();
			
 
				+}
			
--- a/src/mapred/org/apache/hadoop/mapreduce/ID.java
+++ b/src/mapred/org/apache/hadoop/mapreduce/ID.java
@@ -0,0 +1,111 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.mapreduce;
			
 
				+
			
 
				+import java.io.DataInput;
			
 
				+import java.io.DataOutput;
			
 
				+import java.io.IOException;
			
 
				+
			
 
				+import org.apache.hadoop.io.WritableComparable;
			
 
				+
			
 
				+/**
			
 
				+ * A general identifier, which internally stores the id
			
 
				+ * as an integer. This is the super class of {@link JobID}, 
			
 
				+ * {@link TaskID} and {@link TaskAttemptID}.
			
 
				+ * 
			
 
				+ * @see JobID
			
 
				+ * @see TaskID
			
 
				+ * @see TaskAttemptID
			
 
				+ */
			
 
				+public class ID implements WritableComparable<ID> {
			
 
				+  protected int id;
			
 
				+
			
 
				+  /** constructs an ID object from the given int */
			
 
				+  public ID(int id) {
			
 
				+    this.id = id;
			
 
				+  }
			
 
				+
			
 
				+  protected ID() {
			
 
				+  }
			
 
				+
			
 
				+  /** returns the int which represents the identifier */
			
 
				+  public int getId() {
			
 
				+    return id;
			
 
				+  }
			
 
				+
			
 
				+  @Override
			
 
				+  public String toString() {
			
 
				+    return String.valueOf(id);
			
 
				+  }
			
 
				+
			
 
				+  @Override
			
 
				+  public int hashCode() {
			
 
				+    return Integer.valueOf(id).hashCode();
			
 
				+  }
			
 
				+
			
 
				+  @Override
			
 
				+  public boolean equals(Object o) {
			
 
				+    if(o == null)
			
 
				+      return false;
			
 
				+    if (o.getClass().equals(ID.class)) {
			
 
				+      ID that = (ID) o;
			
 
				+      return this.id == that.id;
			
 
				+    }
			
 
				+    else
			
 
				+      return false;
			
 
				+  }
			
 
				+
			
 
				+  /** Compare IDs by associated numbers */
			
 
				+  public int compareTo(ID that) {
			
 
				+    return this.id - that.id;
			
 
				+  }
			
 
				+
			
 
				+  public void readFields(DataInput in) throws IOException {
			
 
				+    this.id = in.readInt();
			
 
				+  }
			
 
				+
			
 
				+  public void write(DataOutput out) throws IOException {
			
 
				+    out.writeInt(id);
			
 
				+  }
			
 
				+
			
 
				+  public static ID read(DataInput in) throws IOException {
			
 
				+    ID id = new ID();
			
 
				+    id.readFields(in);
			
 
				+    return id;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Construct an ID object from given string
			
 
				+   * 
			
 
				+   * @return constructed Id object or null if the given String is null
			
 
				+   * @throws IllegalArgumentException if the given string is malformed
			
 
				+   */
			
 
				+  public static ID forName(String str) throws IllegalArgumentException {
			
 
				+    if (str == null)
			
 
				+      return null;
			
 
				+    try {
			
 
				+      int id = Integer.parseInt(str);
			
 
				+      return new ID(id);
			
 
				+    }
			
 
				+    catch (Exception ex) {
			
 
				+      throw new IllegalArgumentException("Id string : " + str
			
 
				+          + " is not propoerly formed");
			
 
				+    }
			
 
				+  }
			
 
				+}
			
--- a/src/mapred/org/apache/hadoop/mapreduce/InputFormat.java
+++ b/src/mapred/org/apache/hadoop/mapreduce/InputFormat.java
@@ -0,0 +1,103 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.mapreduce;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+import java.util.List;
			
 
				+
			
 
				+import org.apache.hadoop.fs.FileSystem;
			
 
				+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
			
 
				+
			
 
				+/** 
			
 
				+ * <code>InputFormat</code> describes the input-specification for a 
			
 
				+ * Map-Reduce job. 
			
 
				+ * 
			
 
				+ * <p>The Map-Reduce framework relies on the <code>InputFormat</code> of the
			
 
				+ * job to:<p>
			
 
				+ * <ol>
			
 
				+ *   <li>
			
 
				+ *   Validate the input-specification of the job. 
			
 
				+ *   <li>
			
 
				+ *   Split-up the input file(s) into logical {@link InputSplit}s, each of 
			
 
				+ *   which is then assigned to an individual {@link Mapper}.
			
 
				+ *   </li>
			
 
				+ *   <li>
			
 
				+ *   Provide the {@link RecordReader} implementation to be used to glean
			
 
				+ *   input records from the logical <code>InputSplit</code> for processing by 
			
 
				+ *   the {@link Mapper}.
			
 
				+ *   </li>
			
 
				+ * </ol>
			
 
				+ * 
			
 
				+ * <p>The default behavior of file-based {@link InputFormat}s, typically 
			
 
				+ * sub-classes of {@link FileInputFormat}, is to split the 
			
 
				+ * input into <i>logical</i> {@link InputSplit}s based on the total size, in 
			
 
				+ * bytes, of the input files. However, the {@link FileSystem} blocksize of  
			
 
				+ * the input files is treated as an upper bound for input splits. A lower bound 
			
 
				+ * on the split size can be set via 
			
 
				+ * <a href="{@docRoot}/../hadoop-default.html#mapred.min.split.size">
			
 
				+ * mapred.min.split.size</a>.</p>
			
 
				+ * 
			
 
				+ * <p>Clearly, logical splits based on input-size is insufficient for many 
			
 
				+ * applications since record boundaries are to respected. In such cases, the
			
 
				+ * application has to also implement a {@link RecordReader} on whom lies the
			
 
				+ * responsibility to respect record-boundaries and present a record-oriented
			
 
				+ * view of the logical <code>InputSplit</code> to the individual task.
			
 
				+ *
			
 
				+ * @see InputSplit
			
 
				+ * @see RecordReader
			
 
				+ * @see FileInputFormat
			
 
				+ */
			
 
				+public abstract class InputFormat<K, V> {
			
 
				+
			
 
				+  /** 
			
 
				+   * Logically split the set of input files for the job.  
			
 
				+   * 
			
 
				+   * <p>Each {@link InputSplit} is then assigned to an individual {@link Mapper}
			
 
				+   * for processing.</p>
			
 
				+   *
			
 
				+   * <p><i>Note</i>: The split is a <i>logical</i> split of the inputs and the
			
 
				+   * input files are not physically split into chunks. For e.g. a split could
			
 
				+   * be <i>&lt;input-file-path, start, offset&gt;</i> tuple. The InputFormat
			
 
				+   * also creates the {@link RecordReader} to read the {@link InputSplit}.
			
 
				+   * 
			
 
				+   * @param context job configuration.
			
 
				+   * @return an array of {@link InputSplit}s for the job.
			
 
				+   */
			
 
				+  public abstract 
			
 
				+    List<InputSplit> getSplits(JobContext context
			
 
				+                               ) throws IOException, InterruptedException;
			
 
				+  
			
 
				+  /**
			
 
				+   * Create a record reader for a given split. The framework will call
			
 
				+   * {@link RecordReader#initialize(InputSplit, TaskAttemptContext)} before
			
 
				+   * the split is used.
			
 
				+   * @param split the split to be read
			
 
				+   * @param context the information about the task
			
 
				+   * @return a new record reader
			
 
				+   * @throws IOException
			
 
				+   * @throws InterruptedException
			
 
				+   */
			
 
				+  public abstract 
			
 
				+    RecordReader<K,V> createRecordReader(InputSplit split,
			
 
				+                                         TaskAttemptContext context
			
 
				+                                        ) throws IOException, 
			
 
				+                                                 InterruptedException;
			
 
				+
			
 
				+}
			
 
				+
			
--- a/src/mapred/org/apache/hadoop/mapreduce/InputSplit.java
+++ b/src/mapred/org/apache/hadoop/mapreduce/InputSplit.java
@@ -0,0 +1,58 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.mapreduce;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+
			
 
				+import org.apache.hadoop.io.Writable;
			
 
				+import org.apache.hadoop.mapreduce.InputFormat;
			
 
				+import org.apache.hadoop.mapreduce.Mapper;
			
 
				+import org.apache.hadoop.mapreduce.RecordReader;
			
 
				+
			
 
				+/**
			
 
				+ * <code>InputSplit</code> represents the data to be processed by an 
			
 
				+ * individual {@link Mapper}. 
			
 
				+ *
			
 
				+ * <p>Typically, it presents a byte-oriented view on the input and is the 
			
 
				+ * responsibility of {@link RecordReader} of the job to process this and present
			
 
				+ * a record-oriented view. Although the type is Writable, only the InputFormat
			
 
				+ * 
			
 
				+ * @see InputFormat
			
 
				+ * @see RecordReader
			
 
				+ */
			
 
				+public abstract class InputSplit implements Writable {
			
 
				+  /**
			
 
				+   * Get the size of the split, so that the input splits can be sorted by size.
			
 
				+   * @return the number of bytes in the split
			
 
				+   * @throws IOException
			
 
				+   * @throws InterruptedException
			
 
				+   */
			
 
				+  public abstract long getLength() throws IOException, InterruptedException;
			
 
				+
			
 
				+  /**
			
 
				+   * Get the list of nodes by name where the data for the split would be local.
			
 
				+   * The locations do not need to be serialized by calls to 
			
 
				+   * {@link Writable#write(java.io.DataOutput)}
			
 
				+   * @return a new array of the node nodes.
			
 
				+   * @throws IOException
			
 
				+   * @throws InterruptedException
			
 
				+   */
			
 
				+  public abstract 
			
 
				+    String[] getLocations() throws IOException, InterruptedException;
			
 
				+}
			
--- a/src/mapred/org/apache/hadoop/mapreduce/Job.java
+++ b/src/mapred/org/apache/hadoop/mapreduce/Job.java
@@ -0,0 +1,312 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.mapreduce;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+
			
 
				+import org.apache.hadoop.conf.Configuration;
			
 
				+import org.apache.hadoop.fs.FileSystem;
			
 
				+import org.apache.hadoop.fs.Path;
			
 
				+import org.apache.hadoop.io.RawComparator;
			
 
				+import org.apache.hadoop.mapreduce.TaskAttemptID;
			
 
				+import org.apache.hadoop.mapred.TaskCompletionEvent;
			
 
				+
			
 
				+/**
			
 
				+ * The job submitter's view of the Job. It allows the user to configure the
			
 
				+ * job, submit it, control its execution, and query the state.
			
 
				+ */
			
 
				+public class Job extends JobContext {  
			
 
				+  
			
 
				+  public Job() {
			
 
				+    this(new Configuration());
			
 
				+  }
			
 
				+
			
 
				+  public Job(Configuration conf) {
			
 
				+    super(conf, null);
			
 
				+  }
			
 
				+
			
 
				+  public Job(Configuration conf, String jobName) {
			
 
				+    this(conf);
			
 
				+    setJobName(jobName);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Set the number of reduce tasks for the job.
			
 
				+   * @param tasks the number of reduce tasks
			
 
				+   */
			
 
				+  public void setNumReduceTasks(int tasks) {
			
 
				+    conf.setInt(NUM_REDUCES_ATTR, tasks);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Set the current working directory for the default file system.
			
 
				+   * 
			
 
				+   * @param dir the new current working directory.
			
 
				+   */
			
 
				+  public void setWorkingDirectory(Path dir) throws IOException {
			
 
				+    dir = dir.makeQualified(FileSystem.get(conf));
			
 
				+    conf.set(WORKING_DIR_ATTR, dir.toString());
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Set the {@link InputFormat} for the job.
			
 
				+   * @param cls the <code>InputFormat</code> to use
			
 
				+   */
			
 
				+  public void setInputFormatClass(Class<? extends InputFormat<?,?>> cls) {
			
 
				+    conf.setClass(INPUT_FORMAT_CLASS_ATTR, cls, InputFormat.class);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Set the {@link OutputFormat} for the job.
			
 
				+   * @param cls the <code>OutputFormat</code> to use
			
 
				+   */
			
 
				+  public void setOutputFormatClass(Class<? extends OutputFormat<?,?>> cls) {
			
 
				+    conf.setClass(OUTPUT_FORMAT_CLASS_ATTR, cls, OutputFormat.class);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Set the {@link Mapper} for the job.
			
 
				+   * @param cls the <code>Mapper</code> to use
			
 
				+   */
			
 
				+  public void setMapperClass(Class<? extends Mapper<?,?,?,?>> cls) {
			
 
				+    conf.setClass(MAP_CLASS_ATTR, cls, Mapper.class);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Set the combiner class for the job.
			
 
				+   * @param cls the combiner to use
			
 
				+   */
			
 
				+  public void setCombinerClass(Class<? extends Reducer<?,?,?,?>> cls) {
			
 
				+    conf.setClass(COMBINE_CLASS_ATTR, cls, Reducer.class);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Set the {@link Reducer} for the job.
			
 
				+   * @param cls the <code>Reducer</code> to use
			
 
				+   */
			
 
				+  public void setReducerClass(Class<? extends Reducer<?,?,?,?>> cls) {
			
 
				+    conf.setClass(REDUCE_CLASS_ATTR, cls, Reducer.class);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Set the {@link Partitioner} for the job.
			
 
				+   * @param cls the <code>Partitioner</code> to use
			
 
				+   */
			
 
				+  public void setPartitionerClass(Class<? extends Partitioner<?,?>> cls) {
			
 
				+    conf.setClass(PARTITIONER_CLASS_ATTR, cls, Partitioner.class);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Set the key class for the map output data. This allows the user to
			
 
				+   * specify the map output key class to be different than the final output
			
 
				+   * value class.
			
 
				+   * 
			
 
				+   * @param theClass the map output key class.
			
 
				+   */
			
 
				+  public void setMapOutputKeyClass(Class<?> theClass) {
			
 
				+    conf.setClass(MAP_OUTPUT_KEY_CLASS_ATTR, theClass, Object.class);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Set the value class for the map output data. This allows the user to
			
 
				+   * specify the map output value class to be different than the final output
			
 
				+   * value class.
			
 
				+   * 
			
 
				+   * @param theClass the map output value class.
			
 
				+   */
			
 
				+  public void setMapOutputValueClass(Class<?> theClass) {
			
 
				+    conf.setClass(MAP_OUTPUT_VALUE_CLASS_ATTR, theClass, Object.class);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Set the key class for the job output data.
			
 
				+   * 
			
 
				+   * @param theClass the key class for the job output data.
			
 
				+   */
			
 
				+  public void setOutputKeyClass(Class<?> theClass) {
			
 
				+    conf.setClass(OUTPUT_KEY_CLASS_ATTR, theClass, Object.class);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Set the value class for job outputs.
			
 
				+   * 
			
 
				+   * @param theClass the value class for job outputs.
			
 
				+   */
			
 
				+  public void setOutputValueClass(Class<?> theClass) {
			
 
				+    conf.setClass(OUTPUT_VALUE_CLASS_ATTR, theClass, Object.class);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Define the comparator that controls how the keys are sorted before they
			
 
				+   * are passed to the {@link Reducer}.
			
 
				+   * @param cls the raw comparator
			
 
				+   */
			
 
				+  public void setSortComparatorClass(Class<? extends RawComparator<?>> cls) {
			
 
				+    conf.setClass(SORT_COMPARATOR_ATTR, cls, RawComparator.class);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Define the comparator that controls which keys are grouped together
			
 
				+   * for a single call to 
			
 
				+   * {@link Reducer#reduce(Object, Iterable, org.apache.hadoop.mapreduce.Reducer.Context)}
			
 
				+   * @param cls the raw comparator to use
			
 
				+   */
			
 
				+  public void setGroupingComparatorClass(Class<? extends RawComparator<?>> cls){
			
 
				+    conf.setClass(GROUPING_COMPARATOR_ATTR, cls, RawComparator.class);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Set the user-specified job name.
			
 
				+   * 
			
 
				+   * @param name the job's new name.
			
 
				+   */
			
 
				+  public void setJobName(String name) {
			
 
				+    conf.set(JOB_NAME_ATTR, name);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Get the URL where some job progress information will be displayed.
			
 
				+   * 
			
 
				+   * @return the URL where some job progress information will be displayed.
			
 
				+   */
			
 
				+  public String getTrackingURL() {
			
 
				+    // TODO
			
 
				+    return null;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Get the <i>progress</i> of the job's map-tasks, as a float between 0.0 
			
 
				+   * and 1.0.  When all map tasks have completed, the function returns 1.0.
			
 
				+   * 
			
 
				+   * @return the progress of the job's map-tasks.
			
 
				+   * @throws IOException
			
 
				+   */
			
 
				+  public float mapProgress() throws IOException {
			
 
				+    // TODO
			
 
				+    return 0.0f;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Get the <i>progress</i> of the job's reduce-tasks, as a float between 0.0 
			
 
				+   * and 1.0.  When all reduce tasks have completed, the function returns 1.0.
			
 
				+   * 
			
 
				+   * @return the progress of the job's reduce-tasks.
			
 
				+   * @throws IOException
			
 
				+   */
			
 
				+  public float reduceProgress() throws IOException {
			
 
				+    // TODO
			
 
				+    return 0.0f;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Check if the job is finished or not. 
			
 
				+   * This is a non-blocking call.
			
 
				+   * 
			
 
				+   * @return <code>true</code> if the job is complete, else <code>false</code>.
			
 
				+   * @throws IOException
			
 
				+   */
			
 
				+  public boolean isComplete() throws IOException {
			
 
				+    // TODO
			
 
				+    return false;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Check if the job completed successfully. 
			
 
				+   * 
			
 
				+   * @return <code>true</code> if the job succeeded, else <code>false</code>.
			
 
				+   * @throws IOException
			
 
				+   */
			
 
				+  public boolean isSuccessful() throws IOException {
			
 
				+    // TODO
			
 
				+    return false;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Kill the running job.  Blocks until all job tasks have been
			
 
				+   * killed as well.  If the job is no longer running, it simply returns.
			
 
				+   * 
			
 
				+   * @throws IOException
			
 
				+   */
			
 
				+  public void killJob() throws IOException {
			
 
				+    // TODO
			
 
				+  }
			
 
				+    
			
 
				+  /**
			
 
				+   * Get events indicating completion (success/failure) of component tasks.
			
 
				+   *  
			
 
				+   * @param startFrom index to start fetching events from
			
 
				+   * @return an array of {@link TaskCompletionEvent}s
			
 
				+   * @throws IOException
			
 
				+   */
			
 
				+  public TaskCompletionEvent[] getTaskCompletionEvents(int startFrom
			
 
				+                                                       ) throws IOException {
			
 
				+    // TODO
			
 
				+    return null;
			
 
				+  }
			
 
				+  
			
 
				+  /**
			
 
				+   * Kill indicated task attempt.
			
 
				+   * 
			
 
				+   * @param taskId the id of the task to be terminated.
			
 
				+   * @throws IOException
			
 
				+   */
			
 
				+  public void killTask(TaskAttemptID taskId) throws IOException {
			
 
				+    // TODO
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Fail indicated task attempt.
			
 
				+   * 
			
 
				+   * @param taskId the id of the task to be terminated.
			
 
				+   * @throws IOException
			
 
				+   */
			
 
				+  public void failTask(TaskAttemptID taskId) throws IOException {
			
 
				+    // TODO
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Gets the counters for this job.
			
 
				+   * 
			
 
				+   * @return the counters for this job.
			
 
				+   * @throws IOException
			
 
				+   */
			
 
				+  public Iterable<CounterGroup> getCounters() throws IOException {
			
 
				+    // TODO
			
 
				+    return null;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Submit the job to the cluster and return immediately.
			
 
				+   * @throws IOException
			
 
				+   */
			
 
				+  public void submit() throws IOException {
			
 
				+    // TODO
			
 
				+  }
			
 
				+  
			
 
				+  /**
			
 
				+   * Submit the job to the cluster and wait for it to finish.
			
 
				+   * @return true if the job succeeded
			
 
				+   * @throws IOException thrown if the communication with the 
			
 
				+   *         <code>JobTracker</code> is lost
			
 
				+   */
			
 
				+  public boolean waitForCompletion() throws IOException {
			
 
				+    // TODO
			
 
				+    return false;
			
 
				+  }
			
 
				+}
			
--- a/src/mapred/org/apache/hadoop/mapreduce/JobContext.java
+++ b/src/mapred/org/apache/hadoop/mapreduce/JobContext.java
@@ -0,0 +1,273 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.mapreduce;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+
			
 
				+import org.apache.hadoop.conf.Configuration;
			
 
				+import org.apache.hadoop.fs.FileSystem;
			
 
				+import org.apache.hadoop.fs.Path;
			
 
				+import org.apache.hadoop.io.LongWritable;
			
 
				+import org.apache.hadoop.io.RawComparator;
			
 
				+import org.apache.hadoop.io.Text;
			
 
				+import org.apache.hadoop.io.WritableComparable;
			
 
				+import org.apache.hadoop.io.WritableComparator;
			
 
				+import org.apache.hadoop.mapreduce.Mapper;
			
 
				+import org.apache.hadoop.util.ReflectionUtils;
			
 
				+
			
 
				+/**
			
 
				+ * A read-only view of the job that is provided to the tasks while they
			
 
				+ * are running.
			
 
				+ */
			
 
				+public class JobContext {
			
 
				+  // Put all of the attribute names in here so that Job and JobContext are
			
 
				+  // consistent.
			
 
				+  protected static final String INPUT_FORMAT_CLASS_ATTR = "mapreduce.map.class";
			
 
				+  protected static final String MAP_CLASS_ATTR = "mapreduce.map.class";
			
 
				+  protected static final String COMBINE_CLASS_ATTR = "mapreduce.combine.class";
			
 
				+  protected static final String REDUCE_CLASS_ATTR = "mapreduce.reduce.class";
			
 
				+  protected static final String OUTPUT_FORMAT_CLASS_ATTR = 
			
 
				+    "mapreduce.outputformat.class";
			
 
				+  protected static final String OUTPUT_KEY_CLASS_ATTR = 
			
 
				+    "mapreduce.out.key.class";
			
 
				+  protected static final String OUTPUT_VALUE_CLASS_ATTR = 
			
 
				+    "mapreduce.out.value.class";
			
 
				+  protected static final String MAP_OUTPUT_KEY_CLASS_ATTR = 
			
 
				+    "mapreduce.map.out.key.class";
			
 
				+  protected static final String MAP_OUTPUT_VALUE_CLASS_ATTR = 
			
 
				+    "mapreduce.map.out.value.class";
			
 
				+  protected static final String NUM_REDUCES_ATTR = "mapreduce.reduce.tasks";
			
 
				+  protected static final String WORKING_DIR_ATTR = "mapreduce.work.dir";
			
 
				+  protected static final String JOB_NAME_ATTR = "mapreduce.job.name";
			
 
				+  protected static final String SORT_COMPARATOR_ATTR = 
			
 
				+    "mapreduce.sort.comparator";
			
 
				+  protected static final String GROUPING_COMPARATOR_ATTR = 
			
 
				+    "mapreduce.grouping.comparator";
			
 
				+  protected static final String PARTITIONER_CLASS_ATTR = 
			
 
				+    "mapreduce.partitioner.class";
			
 
				+
			
 
				+  protected final Configuration conf;
			
 
				+  private final JobID jobId;
			
 
				+  
			
 
				+  public JobContext(Configuration conf, JobID jobId) {
			
 
				+    this.conf = conf;
			
 
				+    this.jobId = jobId;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Return the configuration for the job.
			
 
				+   * @return the shared configuration object
			
 
				+   */
			
 
				+  public Configuration getConfiguration() {
			
 
				+    return conf;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Get the unique ID for the job.
			
 
				+   * @return the object with the job id
			
 
				+   */
			
 
				+  public JobID getJobID() {
			
 
				+    return jobId;
			
 
				+  }
			
 
				+  
			
 
				+  /**
			
 
				+   * Get configured the number of reduce tasks for this job. Defaults to 
			
 
				+   * <code>1</code>.
			
 
				+   * @return the number of reduce tasks for this job.
			
 
				+   */
			
 
				+  public int getNumReduceTasks() {
			
 
				+    return conf.getInt(NUM_REDUCES_ATTR, 1);
			
 
				+  }
			
 
				+  
			
 
				+  /**
			
 
				+   * Get the current working directory for the default file system.
			
 
				+   * 
			
 
				+   * @return the directory name.
			
 
				+   */
			
 
				+  public Path getWorkingDirectory() throws IOException {
			
 
				+    String name = conf.get(WORKING_DIR_ATTR);
			
 
				+    if (name != null) {
			
 
				+      return new Path(name);
			
 
				+    } else {
			
 
				+      Path dir = FileSystem.get(conf).getWorkingDirectory();
			
 
				+      conf.set(WORKING_DIR_ATTR, dir.toString());
			
 
				+      return dir;
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Get the key class for the job output data.
			
 
				+   * @return the key class for the job output data.
			
 
				+   */
			
 
				+  public Class<?> getOutputKeyClass() {
			
 
				+    return conf.getClass(OUTPUT_KEY_CLASS_ATTR,
			
 
				+                         LongWritable.class, Object.class);
			
 
				+  }
			
 
				+  
			
 
				+  /**
			
 
				+   * Get the value class for job outputs.
			
 
				+   * @return the value class for job outputs.
			
 
				+   */
			
 
				+  public Class<?> getOutputValueClass() {
			
 
				+    return conf.getClass(OUTPUT_VALUE_CLASS_ATTR, Text.class, Object.class);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Get the key class for the map output data. If it is not set, use the
			
 
				+   * (final) output key class. This allows the map output key class to be
			
 
				+   * different than the final output key class.
			
 
				+   * @return the map output key class.
			
 
				+   */
			
 
				+  public Class<?> getMapOutputKeyClass() {
			
 
				+    Class<?> retv = conf.getClass(MAP_OUTPUT_KEY_CLASS_ATTR, null, 
			
 
				+                                  Object.class);
			
 
				+    if (retv == null) {
			
 
				+      retv = getOutputKeyClass();
			
 
				+    }
			
 
				+    return retv;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Get the value class for the map output data. If it is not set, use the
			
 
				+   * (final) output value class This allows the map output value class to be
			
 
				+   * different than the final output value class.
			
 
				+   *  
			
 
				+   * @return the map output value class.
			
 
				+   */
			
 
				+  public Class<?> getMapOutputValueClass() {
			
 
				+    Class<?> retv = conf.getClass(MAP_OUTPUT_VALUE_CLASS_ATTR, null,
			
 
				+        Object.class);
			
 
				+    if (retv == null) {
			
 
				+      retv = getOutputValueClass();
			
 
				+    }
			
 
				+    return retv;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Get the user-specified job name. This is only used to identify the 
			
 
				+   * job to the user.
			
 
				+   * 
			
 
				+   * @return the job's name, defaulting to "".
			
 
				+   */
			
 
				+  public String getJobName() {
			
 
				+    return conf.get(JOB_NAME_ATTR, "");
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Get the {@link InputFormat} class for the job.
			
 
				+   * 
			
 
				+   * @return the {@link InputFormat} class for the job.
			
 
				+   */
			
 
				+  @SuppressWarnings("unchecked")
			
 
				+  public Class<? extends InputFormat<?,?>> getInputFormatClass() 
			
 
				+     throws ClassNotFoundException {
			
 
				+    return (Class<? extends InputFormat<?,?>>) 
			
 
				+      conf.getClass(INPUT_FORMAT_CLASS_ATTR, InputFormat.class);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Get the {@link Mapper} class for the job.
			
 
				+   * 
			
 
				+   * @return the {@link Mapper} class for the job.
			
 
				+   */
			
 
				+  @SuppressWarnings("unchecked")
			
 
				+  public Class<? extends Mapper<?,?,?,?>> getMapperClass() 
			
 
				+     throws ClassNotFoundException {
			
 
				+    return (Class<? extends Mapper<?,?,?,?>>) 
			
 
				+      conf.getClass(MAP_CLASS_ATTR, Mapper.class);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Get the combiner class for the job.
			
 
				+   * 
			
 
				+   * @return the combiner class for the job.
			
 
				+   */
			
 
				+  @SuppressWarnings("unchecked")
			
 
				+  public Class<? extends Reducer<?,?,?,?>> getCombinerClass() 
			
 
				+     throws ClassNotFoundException {
			
 
				+    return (Class<? extends Reducer<?,?,?,?>>) 
			
 
				+      conf.getClass(COMBINE_CLASS_ATTR, Reducer.class);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Get the {@link Reducer} class for the job.
			
 
				+   * 
			
 
				+   * @return the {@link Reducer} class for the job.
			
 
				+   */
			
 
				+  @SuppressWarnings("unchecked")
			
 
				+  public Class<? extends Reducer<?,?,?,?>> getReducerClass() 
			
 
				+     throws ClassNotFoundException {
			
 
				+    return (Class<? extends Reducer<?,?,?,?>>) 
			
 
				+      conf.getClass(REDUCE_CLASS_ATTR, Reducer.class);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Get the {@link OutputFormat} class for the job.
			
 
				+   * 
			
 
				+   * @return the {@link OutputFormat} class for the job.
			
 
				+   */
			
 
				+  @SuppressWarnings("unchecked")
			
 
				+  public Class<? extends OutputFormat<?,?>> getOutputFormatClass() 
			
 
				+     throws ClassNotFoundException {
			
 
				+    return (Class<? extends OutputFormat<?,?>>) 
			
 
				+      conf.getClass(OUTPUT_FORMAT_CLASS_ATTR, OutputFormat.class);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Get the {@link Partitioner} class for the job.
			
 
				+   * 
			
 
				+   * @return the {@link Partitioner} class for the job.
			
 
				+   */
			
 
				+  @SuppressWarnings("unchecked")
			
 
				+  public Class<? extends Partitioner<?,?>> getPartitionerClass() 
			
 
				+     throws ClassNotFoundException {
			
 
				+    return (Class<? extends Partitioner<?,?>>) 
			
 
				+      conf.getClass(PARTITIONER_CLASS_ATTR, Partitioner.class);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Get the {@link RawComparator} comparator used to compare keys.
			
 
				+   * 
			
 
				+   * @return the {@link RawComparator} comparator used to compare keys.
			
 
				+   */
			
 
				+  public RawComparator<?> getSortComparator() {
			
 
				+    Class<?> theClass = conf.getClass(SORT_COMPARATOR_ATTR, null,
			
 
				+                                   RawComparator.class);
			
 
				+    if (theClass != null)
			
 
				+      return (RawComparator<?>) ReflectionUtils.newInstance(theClass, conf);
			
 
				+    return WritableComparator.get(getMapOutputKeyClass());
			
 
				+  }
			
 
				+
			
 
				+  /** 
			
 
				+   * Get the user defined {@link WritableComparable} comparator for 
			
 
				+   * grouping keys of inputs to the reduce.
			
 
				+   * 
			
 
				+   * @return comparator set by the user for grouping values.
			
 
				+   * @see Job#setGroupingComparatorClass(Class) for details.  
			
 
				+   */
			
 
				+  public RawComparator<?> getGroupingComparator() {
			
 
				+    Class<?> theClass = conf.getClass(GROUPING_COMPARATOR_ATTR, null,
			
 
				+                                   RawComparator.class);
			
 
				+    if (theClass == null) {
			
 
				+      return getSortComparator();
			
 
				+    }
			
 
				+    return (RawComparator<?>) ReflectionUtils.newInstance(theClass, conf);
			
 
				+  }
			
 
				+
			
 
				+}
			
--- a/src/mapred/org/apache/hadoop/mapreduce/JobID.java
+++ b/src/mapred/org/apache/hadoop/mapreduce/JobID.java
@@ -0,0 +1,184 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.mapreduce;
			
 
				+
			
 
				+import java.io.DataInput;
			
 
				+import java.io.DataOutput;
			
 
				+import java.io.IOException;
			
 
				+import java.text.NumberFormat;
			
 
				+
			
 
				+import org.apache.hadoop.io.Text;
			
 
				+import org.apache.hadoop.mapred.JobTracker;
			
 
				+
			
 
				+/**
			
 
				+ * JobID represents the immutable and unique identifier for 
			
 
				+ * the job. JobID consists of two parts. First part 
			
 
				+ * represents the jobtracker identifier, so that jobID to jobtracker map 
			
 
				+ * is defined. For cluster setup this string is the jobtracker 
			
 
				+ * start time, for local setting, it is "local".
			
 
				+ * Second part of the JobID is the job number. <br> 
			
 
				+ * An example JobID is : 
			
 
				+ * <code>job_200707121733_0003</code> , which represents the third job 
			
 
				+ * running at the jobtracker started at <code>200707121733</code>. 
			
 
				+ * <p>
			
 
				+ * Applications should never construct or parse JobID strings, but rather 
			
 
				+ * use appropriate constructors or {@link #forName(String)} method. 
			
 
				+ * 
			
 
				+ * @see TaskID
			
 
				+ * @see TaskAttemptID
			
 
				+ * @see JobTracker#getNewJobId()
			
 
				+ * @see JobTracker#getStartTime()
			
 
				+ */
			
 
				+public class JobID extends ID {
			
 
				+  private static final String JOB = "job";
			
 
				+  private String jtIdentifier;
			
 
				+  private static char UNDERSCORE = '_';
			
 
				+  
			
 
				+  private static NumberFormat idFormat = NumberFormat.getInstance();
			
 
				+  static {
			
 
				+    idFormat.setGroupingUsed(false);
			
 
				+    idFormat.setMinimumIntegerDigits(4);
			
 
				+  }
			
 
				+  
			
 
				+  /**
			
 
				+   * Constructs a JobID object 
			
 
				+   * @param jtIdentifier jobTracker identifier
			
 
				+   * @param id job number
			
 
				+   */
			
 
				+  public JobID(String jtIdentifier, int id) {
			
 
				+    super(id);
			
 
				+    this.jtIdentifier = jtIdentifier;
			
 
				+  }
			
 
				+  
			
 
				+  private JobID() { }
			
 
				+  
			
 
				+  public String getJtIdentifier() {
			
 
				+    return jtIdentifier;
			
 
				+  }
			
 
				+  
			
 
				+  @Override
			
 
				+  public boolean equals(Object o) {
			
 
				+    if(o == null)
			
 
				+      return false;
			
 
				+    if(o.getClass().equals(JobID.class)) {
			
 
				+      JobID that = (JobID)o;
			
 
				+      return this.id==that.id
			
 
				+        && this.jtIdentifier.equals(that.jtIdentifier);
			
 
				+    }
			
 
				+    else return false;
			
 
				+  }
			
 
				+  
			
 
				+  /**Compare JobIds by first jtIdentifiers, then by job numbers*/
			
 
				+  @Override
			
 
				+  public int compareTo(ID o) {
			
 
				+    JobID that = (JobID)o;
			
 
				+    int jtComp = this.jtIdentifier.compareTo(that.jtIdentifier);
			
 
				+    if(jtComp == 0) {
			
 
				+      return this.id - that.id;
			
 
				+    }
			
 
				+    else return jtComp;
			
 
				+  }
			
 
				+  
			
 
				+  @Override
			
 
				+  public String toString() {
			
 
				+    StringBuilder builder = new StringBuilder();
			
 
				+    return builder.append(JOB).append(UNDERSCORE)
			
 
				+      .append(toStringWOPrefix()).toString();
			
 
				+  }
			
 
				+  
			
 
				+  /** Returns the string representation w/o prefix */
			
 
				+  StringBuilder toStringWOPrefix() {
			
 
				+    StringBuilder builder = new StringBuilder();
			
 
				+    builder.append(jtIdentifier).append(UNDERSCORE)
			
 
				+    .append(idFormat.format(id)).toString();
			
 
				+    return builder;
			
 
				+  }
			
 
				+  
			
 
				+  @Override
			
 
				+  public int hashCode() {
			
 
				+    return toStringWOPrefix().toString().hashCode();
			
 
				+  }
			
 
				+  
			
 
				+  @Override
			
 
				+  public void readFields(DataInput in) throws IOException {
			
 
				+    super.readFields(in);
			
 
				+    this.jtIdentifier = Text.readString(in);
			
 
				+  }
			
 
				+
			
 
				+  @Override
			
 
				+  public void write(DataOutput out) throws IOException {
			
 
				+    super.write(out);
			
 
				+    Text.writeString(out, jtIdentifier);
			
 
				+  }
			
 
				+  
			
 
				+  public static JobID read(DataInput in) throws IOException {
			
 
				+    JobID jobId = new JobID();
			
 
				+    jobId.readFields(in);
			
 
				+    return jobId;
			
 
				+  }
			
 
				+  
			
 
				+  /** Construct a JobId object from given string 
			
 
				+   * @return constructed JobId object or null if the given String is null
			
 
				+   * @throws IllegalArgumentException if the given string is malformed
			
 
				+   */
			
 
				+  public static JobID forName(String str) throws IllegalArgumentException {
			
 
				+    if(str == null)
			
 
				+      return null;
			
 
				+    try {
			
 
				+      String[] parts = str.split("_");
			
 
				+      if(parts.length == 3) {
			
 
				+        if(parts[0].equals(JOB)) {
			
 
				+          return new JobID(parts[1], Integer.parseInt(parts[2]));
			
 
				+        }
			
 
				+      }
			
 
				+    }catch (Exception ex) {//fall below
			
 
				+    }
			
 
				+    throw new IllegalArgumentException("JobId string : " + str 
			
 
				+        + " is not properly formed");
			
 
				+  }
			
 
				+  
			
 
				+  /** 
			
 
				+   * Returns a regex pattern which matches task IDs. Arguments can 
			
 
				+   * be given null, in which case that part of the regex will be generic.  
			
 
				+   * For example to obtain a regex matching <i>any job</i> 
			
 
				+   * run on the jobtracker started at <i>200707121733</i>, we would use :
			
 
				+   * <pre> 
			
 
				+   * JobID.getTaskIDsPattern("200707121733", null);
			
 
				+   * </pre>
			
 
				+   * which will return :
			
 
				+   * <pre> "job_200707121733_[0-9]*" </pre> 
			
 
				+   * @param jtIdentifier jobTracker identifier, or null
			
 
				+   * @param jobId job number, or null
			
 
				+   * @return a regex pattern matching JobIDs
			
 
				+   */
			
 
				+  public static String getJobIDsPattern(String jtIdentifier, Integer jobId) {
			
 
				+    StringBuilder builder = new StringBuilder(JOB).append(UNDERSCORE);
			
 
				+    builder.append(getJobIDsPatternWOPrefix(jtIdentifier, jobId));
			
 
				+    return builder.toString();
			
 
				+  }
			
 
				+  
			
 
				+  static StringBuilder getJobIDsPatternWOPrefix(String jtIdentifier
			
 
				+      , Integer jobId) {
			
 
				+    StringBuilder builder = new StringBuilder()
			
 
				+      .append(jtIdentifier != null ? jtIdentifier : "[^_]*").append(UNDERSCORE)
			
 
				+      .append(jobId != null ? idFormat.format(jobId) : "[0-9]*");
			
 
				+    return builder;
			
 
				+  }
			
 
				+  
			
 
				+}
			
--- a/src/mapred/org/apache/hadoop/mapreduce/MapContext.java
+++ b/src/mapred/org/apache/hadoop/mapreduce/MapContext.java
@@ -0,0 +1,43 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.mapreduce;
			
 
				+
			
 
				+import org.apache.hadoop.conf.Configuration;
			
 
				+
			
 
				+/**
			
 
				+ * The context that is given to the {@link Mapper}.
			
 
				+ * @param <KEYIN> the key input type to the Mapper
			
 
				+ * @param <VALUEIN> the value input type to the Mapper
			
 
				+ * @param <KEYOUT> the key output type from the Mapper
			
 
				+ * @param <VALUEOUT> the value output type from the Mapper
			
 
				+ */
			
 
				+public abstract class MapContext<KEYIN,VALUEIN,KEYOUT,VALUEOUT> 
			
 
				+  extends TaskInputOutputContext<KEYIN,VALUEIN,KEYOUT,VALUEOUT> {
			
 
				+
			
 
				+  public MapContext(Configuration conf, TaskAttemptID taskid) {
			
 
				+    super(conf, taskid);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Get the input split for this map.
			
 
				+   */
			
 
				+  public abstract InputSplit getInputSplit();
			
 
				+
			
 
				+}
			
 
				+     
			
--- a/src/mapred/org/apache/hadoop/mapreduce/Mapper.java
+++ b/src/mapred/org/apache/hadoop/mapreduce/Mapper.java
@@ -0,0 +1,147 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.mapreduce;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+
			
 
				+import org.apache.hadoop.conf.Configuration;
			
 
				+import org.apache.hadoop.io.RawComparator;
			
 
				+import org.apache.hadoop.io.compress.CompressionCodec;
			
 
				+
			
 
				+/** 
			
 
				+ * Maps input key/value pairs to a set of intermediate key/value pairs.  
			
 
				+ * 
			
 
				+ * <p>Maps are the individual tasks which transform input records into a 
			
 
				+ * intermediate records. The transformed intermediate records need not be of 
			
 
				+ * the same type as the input records. A given input pair may map to zero or 
			
 
				+ * many output pairs.</p> 
			
 
				+ * 
			
 
				+ * <p>The Hadoop Map-Reduce framework spawns one map task for each 
			
 
				+ * {@link InputSplit} generated by the {@link InputFormat} for the job.
			
 
				+ * <code>Mapper</code> implementations can access the {@link Configuration} for 
			
 
				+ * the job via the {@link JobContext#getConfiguration()}.
			
 
				+ * 
			
 
				+ * <p>The framework first calls 
			
 
				+ * {@link #setup(org.apache.hadoop.mapreduce.Mapper.Context)}, followed by
			
 
				+ * {@link #map(Object, Object, Context)} 
			
 
				+ * for each key/value pair in the <code>InputSplit</code>. Finally 
			
 
				+ * {@link #cleanup(Context)} is called.</p>
			
 
				+ * 
			
 
				+ * <p>All intermediate values associated with a given output key are 
			
 
				+ * subsequently grouped by the framework, and passed to a {@link Reducer} to  
			
 
				+ * determine the final output. Users can control the sorting and grouping by 
			
 
				+ * specifying two key {@link RawComparator} classes.</p>
			
 
				+ *
			
 
				+ * <p>The <code>Mapper</code> outputs are partitioned per 
			
 
				+ * <code>Reducer</code>. Users can control which keys (and hence records) go to 
			
 
				+ * which <code>Reducer</code> by implementing a custom {@link Partitioner}.
			
 
				+ * 
			
 
				+ * <p>Users can optionally specify a <code>combiner</code>, via 
			
 
				+ * {@link Job#setCombinerClass(Class)}, to perform local aggregation of the 
			
 
				+ * intermediate outputs, which helps to cut down the amount of data transferred 
			
 
				+ * from the <code>Mapper</code> to the <code>Reducer</code>.
			
 
				+ * 
			
 
				+ * <p>Applications can specify if and how the intermediate
			
 
				+ * outputs are to be compressed and which {@link CompressionCodec}s are to be
			
 
				+ * used via the <code>Configuration</code>.</p>
			
 
				+ *  
			
 
				+ * <p>If the job has zero
			
 
				+ * reduces then the output of the <code>Mapper</code> is directly written
			
 
				+ * to the {@link OutputFormat} without sorting by keys.</p>
			
 
				+ * 
			
 
				+ * <p>Example:</p>
			
 
				+ * <p><blockquote><pre>
			
 
				+ * public class TokenCounterMapper 
			
 
				+ *     extends Mapper<Object, Text, Text, IntWritable>{
			
 
				+ *    
			
 
				+ *   private final static IntWritable one = new IntWritable(1);
			
 
				+ *   private Text word = new Text();
			
 
				+ *   
			
 
				+ *   public void map(Object key, Text value, Context context) throws IOException {
			
 
				+ *     StringTokenizer itr = new StringTokenizer(value.toString());
			
 
				+ *     while (itr.hasMoreTokens()) {
			
 
				+ *       word.set(itr.nextToken());
			
 
				+ *       context.collect(word, one);
			
 
				+ *     }
			
 
				+ *   }
			
 
				+ * }
			
 
				+ * </pre></blockquote></p>
			
 
				+ *
			
 
				+ * <p>Applications may override the {@link #run(Context)} method to exert 
			
 
				+ * greater control on map processing e.g. multi-threaded <code>Mapper</code>s 
			
 
				+ * etc.</p>
			
 
				+ * 
			
 
				+ * @see InputFormat
			
 
				+ * @see JobContext
			
 
				+ * @see Partitioner  
			
 
				+ * @see Reducer
			
 
				+ */
			
 
				+public class Mapper<KEYIN, VALUEIN, KEYOUT, VALUEOUT> {
			
 
				+
			
 
				+  public abstract class Context 
			
 
				+    extends MapContext<KEYIN,VALUEIN,KEYOUT,VALUEOUT> {
			
 
				+    public Context(Configuration conf, TaskAttemptID taskid) {
			
 
				+      super(conf, taskid);
			
 
				+    }
			
 
				+  }
			
 
				+  
			
 
				+  /**
			
 
				+   * Called once at the beginning of the task.
			
 
				+   */
			
 
				+  protected void setup(Context context
			
 
				+                       ) throws IOException, InterruptedException {
			
 
				+    // NOTHING
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Called once for each key/value pair in the input split. Most applications
			
 
				+   * should override this, but the default is the identity function.
			
 
				+   */
			
 
				+  @SuppressWarnings("unchecked")
			
 
				+  protected void map(KEYIN key, VALUEIN value, 
			
 
				+                     Context context) throws IOException, InterruptedException {
			
 
				+    context.collect((KEYOUT) key, (VALUEOUT) value);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Called once at the end of the task.
			
 
				+   */
			
 
				+  protected void cleanup(Context context
			
 
				+                         ) throws IOException, InterruptedException {
			
 
				+    // NOTHING
			
 
				+  }
			
 
				+  
			
 
				+  /**
			
 
				+   * Expert users can override this method for more complete control over the
			
 
				+   * execution of the Mapper.
			
 
				+   * @param context
			
 
				+   * @throws IOException
			
 
				+   */
			
 
				+  public void run(Context context) throws IOException, InterruptedException {
			
 
				+    setup(context);
			
 
				+    KEYIN key = context.nextKey(null);
			
 
				+    VALUEIN value = null;
			
 
				+    while (key != null) {
			
 
				+      value = context.nextValue(value);
			
 
				+      map(key, value, context);
			
 
				+      key = context.nextKey(key);
			
 
				+    }
			
 
				+    cleanup(context);
			
 
				+  }
			
 
				+}
			
--- a/src/mapred/org/apache/hadoop/mapreduce/OutputFormat.java
+++ b/src/mapred/org/apache/hadoop/mapreduce/OutputFormat.java
@@ -0,0 +1,70 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.mapreduce;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+
			
 
				+import org.apache.hadoop.fs.FileSystem;
			
 
				+
			
 
				+/** 
			
 
				+ * <code>OutputFormat</code> describes the output-specification for a 
			
 
				+ * Map-Reduce job.
			
 
				+ *
			
 
				+ * <p>The Map-Reduce framework relies on the <code>OutputFormat</code> of the
			
 
				+ * job to:<p>
			
 
				+ * <ol>
			
 
				+ *   <li>
			
 
				+ *   Validate the output-specification of the job. For e.g. check that the 
			
 
				+ *   output directory doesn't already exist. 
			
 
				+ *   <li>
			
 
				+ *   Provide the {@link RecordWriter} implementation to be used to write out
			
 
				+ *   the output files of the job. Output files are stored in a 
			
 
				+ *   {@link FileSystem}.
			
 
				+ *   </li>
			
 
				+ * </ol>
			
 
				+ * 
			
 
				+ * @see RecordWriter
			
 
				+ */
			
 
				+public interface OutputFormat<K, V> {
			
 
				+
			
 
				+  /** 
			
 
				+   * Get the {@link RecordWriter} for the given task.
			
 
				+   *
			
 
				+   * @param context the information about the current task.
			
 
				+   * @return a {@link RecordWriter} to write the output for the job.
			
 
				+   * @throws IOException
			
 
				+   */
			
 
				+  RecordWriter<K, V> getRecordWriter(TaskAttemptContext context)
			
 
				+  throws IOException, InterruptedException;
			
 
				+
			
 
				+  /** 
			
 
				+   * Check for validity of the output-specification for the job.
			
 
				+   *  
			
 
				+   * <p>This is to validate the output specification for the job when it is
			
 
				+   * a job is submitted.  Typically checks that it does not already exist,
			
 
				+   * throwing an exception when it already exists, so that output is not
			
 
				+   * overwritten.</p>
			
 
				+   *
			
 
				+   * @param context information about the job
			
 
				+   * @throws IOException when output should not be attempted
			
 
				+   */
			
 
				+  void checkOutputSpecs(JobContext context
			
 
				+                        ) throws IOException, InterruptedException;
			
 
				+}
			
 
				+
			
--- a/src/mapred/org/apache/hadoop/mapreduce/Partitioner.java
+++ b/src/mapred/org/apache/hadoop/mapreduce/Partitioner.java
@@ -0,0 +1,47 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.mapreduce;
			
 
				+
			
 
				+/** 
			
 
				+ * Partitions the key space.
			
 
				+ * 
			
 
				+ * <p><code>Partitioner</code> controls the partitioning of the keys of the 
			
 
				+ * intermediate map-outputs. The key (or a subset of the key) is used to derive
			
 
				+ * the partition, typically by a hash function. The total number of partitions
			
 
				+ * is the same as the number of reduce tasks for the job. Hence this controls
			
 
				+ * which of the <code>m</code> reduce tasks the intermediate key (and hence the 
			
 
				+ * record) is sent for reduction.</p>
			
 
				+ * 
			
 
				+ * @see Reducer
			
 
				+ */
			
 
				+public interface Partitioner<KEY, VALUE> {
			
 
				+  
			
 
				+  /** 
			
 
				+   * Get the partition number for a given key (hence record) given the total 
			
 
				+   * number of partitions i.e. number of reduce-tasks for the job.
			
 
				+   *   
			
 
				+   * <p>Typically a hash function on a all or a subset of the key.</p>
			
 
				+   *
			
 
				+   * @param key the key to be partioned.
			
 
				+   * @param value the entry value.
			
 
				+   * @param numPartitions the total number of partitions.
			
 
				+   * @return the partition number for the <code>key</code>.
			
 
				+   */
			
 
				+  int getPartition(KEY key, VALUE value, int numPartitions);
			
 
				+}
			
--- a/src/mapred/org/apache/hadoop/mapreduce/RecordReader.java
+++ b/src/mapred/org/apache/hadoop/mapreduce/RecordReader.java
@@ -0,0 +1,75 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.mapreduce;
			
 
				+
			
 
				+import java.io.Closeable;
			
 
				+import java.io.IOException;
			
 
				+
			
 
				+/**
			
 
				+ * The record reader breaks the data into key/value pairs for input to the
			
 
				+ * {@link Mapper}.
			
 
				+ * @param <KEYIN>
			
 
				+ * @param <VALUEIN>
			
 
				+ */
			
 
				+public abstract class RecordReader<KEYIN, VALUEIN> implements Closeable {
			
 
				+
			
 
				+  /**
			
 
				+   * Called once at initialization.
			
 
				+   * @param split the split that defines the range of records to read
			
 
				+   * @param context the information about the task
			
 
				+   * @throws IOException
			
 
				+   * @throws InterruptedException
			
 
				+   */
			
 
				+  public abstract void initialize(InputSplit split,
			
 
				+                                  TaskAttemptContext context
			
 
				+                                  ) throws IOException, InterruptedException;
			
 
				+
			
 
				+  /**
			
 
				+   * Read the next key.
			
 
				+   * @param key the object to be read into, which may be null
			
 
				+   * @return the object that was read
			
 
				+   * @throws IOException
			
 
				+   * @throws InterruptedException
			
 
				+   */
			
 
				+  public abstract KEYIN nextKey(KEYIN key
			
 
				+                                ) throws IOException, InterruptedException;
			
 
				+
			
 
				+  /**
			
 
				+   * Read the next value. It must be called after {@link #nextKey(Object)}.
			
 
				+   * @param value the object to read into, which may be null
			
 
				+   * @return the object that was read
			
 
				+   * @throws IOException
			
 
				+   * @throws InterruptedException
			
 
				+   */
			
 
				+  public abstract VALUEIN nextValue(VALUEIN value
			
 
				+                                    ) throws IOException, InterruptedException;
			
 
				+  
			
 
				+  /**
			
 
				+   * The current progress of the record reader through its data.
			
 
				+   * @return a number between 0.0 and 1.0 that is the fraction of the data read
			
 
				+   * @throws IOException
			
 
				+   * @throws InterruptedException
			
 
				+   */
			
 
				+  public abstract float getProgress() throws IOException, InterruptedException;
			
 
				+  
			
 
				+  /**
			
 
				+   * Close the record reader.
			
 
				+   */
			
 
				+  public abstract void close() throws IOException;
			
 
				+}
			
--- a/src/mapred/org/apache/hadoop/mapreduce/RecordWriter.java
+++ b/src/mapred/org/apache/hadoop/mapreduce/RecordWriter.java
@@ -0,0 +1,52 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.mapreduce;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+
			
 
				+import org.apache.hadoop.fs.FileSystem;
			
 
				+
			
 
				+/**
			
 
				+ * <code>RecordWriter</code> writes the output &lt;key, value&gt; pairs 
			
 
				+ * to an output file.
			
 
				+ 
			
 
				+ * <p><code>RecordWriter</code> implementations write the job outputs to the
			
 
				+ * {@link FileSystem}.
			
 
				+ * 
			
 
				+ * @see OutputFormat
			
 
				+ */
			
 
				+public interface RecordWriter<K, V> {
			
 
				+  /** 
			
 
				+   * Writes a key/value pair.
			
 
				+   *
			
 
				+   * @param key the key to write.
			
 
				+   * @param value the value to write.
			
 
				+   * @throws IOException
			
 
				+   */      
			
 
				+  void write(K key, V value) throws IOException, InterruptedException;
			
 
				+
			
 
				+  /** 
			
 
				+   * Close this <code>RecordWriter</code> to future operations.
			
 
				+   * 
			
 
				+   * @param context the context of the task
			
 
				+   * @throws IOException
			
 
				+   */ 
			
 
				+  void close(TaskAttemptContext context
			
 
				+             ) throws IOException, InterruptedException;
			
 
				+}
			
--- a/src/mapred/org/apache/hadoop/mapreduce/ReduceContext.java
+++ b/src/mapred/org/apache/hadoop/mapreduce/ReduceContext.java
@@ -0,0 +1,45 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.mapreduce;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+
			
 
				+import org.apache.hadoop.conf.Configuration;
			
 
				+
			
 
				+/**
			
 
				+ * The context passed to the {@link Reducer}.
			
 
				+ * @param <KEYIN> the class of the input keys
			
 
				+ * @param <VALUEIN> the class of the input values
			
 
				+ * @param <KEYOUT> the class of the output keys
			
 
				+ * @param <VALUEOUT> the class of the output values
			
 
				+ */
			
 
				+public abstract class ReduceContext<KEYIN,VALUEIN,KEYOUT,VALUEOUT>
			
 
				+    extends TaskInputOutputContext<KEYIN,VALUEIN,KEYOUT,VALUEOUT> {
			
 
				+
			
 
				+  public ReduceContext(Configuration conf, TaskAttemptID taskid) {
			
 
				+    super(conf, taskid);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Iterate through the values for the current key.
			
 
				+   */
			
 
				+  public abstract 
			
 
				+  Iterable<VALUEIN> getValues() throws IOException, InterruptedException;
			
 
				+
			
 
				+}
			
--- a/src/mapred/org/apache/hadoop/mapreduce/Reducer.java
+++ b/src/mapred/org/apache/hadoop/mapreduce/Reducer.java
@@ -0,0 +1,170 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.mapreduce;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+
			
 
				+import org.apache.hadoop.conf.Configuration;
			
 
				+import org.apache.hadoop.mapreduce.Mapper;
			
 
				+import org.apache.hadoop.mapreduce.Partitioner;
			
 
				+
			
 
				+/** 
			
 
				+ * Reduces a set of intermediate values which share a key to a smaller set of
			
 
				+ * values.  
			
 
				+ * 
			
 
				+ * <p><code>Reducer</code> implementations 
			
 
				+ * can access the {@link Configuration} for the job via the 
			
 
				+ * {@link JobContext#getConfiguration()} method.</p>
			
 
				+
			
 
				+ * <p><code>Reducer</code> has 3 primary phases:</p>
			
 
				+ * <ol>
			
 
				+ *   <li>
			
 
				+ *   
			
 
				+ *   <h4 id="Shuffle">Shuffle</h4>
			
 
				+ *   
			
 
				+ *   <p>The <code>Reducer</code> copies the sorted output from each 
			
 
				+ *   {@link Mapper} using HTTP across the network.</p>
			
 
				+ *   </li>
			
 
				+ *   
			
 
				+ *   <li>
			
 
				+ *   <h4 id="Sort">Sort</h4>
			
 
				+ *   
			
 
				+ *   <p>The framework merge sorts <code>Reducer</code> inputs by 
			
 
				+ *   <code>key</code>s 
			
 
				+ *   (since different <code>Mapper</code>s may have output the same key).</p>
			
 
				+ *   
			
 
				+ *   <p>The shuffle and sort phases occur simultaneously i.e. while outputs are
			
 
				+ *   being fetched they are merged.</p>
			
 
				+ *      
			
 
				+ *   <h5 id="SecondarySort">SecondarySort</h5>
			
 
				+ *   
			
 
				+ *   <p>To achieve a secondary sort on the values returned by the value 
			
 
				+ *   iterator, the application should extend the key with the secondary
			
 
				+ *   key and define a grouping comparator. The keys will be sorted using the
			
 
				+ *   entire key, but will be grouped using the grouping comparator to decide
			
 
				+ *   which keys and values are sent in the same call to reduce.The grouping 
			
 
				+ *   comparator is specified via 
			
 
				+ *   {@link Job#setGroupingComparatorClass(Class)}. The sort order is
			
 
				+ *   controlled by 
			
 
				+ *   {@link Job#setSortComparatorClass(Class)}.</p>
			
 
				+ *   
			
 
				+ *   
			
 
				+ *   For example, say that you want to find duplicate web pages and tag them 
			
 
				+ *   all with the url of the "best" known example. You would set up the job 
			
 
				+ *   like:
			
 
				+ *   <ul>
			
 
				+ *     <li>Map Input Key: url</li>
			
 
				+ *     <li>Map Input Value: document</li>
			
 
				+ *     <li>Map Output Key: document checksum, url pagerank</li>
			
 
				+ *     <li>Map Output Value: url</li>
			
 
				+ *     <li>Partitioner: by checksum</li>
			
 
				+ *     <li>OutputKeyComparator: by checksum and then decreasing pagerank</li>
			
 
				+ *     <li>OutputValueGroupingComparator: by checksum</li>
			
 
				+ *   </ul>
			
 
				+ *   </li>
			
 
				+ *   
			
 
				+ *   <li>   
			
 
				+ *   <h4 id="Reduce">Reduce</h4>
			
 
				+ *   
			
 
				+ *   <p>In this phase the 
			
 
				+ *   {@link #reduce(Object, Iterable, Context)}
			
 
				+ *   method is called for each <code>&lt;key, (collection of values)></code> in
			
 
				+ *   the sorted inputs.</p>
			
 
				+ *   <p>The output of the reduce task is typically written to a 
			
 
				+ *   {@link RecordWriter} via 
			
 
				+ *   {@link Context#collect(Object, Object)}.</p>
			
 
				+ *   </li>
			
 
				+ * </ol>
			
 
				+ * 
			
 
				+ * <p>The output of the <code>Reducer</code> is <b>not re-sorted</b>.</p>
			
 
				+ * 
			
 
				+ * <p>Example:</p>
			
 
				+ * <p><blockquote><pre>
			
 
				+ * public class IntSumReducer<Key> extends Reducer<Key,IntWritable,
			
 
				+ *                                                 Key,IntWritable> {
			
 
				+ *   private IntWritable result = new IntWritable();
			
 
				+ * 
			
 
				+ *   public void reduce(Key key, Iterable<IntWritable> values, 
			
 
				+ *                      Context context) throws IOException {
			
 
				+ *     int sum = 0;
			
 
				+ *     for (IntWritable val : values) {
			
 
				+ *       sum += val.get();
			
 
				+ *     }
			
 
				+ *     result.set(sum);
			
 
				+ *     context.collect(key, result);
			
 
				+ *   }
			
 
				+ * }
			
 
				+ * </pre></blockquote></p>
			
 
				+ * 
			
 
				+ * @see Mapper
			
 
				+ * @see Partitioner
			
 
				+ */
			
 
				+public abstract class Reducer<KEYIN,VALUEIN,KEYOUT,VALUEOUT> {
			
 
				+
			
 
				+  protected abstract class Context 
			
 
				+    extends ReduceContext<KEYIN,VALUEIN,KEYOUT,VALUEOUT> {
			
 
				+    public Context(Configuration conf, TaskAttemptID taskid) {
			
 
				+      super(conf, taskid);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Called once at the start of the task.
			
 
				+   */
			
 
				+  protected void setup(Context context
			
 
				+                       ) throws IOException, InterruptedException {
			
 
				+    // NOTHING
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * This method is called once for each key. Most applications will define
			
 
				+   * their reduce class by overriding this method. The default implementation
			
 
				+   * is an identity function.
			
 
				+   */
			
 
				+  @SuppressWarnings("unchecked")
			
 
				+  protected void reduce(KEYIN key, Iterable<VALUEIN> values, Context context
			
 
				+                        ) throws IOException, InterruptedException {
			
 
				+    for(VALUEIN value: values) {
			
 
				+      context.collect((KEYOUT) key, (VALUEOUT) value);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Called once at the end of the task.
			
 
				+   */
			
 
				+  protected void cleanup(Context context
			
 
				+                         ) throws IOException, InterruptedException {
			
 
				+    // NOTHING
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Advanced application writers can use the 
			
 
				+   * {@link #run(org.apache.hadoop.mapreduce.Reducer.Context)} method to
			
 
				+   * control how the reduce task works.
			
 
				+   */
			
 
				+  public void run(Context context) throws IOException, InterruptedException {
			
 
				+    setup(context);
			
 
				+    KEYIN key = context.nextKey(null);
			
 
				+    while(key != null) {
			
 
				+      reduce(key, context.getValues(), context);
			
 
				+      key = context.nextKey(key);
			
 
				+    }
			
 
				+    cleanup(context);
			
 
				+  }
			
 
				+}
			
--- a/src/mapred/org/apache/hadoop/mapreduce/TaskAttemptContext.java
+++ b/src/mapred/org/apache/hadoop/mapreduce/TaskAttemptContext.java
@@ -0,0 +1,71 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.mapreduce;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+
			
 
				+import org.apache.hadoop.conf.Configuration;
			
 
				+import org.apache.hadoop.util.Progressable;
			
 
				+
			
 
				+/**
			
 
				+ * The context for task attempts.
			
 
				+ */
			
 
				+public abstract class TaskAttemptContext extends JobContext 
			
 
				+    implements Progressable {
			
 
				+  private final TaskAttemptID taskId;
			
 
				+  private String status = "";
			
 
				+  
			
 
				+  public TaskAttemptContext(Configuration conf, TaskAttemptID taskId) {
			
 
				+    super(conf, taskId.getJobID());
			
 
				+    this.taskId = taskId;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Get the unique name for this task attempt.
			
 
				+   */
			
 
				+  public TaskAttemptID getTaskAttemptId() {
			
 
				+    return taskId;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Set the current status of the task to the given string.
			
 
				+   */
			
 
				+  public void setStatus(String msg) throws IOException {
			
 
				+    status = msg;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Get the last set status message.
			
 
				+   * @return the current status message
			
 
				+   */
			
 
				+  public String getStatus() {
			
 
				+    return status;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Lookup a counter by an enum.
			
 
				+   */
			
 
				+  public abstract Counter getCounter(Enum<?> counterName);
			
 
				+
			
 
				+  /**
			
 
				+   * Lookup a counter by group and counter name. The enum-based interface is
			
 
				+   * preferred.
			
 
				+   */
			
 
				+  public abstract Counter getCounter(String groupName, String counterName);
			
 
				+}
			
--- a/src/mapred/org/apache/hadoop/mapreduce/TaskAttemptID.java
+++ b/src/mapred/org/apache/hadoop/mapreduce/TaskAttemptID.java
@@ -0,0 +1,212 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.mapreduce;
			
 
				+
			
 
				+import java.io.DataInput;
			
 
				+import java.io.DataOutput;
			
 
				+import java.io.IOException;
			
 
				+
			
 
				+/**
			
 
				+ * TaskAttemptID represents the immutable and unique identifier for 
			
 
				+ * a task attempt. Each task attempt is one particular instance of a Map or
			
 
				+ * Reduce Task identified by its TaskID. 
			
 
				+ * 
			
 
				+ * TaskAttemptID consists of 2 parts. First part is the 
			
 
				+ * {@link TaskID}, that this TaskAttemptID belongs to.
			
 
				+ * Second part is the task attempt number. <br> 
			
 
				+ * An example TaskAttemptID is : 
			
 
				+ * <code>attempt_200707121733_0003_m_000005_0</code> , which represents the
			
 
				+ * zeroth task attempt for the fifth map task in the third job 
			
 
				+ * running at the jobtracker started at <code>200707121733</code>. 
			
 
				+ * <p>
			
 
				+ * Applications should never construct or parse TaskAttemptID strings
			
 
				+ * , but rather use appropriate constructors or {@link #forName(String)} 
			
 
				+ * method. 
			
 
				+ * 
			
 
				+ * @see JobID
			
 
				+ * @see TaskID
			
 
				+ */
			
 
				+public class TaskAttemptID extends ID {
			
 
				+  private static final String ATTEMPT = "attempt";
			
 
				+  private TaskID taskId;
			
 
				+  private static final char UNDERSCORE = '_';
			
 
				+  
			
 
				+  /**
			
 
				+   * Constructs a TaskAttemptID object from given {@link TaskID}.  
			
 
				+   * @param taskId TaskID that this task belongs to  
			
 
				+   * @param id the task attempt number
			
 
				+   */
			
 
				+  public TaskAttemptID(TaskID taskId, int id) {
			
 
				+    super(id);
			
 
				+    if(taskId == null) {
			
 
				+      throw new IllegalArgumentException("taskId cannot be null");
			
 
				+    }
			
 
				+    this.taskId = taskId;
			
 
				+  }
			
 
				+  
			
 
				+  /**
			
 
				+   * Constructs a TaskId object from given parts.
			
 
				+   * @param jtIdentifier jobTracker identifier
			
 
				+   * @param jobId job number 
			
 
				+   * @param isMap whether the tip is a map 
			
 
				+   * @param taskId taskId number
			
 
				+   * @param id the task attempt number
			
 
				+   */
			
 
				+  public TaskAttemptID(String jtIdentifier, int jobId, boolean isMap
			
 
				+      , int taskId, int id) {
			
 
				+    this(new TaskID(jtIdentifier, jobId, isMap, taskId), id);
			
 
				+  }
			
 
				+  
			
 
				+  private TaskAttemptID() { }
			
 
				+  
			
 
				+  /** Returns the {@link JobID} object that this task attempt belongs to */
			
 
				+  public JobID getJobID() {
			
 
				+    return taskId.getJobID();
			
 
				+  }
			
 
				+  
			
 
				+  /** Returns the {@link TaskID} object that this task attempt belongs to */
			
 
				+  public TaskID getTaskID() {
			
 
				+    return taskId;
			
 
				+  }
			
 
				+  
			
 
				+  /**Returns whether this TaskAttemptID is a map ID */
			
 
				+  public boolean isMap() {
			
 
				+    return taskId.isMap();
			
 
				+  }
			
 
				+  
			
 
				+  @Override
			
 
				+  public boolean equals(Object o) {
			
 
				+    if(o == null)
			
 
				+      return false;
			
 
				+    if(o.getClass().equals(TaskAttemptID.class)) {
			
 
				+      TaskAttemptID that = (TaskAttemptID)o;
			
 
				+      return this.id==that.id
			
 
				+        && this.taskId.equals(that.taskId);
			
 
				+    }
			
 
				+    else return false;
			
 
				+  }
			
 
				+  
			
 
				+  /**Compare TaskIds by first tipIds, then by task numbers. */
			
 
				+  @Override
			
 
				+  public int compareTo(ID o) {
			
 
				+    TaskAttemptID that = (TaskAttemptID)o;
			
 
				+    int tipComp = this.taskId.compareTo(that.taskId);
			
 
				+    if(tipComp == 0) {
			
 
				+      return this.id - that.id;
			
 
				+    }
			
 
				+    else return tipComp;
			
 
				+  }
			
 
				+  @Override
			
 
				+  public String toString() { 
			
 
				+    StringBuilder builder = new StringBuilder();
			
 
				+    return builder.append(ATTEMPT).append(UNDERSCORE)
			
 
				+      .append(toStringWOPrefix()).toString();
			
 
				+  }
			
 
				+
			
 
				+  StringBuilder toStringWOPrefix() {
			
 
				+    StringBuilder builder = new StringBuilder();
			
 
				+    return builder.append(taskId.toStringWOPrefix())
			
 
				+      .append(UNDERSCORE).append(id);
			
 
				+  }
			
 
				+  
			
 
				+  @Override
			
 
				+  public int hashCode() {
			
 
				+    return toStringWOPrefix().toString().hashCode();
			
 
				+  }
			
 
				+  
			
 
				+  @Override
			
 
				+  public void readFields(DataInput in) throws IOException {
			
 
				+    super.readFields(in);
			
 
				+    this.taskId = TaskID.read(in);
			
 
				+  }
			
 
				+
			
 
				+  @Override
			
 
				+  public void write(DataOutput out) throws IOException {
			
 
				+    super.write(out);
			
 
				+    taskId.write(out);
			
 
				+  }
			
 
				+  
			
 
				+  public static TaskAttemptID read(DataInput in) throws IOException {
			
 
				+    TaskAttemptID taskId = new TaskAttemptID();
			
 
				+    taskId.readFields(in);
			
 
				+    return taskId;
			
 
				+  }
			
 
				+  
			
 
				+  /** Construct a TaskAttemptID object from given string 
			
 
				+   * @return constructed TaskAttemptID object or null if the given String is null
			
 
				+   * @throws IllegalArgumentException if the given string is malformed
			
 
				+   */
			
 
				+  public static TaskAttemptID forName(String str) throws IllegalArgumentException {
			
 
				+    if(str == null)
			
 
				+      return null;
			
 
				+    try {
			
 
				+      String[] parts = str.split("_");
			
 
				+      if(parts.length == 6) {
			
 
				+        if(parts[0].equals(ATTEMPT)) {
			
 
				+          boolean isMap = false;
			
 
				+          if(parts[3].equals("m")) isMap = true;
			
 
				+          else if(parts[3].equals("r")) isMap = false;
			
 
				+          else throw new Exception();
			
 
				+          return new TaskAttemptID(parts[1], Integer.parseInt(parts[2]),
			
 
				+              isMap, Integer.parseInt(parts[4]), Integer.parseInt(parts[5]));
			
 
				+        }
			
 
				+      }
			
 
				+    }catch (Exception ex) {//fall below
			
 
				+    }
			
 
				+    throw new IllegalArgumentException("TaskAttemptId string : " + str 
			
 
				+        + " is not properly formed");
			
 
				+  }
			
 
				+  
			
 
				+  /** 
			
 
				+   * Returns a regex pattern which matches task attempt IDs. Arguments can 
			
 
				+   * be given null, in which case that part of the regex will be generic.  
			
 
				+   * For example to obtain a regex matching <i>all task attempt IDs</i> 
			
 
				+   * of <i>any jobtracker</i>, in <i>any job</i>, of the <i>first 
			
 
				+   * map task</i>, we would use :
			
 
				+   * <pre> 
			
 
				+   * TaskAttemptID.getTaskAttemptIDsPattern(null, null, true, 1, null);
			
 
				+   * </pre>
			
 
				+   * which will return :
			
 
				+   * <pre> "attempt_[^_]*_[0-9]*_m_000001_[0-9]*" </pre> 
			
 
				+   * @param jtIdentifier jobTracker identifier, or null
			
 
				+   * @param jobId job number, or null
			
 
				+   * @param isMap whether the tip is a map, or null 
			
 
				+   * @param taskId taskId number, or null
			
 
				+   * @param attemptId the task attempt number, or null
			
 
				+   * @return a regex pattern matching TaskAttemptIDs
			
 
				+   */
			
 
				+  public static String getTaskAttemptIDsPattern(String jtIdentifier,
			
 
				+      Integer jobId, Boolean isMap, Integer taskId, Integer attemptId) {
			
 
				+    StringBuilder builder = new StringBuilder(ATTEMPT).append(UNDERSCORE);
			
 
				+    builder.append(getTaskAttemptIDsPatternWOPrefix(jtIdentifier, jobId,
			
 
				+        isMap, taskId, attemptId));
			
 
				+    return builder.toString();
			
 
				+  }
			
 
				+  
			
 
				+  static StringBuilder getTaskAttemptIDsPatternWOPrefix(String jtIdentifier
			
 
				+      , Integer jobId, Boolean isMap, Integer taskId, Integer attemptId) {
			
 
				+    StringBuilder builder = new StringBuilder();
			
 
				+    builder.append(TaskID.getTaskIDsPatternWOPrefix(jtIdentifier
			
 
				+        , jobId, isMap, taskId))
			
 
				+        .append(UNDERSCORE)
			
 
				+        .append(attemptId != null ? attemptId : "[0-9]*");
			
 
				+    return builder;
			
 
				+  }
			
 
				+  
			
 
				+}
			
--- a/src/mapred/org/apache/hadoop/mapreduce/TaskID.java
+++ b/src/mapred/org/apache/hadoop/mapreduce/TaskID.java
@@ -0,0 +1,224 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.mapreduce;
			
 
				+
			
 
				+import java.io.DataInput;
			
 
				+import java.io.DataOutput;
			
 
				+import java.io.IOException;
			
 
				+import java.text.NumberFormat;
			
 
				+
			
 
				+/**
			
 
				+ * TaskID represents the immutable and unique identifier for 
			
 
				+ * a Map or Reduce Task. Each TaskID encompasses multiple attempts made to
			
 
				+ * execute the Map or Reduce Task, each of which are uniquely indentified by
			
 
				+ * their TaskAttemptID.
			
 
				+ * 
			
 
				+ * TaskID consists of 3 parts. First part is the {@link JobID}, that this 
			
 
				+ * TaskInProgress belongs to. Second part of the TaskID is either 'm' or 'r' 
			
 
				+ * representing whether the task is a map task or a reduce task. 
			
 
				+ * And the third part is the task number. <br> 
			
 
				+ * An example TaskID is : 
			
 
				+ * <code>task_200707121733_0003_m_000005</code> , which represents the
			
 
				+ * fifth map task in the third job running at the jobtracker 
			
 
				+ * started at <code>200707121733</code>. 
			
 
				+ * <p>
			
 
				+ * Applications should never construct or parse TaskID strings
			
 
				+ * , but rather use appropriate constructors or {@link #forName(String)} 
			
 
				+ * method. 
			
 
				+ * 
			
 
				+ * @see JobID
			
 
				+ * @see TaskAttemptID
			
 
				+ */
			
 
				+public class TaskID extends ID {
			
 
				+  private static final String TASK = "task";
			
 
				+  private static char UNDERSCORE = '_';  
			
 
				+  private static NumberFormat idFormat = NumberFormat.getInstance();
			
 
				+  static {
			
 
				+    idFormat.setGroupingUsed(false);
			
 
				+    idFormat.setMinimumIntegerDigits(6);
			
 
				+  }
			
 
				+  
			
 
				+  private JobID jobId;
			
 
				+  private boolean isMap;
			
 
				+
			
 
				+  /**
			
 
				+   * Constructs a TaskID object from given {@link JobID}.  
			
 
				+   * @param jobId JobID that this tip belongs to 
			
 
				+   * @param isMap whether the tip is a map 
			
 
				+   * @param id the tip number
			
 
				+   */
			
 
				+  public TaskID(JobID jobId, boolean isMap, int id) {
			
 
				+    super(id);
			
 
				+    if(jobId == null) {
			
 
				+      throw new IllegalArgumentException("jobId cannot be null");
			
 
				+    }
			
 
				+    this.jobId = jobId;
			
 
				+    this.isMap = isMap;
			
 
				+  }
			
 
				+  
			
 
				+  /**
			
 
				+   * Constructs a TaskInProgressId object from given parts.
			
 
				+   * @param jtIdentifier jobTracker identifier
			
 
				+   * @param jobId job number 
			
 
				+   * @param isMap whether the tip is a map 
			
 
				+   * @param id the tip number
			
 
				+   */
			
 
				+  public TaskID(String jtIdentifier, int jobId, boolean isMap, int id) {
			
 
				+    this(new JobID(jtIdentifier, jobId), isMap, id);
			
 
				+  }
			
 
				+  
			
 
				+  private TaskID() { }
			
 
				+  
			
 
				+  /** Returns the {@link JobID} object that this tip belongs to */
			
 
				+  public JobID getJobID() {
			
 
				+    return jobId;
			
 
				+  }
			
 
				+  
			
 
				+  /**Returns whether this TaskID is a map ID */
			
 
				+  public boolean isMap() {
			
 
				+    return isMap;
			
 
				+  }
			
 
				+  
			
 
				+  @Override
			
 
				+  public boolean equals(Object o) {
			
 
				+    if(o == null)
			
 
				+      return false;
			
 
				+    if(o.getClass().equals(TaskID.class)) {
			
 
				+      TaskID that = (TaskID)o;
			
 
				+      return this.id==that.id
			
 
				+        && this.isMap == that.isMap
			
 
				+        && this.jobId.equals(that.jobId);
			
 
				+    }
			
 
				+    else return false;
			
 
				+  }
			
 
				+
			
 
				+  /**Compare TaskInProgressIds by first jobIds, then by tip numbers. Reduces are 
			
 
				+   * defined as greater then maps.*/
			
 
				+  @Override
			
 
				+  public int compareTo(ID o) {
			
 
				+    TaskID that = (TaskID)o;
			
 
				+    int jobComp = this.jobId.compareTo(that.jobId);
			
 
				+    if(jobComp == 0) {
			
 
				+      if(this.isMap == that.isMap) {
			
 
				+        return this.id - that.id;
			
 
				+      }
			
 
				+      else return this.isMap ? -1 : 1;
			
 
				+    }
			
 
				+    else return jobComp;
			
 
				+  }
			
 
				+  
			
 
				+  @Override
			
 
				+  public String toString() { 
			
 
				+    StringBuilder builder = new StringBuilder();
			
 
				+    return builder.append(TASK).append(UNDERSCORE)
			
 
				+      .append(toStringWOPrefix()).toString();
			
 
				+  }
			
 
				+
			
 
				+  StringBuilder toStringWOPrefix() {
			
 
				+    StringBuilder builder = new StringBuilder();
			
 
				+    builder.append(jobId.toStringWOPrefix())
			
 
				+      .append(isMap ? "_m_" : "_r_");
			
 
				+    return builder.append(idFormat.format(id));
			
 
				+  }
			
 
				+  
			
 
				+  @Override
			
 
				+  public int hashCode() {
			
 
				+    return toStringWOPrefix().toString().hashCode();
			
 
				+  }
			
 
				+  
			
 
				+  @Override
			
 
				+  public void readFields(DataInput in) throws IOException {
			
 
				+    super.readFields(in);
			
 
				+    this.jobId = JobID.read(in);
			
 
				+    this.isMap = in.readBoolean();
			
 
				+  }
			
 
				+
			
 
				+  @Override
			
 
				+  public void write(DataOutput out) throws IOException {
			
 
				+    super.write(out);
			
 
				+    jobId.write(out);
			
 
				+    out.writeBoolean(isMap);
			
 
				+  }
			
 
				+  
			
 
				+  public static TaskID read(DataInput in) throws IOException {
			
 
				+    TaskID tipId = new TaskID();
			
 
				+    tipId.readFields(in);
			
 
				+    return tipId;
			
 
				+  }
			
 
				+  
			
 
				+  /** Construct a TaskID object from given string 
			
 
				+   * @return constructed TaskID object or null if the given String is null
			
 
				+   * @throws IllegalArgumentException if the given string is malformed
			
 
				+   */
			
 
				+  public static TaskID forName(String str) 
			
 
				+    throws IllegalArgumentException {
			
 
				+    if(str == null)
			
 
				+      return null;
			
 
				+    try {
			
 
				+      String[] parts = str.split("_");
			
 
				+      if(parts.length == 5) {
			
 
				+        if(parts[0].equals(TASK)) {
			
 
				+          boolean isMap = false;
			
 
				+          if(parts[3].equals("m")) isMap = true;
			
 
				+          else if(parts[3].equals("r")) isMap = false;
			
 
				+          else throw new Exception();
			
 
				+          return new TaskID(parts[1], Integer.parseInt(parts[2]),
			
 
				+              isMap, Integer.parseInt(parts[4]));
			
 
				+        }
			
 
				+      }
			
 
				+    }catch (Exception ex) {//fall below
			
 
				+    }
			
 
				+    throw new IllegalArgumentException("TaskId string : " + str 
			
 
				+        + " is not properly formed");
			
 
				+  }
			
 
				+  
			
 
				+  /** 
			
 
				+   * Returns a regex pattern which matches task IDs. Arguments can 
			
 
				+   * be given null, in which case that part of the regex will be generic.  
			
 
				+   * For example to obtain a regex matching <i>the first map task</i> 
			
 
				+   * of <i>any jobtracker</i>, of <i>any job</i>, we would use :
			
 
				+   * <pre> 
			
 
				+   * TaskID.getTaskIDsPattern(null, null, true, 1);
			
 
				+   * </pre>
			
 
				+   * which will return :
			
 
				+   * <pre> "task_[^_]*_[0-9]*_m_000001*" </pre> 
			
 
				+   * @param jtIdentifier jobTracker identifier, or null
			
 
				+   * @param jobId job number, or null
			
 
				+   * @param isMap whether the tip is a map, or null 
			
 
				+   * @param taskId taskId number, or null
			
 
				+   * @return a regex pattern matching TaskIDs
			
 
				+   */
			
 
				+  public static String getTaskIDsPattern(String jtIdentifier, Integer jobId
			
 
				+      , Boolean isMap, Integer taskId) {
			
 
				+    StringBuilder builder = new StringBuilder(TASK).append(UNDERSCORE)
			
 
				+      .append(getTaskIDsPatternWOPrefix(jtIdentifier, jobId, isMap, taskId));
			
 
				+    return builder.toString();
			
 
				+  }
			
 
				+  
			
 
				+  static StringBuilder getTaskIDsPatternWOPrefix(String jtIdentifier
			
 
				+      , Integer jobId, Boolean isMap, Integer taskId) {
			
 
				+    StringBuilder builder = new StringBuilder();
			
 
				+    builder.append(JobID.getJobIDsPatternWOPrefix(jtIdentifier, jobId))
			
 
				+      .append(UNDERSCORE)
			
 
				+      .append(isMap != null ? (isMap ? "m" : "r") : "(m|r)").append(UNDERSCORE)
			
 
				+      .append(taskId != null ? idFormat.format(taskId) : "[0-9]*");
			
 
				+    return builder;
			
 
				+  }
			
 
				+  
			
 
				+}
			
--- a/src/mapred/org/apache/hadoop/mapreduce/TaskInputOutputContext.java
+++ b/src/mapred/org/apache/hadoop/mapreduce/TaskInputOutputContext.java
@@ -0,0 +1,64 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.mapreduce;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+
			
 
				+import org.apache.hadoop.conf.Configuration;
			
 
				+
			
 
				+/**
			
 
				+ * A context object that allows input and output from the task. It is only
			
 
				+ * supplied to the {@link Mapper} or {@link Reducer}.
			
 
				+ * @param <KEYIN> the input key type for the task
			
 
				+ * @param <VALUEIN> the input value type for the task
			
 
				+ * @param <KEYOUT> the output key type for the task
			
 
				+ * @param <VALUEOUT> the output value type for the task
			
 
				+ */
			
 
				+public abstract class TaskInputOutputContext<KEYIN,VALUEIN,KEYOUT,VALUEOUT> 
			
 
				+    extends TaskAttemptContext {
			
 
				+
			
 
				+  public TaskInputOutputContext(Configuration conf, TaskAttemptID taskid) {
			
 
				+    super(conf, taskid);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Advance to the next key, returning null if at end.
			
 
				+   * @param key the key object to read in to, which may be null
			
 
				+   * @return the key object that was read into
			
 
				+   */
			
 
				+  public abstract KEYIN nextKey(KEYIN key
			
 
				+                                ) throws IOException, InterruptedException;
			
 
				+  
			
 
				+  /**
			
 
				+   * Read the next value. Must be called after nextKey.
			
 
				+   * @param value the value object to read in to, which may be null
			
 
				+   * @return the value object that was read into
			
 
				+   * @throws IOException
			
 
				+   * @throws InterruptedException
			
 
				+   */
			
 
				+  public abstract VALUEIN nextValue(VALUEIN value
			
 
				+                                    ) throws IOException, InterruptedException;
			
 
				+
			
 
				+  /**
			
 
				+   * Generate an output key/value pair.
			
 
				+   */
			
 
				+  public abstract void collect(KEYOUT key, VALUEOUT value
			
 
				+                               ) throws IOException, InterruptedException;
			
 
				+
			
 
				+}
			
--- a/src/mapred/org/apache/hadoop/mapreduce/example/WordCount.java
+++ b/src/mapred/org/apache/hadoop/mapreduce/example/WordCount.java
@@ -0,0 +1,69 @@
 
				+package org.apache.hadoop.mapreduce.example;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+import java.util.StringTokenizer;
			
 
				+
			
 
				+import org.apache.hadoop.conf.Configuration;
			
 
				+import org.apache.hadoop.fs.Path;
			
 
				+import org.apache.hadoop.io.IntWritable;
			
 
				+import org.apache.hadoop.io.Text;
			
 
				+import org.apache.hadoop.mapreduce.Job;
			
 
				+import org.apache.hadoop.mapreduce.Mapper;
			
 
				+import org.apache.hadoop.mapreduce.Reducer;
			
 
				+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
			
 
				+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
			
 
				+import org.apache.hadoop.util.GenericOptionsParser;
			
 
				+
			
 
				+public class WordCount {
			
 
				+
			
 
				+  public static class TokenizerMapper 
			
 
				+       extends Mapper<Object, Text, Text, IntWritable>{
			
 
				+    
			
 
				+    private final static IntWritable one = new IntWritable(1);
			
 
				+    private Text word = new Text();
			
 
				+      
			
 
				+    public void map(Object key, Text value, Context context
			
 
				+                    ) throws IOException, InterruptedException {
			
 
				+      StringTokenizer itr = new StringTokenizer(value.toString());
			
 
				+      while (itr.hasMoreTokens()) {
			
 
				+        word.set(itr.nextToken());
			
 
				+        context.collect(word, one);
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+  
			
 
				+  public static class IntSumReducer 
			
 
				+       extends Reducer<Text,IntWritable,Text,IntWritable> {
			
 
				+    private IntWritable result = new IntWritable();
			
 
				+
			
 
				+    public void reduce(Text key, Iterable<IntWritable> values, 
			
 
				+                       Context context
			
 
				+                       ) throws IOException, InterruptedException {
			
 
				+      int sum = 0;
			
 
				+      for (IntWritable val : values) {
			
 
				+        sum += val.get();
			
 
				+      }
			
 
				+      result.set(sum);
			
 
				+      context.collect(key, result);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  public static void main(String[] args) throws Exception {
			
 
				+    Configuration conf = new Configuration();
			
 
				+    GenericOptionsParser parser = new GenericOptionsParser(conf, args);
			
 
				+    String[] otherArgs = parser.getRemainingArgs();
			
 
				+    if (otherArgs.length != 2) {
			
 
				+      System.err.println("Usage: wordcount <in> <out>");
			
 
				+      System.exit(2);
			
 
				+    }
			
 
				+    Job job = new Job(conf, "word count");
			
 
				+    job.setMapperClass(TokenizerMapper.class);
			
 
				+    job.setCombinerClass(IntSumReducer.class);
			
 
				+    job.setReducerClass(IntSumReducer.class);
			
 
				+    job.setOutputKeyClass(Text.class);
			
 
				+    job.setOutputValueClass(IntWritable.class);
			
 
				+    FileInputFormat.addInputPath(conf, new Path(otherArgs[0]));
			
 
				+    FileOutputFormat.setOutputPath(conf, new Path(otherArgs[1]));
			
 
				+    System.exit(job.waitForCompletion() ? 0 : 1);
			
 
				+  }
			
 
				+}
			
--- a/src/mapred/org/apache/hadoop/mapreduce/lib/input/FileInputFormat.java
+++ b/src/mapred/org/apache/hadoop/mapreduce/lib/input/FileInputFormat.java
@@ -0,0 +1,393 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.mapreduce.lib.input;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+import java.util.ArrayList;
			
 
				+import java.util.List;
			
 
				+
			
 
				+import org.apache.commons.logging.Log;
			
 
				+import org.apache.commons.logging.LogFactory;
			
 
				+import org.apache.hadoop.conf.Configuration;
			
 
				+import org.apache.hadoop.fs.FileStatus;
			
 
				+import org.apache.hadoop.fs.FileSystem;
			
 
				+import org.apache.hadoop.fs.Path;
			
 
				+import org.apache.hadoop.fs.PathFilter;
			
 
				+import org.apache.hadoop.fs.BlockLocation;
			
 
				+import org.apache.hadoop.mapreduce.InputFormat;
			
 
				+import org.apache.hadoop.mapreduce.InputSplit;
			
 
				+import org.apache.hadoop.mapreduce.JobContext;
			
 
				+import org.apache.hadoop.mapreduce.Mapper;
			
 
				+import org.apache.hadoop.util.ReflectionUtils;
			
 
				+import org.apache.hadoop.util.StringUtils;
			
 
				+
			
 
				+/** 
			
 
				+ * A base class for file-based {@link InputFormat}s.
			
 
				+ * 
			
 
				+ * <p><code>FileInputFormat</code> is the base class for all file-based 
			
 
				+ * <code>InputFormat</code>s. This provides a generic implementation of
			
 
				+ * {@link #getSplits(JobContext)}.
			
 
				+ * Subclasses of <code>FileInputFormat</code> can also override the 
			
 
				+ * {@link #isSplitable(JobContext, Path)} method to ensure input-files are
			
 
				+ * not split-up and are processed as a whole by {@link Mapper}s.
			
 
				+ */
			
 
				+public abstract class FileInputFormat<K, V> extends InputFormat<K, V> {
			
 
				+
			
 
				+  public static final Log LOG = LogFactory.getLog(FileInputFormat.class);
			
 
				+
			
 
				+  private static final double SPLIT_SLOP = 1.1;   // 10% slop
			
 
				+
			
 
				+  private static final PathFilter hiddenFileFilter = new PathFilter(){
			
 
				+      public boolean accept(Path p){
			
 
				+        String name = p.getName(); 
			
 
				+        return !name.startsWith("_") && !name.startsWith("."); 
			
 
				+      }
			
 
				+    }; 
			
 
				+
			
 
				+  /**
			
 
				+   * Proxy PathFilter that accepts a path only if all filters given in the
			
 
				+   * constructor do. Used by the listPaths() to apply the built-in
			
 
				+   * hiddenFileFilter together with a user provided one (if any).
			
 
				+   */
			
 
				+  private static class MultiPathFilter implements PathFilter {
			
 
				+    private List<PathFilter> filters;
			
 
				+
			
 
				+    public MultiPathFilter(List<PathFilter> filters) {
			
 
				+      this.filters = filters;
			
 
				+    }
			
 
				+
			
 
				+    public boolean accept(Path path) {
			
 
				+      for (PathFilter filter : filters) {
			
 
				+        if (!filter.accept(path)) {
			
 
				+          return false;
			
 
				+        }
			
 
				+      }
			
 
				+      return true;
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Get the lower bound on split size imposed by the format.
			
 
				+   * @return the number of bytes of the minimal split for this format
			
 
				+   */
			
 
				+  protected long getFormatMinSplitSize() {
			
 
				+    return 1;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Is the given filename splitable? Usually, true, but if the file is
			
 
				+   * stream compressed, it will not be.
			
 
				+   * 
			
 
				+   * <code>FileInputFormat</code> implementations can override this and return
			
 
				+   * <code>false</code> to ensure that individual input files are never split-up
			
 
				+   * so that {@link Mapper}s process entire files.
			
 
				+   * 
			
 
				+   * @param context the job context
			
 
				+   * @param filename the file name to check
			
 
				+   * @return is this file splitable?
			
 
				+   */
			
 
				+  protected boolean isSplitable(JobContext context, Path filename) {
			
 
				+    return true;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Set a PathFilter to be applied to the input paths for the map-reduce job.
			
 
				+   *
			
 
				+   * @param filter the PathFilter class use for filtering the input paths.
			
 
				+   */
			
 
				+  public static void setInputPathFilter(Configuration conf,
			
 
				+                                        Class<? extends PathFilter> filter) {
			
 
				+    conf.setClass("mapred.input.pathFilter.class", filter, PathFilter.class);
			
 
				+  }
			
 
				+
			
 
				+  public static void setMinInputSplitSize(Configuration conf,
			
 
				+                                          long size) {
			
 
				+    conf.setLong("mapred.min.split.size", size);
			
 
				+  }
			
 
				+
			
 
				+  public static long getMinSplitSize(Configuration conf) {
			
 
				+    return conf.getLong("mapred.min.split.size", 1L);
			
 
				+  }
			
 
				+
			
 
				+  public static void setMaxInputSplitSize(Configuration conf,
			
 
				+                                          long size) {
			
 
				+    conf.setLong("mapred.max.split.size", size);
			
 
				+  }
			
 
				+
			
 
				+  public static long getMaxSplitSize(Configuration conf) {
			
 
				+    return conf.getLong("mapred.max.split.size", Long.MAX_VALUE);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Get a PathFilter instance of the filter set for the input paths.
			
 
				+   *
			
 
				+   * @return the PathFilter instance set for the job, NULL if none has been set.
			
 
				+   */
			
 
				+  public static PathFilter getInputPathFilter(Configuration conf) {
			
 
				+    Class<?> filterClass = conf.getClass("mapred.input.pathFilter.class", null,
			
 
				+        PathFilter.class);
			
 
				+    return (filterClass != null) ?
			
 
				+        (PathFilter) ReflectionUtils.newInstance(filterClass, conf) : null;
			
 
				+  }
			
 
				+
			
 
				+  /** List input directories.
			
 
				+   * Subclasses may override to, e.g., select only files matching a regular
			
 
				+   * expression. 
			
 
				+   * 
			
 
				+   * @param job the job to list input paths for
			
 
				+   * @return array of FileStatus objects
			
 
				+   * @throws IOException if zero items.
			
 
				+   */
			
 
				+  protected List<FileStatus> listStatus(Configuration job
			
 
				+                                              ) throws IOException {
			
 
				+    List<FileStatus> result = new ArrayList<FileStatus>();
			
 
				+    Path[] dirs = getInputPaths(job);
			
 
				+    if (dirs.length == 0) {
			
 
				+      throw new IOException("No input paths specified in job");
			
 
				+    }
			
 
				+
			
 
				+    List<IOException> errors = new ArrayList<IOException>();
			
 
				+    
			
 
				+    // creates a MultiPathFilter with the hiddenFileFilter and the
			
 
				+    // user provided one (if any).
			
 
				+    List<PathFilter> filters = new ArrayList<PathFilter>();
			
 
				+    filters.add(hiddenFileFilter);
			
 
				+    PathFilter jobFilter = getInputPathFilter(job);
			
 
				+    if (jobFilter != null) {
			
 
				+      filters.add(jobFilter);
			
 
				+    }
			
 
				+    PathFilter inputFilter = new MultiPathFilter(filters);
			
 
				+    
			
 
				+    for (int i=0; i < dirs.length; ++i) {
			
 
				+      Path p = dirs[i];
			
 
				+      FileSystem fs = p.getFileSystem(job); 
			
 
				+      FileStatus[] matches = fs.globStatus(p, inputFilter);
			
 
				+      if (matches == null) {
			
 
				+        errors.add(new IOException("Input path does not exist: " + p));
			
 
				+      } else if (matches.length == 0) {
			
 
				+        errors.add(new IOException("Input Pattern " + p + " matches 0 files"));
			
 
				+      } else {
			
 
				+        for (FileStatus globStat: matches) {
			
 
				+          if (globStat.isDir()) {
			
 
				+            for(FileStatus stat: fs.listStatus(globStat.getPath(),
			
 
				+                inputFilter)) {
			
 
				+              result.add(stat);
			
 
				+            }          
			
 
				+          } else {
			
 
				+            result.add(globStat);
			
 
				+          }
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+    if (!errors.isEmpty()) {
			
 
				+      throw new InvalidInputException(errors);
			
 
				+    }
			
 
				+    LOG.info("Total input paths to process : " + result.size()); 
			
 
				+    return result;
			
 
				+  }
			
 
				+  
			
 
				+
			
 
				+  /** Splits files returned by {@link #listStatus(Configuration)} when
			
 
				+   * they're too big.*/ 
			
 
				+  public List<InputSplit> getSplits(JobContext context
			
 
				+                                    ) throws IOException {
			
 
				+    Configuration job = context.getConfiguration();    
			
 
				+    long minSize = Math.max(getFormatMinSplitSize(), getMinSplitSize(job));
			
 
				+    long maxSize = getMaxSplitSize(job);
			
 
				+
			
 
				+    // generate splits
			
 
				+    List<InputSplit> splits = new ArrayList<InputSplit>();
			
 
				+    for (FileStatus file: listStatus(job)) {
			
 
				+      Path path = file.getPath();
			
 
				+      FileSystem fs = path.getFileSystem(context.getConfiguration());
			
 
				+      long length = file.getLen();
			
 
				+      BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length);
			
 
				+      if ((length != 0) && isSplitable(context, path)) { 
			
 
				+        long blockSize = file.getBlockSize();
			
 
				+        long splitSize = computeSplitSize(blockSize, minSize, maxSize);
			
 
				+
			
 
				+        long bytesRemaining = length;
			
 
				+        while (((double) bytesRemaining)/splitSize > SPLIT_SLOP) {
			
 
				+          int blkIndex = getBlockIndex(blkLocations, length-bytesRemaining);
			
 
				+          splits.add(new FileSplit(path, length-bytesRemaining, splitSize, 
			
 
				+                                   blkLocations[blkIndex].getHosts()));
			
 
				+          bytesRemaining -= splitSize;
			
 
				+        }
			
 
				+        
			
 
				+        if (bytesRemaining != 0) {
			
 
				+          splits.add(new FileSplit(path, length-bytesRemaining, bytesRemaining, 
			
 
				+                     blkLocations[blkLocations.length-1].getHosts()));
			
 
				+        }
			
 
				+      } else if (length != 0) {
			
 
				+        splits.add(new FileSplit(path, 0, length, blkLocations[0].getHosts()));
			
 
				+      } else { 
			
 
				+        //Create empty hosts array for zero length files
			
 
				+        splits.add(new FileSplit(path, 0, length, new String[0]));
			
 
				+      }
			
 
				+    }
			
 
				+    LOG.debug("Total # of splits: " + splits.size());
			
 
				+    return splits;
			
 
				+  }
			
 
				+
			
 
				+  protected long computeSplitSize(long blockSize, long minSize,
			
 
				+                                  long maxSize) {
			
 
				+    return Math.max(minSize, Math.min(maxSize, blockSize));
			
 
				+  }
			
 
				+
			
 
				+  protected int getBlockIndex(BlockLocation[] blkLocations, 
			
 
				+                              long offset) {
			
 
				+    for (int i = 0 ; i < blkLocations.length; i++) {
			
 
				+      // is the offset inside this block?
			
 
				+      if ((blkLocations[i].getOffset() <= offset) &&
			
 
				+          (offset < blkLocations[i].getOffset() + blkLocations[i].getLength())){
			
 
				+        return i;
			
 
				+      }
			
 
				+    }
			
 
				+    BlockLocation last = blkLocations[blkLocations.length -1];
			
 
				+    long fileLength = last.getOffset() + last.getLength() -1;
			
 
				+    throw new IllegalArgumentException("Offset " + offset + 
			
 
				+                                       " is outside of file (0.." +
			
 
				+                                       fileLength + ")");
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Sets the given comma separated paths as the list of inputs 
			
 
				+   * for the map-reduce job.
			
 
				+   * 
			
 
				+   * @param conf Configuration of the job
			
 
				+   * @param commaSeparatedPaths Comma separated paths to be set as 
			
 
				+   *        the list of inputs for the map-reduce job.
			
 
				+   */
			
 
				+  public static void setInputPaths(Configuration conf, 
			
 
				+                                   String commaSeparatedPaths
			
 
				+                                   ) throws IOException {
			
 
				+    setInputPaths(conf, StringUtils.stringToPath(
			
 
				+                        getPathStrings(commaSeparatedPaths)));
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Add the given comma separated paths to the list of inputs for
			
 
				+   *  the map-reduce job.
			
 
				+   * 
			
 
				+   * @param conf The configuration of the job 
			
 
				+   * @param commaSeparatedPaths Comma separated paths to be added to
			
 
				+   *        the list of inputs for the map-reduce job.
			
 
				+   */
			
 
				+  public static void addInputPaths(Configuration conf, 
			
 
				+                                   String commaSeparatedPaths
			
 
				+                                   ) throws IOException {
			
 
				+    for (String str : getPathStrings(commaSeparatedPaths)) {
			
 
				+      addInputPath(conf, new Path(str));
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Set the array of {@link Path}s as the list of inputs
			
 
				+   * for the map-reduce job.
			
 
				+   * 
			
 
				+   * @param conf Configuration of the job. 
			
 
				+   * @param inputPaths the {@link Path}s of the input directories/files 
			
 
				+   * for the map-reduce job.
			
 
				+   */ 
			
 
				+  public static void setInputPaths(Configuration conf, 
			
 
				+                                   Path... inputPaths) throws IOException {
			
 
				+    FileSystem fs = FileSystem.get(conf);
			
 
				+    Path path = inputPaths[0].makeQualified(fs);
			
 
				+    StringBuffer str = new StringBuffer(StringUtils.escapeString(path.toString()));
			
 
				+    for(int i = 1; i < inputPaths.length;i++) {
			
 
				+      str.append(StringUtils.COMMA_STR);
			
 
				+      path = inputPaths[i].makeQualified(fs);
			
 
				+      str.append(StringUtils.escapeString(path.toString()));
			
 
				+    }
			
 
				+    conf.set("mapred.input.dir", str.toString());
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Add a {@link Path} to the list of inputs for the map-reduce job.
			
 
				+   * 
			
 
				+   * @param conf The configuration of the job 
			
 
				+   * @param path {@link Path} to be added to the list of inputs for 
			
 
				+   *            the map-reduce job.
			
 
				+   */
			
 
				+  public static void addInputPath(Configuration conf, 
			
 
				+                                  Path path) throws IOException {
			
 
				+    FileSystem fs = FileSystem.get(conf);
			
 
				+    path = path.makeQualified(fs);
			
 
				+    String dirStr = StringUtils.escapeString(path.toString());
			
 
				+    String dirs = conf.get("mapred.input.dir");
			
 
				+    conf.set("mapred.input.dir", dirs == null ? dirStr : dirs + "," + dirStr);
			
 
				+  }
			
 
				+  
			
 
				+  // This method escapes commas in the glob pattern of the given paths.
			
 
				+  private static String[] getPathStrings(String commaSeparatedPaths) {
			
 
				+    int length = commaSeparatedPaths.length();
			
 
				+    int curlyOpen = 0;
			
 
				+    int pathStart = 0;
			
 
				+    boolean globPattern = false;
			
 
				+    List<String> pathStrings = new ArrayList<String>();
			
 
				+    
			
 
				+    for (int i=0; i<length; i++) {
			
 
				+      char ch = commaSeparatedPaths.charAt(i);
			
 
				+      switch(ch) {
			
 
				+        case '{' : {
			
 
				+          curlyOpen++;
			
 
				+          if (!globPattern) {
			
 
				+            globPattern = true;
			
 
				+          }
			
 
				+          break;
			
 
				+        }
			
 
				+        case '}' : {
			
 
				+          curlyOpen--;
			
 
				+          if (curlyOpen == 0 && globPattern) {
			
 
				+            globPattern = false;
			
 
				+          }
			
 
				+          break;
			
 
				+        }
			
 
				+        case ',' : {
			
 
				+          if (!globPattern) {
			
 
				+            pathStrings.add(commaSeparatedPaths.substring(pathStart, i));
			
 
				+            pathStart = i + 1 ;
			
 
				+          }
			
 
				+          break;
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+    pathStrings.add(commaSeparatedPaths.substring(pathStart, length));
			
 
				+    
			
 
				+    return pathStrings.toArray(new String[0]);
			
 
				+  }
			
 
				+  
			
 
				+  /**
			
 
				+   * Get the list of input {@link Path}s for the map-reduce job.
			
 
				+   * 
			
 
				+   * @param conf The configuration of the job 
			
 
				+   * @return the list of input {@link Path}s for the map-reduce job.
			
 
				+   */
			
 
				+  public static Path[] getInputPaths(Configuration conf) {
			
 
				+    String dirs = conf.get("mapred.input.dir", "");
			
 
				+    String [] list = StringUtils.split(dirs);
			
 
				+    Path[] result = new Path[list.length];
			
 
				+    for (int i = 0; i < list.length; i++) {
			
 
				+      result[i] = new Path(StringUtils.unEscapeString(list[i]));
			
 
				+    }
			
 
				+    return result;
			
 
				+  }
			
 
				+
			
 
				+}
			
--- a/src/mapred/org/apache/hadoop/mapreduce/lib/input/FileSplit.java
+++ b/src/mapred/org/apache/hadoop/mapreduce/lib/input/FileSplit.java
@@ -0,0 +1,91 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.mapreduce.lib.input;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+import java.io.DataInput;
			
 
				+import java.io.DataOutput;
			
 
				+
			
 
				+import org.apache.hadoop.mapreduce.InputFormat;
			
 
				+import org.apache.hadoop.mapreduce.InputSplit;
			
 
				+import org.apache.hadoop.mapreduce.TaskAttemptContext;
			
 
				+import org.apache.hadoop.fs.Path;
			
 
				+import org.apache.hadoop.io.Text;
			
 
				+
			
 
				+/** A section of an input file.  Returned by {@link
			
 
				+ * InputFormat#getSplits(JobContext)} and passed to
			
 
				+ * {@link InputFormat#createRecordReader(InputSplit,TaskAttemptContext)}. */
			
 
				+public class FileSplit extends InputSplit {
			
 
				+  private Path file;
			
 
				+  private long start;
			
 
				+  private long length;
			
 
				+  private String[] hosts;
			
 
				+
			
 
				+  FileSplit() {}
			
 
				+
			
 
				+  /** Constructs a split with host information
			
 
				+   *
			
 
				+   * @param file the file name
			
 
				+   * @param start the position of the first byte in the file to process
			
 
				+   * @param length the number of bytes in the file to process
			
 
				+   * @param hosts the list of hosts containing the block, possibly null
			
 
				+   */
			
 
				+  public FileSplit(Path file, long start, long length, String[] hosts) {
			
 
				+    this.file = file;
			
 
				+    this.start = start;
			
 
				+    this.length = length;
			
 
				+    this.hosts = hosts;
			
 
				+  }
			
 
				+ 
			
 
				+  /** The file containing this split's data. */
			
 
				+  public Path getPath() { return file; }
			
 
				+  
			
 
				+  /** The position of the first byte in the file to process. */
			
 
				+  public long getStart() { return start; }
			
 
				+  
			
 
				+  /** The number of bytes in the file to process. */
			
 
				+  public long getLength() { return length; }
			
 
				+
			
 
				+  public String toString() { return file + ":" + start + "+" + length; }
			
 
				+
			
 
				+  ////////////////////////////////////////////
			
 
				+  // Writable methods
			
 
				+  ////////////////////////////////////////////
			
 
				+
			
 
				+  public void write(DataOutput out) throws IOException {
			
 
				+    Text.writeString(out, file.toString());
			
 
				+    out.writeLong(start);
			
 
				+    out.writeLong(length);
			
 
				+  }
			
 
				+
			
 
				+  public void readFields(DataInput in) throws IOException {
			
 
				+    file = new Path(Text.readString(in));
			
 
				+    start = in.readLong();
			
 
				+    length = in.readLong();
			
 
				+    hosts = null;
			
 
				+  }
			
 
				+
			
 
				+  public String[] getLocations() throws IOException {
			
 
				+    if (this.hosts == null) {
			
 
				+      return new String[]{};
			
 
				+    } else {
			
 
				+      return this.hosts;
			
 
				+    }
			
 
				+  }
			
 
				+}
			
--- a/src/mapred/org/apache/hadoop/mapreduce/lib/input/InvalidInputException.java
+++ b/src/mapred/org/apache/hadoop/mapreduce/lib/input/InvalidInputException.java
@@ -0,0 +1,63 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+package org.apache.hadoop.mapreduce.lib.input;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+import java.util.List;
			
 
				+import java.util.Iterator;
			
 
				+
			
 
				+/**
			
 
				+ * This class wraps a list of problems with the input, so that the user
			
 
				+ * can get a list of problems together instead of finding and fixing them one 
			
 
				+ * by one.
			
 
				+ */
			
 
				+public class InvalidInputException extends IOException {
			
 
				+  private List<IOException> problems;
			
 
				+  
			
 
				+  /**
			
 
				+   * Create the exception with the given list.
			
 
				+   * @param probs the list of problems to report. this list is not copied.
			
 
				+   */
			
 
				+  public InvalidInputException(List<IOException> probs) {
			
 
				+    problems = probs;
			
 
				+  }
			
 
				+  
			
 
				+  /**
			
 
				+   * Get the complete list of the problems reported.
			
 
				+   * @return the list of problems, which must not be modified
			
 
				+   */
			
 
				+  public List<IOException> getProblems() {
			
 
				+    return problems;
			
 
				+  }
			
 
				+  
			
 
				+  /**
			
 
				+   * Get a summary message of the problems found.
			
 
				+   * @return the concatenated messages from all of the problems.
			
 
				+   */
			
 
				+  public String getMessage() {
			
 
				+    StringBuffer result = new StringBuffer();
			
 
				+    Iterator<IOException> itr = problems.iterator();
			
 
				+    while(itr.hasNext()) {
			
 
				+      result.append(itr.next().getMessage());
			
 
				+      if (itr.hasNext()) {
			
 
				+        result.append("\n");
			
 
				+      }
			
 
				+    }
			
 
				+    return result.toString();
			
 
				+  }
			
 
				+}
			
--- a/src/mapred/org/apache/hadoop/mapreduce/lib/input/LineRecordReader.java
+++ b/src/mapred/org/apache/hadoop/mapreduce/lib/input/LineRecordReader.java
@@ -0,0 +1,277 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.mapreduce.lib.input;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+import java.io.InputStream;
			
 
				+
			
 
				+import org.apache.hadoop.conf.Configuration;
			
 
				+import org.apache.hadoop.fs.FSDataInputStream;
			
 
				+import org.apache.hadoop.fs.FileSystem;
			
 
				+import org.apache.hadoop.fs.Path;
			
 
				+import org.apache.hadoop.io.LongWritable;
			
 
				+import org.apache.hadoop.io.Text;
			
 
				+import org.apache.hadoop.io.compress.CompressionCodec;
			
 
				+import org.apache.hadoop.io.compress.CompressionCodecFactory;
			
 
				+import org.apache.hadoop.mapreduce.InputSplit;
			
 
				+import org.apache.hadoop.mapreduce.RecordReader;
			
 
				+import org.apache.hadoop.mapreduce.TaskAttemptContext;
			
 
				+import org.apache.commons.logging.LogFactory;
			
 
				+import org.apache.commons.logging.Log;
			
 
				+
			
 
				+/**
			
 
				+ * Treats keys as offset in file and value as line. 
			
 
				+ */
			
 
				+public class LineRecordReader extends RecordReader<LongWritable, Text> {
			
 
				+  private static final Log LOG = LogFactory.getLog(LineRecordReader.class);
			
 
				+
			
 
				+  private CompressionCodecFactory compressionCodecs = null;
			
 
				+  private long start;
			
 
				+  private long pos;
			
 
				+  private long end;
			
 
				+  private LineReader in;
			
 
				+  int maxLineLength;
			
 
				+
			
 
				+  /**
			
 
				+   * A class that provides a line reader from an input stream.
			
 
				+   */
			
 
				+  public static class LineReader {
			
 
				+    private static final int DEFAULT_BUFFER_SIZE = 64 * 1024;
			
 
				+    private int bufferSize = DEFAULT_BUFFER_SIZE;
			
 
				+    private InputStream in;
			
 
				+    private byte[] buffer;
			
 
				+    // the number of bytes of real data in the buffer
			
 
				+    private int bufferLength = 0;
			
 
				+    // the current position in the buffer
			
 
				+    private int bufferPosn = 0;
			
 
				+
			
 
				+    /**
			
 
				+     * Create a line reader that reads from the given stream using the 
			
 
				+     * given buffer-size.
			
 
				+     * @param in
			
 
				+     * @throws IOException
			
 
				+     */
			
 
				+    LineReader(InputStream in, int bufferSize) {
			
 
				+      this.in = in;
			
 
				+      this.bufferSize = bufferSize;
			
 
				+      this.buffer = new byte[this.bufferSize];
			
 
				+    }
			
 
				+
			
 
				+    /**
			
 
				+     * Create a line reader that reads from the given stream using the
			
 
				+     * <code>io.file.buffer.size</code> specified in the given
			
 
				+     * <code>Configuration</code>.
			
 
				+     * @param in input stream
			
 
				+     * @param conf configuration
			
 
				+     * @throws IOException
			
 
				+     */
			
 
				+    public LineReader(InputStream in, Configuration conf) throws IOException {
			
 
				+      this(in, conf.getInt("io.file.buffer.size", DEFAULT_BUFFER_SIZE));
			
 
				+    }
			
 
				+
			
 
				+    /**
			
 
				+     * Fill the buffer with more data.
			
 
				+     * @return was there more data?
			
 
				+     * @throws IOException
			
 
				+     */
			
 
				+    boolean backfill() throws IOException {
			
 
				+      bufferPosn = 0;
			
 
				+      bufferLength = in.read(buffer);
			
 
				+      return bufferLength > 0;
			
 
				+    }
			
 
				+    
			
 
				+    /**
			
 
				+     * Close the underlying stream.
			
 
				+     * @throws IOException
			
 
				+     */
			
 
				+    public void close() throws IOException {
			
 
				+      in.close();
			
 
				+    }
			
 
				+    
			
 
				+    /**
			
 
				+     * Read from the InputStream into the given Text.
			
 
				+     * @param str the object to store the given line
			
 
				+     * @param maxLineLength the maximum number of bytes to store into str.
			
 
				+     * @param maxBytesToConsume the maximum number of bytes to consume in this call.
			
 
				+     * @return the number of bytes read including the newline
			
 
				+     * @throws IOException if the underlying stream throws
			
 
				+     */
			
 
				+    public int readLine(Text str, int maxLineLength,
			
 
				+                        int maxBytesToConsume) throws IOException {
			
 
				+      str.clear();
			
 
				+      boolean hadFinalNewline = false;
			
 
				+      boolean hadFinalReturn = false;
			
 
				+      boolean hitEndOfFile = false;
			
 
				+      int startPosn = bufferPosn;
			
 
				+      long bytesConsumed = 0;
			
 
				+      outerLoop: while (true) {
			
 
				+        if (bufferPosn >= bufferLength) {
			
 
				+          if (!backfill()) {
			
 
				+            hitEndOfFile = true;
			
 
				+            break;
			
 
				+          }
			
 
				+        }
			
 
				+        startPosn = bufferPosn;
			
 
				+        for(; bufferPosn < bufferLength; ++bufferPosn) {
			
 
				+          switch (buffer[bufferPosn]) {
			
 
				+          case '\n':
			
 
				+            hadFinalNewline = true;
			
 
				+            bufferPosn += 1;
			
 
				+            break outerLoop;
			
 
				+          case '\r':
			
 
				+            if (hadFinalReturn) {
			
 
				+              // leave this \r in the stream, so we'll get it next time
			
 
				+              break outerLoop;
			
 
				+            }
			
 
				+            hadFinalReturn = true;
			
 
				+            break;
			
 
				+          default:
			
 
				+            if (hadFinalReturn) {
			
 
				+              break outerLoop;
			
 
				+            }
			
 
				+          }        
			
 
				+        }
			
 
				+        bytesConsumed += bufferPosn - startPosn;
			
 
				+        int length = bufferPosn - startPosn - (hadFinalReturn ? 1 : 0);
			
 
				+        length = (int)Math.min(length, maxLineLength - str.getLength());
			
 
				+        if (length >= 0) {
			
 
				+          str.append(buffer, startPosn, length);
			
 
				+        }
			
 
				+        if (bytesConsumed >= maxBytesToConsume) {
			
 
				+          return (int)Math.min(bytesConsumed, (long)Integer.MAX_VALUE);
			
 
				+        }
			
 
				+      }
			
 
				+      int newlineLength = (hadFinalNewline ? 1 : 0) + (hadFinalReturn ? 1 : 0);
			
 
				+      if (!hitEndOfFile) {
			
 
				+        bytesConsumed += bufferPosn - startPosn;
			
 
				+        int length = bufferPosn - startPosn - newlineLength;
			
 
				+        length = (int)Math.min(length, maxLineLength - str.getLength());
			
 
				+        if (length > 0) {
			
 
				+          str.append(buffer, startPosn, length);
			
 
				+        }
			
 
				+      }
			
 
				+      return (int)Math.min(bytesConsumed, (long)Integer.MAX_VALUE);
			
 
				+    }
			
 
				+
			
 
				+    /**
			
 
				+     * Read from the InputStream into the given Text.
			
 
				+     * @param str the object to store the given line
			
 
				+     * @param maxLineLength the maximum number of bytes to store into str.
			
 
				+     * @return the number of bytes read including the newline
			
 
				+     * @throws IOException if the underlying stream throws
			
 
				+     */
			
 
				+    public int readLine(Text str, int maxLineLength) throws IOException {
			
 
				+      return readLine(str, maxLineLength, Integer.MAX_VALUE);
			
 
				+  }
			
 
				+
			
 
				+    /**
			
 
				+     * Read from the InputStream into the given Text.
			
 
				+     * @param str the object to store the given line
			
 
				+     * @return the number of bytes read including the newline
			
 
				+     * @throws IOException if the underlying stream throws
			
 
				+     */
			
 
				+    public int readLine(Text str) throws IOException {
			
 
				+      return readLine(str, Integer.MAX_VALUE, Integer.MAX_VALUE);
			
 
				+    }
			
 
				+
			
 
				+  }
			
 
				+
			
 
				+  public void initialize(InputSplit genericSplit,
			
 
				+                         TaskAttemptContext context) throws IOException {
			
 
				+    FileSplit split = (FileSplit) genericSplit;
			
 
				+    Configuration job = context.getConfiguration();
			
 
				+    this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength",
			
 
				+                                    Integer.MAX_VALUE);
			
 
				+    start = split.getStart();
			
 
				+    end = start + split.getLength();
			
 
				+    final Path file = split.getPath();
			
 
				+    compressionCodecs = new CompressionCodecFactory(job);
			
 
				+    final CompressionCodec codec = compressionCodecs.getCodec(file);
			
 
				+
			
 
				+    // open the file and seek to the start of the split
			
 
				+    FileSystem fs = file.getFileSystem(job);
			
 
				+    FSDataInputStream fileIn = fs.open(split.getPath());
			
 
				+    boolean skipFirstLine = false;
			
 
				+    if (codec != null) {
			
 
				+      in = new LineReader(codec.createInputStream(fileIn), job);
			
 
				+      end = Long.MAX_VALUE;
			
 
				+    } else {
			
 
				+      if (start != 0) {
			
 
				+        skipFirstLine = true;
			
 
				+        --start;
			
 
				+        fileIn.seek(start);
			
 
				+      }
			
 
				+      in = new LineReader(fileIn, job);
			
 
				+    }
			
 
				+    if (skipFirstLine) {  // skip first line and re-establish "start".
			
 
				+      start += in.readLine(new Text(), 0,
			
 
				+                           (int)Math.min((long)Integer.MAX_VALUE, end - start));
			
 
				+    }
			
 
				+    this.pos = start;
			
 
				+  }
			
 
				+  
			
 
				+  public LongWritable nextKey(LongWritable key) throws IOException {
			
 
				+    if (key == null) {
			
 
				+      key = new LongWritable();
			
 
				+    }
			
 
				+    key.set(pos);
			
 
				+    return key;
			
 
				+  }
			
 
				+
			
 
				+  public Text nextValue(Text value) throws IOException {
			
 
				+    if (value == null) {
			
 
				+      value = new Text();
			
 
				+    }
			
 
				+    while (pos < end) {
			
 
				+      int newSize = in.readLine(value, maxLineLength,
			
 
				+                                Math.max((int)Math.min(Integer.MAX_VALUE, 
			
 
				+                                                       end-pos),
			
 
				+                                         maxLineLength));
			
 
				+      if (newSize == 0) {
			
 
				+        return null;
			
 
				+      }
			
 
				+      pos += newSize;
			
 
				+      if (newSize < maxLineLength) {
			
 
				+        break;
			
 
				+      }
			
 
				+
			
 
				+      // line too long. try again
			
 
				+      LOG.info("Skipped line of size " + newSize + " at pos " + 
			
 
				+               (pos - newSize));
			
 
				+    }
			
 
				+    return value;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Get the progress within the split
			
 
				+   */
			
 
				+  public float getProgress() {
			
 
				+    if (start == end) {
			
 
				+      return 0.0f;
			
 
				+    } else {
			
 
				+      return Math.min(1.0f, (pos - start) / (float)(end - start));
			
 
				+    }
			
 
				+  }
			
 
				+  
			
 
				+  public synchronized void close() throws IOException {
			
 
				+    if (in != null) {
			
 
				+      in.close(); 
			
 
				+    }
			
 
				+  }
			
 
				+}
			
--- a/src/mapred/org/apache/hadoop/mapreduce/lib/input/SequenceFileInputFormat.java
+++ b/src/mapred/org/apache/hadoop/mapreduce/lib/input/SequenceFileInputFormat.java
@@ -0,0 +1,67 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.mapreduce.lib.input;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+import java.util.List;
			
 
				+
			
 
				+import org.apache.hadoop.conf.Configuration;
			
 
				+import org.apache.hadoop.fs.FileStatus;
			
 
				+import org.apache.hadoop.fs.FileSystem;
			
 
				+import org.apache.hadoop.fs.Path;
			
 
				+
			
 
				+import org.apache.hadoop.io.SequenceFile;
			
 
				+import org.apache.hadoop.io.MapFile;
			
 
				+import org.apache.hadoop.mapreduce.InputFormat;
			
 
				+import org.apache.hadoop.mapreduce.InputSplit;
			
 
				+import org.apache.hadoop.mapreduce.RecordReader;
			
 
				+import org.apache.hadoop.mapreduce.TaskAttemptContext;
			
 
				+
			
 
				+/** An {@link InputFormat} for {@link SequenceFile}s. */
			
 
				+public class SequenceFileInputFormat<K, V> extends FileInputFormat<K, V> {
			
 
				+
			
 
				+  @Override
			
 
				+  public RecordReader<K, V> createRecordReader(InputSplit split,
			
 
				+                                               TaskAttemptContext context
			
 
				+                                               ) throws IOException {
			
 
				+    return new SequenceFileRecordReader<K,V>();
			
 
				+  }
			
 
				+
			
 
				+  protected long getFormatMinSplitSize() {
			
 
				+    return SequenceFile.SYNC_INTERVAL;
			
 
				+  }
			
 
				+
			
 
				+  protected List<FileStatus> listStatus(Configuration job
			
 
				+                                              )throws IOException {
			
 
				+
			
 
				+    List<FileStatus> files = super.listStatus(job);
			
 
				+    int len = files.size();
			
 
				+    for(int i=0; i < len; ++i) {
			
 
				+      FileStatus file = files.get(i);
			
 
				+      if (file.isDir()) {     // it's a MapFile
			
 
				+        Path p = file.getPath();
			
 
				+        FileSystem fs = p.getFileSystem(job);
			
 
				+        // use the data file
			
 
				+        files.set(i, fs.getFileStatus(new Path(p, MapFile.DATA_FILE_NAME)));
			
 
				+      }
			
 
				+    }
			
 
				+    return files;
			
 
				+  }
			
 
				+}
			
 
				+
			
--- a/src/mapred/org/apache/hadoop/mapreduce/lib/input/SequenceFileRecordReader.java
+++ b/src/mapred/org/apache/hadoop/mapreduce/lib/input/SequenceFileRecordReader.java
@@ -0,0 +1,95 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.mapreduce.lib.input;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+
			
 
				+
			
 
				+import org.apache.hadoop.conf.Configuration;
			
 
				+import org.apache.hadoop.fs.FileSystem;
			
 
				+import org.apache.hadoop.fs.Path;
			
 
				+import org.apache.hadoop.io.*;
			
 
				+import org.apache.hadoop.mapreduce.InputSplit;
			
 
				+import org.apache.hadoop.mapreduce.RecordReader;
			
 
				+import org.apache.hadoop.mapreduce.TaskAttemptContext;
			
 
				+
			
 
				+/** An {@link RecordReader} for {@link SequenceFile}s. */
			
 
				+public class SequenceFileRecordReader<K, V> extends RecordReader<K, V> {
			
 
				+  private SequenceFile.Reader in;
			
 
				+  private long start;
			
 
				+  private long end;
			
 
				+  private boolean more = true;
			
 
				+  protected Configuration conf;
			
 
				+  
			
 
				+  @Override
			
 
				+  public void initialize(InputSplit split, 
			
 
				+                         TaskAttemptContext context
			
 
				+                         ) throws IOException, InterruptedException {
			
 
				+    FileSplit fileSplit = (FileSplit) split;
			
 
				+    conf = context.getConfiguration();    
			
 
				+    Path path = fileSplit.getPath();
			
 
				+    FileSystem fs = path.getFileSystem(conf);
			
 
				+    this.in = new SequenceFile.Reader(fs, path, conf);
			
 
				+    this.end = fileSplit.getStart() + fileSplit.getLength();
			
 
				+
			
 
				+    if (fileSplit.getStart() > in.getPosition()) {
			
 
				+      in.sync(fileSplit.getStart());                  // sync to start
			
 
				+    }
			
 
				+
			
 
				+    this.start = in.getPosition();
			
 
				+    more = start < end;
			
 
				+  }
			
 
				+
			
 
				+  @Override
			
 
				+  @SuppressWarnings("unchecked")
			
 
				+  public K nextKey(K key) throws IOException, InterruptedException {
			
 
				+    if (!more) {
			
 
				+      return null;
			
 
				+    }
			
 
				+    long pos = in.getPosition();
			
 
				+    K result = (K) in.next(key);
			
 
				+    if (result == null || (pos >= end && in.syncSeen())) {
			
 
				+      more = false;
			
 
				+      result = null;
			
 
				+    }
			
 
				+    return result;
			
 
				+  }
			
 
				+
			
 
				+  @Override
			
 
				+  @SuppressWarnings("unchecked")
			
 
				+  public V nextValue(V value) throws IOException, InterruptedException {
			
 
				+    return (V) in.getCurrentValue(value);
			
 
				+  }
			
 
				+  
			
 
				+  /**
			
 
				+   * Return the progress within the input split
			
 
				+   * @return 0.0 to 1.0 of the input byte range
			
 
				+   */
			
 
				+  public float getProgress() throws IOException {
			
 
				+    if (end == start) {
			
 
				+      return 0.0f;
			
 
				+    } else {
			
 
				+      return Math.min(1.0f, (in.getPosition() - start) / (float)(end - start));
			
 
				+    }
			
 
				+  }
			
 
				+  
			
 
				+  public synchronized void close() throws IOException { in.close(); }
			
 
				+  
			
 
				+}
			
 
				+
			
--- a/src/mapred/org/apache/hadoop/mapreduce/lib/input/TextInputFormat.java
+++ b/src/mapred/org/apache/hadoop/mapreduce/lib/input/TextInputFormat.java
@@ -0,0 +1,50 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.mapreduce.lib.input;
			
 
				+
			
 
				+import org.apache.hadoop.fs.Path;
			
 
				+import org.apache.hadoop.io.LongWritable;
			
 
				+import org.apache.hadoop.io.Text;
			
 
				+import org.apache.hadoop.io.compress.CompressionCodec;
			
 
				+import org.apache.hadoop.io.compress.CompressionCodecFactory;
			
 
				+import org.apache.hadoop.mapreduce.InputFormat;
			
 
				+import org.apache.hadoop.mapreduce.InputSplit;
			
 
				+import org.apache.hadoop.mapreduce.JobContext;
			
 
				+import org.apache.hadoop.mapreduce.RecordReader;
			
 
				+import org.apache.hadoop.mapreduce.TaskAttemptContext;
			
 
				+
			
 
				+/** An {@link InputFormat} for plain text files.  Files are broken into lines.
			
 
				+ * Either linefeed or carriage-return are used to signal end of line.  Keys are
			
 
				+ * the position in the file, and values are the line of text.. */
			
 
				+public class TextInputFormat extends FileInputFormat<LongWritable, Text> {
			
 
				+
			
 
				+  @Override
			
 
				+  public RecordReader<LongWritable, Text> 
			
 
				+    createRecordReader(InputSplit split,
			
 
				+                       TaskAttemptContext context) {
			
 
				+    return new LineRecordReader();
			
 
				+  }
			
 
				+
			
 
				+  protected boolean isSplitable(JobContext context, Path file) {
			
 
				+    CompressionCodec codec = 
			
 
				+      new CompressionCodecFactory(context.getConfiguration()).getCodec(file);
			
 
				+    return codec == null;
			
 
				+  }
			
 
				+
			
 
				+}
			
--- a/src/mapred/org/apache/hadoop/mapreduce/lib/map/InverseMapper.java
+++ b/src/mapred/org/apache/hadoop/mapreduce/lib/map/InverseMapper.java
@@ -0,0 +1,34 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.mapreduce.lib.map;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+
			
 
				+import org.apache.hadoop.mapreduce.Mapper;
			
 
				+
			
 
				+/** A {@link Mapper} that swaps keys and values. */
			
 
				+public class InverseMapper<K, V> extends Mapper<K,V,V,K> {
			
 
				+
			
 
				+  /** The inverse function.  Input keys and values are swapped.*/
			
 
				+  public void map(K key, V value, Context context
			
 
				+                  ) throws IOException, InterruptedException {
			
 
				+    context.collect(value, key);
			
 
				+  }
			
 
				+  
			
 
				+}
			
--- a/src/mapred/org/apache/hadoop/mapreduce/lib/map/MultithreadedMapper.java
+++ b/src/mapred/org/apache/hadoop/mapreduce/lib/map/MultithreadedMapper.java
@@ -0,0 +1,196 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.mapreduce.lib.map;
			
 
				+
			
 
				+import org.apache.hadoop.util.ReflectionUtils;
			
 
				+import org.apache.hadoop.conf.Configuration;
			
 
				+import org.apache.hadoop.mapreduce.Counter;
			
 
				+import org.apache.hadoop.mapreduce.InputSplit;
			
 
				+import org.apache.hadoop.mapreduce.Mapper;
			
 
				+import org.apache.commons.logging.Log;
			
 
				+import org.apache.commons.logging.LogFactory;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+
			
 
				+/**
			
 
				+ * Multithreaded implementation for @link org.apache.hadoop.mapreduce.Mapper.
			
 
				+ * <p>
			
 
				+ * It can be used instead of the default implementation,
			
 
				+ * @link org.apache.hadoop.mapred.MapRunner, when the Map operation is not CPU
			
 
				+ * bound in order to improve throughput.
			
 
				+ * <p>
			
 
				+ * Mapper implementations using this MapRunnable must be thread-safe.
			
 
				+ * <p>
			
 
				+ * The Map-Reduce job has to be configured with the mapper to use via 
			
 
				+ * {@link #setMapperClass(Configuration, Class)} and
			
 
				+ * the number of thread the thread-pool can use with the
			
 
				+ * {@link #getNumberOfThreads(Configuration) method. The default
			
 
				+ * value is 10 threads.
			
 
				+ * <p>
			
 
				+ */
			
 
				+public class MultithreadedMapper<K1, V1, K2, V2> 
			
 
				+  extends Mapper<K1, V1, K2, V2> {
			
 
				+
			
 
				+  private static final Log LOG = LogFactory.getLog(MultithreadedMapper.class);
			
 
				+  private Class<Mapper<K1,V1,K2,V2>> mapClass;
			
 
				+  private Context outer;
			
 
				+  private MapRunner[] runners;
			
 
				+
			
 
				+  public static int getNumberOfThreads(Configuration conf) {
			
 
				+    return conf.getInt("mapred.map.multithreadedrunner.threads", 10);
			
 
				+  }
			
 
				+
			
 
				+  public static void setNumberOfThreads(Configuration conf, int threads) {
			
 
				+    conf.setInt("mapred.map.multithreadedrunner.threads", threads);
			
 
				+  }
			
 
				+
			
 
				+  @SuppressWarnings("unchecked")
			
 
				+  public static <K1,V1,K2,V2>
			
 
				+  Class<Mapper<K1,V1,K2,V2>> getMapperClass(Configuration conf) {
			
 
				+    return (Class<Mapper<K1,V1,K2,V2>>) 
			
 
				+           conf.getClass("mapred.map.multithreadedrunner.class",
			
 
				+                         Mapper.class);
			
 
				+  }
			
 
				+  
			
 
				+  public static <K1,V1,K2,V2> 
			
 
				+  void setMapperClass(Configuration conf, 
			
 
				+                      Class<Mapper<K1,V1,K2,V2>> cls) {
			
 
				+    if (MultithreadedMapper.class.isAssignableFrom(cls)) {
			
 
				+      throw new IllegalArgumentException("Can't have recursive " + 
			
 
				+                                         "MultithreadedMapper instances.");
			
 
				+    }
			
 
				+    conf.setClass("mapred.map.multithreadedrunner.class", cls, Mapper.class);
			
 
				+  }
			
 
				+
			
 
				+  public void run(Context context) throws IOException, InterruptedException {
			
 
				+    Configuration conf = context.getConfiguration();
			
 
				+    outer = context;
			
 
				+    int numberOfThreads = getNumberOfThreads(conf);
			
 
				+    mapClass = getMapperClass(conf);
			
 
				+    if (LOG.isDebugEnabled()) {
			
 
				+      LOG.debug("Configuring multithread runner to use " + numberOfThreads + 
			
 
				+                " threads");
			
 
				+    }
			
 
				+    
			
 
				+    runners = (MapRunner[]) new Object[numberOfThreads];
			
 
				+    for(int i=0; i < numberOfThreads; ++i) {
			
 
				+      runners[i] = new MapRunner();
			
 
				+      runners[i].start();
			
 
				+    }
			
 
				+    for(int i=0; i < numberOfThreads; ++i) {
			
 
				+      runners[i].join();
			
 
				+      Throwable th = runners[i].throwable;
			
 
				+      if (th != null) {
			
 
				+        if (th instanceof IOException) {
			
 
				+          throw (IOException) th;
			
 
				+        } else if (th instanceof InterruptedException) {
			
 
				+          throw (InterruptedException) th;
			
 
				+        } else {
			
 
				+          throw (RuntimeException) th;
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  private class SubMapContext extends Context {
			
 
				+    private K1 key;
			
 
				+    private V1 value;
			
 
				+    
			
 
				+    SubMapContext() {
			
 
				+      super(outer.getConfiguration(), outer.getTaskAttemptId());
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public InputSplit getInputSplit() {
			
 
				+      synchronized (outer) {
			
 
				+        return outer.getInputSplit();
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public Counter getCounter(Enum<?> counterName) {
			
 
				+      synchronized (outer) {
			
 
				+        return outer.getCounter(counterName);
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public Counter getCounter(String groupName, String counterName) {
			
 
				+      synchronized (outer) {
			
 
				+        return outer.getCounter(groupName, counterName);
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public void progress() {
			
 
				+      synchronized (outer) {
			
 
				+        outer.progress();
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public void collect(K2 key, V2 value) throws IOException,
			
 
				+                                         InterruptedException {
			
 
				+      synchronized (outer) {
			
 
				+        outer.collect(key, value);
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public K1 nextKey(K1 k) throws IOException, InterruptedException {
			
 
				+      synchronized (outer) {
			
 
				+        key = outer.nextKey(key);
			
 
				+        if (key != null) {
			
 
				+          value = outer.nextValue(value);
			
 
				+        }
			
 
				+        return key;
			
 
				+      }
			
 
				+    }
			
 
				+    
			
 
				+    public V1 nextValue(V1 v) throws IOException, InterruptedException {
			
 
				+      return value;
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  private class MapRunner extends Thread {
			
 
				+    private Mapper<K1,V1,K2,V2> mapper;
			
 
				+    private Context context;
			
 
				+    private Throwable throwable;
			
 
				+
			
 
				+    @SuppressWarnings("unchecked")
			
 
				+    MapRunner() {
			
 
				+      mapper = (Mapper<K1,V1,K2,V2>) 
			
 
				+        ReflectionUtils.newInstance(mapClass, context.getConfiguration());
			
 
				+      context = new SubMapContext();
			
 
				+    }
			
 
				+
			
 
				+    public Throwable getThrowable() {
			
 
				+      return throwable;
			
 
				+    }
			
 
				+
			
 
				+    public void run() {
			
 
				+      try {
			
 
				+        mapper.run(context);
			
 
				+      } catch (Throwable ie) {
			
 
				+        throwable = ie;
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+}
			
--- a/src/mapred/org/apache/hadoop/mapreduce/lib/map/TokenCounterMapper.java
+++ b/src/mapred/org/apache/hadoop/mapreduce/lib/map/TokenCounterMapper.java
@@ -0,0 +1,41 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.mapreduce.lib.map;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+import java.util.StringTokenizer;
			
 
				+
			
 
				+import org.apache.hadoop.io.IntWritable;
			
 
				+import org.apache.hadoop.io.Text;
			
 
				+import org.apache.hadoop.mapreduce.Mapper;
			
 
				+
			
 
				+public class TokenCounterMapper extends Mapper<Object, Text, Text, IntWritable>{
			
 
				+    
			
 
				+  private final static IntWritable one = new IntWritable(1);
			
 
				+  private Text word = new Text();
			
 
				+    
			
 
				+  public void map(Object key, Text value, Context context
			
 
				+                  ) throws IOException, InterruptedException {
			
 
				+    StringTokenizer itr = new StringTokenizer(value.toString());
			
 
				+    while (itr.hasMoreTokens()) {
			
 
				+      word.set(itr.nextToken());
			
 
				+      context.collect(word, one);
			
 
				+    }
			
 
				+  }
			
 
				+}
			
--- a/src/mapred/org/apache/hadoop/mapreduce/lib/output/FileOutputFormat.java
+++ b/src/mapred/org/apache/hadoop/mapreduce/lib/output/FileOutputFormat.java
@@ -0,0 +1,289 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.mapreduce.lib.output;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+import java.text.NumberFormat;
			
 
				+
			
 
				+import org.apache.hadoop.conf.Configuration;
			
 
				+import org.apache.hadoop.fs.FileSystem;
			
 
				+import org.apache.hadoop.fs.Path;
			
 
				+import org.apache.hadoop.io.compress.CompressionCodec;
			
 
				+import org.apache.hadoop.mapred.FileAlreadyExistsException;
			
 
				+import org.apache.hadoop.mapred.InvalidJobConfException;
			
 
				+import org.apache.hadoop.mapreduce.JobContext;
			
 
				+import org.apache.hadoop.mapreduce.OutputFormat;
			
 
				+import org.apache.hadoop.mapreduce.RecordWriter;
			
 
				+import org.apache.hadoop.mapreduce.TaskAttemptContext;
			
 
				+
			
 
				+/** A base class for {@link OutputFormat}s that read from {@link FileSystem}s.*/
			
 
				+public abstract class FileOutputFormat<K, V> implements OutputFormat<K, V> {
			
 
				+
			
 
				+  private static final String TEMP_DIR_NAME = "_temp";
			
 
				+  /**
			
 
				+   * Set whether the output of the job is compressed.
			
 
				+   * @param conf the {@link Configuration} to modify
			
 
				+   * @param compress should the output of the job be compressed?
			
 
				+   */
			
 
				+  public static void setCompressOutput(Configuration conf, boolean compress) {
			
 
				+    conf.setBoolean("mapred.output.compress", compress);
			
 
				+  }
			
 
				+  
			
 
				+  /**
			
 
				+   * Is the job output compressed?
			
 
				+   * @param conf the {@link Configuration} to look in
			
 
				+   * @return <code>true</code> if the job output should be compressed,
			
 
				+   *         <code>false</code> otherwise
			
 
				+   */
			
 
				+  public static boolean getCompressOutput(Configuration conf) {
			
 
				+    return conf.getBoolean("mapred.output.compress", false);
			
 
				+  }
			
 
				+  
			
 
				+  /**
			
 
				+   * Set the {@link CompressionCodec} to be used to compress job outputs.
			
 
				+   * @param conf the {@link Configuration} to modify
			
 
				+   * @param codecClass the {@link CompressionCodec} to be used to
			
 
				+   *                   compress the job outputs
			
 
				+   */
			
 
				+  public static void 
			
 
				+  setOutputCompressorClass(Configuration conf, 
			
 
				+                           Class<? extends CompressionCodec> codecClass) {
			
 
				+    setCompressOutput(conf, true);
			
 
				+    conf.setClass("mapred.output.compression.codec", codecClass, 
			
 
				+                  CompressionCodec.class);
			
 
				+  }
			
 
				+  
			
 
				+  /**
			
 
				+   * Get the {@link CompressionCodec} for compressing the job outputs.
			
 
				+   * @param conf the {@link Configuration} to look in
			
 
				+   * @param defaultValue the {@link CompressionCodec} to return if not set
			
 
				+   * @return the {@link CompressionCodec} to be used to compress the 
			
 
				+   *         job outputs
			
 
				+   * @throws IllegalArgumentException if the class was specified, but not found
			
 
				+   */
			
 
				+  public static Class<? extends CompressionCodec> 
			
 
				+  getOutputCompressorClass(Configuration conf, 
			
 
				+		                       Class<? extends CompressionCodec> defaultValue) {
			
 
				+    Class<? extends CompressionCodec> codecClass = defaultValue;
			
 
				+    
			
 
				+    String name = conf.get("mapred.output.compression.codec");
			
 
				+    if (name != null) {
			
 
				+      try {
			
 
				+        codecClass = 
			
 
				+        	conf.getClassByName(name).asSubclass(CompressionCodec.class);
			
 
				+      } catch (ClassNotFoundException e) {
			
 
				+        throw new IllegalArgumentException("Compression codec " + name + 
			
 
				+                                           " was not found.", e);
			
 
				+      }
			
 
				+    }
			
 
				+    return codecClass;
			
 
				+  }
			
 
				+  
			
 
				+  public abstract 
			
 
				+    RecordWriter<K, V> getRecordWriter(TaskAttemptContext context
			
 
				+                                       ) throws IOException;
			
 
				+
			
 
				+  public void checkOutputSpecs(JobContext context) 
			
 
				+    throws FileAlreadyExistsException, 
			
 
				+           InvalidJobConfException, IOException {
			
 
				+    // Ensure that the output directory is set and not already there
			
 
				+    Configuration job = context.getConfiguration();
			
 
				+    Path outDir = getOutputPath(job);
			
 
				+    if (outDir == null && context.getNumReduceTasks() != 0) {
			
 
				+      throw new InvalidJobConfException("Output directory not set in JobConf.");
			
 
				+    }
			
 
				+    if (outDir != null && outDir.getFileSystem(job).exists(outDir)) {
			
 
				+      throw new FileAlreadyExistsException("Output directory " + outDir + 
			
 
				+                                           " already exists");
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Set the {@link Path} of the output directory for the map-reduce job.
			
 
				+   *
			
 
				+   * @param conf The configuration of the job.
			
 
				+   * @param outputDir the {@link Path} of the output directory for 
			
 
				+   * the map-reduce job.
			
 
				+   */
			
 
				+  public static void setOutputPath(Configuration conf, Path outputDir) {
			
 
				+    conf.set("mapred.output.dir", outputDir.toString());
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Get the {@link Path} to the output directory for the map-reduce job.
			
 
				+   * 
			
 
				+   * @return the {@link Path} to the output directory for the map-reduce job.
			
 
				+   * @see FileOutputFormat#getWorkOutputPath(Configuration)
			
 
				+   */
			
 
				+  public static Path getOutputPath(Configuration conf) {
			
 
				+    String name = conf.get("mapred.output.dir");
			
 
				+    return name == null ? null: new Path(name);
			
 
				+  }
			
 
				+  
			
 
				+  /**
			
 
				+   *  Get the {@link Path} to the task's temporary output directory 
			
 
				+   *  for the map-reduce job
			
 
				+   *  
			
 
				+   * <h4 id="SideEffectFiles">Tasks' Side-Effect Files</h4>
			
 
				+   * 
			
 
				+   * <p>Some applications need to create/write-to side-files, which differ from
			
 
				+   * the actual job-outputs.
			
 
				+   * 
			
 
				+   * <p>In such cases there could be issues with 2 instances of the same TIP 
			
 
				+   * (running simultaneously e.g. speculative tasks) trying to open/write-to the
			
 
				+   * same file (path) on HDFS. Hence the application-writer will have to pick 
			
 
				+   * unique names per task-attempt (e.g. using the attemptid, say 
			
 
				+   * <tt>attempt_200709221812_0001_m_000000_0</tt>), not just per TIP.</p> 
			
 
				+   * 
			
 
				+   * <p>To get around this the Map-Reduce framework helps the application-writer 
			
 
				+   * out by maintaining a special 
			
 
				+   * <tt>${mapred.output.dir}/_temporary/_${taskid}</tt> 
			
 
				+   * sub-directory for each task-attempt on HDFS where the output of the 
			
 
				+   * task-attempt goes. On successful completion of the task-attempt the files 
			
 
				+   * in the <tt>${mapred.output.dir}/_temporary/_${taskid}</tt> (only) 
			
 
				+   * are <i>promoted</i> to <tt>${mapred.output.dir}</tt>. Of course, the 
			
 
				+   * framework discards the sub-directory of unsuccessful task-attempts. This 
			
 
				+   * is completely transparent to the application.</p>
			
 
				+   * 
			
 
				+   * <p>The application-writer can take advantage of this by creating any 
			
 
				+   * side-files required in <tt>${mapred.work.output.dir}</tt> during execution 
			
 
				+   * of his reduce-task i.e. via {@link #getWorkOutputPath(Configuration)}, and
			
 
				+   * the framework will move them out similarly - thus she doesn't have to pick 
			
 
				+   * unique paths per task-attempt.</p>
			
 
				+   * 
			
 
				+   * <p><i>Note</i>: the value of <tt>${mapred.work.output.dir}</tt> during 
			
 
				+   * execution of a particular task-attempt is actually 
			
 
				+   * <tt>${mapred.output.dir}/_temporary/_{$taskid}</tt>, and this value is 
			
 
				+   * set by the map-reduce framework. So, just create any side-files in the 
			
 
				+   * path  returned by {@link #getWorkOutputPath(Configuration)} from map/reduce 
			
 
				+   * task to take advantage of this feature.</p>
			
 
				+   * 
			
 
				+   * <p>The entire discussion holds true for maps of jobs with 
			
 
				+   * reducer=NONE (i.e. 0 reduces) since output of the map, in that case, 
			
 
				+   * goes directly to HDFS.</p> 
			
 
				+   * 
			
 
				+   * @return the {@link Path} to the task's temporary output directory 
			
 
				+   * for the map-reduce job.
			
 
				+   */
			
 
				+  public static Path getWorkOutputPath(Configuration conf) {
			
 
				+    String name = conf.get("mapred.work.output.dir");
			
 
				+    return name == null ? null: new Path(name);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Helper function to create the task's temporary output directory and 
			
 
				+   * return the path to the task's output file.
			
 
				+   * 
			
 
				+   * @param context the task's context
			
 
				+   * @return path to the task's temporary output file
			
 
				+   * @throws IOException
			
 
				+   */
			
 
				+  protected static Path getTaskOutputPath(TaskAttemptContext context
			
 
				+                                          ) throws IOException {
			
 
				+    // ${mapred.job.dir}
			
 
				+    Configuration conf = context.getConfiguration();
			
 
				+    Path outputPath = getOutputPath(conf);
			
 
				+    if (outputPath == null) {
			
 
				+      throw new IOException("Undefined job output-path");
			
 
				+    }
			
 
				+
			
 
				+    // ${mapred.out.dir}/_temporary
			
 
				+    Path jobTmpDir = new Path(outputPath, TEMP_DIR_NAME);
			
 
				+    FileSystem fs = jobTmpDir.getFileSystem(conf);
			
 
				+    if (!fs.exists(jobTmpDir)) {
			
 
				+      throw new IOException("The temporary job-output directory " + 
			
 
				+          jobTmpDir.toString() + " doesn't exist!"); 
			
 
				+    }
			
 
				+
			
 
				+    // ${mapred.out.dir}/_temporary/_${taskid}
			
 
				+    Path taskTmpDir = getWorkOutputPath(conf);
			
 
				+    if (!fs.mkdirs(taskTmpDir)) {
			
 
				+      throw new IOException("Mkdirs failed to create " 
			
 
				+          + taskTmpDir.toString());
			
 
				+    }
			
 
				+    
			
 
				+    // ${mapred.out.dir}/_temporary/_${taskid}/${name}
			
 
				+    return new Path(taskTmpDir, getOutputName(context));
			
 
				+  } 
			
 
				+
			
 
				+  /**
			
 
				+   * Helper function to generate a name that is unique for the task.
			
 
				+   *
			
 
				+   * <p>The generated name can be used to create custom files from within the
			
 
				+   * different tasks for the job, the names for different tasks will not collide
			
 
				+   * with each other.</p>
			
 
				+   *
			
 
				+   * <p>The given name is postfixed with the task type, 'm' for maps, 'r' for
			
 
				+   * reduces and the task partition number. For example, give a name 'test'
			
 
				+   * running on the first map o the job the generated name will be
			
 
				+   * 'test-m-00000'.</p>
			
 
				+   *
			
 
				+   * @param conf the configuration for the job.
			
 
				+   * @param name the name to make unique.
			
 
				+   * @return a unique name accross all tasks of the job.
			
 
				+   */
			
 
				+  public static String getUniqueName(Configuration conf, String name) {
			
 
				+    int partition = conf.getInt("mapred.task.partition", -1);
			
 
				+    if (partition == -1) {
			
 
				+      throw new IllegalArgumentException(
			
 
				+        "This method can only be called from within a Job");
			
 
				+    }
			
 
				+
			
 
				+    String taskType = (conf.getBoolean("mapred.task.is.map", true)) ? "m" : "r";
			
 
				+
			
 
				+    NumberFormat numberFormat = NumberFormat.getInstance();
			
 
				+    numberFormat.setMinimumIntegerDigits(5);
			
 
				+    numberFormat.setGroupingUsed(false);
			
 
				+
			
 
				+    return name + "-" + taskType + "-" + numberFormat.format(partition);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Helper function to generate a {@link Path} for a file that is unique for
			
 
				+   * the task within the job output directory.
			
 
				+   *
			
 
				+   * <p>The path can be used to create custom files from within the map and
			
 
				+   * reduce tasks. The path name will be unique for each task. The path parent
			
 
				+   * will be the job output directory.</p>ls
			
 
				+   *
			
 
				+   * <p>This method uses the {@link #getUniqueName} method to make the file name
			
 
				+   * unique for the task.</p>
			
 
				+   *
			
 
				+   * @param conf the configuration for the job.
			
 
				+   * @param name the name for the file.
			
 
				+   * @return a unique path accross all tasks of the job.
			
 
				+   */
			
 
				+  public static Path getPathForCustomFile(Configuration conf, String name) {
			
 
				+    return new Path(getWorkOutputPath(conf), getUniqueName(conf, name));
			
 
				+  }
			
 
				+
			
 
				+  /** Construct output file names so that, when an output directory listing is
			
 
				+   * sorted lexicographically, positions correspond to output partitions.*/
			
 
				+  private static final NumberFormat NUMBER_FORMAT = NumberFormat.getInstance();
			
 
				+  static {
			
 
				+    NUMBER_FORMAT.setMinimumIntegerDigits(5);
			
 
				+    NUMBER_FORMAT.setGroupingUsed(false);
			
 
				+  }
			
 
				+
			
 
				+  protected static synchronized 
			
 
				+  String getOutputName(TaskAttemptContext context) {
			
 
				+    return "part-" + NUMBER_FORMAT.format(context.getTaskAttemptId().getId());
			
 
				+  }
			
 
				+}
			
 
				+
			
--- a/src/mapred/org/apache/hadoop/mapreduce/lib/output/NullOutputFormat.java
+++ b/src/mapred/org/apache/hadoop/mapreduce/lib/output/NullOutputFormat.java
@@ -0,0 +1,39 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.mapreduce.lib.output;
			
 
				+
			
 
				+import org.apache.hadoop.mapreduce.JobContext;
			
 
				+import org.apache.hadoop.mapreduce.OutputFormat;
			
 
				+import org.apache.hadoop.mapreduce.RecordWriter;
			
 
				+import org.apache.hadoop.mapreduce.TaskAttemptContext;
			
 
				+
			
 
				+/**
			
 
				+ * Consume all outputs and put them in /dev/null. 
			
 
				+ */
			
 
				+public class NullOutputFormat<K, V> implements OutputFormat<K, V> {
			
 
				+  
			
 
				+  public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) {
			
 
				+    return new RecordWriter<K, V>(){
			
 
				+        public void write(K key, V value) { }
			
 
				+        public void close(TaskAttemptContext context) { }
			
 
				+      };
			
 
				+  }
			
 
				+  
			
 
				+  public void checkOutputSpecs(JobContext context) { }
			
 
				+}
			
--- a/src/mapred/org/apache/hadoop/mapreduce/lib/output/SequenceFileOutputFormat.java
+++ b/src/mapred/org/apache/hadoop/mapreduce/lib/output/SequenceFileOutputFormat.java
@@ -0,0 +1,104 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.mapreduce.lib.output;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+
			
 
				+import org.apache.hadoop.fs.FileSystem;
			
 
				+import org.apache.hadoop.fs.Path;
			
 
				+
			
 
				+import org.apache.hadoop.io.SequenceFile;
			
 
				+import org.apache.hadoop.io.SequenceFile.CompressionType;
			
 
				+import org.apache.hadoop.io.compress.CompressionCodec;
			
 
				+import org.apache.hadoop.io.compress.DefaultCodec;
			
 
				+import org.apache.hadoop.mapreduce.OutputFormat;
			
 
				+import org.apache.hadoop.mapreduce.RecordWriter;
			
 
				+import org.apache.hadoop.mapreduce.TaskAttemptContext;
			
 
				+import org.apache.hadoop.util.ReflectionUtils;
			
 
				+import org.apache.hadoop.conf.Configuration;
			
 
				+
			
 
				+/** An {@link OutputFormat} that writes {@link SequenceFile}s. */
			
 
				+public class SequenceFileOutputFormat <K,V> extends FileOutputFormat<K, V> {
			
 
				+
			
 
				+  public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context)
			
 
				+    throws IOException {
			
 
				+    // get the path of the temporary output file 
			
 
				+    Path file = FileOutputFormat.getTaskOutputPath(context);
			
 
				+    Configuration conf = context.getConfiguration();
			
 
				+    
			
 
				+    FileSystem fs = file.getFileSystem(conf);
			
 
				+    CompressionCodec codec = null;
			
 
				+    CompressionType compressionType = CompressionType.NONE;
			
 
				+    if (getCompressOutput(conf)) {
			
 
				+      // find the kind of compression to do
			
 
				+      compressionType = getOutputCompressionType(conf);
			
 
				+
			
 
				+      // find the right codec
			
 
				+      Class<?> codecClass = getOutputCompressorClass(conf, DefaultCodec.class);
			
 
				+      codec = (CompressionCodec) 
			
 
				+        ReflectionUtils.newInstance(codecClass, conf);
			
 
				+    }
			
 
				+    final SequenceFile.Writer out = 
			
 
				+      SequenceFile.createWriter(fs, conf, file,
			
 
				+                                context.getOutputKeyClass(),
			
 
				+                                context.getOutputValueClass(),
			
 
				+                                compressionType,
			
 
				+                                codec,
			
 
				+                                context);
			
 
				+
			
 
				+    return new RecordWriter<K, V>() {
			
 
				+
			
 
				+        public void write(K key, V value)
			
 
				+          throws IOException {
			
 
				+
			
 
				+          out.append(key, value);
			
 
				+        }
			
 
				+
			
 
				+        public void close(TaskAttemptContext context) throws IOException { 
			
 
				+          out.close();
			
 
				+        }
			
 
				+      };
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Get the {@link CompressionType} for the output {@link SequenceFile}.
			
 
				+   * @param conf the {@link Configuration}
			
 
				+   * @return the {@link CompressionType} for the output {@link SequenceFile}, 
			
 
				+   *         defaulting to {@link CompressionType#RECORD}
			
 
				+   */
			
 
				+  public static CompressionType getOutputCompressionType(Configuration conf) {
			
 
				+    String val = conf.get("mapred.output.compression.type", 
			
 
				+                          CompressionType.RECORD.toString());
			
 
				+    return CompressionType.valueOf(val);
			
 
				+  }
			
 
				+  
			
 
				+  /**
			
 
				+   * Set the {@link CompressionType} for the output {@link SequenceFile}.
			
 
				+   * @param conf the {@link Configuration} to modify
			
 
				+   * @param style the {@link CompressionType} for the output
			
 
				+   *              {@link SequenceFile} 
			
 
				+   */
			
 
				+  public static void setOutputCompressionType(Configuration conf, 
			
 
				+		                                          CompressionType style) {
			
 
				+    setCompressOutput(conf, true);
			
 
				+    conf.set("mapred.output.compression.type", style.toString());
			
 
				+  }
			
 
				+
			
 
				+}
			
 
				+
			
--- a/src/mapred/org/apache/hadoop/mapreduce/lib/output/TextOutputFormat.java
+++ b/src/mapred/org/apache/hadoop/mapreduce/lib/output/TextOutputFormat.java
@@ -0,0 +1,138 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.mapreduce.lib.output;
			
 
				+
			
 
				+import java.io.DataOutputStream;
			
 
				+import java.io.IOException;
			
 
				+import java.io.UnsupportedEncodingException;
			
 
				+
			
 
				+import org.apache.hadoop.conf.Configuration;
			
 
				+import org.apache.hadoop.fs.FileSystem;
			
 
				+import org.apache.hadoop.fs.Path;
			
 
				+import org.apache.hadoop.fs.FSDataOutputStream;
			
 
				+
			
 
				+import org.apache.hadoop.io.NullWritable;
			
 
				+import org.apache.hadoop.io.Text;
			
 
				+import org.apache.hadoop.io.compress.CompressionCodec;
			
 
				+import org.apache.hadoop.io.compress.GzipCodec;
			
 
				+import org.apache.hadoop.mapreduce.OutputFormat;
			
 
				+import org.apache.hadoop.mapreduce.RecordWriter;
			
 
				+import org.apache.hadoop.mapreduce.TaskAttemptContext;
			
 
				+import org.apache.hadoop.util.*;
			
 
				+
			
 
				+/** An {@link OutputFormat} that writes plain text files. */
			
 
				+public class TextOutputFormat<K, V> extends FileOutputFormat<K, V> {
			
 
				+
			
 
				+  protected static class LineRecordWriter<K, V>
			
 
				+    implements RecordWriter<K, V> {
			
 
				+    private static final String utf8 = "UTF-8";
			
 
				+    private static final byte[] newline;
			
 
				+    static {
			
 
				+      try {
			
 
				+        newline = "\n".getBytes(utf8);
			
 
				+      } catch (UnsupportedEncodingException uee) {
			
 
				+        throw new IllegalArgumentException("can't find " + utf8 + " encoding");
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+    protected DataOutputStream out;
			
 
				+    private final byte[] keyValueSeparator;
			
 
				+
			
 
				+    public LineRecordWriter(DataOutputStream out, String keyValueSeparator) {
			
 
				+      this.out = out;
			
 
				+      try {
			
 
				+        this.keyValueSeparator = keyValueSeparator.getBytes(utf8);
			
 
				+      } catch (UnsupportedEncodingException uee) {
			
 
				+        throw new IllegalArgumentException("can't find " + utf8 + " encoding");
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+    public LineRecordWriter(DataOutputStream out) {
			
 
				+      this(out, "\t");
			
 
				+    }
			
 
				+
			
 
				+    /**
			
 
				+     * Write the object to the byte stream, handling Text as a special
			
 
				+     * case.
			
 
				+     * @param o the object to print
			
 
				+     * @throws IOException if the write throws, we pass it on
			
 
				+     */
			
 
				+    private void writeObject(Object o) throws IOException {
			
 
				+      if (o instanceof Text) {
			
 
				+        Text to = (Text) o;
			
 
				+        out.write(to.getBytes(), 0, to.getLength());
			
 
				+      } else {
			
 
				+        out.write(o.toString().getBytes(utf8));
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+    public synchronized void write(K key, V value)
			
 
				+      throws IOException {
			
 
				+
			
 
				+      boolean nullKey = key == null || key instanceof NullWritable;
			
 
				+      boolean nullValue = value == null || value instanceof NullWritable;
			
 
				+      if (nullKey && nullValue) {
			
 
				+        return;
			
 
				+      }
			
 
				+      if (!nullKey) {
			
 
				+        writeObject(key);
			
 
				+      }
			
 
				+      if (!(nullKey || nullValue)) {
			
 
				+        out.write(keyValueSeparator);
			
 
				+      }
			
 
				+      if (!nullValue) {
			
 
				+        writeObject(value);
			
 
				+      }
			
 
				+      out.write(newline);
			
 
				+    }
			
 
				+
			
 
				+    public synchronized 
			
 
				+    void close(TaskAttemptContext context) throws IOException {
			
 
				+      out.close();
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context)
			
 
				+    throws IOException {
			
 
				+    Configuration job = context.getConfiguration();
			
 
				+    boolean isCompressed = getCompressOutput(job);
			
 
				+    String keyValueSeparator = job.get("mapred.textoutputformat.separator", 
			
 
				+                                       "\t");
			
 
				+    Path file = FileOutputFormat.getTaskOutputPath(context);
			
 
				+    if (!isCompressed) {
			
 
				+      FileSystem fs = file.getFileSystem(job);
			
 
				+      FSDataOutputStream fileOut = fs.create(file, context);
			
 
				+      return new LineRecordWriter<K, V>(fileOut, keyValueSeparator);
			
 
				+    } else {
			
 
				+      Class<? extends CompressionCodec> codecClass =
			
 
				+        getOutputCompressorClass(job, GzipCodec.class);
			
 
				+      // create the named codec
			
 
				+      CompressionCodec codec = (CompressionCodec)
			
 
				+        ReflectionUtils.newInstance(codecClass, job);
			
 
				+      // build the filename including the extension
			
 
				+      file = new Path(file + codec.getDefaultExtension());
			
 
				+      FileSystem fs = file.getFileSystem(job);
			
 
				+      FSDataOutputStream fileOut = fs.create(file, context);
			
 
				+      return new LineRecordWriter<K, V>(new DataOutputStream
			
 
				+                                        (codec.createOutputStream(fileOut)),
			
 
				+                                        keyValueSeparator);
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
--- a/src/mapred/org/apache/hadoop/mapreduce/lib/partition/HashPartitioner.java
+++ b/src/mapred/org/apache/hadoop/mapreduce/lib/partition/HashPartitioner.java
@@ -0,0 +1,32 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.mapreduce.lib.partition;
			
 
				+
			
 
				+import org.apache.hadoop.mapreduce.Partitioner;
			
 
				+
			
 
				+/** Partition keys by their {@link Object#hashCode()}. */
			
 
				+public class HashPartitioner<K, V> implements Partitioner<K, V> {
			
 
				+
			
 
				+  /** Use {@link Object#hashCode()} to partition. */
			
 
				+  public int getPartition(K key, V value,
			
 
				+                          int numReduceTasks) {
			
 
				+    return (key.hashCode() & Integer.MAX_VALUE) % numReduceTasks;
			
 
				+  }
			
 
				+
			
 
				+}
			
--- a/src/mapred/org/apache/hadoop/mapreduce/lib/reduce/IntSumReducer.java
+++ b/src/mapred/org/apache/hadoop/mapreduce/lib/reduce/IntSumReducer.java
@@ -0,0 +1,40 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.mapreduce.lib.reduce;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+
			
 
				+import org.apache.hadoop.io.IntWritable;
			
 
				+import org.apache.hadoop.mapreduce.Reducer;
			
 
				+
			
 
				+public class IntSumReducer<Key> extends Reducer<Key,IntWritable,
			
 
				+                                                Key,IntWritable> {
			
 
				+  private IntWritable result = new IntWritable();
			
 
				+
			
 
				+  public void reduce(Key key, Iterable<IntWritable> values, 
			
 
				+                     Context context) throws IOException, InterruptedException {
			
 
				+    int sum = 0;
			
 
				+    for (IntWritable val : values) {
			
 
				+      sum += val.get();
			
 
				+    }
			
 
				+    result.set(sum);
			
 
				+    context.collect(key, result);
			
 
				+  }
			
 
				+
			
 
				+}
			
--- a/src/mapred/org/apache/hadoop/mapreduce/lib/reduce/LongSumReducer.java
+++ b/src/mapred/org/apache/hadoop/mapreduce/lib/reduce/LongSumReducer.java
@@ -0,0 +1,40 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.mapreduce.lib.reduce;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+import org.apache.hadoop.io.LongWritable;
			
 
				+import org.apache.hadoop.mapreduce.Reducer;
			
 
				+
			
 
				+public class LongSumReducer<KEY> extends Reducer<KEY, LongWritable,
			
 
				+                                                 KEY,LongWritable> {
			
 
				+
			
 
				+  private LongWritable result = new LongWritable();
			
 
				+
			
 
				+  public void reduce(KEY key, Iterable<LongWritable> values,
			
 
				+                     Context context) throws IOException, InterruptedException {
			
 
				+    long sum = 0;
			
 
				+    for (LongWritable val : values) {
			
 
				+      sum += val.get();
			
 
				+    }
			
 
				+    result.set(sum);
			
 
				+    context.collect(key, result);
			
 
				+  }
			
 
				+
			
 
				+}