18 years ago · caec916d45
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -169,6 +169,10 @@ Trunk (unreleased changes)
 
				     key and value when either is null, and to print nothing when both
			
 
				     are null.  (Runping Qi via cutting)
			
 
				 
			
 
				+52. HADOOP-1204.  Rename InputFormatBase to be FileInputFormat, and
			
 
				+    deprecate InputFormatBase.  Also make LineRecordReader easier to
			
 
				+    extend.  (Runping Qi via cutting)
			
 
				+
			
 
				 
			
 
				 Release 0.12.3 - 2007-04-06
			
 
				 
			
--- a/src/java/org/apache/hadoop/mapred/FileInputFormat.java
+++ b/src/java/org/apache/hadoop/mapred/FileInputFormat.java
@@ -0,0 +1,198 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.mapred;
			
 
				+
			
 
				+import java.io.FileNotFoundException;
			
 
				+import java.io.IOException;
			
 
				+import java.util.ArrayList;
			
 
				+import java.util.List;
			
 
				+
			
 
				+import org.apache.commons.logging.Log;
			
 
				+import org.apache.commons.logging.LogFactory;
			
 
				+import org.apache.hadoop.fs.FileSystem;
			
 
				+import org.apache.hadoop.fs.Path;
			
 
				+import org.apache.hadoop.fs.PathFilter;
			
 
				+
			
 
				+/** 
			
 
				+ * A base class for {@link InputFormat}. 
			
 
				+ * 
			
 
				+ */
			
 
				+public abstract class FileInputFormat implements InputFormat {
			
 
				+
			
 
				+  public static final Log LOG =
			
 
				+    LogFactory.getLog("org.apache.hadoop.mapred.FileInputFormat");
			
 
				+
			
 
				+  private static final double SPLIT_SLOP = 1.1;   // 10% slop
			
 
				+
			
 
				+  private long minSplitSize = 1;
			
 
				+  private static final PathFilter hiddenFileFilter = new PathFilter(){
			
 
				+    public boolean accept( Path p ){
			
 
				+      String name = p.getName(); 
			
 
				+      return !name.startsWith("_") && !name.startsWith("."); 
			
 
				+    }
			
 
				+  }; 
			
 
				+  protected void setMinSplitSize(long minSplitSize) {
			
 
				+    this.minSplitSize = minSplitSize;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Is the given filename splitable? Usually, true, but if the file is
			
 
				+   * stream compressed, it will not be.
			
 
				+   * @param fs the file system that the file is on
			
 
				+   * @param filename the file name to check
			
 
				+   * @return is this file splitable?
			
 
				+   */
			
 
				+  protected boolean isSplitable(FileSystem fs, Path filename) {
			
 
				+    return true;
			
 
				+  }
			
 
				+  
			
 
				+  public abstract RecordReader getRecordReader(InputSplit split,
			
 
				+                                               JobConf job,
			
 
				+                                               Reporter reporter)
			
 
				+    throws IOException;
			
 
				+
			
 
				+  /** List input directories.
			
 
				+   * Subclasses may override to, e.g., select only files matching a regular
			
 
				+   * expression. 
			
 
				+   * 
			
 
				+   * @param job the job to list input paths for
			
 
				+   * @return array of Path objects
			
 
				+   * @throws IOException if zero items.
			
 
				+   */
			
 
				+  protected Path[] listPaths(JobConf job)
			
 
				+    throws IOException {
			
 
				+    Path[] dirs = job.getInputPaths();
			
 
				+    if (dirs.length == 0) {
			
 
				+      throw new IOException("No input paths specified in job");
			
 
				+    }
			
 
				+    List<Path> result = new ArrayList(); 
			
 
				+    for (Path p: dirs) {
			
 
				+      FileSystem fs = p.getFileSystem(job); 
			
 
				+      Path[] matches =
			
 
				+        fs.listPaths(fs.globPaths(p, hiddenFileFilter),hiddenFileFilter);
			
 
				+      for (Path match: matches) {
			
 
				+        result.add(fs.makeQualified(match));
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+    return (Path[])result.toArray(new Path[result.size()]);
			
 
				+  }
			
 
				+
			
 
				+  public void validateInput(JobConf job) throws IOException {
			
 
				+    Path[] inputDirs = job.getInputPaths();
			
 
				+    if (inputDirs.length == 0) {
			
 
				+      throw new IOException("No input paths specified in input"); 
			
 
				+    }
			
 
				+    
			
 
				+    List<IOException> result = new ArrayList();
			
 
				+    int totalFiles = 0; 
			
 
				+    for (Path p: inputDirs) {
			
 
				+      FileSystem fs = p.getFileSystem(job);
			
 
				+      if (fs.exists(p)) {
			
 
				+        // make sure all paths are files to avoid exception
			
 
				+        // while generating splits
			
 
				+        for (Path subPath : fs.listPaths(p, hiddenFileFilter)) {
			
 
				+          FileSystem subFS = subPath.getFileSystem(job); 
			
 
				+          if (!subFS.exists(subPath)) {
			
 
				+            result.add(new IOException(
			
 
				+                "Input path does not exist: " + subPath)); 
			
 
				+          } else {
			
 
				+            totalFiles++; 
			
 
				+          }
			
 
				+        }
			
 
				+      } else {
			
 
				+        Path [] paths = fs.globPaths(p, hiddenFileFilter); 
			
 
				+        if (paths.length == 0) {
			
 
				+          result.add(
			
 
				+            new IOException("Input Pattern " + p + " matches 0 files")); 
			
 
				+        } else {
			
 
				+          // validate globbed paths 
			
 
				+          for (Path gPath : paths) {
			
 
				+            FileSystem gPathFS = gPath.getFileSystem(job); 
			
 
				+            if (!gPathFS.exists(gPath)) {
			
 
				+              result.add(
			
 
				+                new FileNotFoundException(
			
 
				+                    "Input path doesnt exist : " + gPath)); 
			
 
				+            }
			
 
				+          }
			
 
				+          totalFiles += paths.length ; 
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+    if (!result.isEmpty()) {
			
 
				+      throw new InvalidInputException(result);
			
 
				+    }
			
 
				+    // send output to client. 
			
 
				+    LOG.info("Total input paths to process : " + totalFiles); 
			
 
				+  }
			
 
				+
			
 
				+  /** Splits files returned by {@link #listPaths(JobConf)} when
			
 
				+   * they're too big.*/ 
			
 
				+  public InputSplit[] getSplits(JobConf job, int numSplits)
			
 
				+    throws IOException {
			
 
				+    Path[] files = listPaths(job);
			
 
				+    long totalSize = 0;                           // compute total size
			
 
				+    for (int i = 0; i < files.length; i++) {      // check we have valid files
			
 
				+      Path file = files[i];
			
 
				+      FileSystem fs = file.getFileSystem(job);
			
 
				+      if (fs.isDirectory(file) || !fs.exists(file)) {
			
 
				+        throw new IOException("Not a file: "+files[i]);
			
 
				+      }
			
 
				+      totalSize += fs.getLength(files[i]);
			
 
				+    }
			
 
				+
			
 
				+    long goalSize = totalSize / (numSplits == 0 ? 1 : numSplits);
			
 
				+    long minSize = Math.max(job.getLong("mapred.min.split.size", 1),
			
 
				+                            minSplitSize);
			
 
				+
			
 
				+    ArrayList splits = new ArrayList(numSplits);  // generate splits
			
 
				+    for (int i = 0; i < files.length; i++) {
			
 
				+      Path file = files[i];
			
 
				+      FileSystem fs = file.getFileSystem(job);
			
 
				+      long length = fs.getLength(file);
			
 
				+      if (isSplitable(fs, file)) { 
			
 
				+        long blockSize = fs.getBlockSize(file);
			
 
				+        long splitSize = computeSplitSize(goalSize, minSize, blockSize);
			
 
				+
			
 
				+        long bytesRemaining = length;
			
 
				+        while (((double) bytesRemaining)/splitSize > SPLIT_SLOP) {
			
 
				+          splits.add(new FileSplit(file, length-bytesRemaining, splitSize,
			
 
				+                                   job));
			
 
				+          bytesRemaining -= splitSize;
			
 
				+        }
			
 
				+        
			
 
				+        if (bytesRemaining != 0) {
			
 
				+          splits.add(new FileSplit(file, length-bytesRemaining, 
			
 
				+                                   bytesRemaining, job));
			
 
				+        }
			
 
				+      } else {
			
 
				+        if (length != 0) {
			
 
				+          splits.add(new FileSplit(file, 0, length, job));
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+    LOG.debug( "Total # of splits: " + splits.size() );
			
 
				+    return (FileSplit[])splits.toArray(new FileSplit[splits.size()]);
			
 
				+  }
			
 
				+
			
 
				+  private static long computeSplitSize(long goalSize, long minSize,
			
 
				+                                       long blockSize) {
			
 
				+    return Math.max(minSize, Math.min(goalSize, blockSize));
			
 
				+  }
			
 
				+}
			
--- a/src/java/org/apache/hadoop/mapred/InputFormatBase.java
+++ b/src/java/org/apache/hadoop/mapred/InputFormatBase.java
@@ -18,181 +18,10 @@
 
				 
			
 
				 package org.apache.hadoop.mapred;
			
 
				 
			
 
				-import java.io.FileNotFoundException;
			
 
				-import java.io.IOException;
			
 
				-
			
 
				-import java.util.ArrayList;
			
 
				-import java.util.Arrays;
			
 
				-import java.util.List;
			
 
				-
			
 
				-import org.apache.commons.logging.*;
			
 
				-
			
 
				-import org.apache.hadoop.fs.FileSystem;
			
 
				-import org.apache.hadoop.fs.Path;
			
 
				-import org.apache.hadoop.fs.PathFilter;
			
 
				-
			
 
				-/** A base class for {@link InputFormat}. */
			
 
				-public abstract class InputFormatBase implements InputFormat {
			
 
				-
			
 
				-  public static final Log LOG =
			
 
				-    LogFactory.getLog("org.apache.hadoop.mapred.InputFormatBase");
			
 
				-
			
 
				-  private static final double SPLIT_SLOP = 1.1;   // 10% slop
			
 
				-
			
 
				-  private long minSplitSize = 1;
			
 
				-  private static final PathFilter hiddenFileFilter = new PathFilter(){
			
 
				-    public boolean accept( Path p ){
			
 
				-      String name = p.getName(); 
			
 
				-      return !name.startsWith("_") && !name.startsWith("."); 
			
 
				-    }
			
 
				-  }; 
			
 
				-  protected void setMinSplitSize(long minSplitSize) {
			
 
				-    this.minSplitSize = minSplitSize;
			
 
				-  }
			
 
				-
			
 
				-  /**
			
 
				-   * Is the given filename splitable? Usually, true, but if the file is
			
 
				-   * stream compressed, it will not be.
			
 
				-   * @param fs the file system that the file is on
			
 
				-   * @param filename the file name to check
			
 
				-   * @return is this file splitable?
			
 
				-   */
			
 
				-  protected boolean isSplitable(FileSystem fs, Path filename) {
			
 
				-    return true;
			
 
				-  }
			
 
				-  
			
 
				-  public abstract RecordReader getRecordReader(InputSplit split,
			
 
				-                                               JobConf job,
			
 
				-                                               Reporter reporter)
			
 
				-    throws IOException;
			
 
				-
			
 
				-  /** List input directories.
			
 
				-   * Subclasses may override to, e.g., select only files matching a regular
			
 
				-   * expression. 
			
 
				-   * 
			
 
				-   * @param job the job to list input paths for
			
 
				-   * @return array of Path objects
			
 
				-   * @throws IOException if zero items.
			
 
				-   */
			
 
				-  protected Path[] listPaths(JobConf job)
			
 
				-    throws IOException {
			
 
				-    Path[] dirs = job.getInputPaths();
			
 
				-    if (dirs.length == 0) {
			
 
				-      throw new IOException("No input paths specified in job");
			
 
				-    }
			
 
				-    List<Path> result = new ArrayList(); 
			
 
				-    for (Path p: dirs) {
			
 
				-      FileSystem fs = p.getFileSystem(job); 
			
 
				-      Path[] matches =
			
 
				-        fs.listPaths(fs.globPaths(p, hiddenFileFilter),hiddenFileFilter);
			
 
				-      for (Path match: matches) {
			
 
				-        result.add(fs.makeQualified(match));
			
 
				-      }
			
 
				-    }
			
 
				-
			
 
				-    return (Path[])result.toArray(new Path[result.size()]);
			
 
				-  }
			
 
				-
			
 
				-  public void validateInput(JobConf job) throws IOException {
			
 
				-    Path[] inputDirs = job.getInputPaths();
			
 
				-    if (inputDirs.length == 0) {
			
 
				-      throw new IOException("No input paths specified in input"); 
			
 
				-    }
			
 
				-    
			
 
				-    List<IOException> result = new ArrayList();
			
 
				-    int totalFiles = 0; 
			
 
				-    for (Path p: inputDirs) {
			
 
				-      FileSystem fs = p.getFileSystem(job);
			
 
				-      if (fs.exists(p)) {
			
 
				-        // make sure all paths are files to avoid exception
			
 
				-        // while generating splits
			
 
				-        for (Path subPath : fs.listPaths(p, hiddenFileFilter)) {
			
 
				-          FileSystem subFS = subPath.getFileSystem(job); 
			
 
				-          if (!subFS.exists(subPath)) {
			
 
				-            result.add(new IOException(
			
 
				-                "Input path does not exist: " + subPath)); 
			
 
				-          } else {
			
 
				-            totalFiles++; 
			
 
				-          }
			
 
				-        }
			
 
				-      } else {
			
 
				-        Path [] paths = fs.globPaths(p, hiddenFileFilter); 
			
 
				-        if (paths.length == 0) {
			
 
				-          result.add(
			
 
				-            new IOException("Input Pattern " + p + " matches 0 files")); 
			
 
				-        } else {
			
 
				-          // validate globbed paths 
			
 
				-          for (Path gPath : paths) {
			
 
				-            FileSystem gPathFS = gPath.getFileSystem(job); 
			
 
				-            if (!gPathFS.exists(gPath)) {
			
 
				-              result.add(
			
 
				-                new FileNotFoundException(
			
 
				-                    "Input path doesnt exist : " + gPath)); 
			
 
				-            }
			
 
				-          }
			
 
				-          totalFiles += paths.length ; 
			
 
				-        }
			
 
				-      }
			
 
				-    }
			
 
				-    if (!result.isEmpty()) {
			
 
				-      throw new InvalidInputException(result);
			
 
				-    }
			
 
				-    // send output to client. 
			
 
				-    LOG.info("Total input paths to process : " + totalFiles); 
			
 
				-  }
			
 
				-
			
 
				-  /** Splits files returned by {@link #listPaths(JobConf)} when
			
 
				-   * they're too big.*/ 
			
 
				-  public InputSplit[] getSplits(JobConf job, int numSplits)
			
 
				-    throws IOException {
			
 
				-    Path[] files = listPaths(job);
			
 
				-    long totalSize = 0;                           // compute total size
			
 
				-    for (int i = 0; i < files.length; i++) {      // check we have valid files
			
 
				-      Path file = files[i];
			
 
				-      FileSystem fs = file.getFileSystem(job);
			
 
				-      if (fs.isDirectory(file) || !fs.exists(file)) {
			
 
				-        throw new IOException("Not a file: "+files[i]);
			
 
				-      }
			
 
				-      totalSize += fs.getLength(files[i]);
			
 
				-    }
			
 
				-
			
 
				-    long goalSize = totalSize / (numSplits == 0 ? 1 : numSplits);
			
 
				-    long minSize = Math.max(job.getLong("mapred.min.split.size", 1),
			
 
				-                            minSplitSize);
			
 
				-
			
 
				-    ArrayList splits = new ArrayList(numSplits);  // generate splits
			
 
				-    for (int i = 0; i < files.length; i++) {
			
 
				-      Path file = files[i];
			
 
				-      FileSystem fs = file.getFileSystem(job);
			
 
				-      long length = fs.getLength(file);
			
 
				-      if (isSplitable(fs, file)) { 
			
 
				-        long blockSize = fs.getBlockSize(file);
			
 
				-        long splitSize = computeSplitSize(goalSize, minSize, blockSize);
			
 
				-
			
 
				-        long bytesRemaining = length;
			
 
				-        while (((double) bytesRemaining)/splitSize > SPLIT_SLOP) {
			
 
				-          splits.add(new FileSplit(file, length-bytesRemaining, splitSize,
			
 
				-                                   job));
			
 
				-          bytesRemaining -= splitSize;
			
 
				-        }
			
 
				-        
			
 
				-        if (bytesRemaining != 0) {
			
 
				-          splits.add(new FileSplit(file, length-bytesRemaining, 
			
 
				-                                   bytesRemaining, job));
			
 
				-        }
			
 
				-      } else {
			
 
				-        if (length != 0) {
			
 
				-          splits.add(new FileSplit(file, 0, length, job));
			
 
				-        }
			
 
				-      }
			
 
				-    }
			
 
				-    LOG.debug( "Total # of splits: " + splits.size() );
			
 
				-    return (FileSplit[])splits.toArray(new FileSplit[splits.size()]);
			
 
				-  }
			
 
				+/** A base class for {@link InputFormat}. 
			
 
				+ *  @deprecated replaced by {@link FileInputFormat}
			
 
				+ */
			
 
				+public abstract class InputFormatBase extends FileInputFormat {
			
 
				 
			
 
				-  private static long computeSplitSize(long goalSize, long minSize,
			
 
				-                                       long blockSize) {
			
 
				-    return Math.max(minSize, Math.min(goalSize, blockSize));
			
 
				-  }
			
 
				 }
			
 
				 
			
--- a/src/java/org/apache/hadoop/mapred/LineRecordReader.java
+++ b/src/java/org/apache/hadoop/mapred/LineRecordReader.java
@@ -6,10 +6,16 @@ import java.io.IOException;
 
				 import java.io.InputStream;
			
 
				 import java.io.OutputStream;
			
 
				 
			
 
				+import org.apache.hadoop.conf.Configuration;
			
 
				+import org.apache.hadoop.fs.FSDataInputStream;
			
 
				+import org.apache.hadoop.fs.FileSystem;
			
 
				+import org.apache.hadoop.fs.Path;
			
 
				 import org.apache.hadoop.io.LongWritable;
			
 
				 import org.apache.hadoop.io.Text;
			
 
				 import org.apache.hadoop.io.Writable;
			
 
				 import org.apache.hadoop.io.WritableComparable;
			
 
				+import org.apache.hadoop.io.compress.CompressionCodec;
			
 
				+import org.apache.hadoop.io.compress.CompressionCodecFactory;
			
 
				 
			
 
				 /**
			
 
				  * Treats keys as offset in file and value as line. 
			
@@ -17,6 +23,7 @@ import org.apache.hadoop.io.WritableComparable;
 
				  *
			
 
				  */
			
 
				 public class LineRecordReader implements RecordReader {
			
 
				+  private CompressionCodecFactory compressionCodecs = null;
			
 
				   private long start; 
			
 
				   private long pos;
			
 
				   private long end;
			
@@ -37,6 +44,33 @@ public class LineRecordReader implements RecordReader {
 
				   }
			
 
				   private TextStuffer bridge = new TextStuffer();
			
 
				 
			
 
				+  public LineRecordReader(Configuration job, FileSplit split)
			
 
				+      throws IOException {
			
 
				+    long start = split.getStart();
			
 
				+    long end = start + split.getLength();
			
 
				+    final Path file = split.getPath();
			
 
				+    compressionCodecs = new CompressionCodecFactory(job);
			
 
				+    final CompressionCodec codec = compressionCodecs.getCodec(file);
			
 
				+
			
 
				+    // open the file and seek to the start of the split
			
 
				+    FileSystem fs = FileSystem.get(job);
			
 
				+    FSDataInputStream fileIn = fs.open(split.getPath());
			
 
				+    InputStream in = fileIn;
			
 
				+    if (codec != null) {
			
 
				+      in = codec.createInputStream(fileIn);
			
 
				+      end = Long.MAX_VALUE;
			
 
				+    } else if (start != 0) {
			
 
				+      fileIn.seek(start - 1);
			
 
				+      LineRecordReader.readLine(fileIn, null);
			
 
				+      start = fileIn.getPos();
			
 
				+    }
			
 
				+
			
 
				+    this.in = new BufferedInputStream(in);
			
 
				+    this.start = start;
			
 
				+    this.pos = start;
			
 
				+    this.end = end;
			
 
				+  }
			
 
				+  
			
 
				   public LineRecordReader(InputStream in, long offset, long endOffset) 
			
 
				     throws IOException{
			
 
				     this.in = new BufferedInputStream(in);
			
@@ -62,7 +96,7 @@ public class LineRecordReader implements RecordReader {
 
				 
			
 
				     ((LongWritable)key).set(pos);           // key is position
			
 
				     buffer.reset();
			
 
				-    long bytesRead = readLine(in, buffer);
			
 
				+    long bytesRead = readLine();
			
 
				     if (bytesRead == 0) {
			
 
				       return false;
			
 
				     }
			
@@ -71,6 +105,10 @@ public class LineRecordReader implements RecordReader {
 
				     buffer.writeTo(bridge);
			
 
				     return true;
			
 
				   }
			
 
				+  
			
 
				+  protected long readLine() throws IOException {
			
 
				+    return LineRecordReader.readLine(in, buffer);
			
 
				+  }
			
 
				 
			
 
				   public static long readLine(InputStream in, 
			
 
				       OutputStream out) throws IOException {
			
--- a/src/java/org/apache/hadoop/mapred/SequenceFileInputFormat.java
+++ b/src/java/org/apache/hadoop/mapred/SequenceFileInputFormat.java
@@ -20,14 +20,13 @@ package org.apache.hadoop.mapred;
 
				 
			
 
				 import java.io.IOException;
			
 
				 
			
 
				-import org.apache.hadoop.fs.FileSystem;
			
 
				 import org.apache.hadoop.fs.Path;
			
 
				 
			
 
				 import org.apache.hadoop.io.SequenceFile;
			
 
				 import org.apache.hadoop.io.MapFile;
			
 
				 
			
 
				 /** An {@link InputFormat} for {@link SequenceFile}s. */
			
 
				-public class SequenceFileInputFormat extends InputFormatBase {
			
 
				+public class SequenceFileInputFormat extends FileInputFormat {
			
 
				 
			
 
				   public SequenceFileInputFormat() {
			
 
				     setMinSplitSize(SequenceFile.SYNC_INTERVAL);
			
--- a/src/java/org/apache/hadoop/mapred/TextInputFormat.java
+++ b/src/java/org/apache/hadoop/mapred/TextInputFormat.java
@@ -26,7 +26,7 @@ import org.apache.hadoop.io.compress.*;
 
				 /** An {@link InputFormat} for plain text files.  Files are broken into lines.
			
 
				  * Either linefeed or carriage-return are used to signal end of line.  Keys are
			
 
				  * the position in the file, and values are the line of text.. */
			
 
				-public class TextInputFormat extends InputFormatBase implements JobConfigurable {
			
 
				+public class TextInputFormat extends FileInputFormat implements JobConfigurable {
			
 
				 
			
 
				   private CompressionCodecFactory compressionCodecs = null;
			
 
				   
			
@@ -37,32 +37,10 @@ public class TextInputFormat extends InputFormatBase implements JobConfigurable
 
				   protected boolean isSplitable(FileSystem fs, Path file) {
			
 
				     return compressionCodecs.getCodec(file) == null;
			
 
				   }
			
 
				-  
			
 
				-  public RecordReader getRecordReader(InputSplit genericSplit,
			
 
				-                                      JobConf job, Reporter reporter)
			
 
				-    throws IOException {
			
 
				 
			
 
				+  public RecordReader getRecordReader(InputSplit genericSplit, JobConf job,
			
 
				+      Reporter reporter) throws IOException {
			
 
				     reporter.setStatus(genericSplit.toString());
			
 
				-    FileSplit split = (FileSplit) genericSplit;
			
 
				-    long start = split.getStart();
			
 
				-    long end = start + split.getLength();
			
 
				-    final Path file = split.getPath();
			
 
				-    final CompressionCodec codec = compressionCodecs.getCodec(file);
			
 
				-
			
 
				-    // open the file and seek to the start of the split
			
 
				-    FileSystem fs = FileSystem.get(job);
			
 
				-    FSDataInputStream fileIn = fs.open(split.getPath());
			
 
				-    InputStream in = fileIn;
			
 
				-    if (codec != null) {
			
 
				-      in = codec.createInputStream(fileIn);
			
 
				-      end = Long.MAX_VALUE;
			
 
				-    } else if (start != 0) {
			
 
				-      fileIn.seek(start-1);
			
 
				-      LineRecordReader.readLine(fileIn, null);
			
 
				-      start = fileIn.getPos();
			
 
				-    }
			
 
				-    
			
 
				-    return new LineRecordReader(in, start, end);
			
 
				+    return new LineRecordReader(job, (FileSplit) genericSplit);
			
 
				   }
			
 
				 }
			
 
				-