Browse Source

svn merge -c 1468232 FIXES: MAPREDUCE-4974. Optimising the LineRecordReader initialize() method (Gelesh via bobby)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1468235 13f79535-47bb-0310-9956-ffa450edef68
Robert Joseph Evans 12 years ago
parent
commit
a7fba8fcd7

+ 3 - 0
hadoop-mapreduce-project/CHANGES.txt

@@ -45,6 +45,9 @@ Release 2.0.5-beta - UNRELEASED
 
   OPTIMIZATIONS
 
+    MAPREDUCE-4974. Optimising the LineRecordReader initialize() method 
+    (Gelesh via bobby)
+
   BUG FIXES
 
     MAPREDUCE-4671. AM does not tell the RM about container requests which are

+ 9 - 15
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/LineRecordReader.java

@@ -52,7 +52,6 @@ public class LineRecordReader extends RecordReader<LongWritable, Text> {
   public static final String MAX_LINE_LENGTH = 
     "mapreduce.input.linerecordreader.line.maxlength";
 
-  private CompressionCodecFactory compressionCodecs = null;
   private long start;
   private long pos;
   private long end;
@@ -60,9 +59,9 @@ public class LineRecordReader extends RecordReader<LongWritable, Text> {
   private FSDataInputStream fileIn;
   private Seekable filePosition;
   private int maxLineLength;
-  private LongWritable key = null;
-  private Text value = null;
-  private CompressionCodec codec;
+  private LongWritable key;
+  private Text value;
+  private boolean isCompressedInput;
   private Decompressor decompressor;
   private byte[] recordDelimiterBytes;
 
@@ -81,13 +80,14 @@ public class LineRecordReader extends RecordReader<LongWritable, Text> {
     start = split.getStart();
     end = start + split.getLength();
     final Path file = split.getPath();
-    compressionCodecs = new CompressionCodecFactory(job);
-    codec = compressionCodecs.getCodec(file);
 
     // open the file and seek to the start of the split
     final FileSystem fs = file.getFileSystem(job);
     fileIn = fs.open(file);
-    if (isCompressedInput()) {
+    
+    CompressionCodec codec = new CompressionCodecFactory(job).getCodec(file);
+    if (null!=codec) {
+      isCompressedInput = true;	
       decompressor = CodecPool.getDecompressor(codec);
       if (codec instanceof SplittableCompressionCodec) {
         final SplitCompressionInputStream cIn =
@@ -132,19 +132,16 @@ public class LineRecordReader extends RecordReader<LongWritable, Text> {
     this.pos = start;
   }
   
-  private boolean isCompressedInput() {
-    return (codec != null);
-  }
 
   private int maxBytesToConsume(long pos) {
-    return isCompressedInput()
+    return isCompressedInput
       ? Integer.MAX_VALUE
       : (int) Math.min(Integer.MAX_VALUE, end - pos);
   }
 
   private long getFilePosition() throws IOException {
     long retVal;
-    if (isCompressedInput() && null != filePosition) {
+    if (isCompressedInput && null != filePosition) {
       retVal = filePosition.getPos();
     } else {
       retVal = pos;
@@ -166,9 +163,6 @@ public class LineRecordReader extends RecordReader<LongWritable, Text> {
     while (getFilePosition() <= end) {
       newSize = in.readLine(value, maxLineLength,
           Math.max(maxBytesToConsume(pos), maxLineLength));
-      if (newSize == 0) {
-        break;
-      }
       pos += newSize;
       if (newSize < maxLineLength) {
         break;