Pārlūkot izejas kodu

HADOOP-3580. Fixes a problem to do with specifying a har as an input to a job. Contributed by Mahadev Konar.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/core/trunk@669472 13f79535-47bb-0310-9956-ffa450edef68
Devaraj Das 17 gadi atpakaļ
vecāks
revīzija
6d90f7b7e8

+ 3 - 0
CHANGES.txt

@@ -617,6 +617,9 @@ Release 0.18.0 - Unreleased
     HADOOP-3533. Add deprecated methods to provide API compatibility
     between 0.18 and 0.17. Remove the deprecated methods in trunk. (omalley)
 
+    HADOOP-3580. Fixes a problem to do with specifying a har as an input to 
+    a job. (Mahadev Konar via ddas)
+
 Release 0.17.1 - Unreleased
 
   INCOMPATIBLE CHANGES

+ 18 - 2
src/core/org/apache/hadoop/fs/HarFileSystem.java

@@ -343,8 +343,24 @@ public class HarFileSystem extends FilterFileSystem {
     }
     FileStatus fsFile = fs.getFileStatus(new Path(archivePath,
         harStatus.getPartName()));
-    return fs.getFileBlockLocations(fsFile, 
-        harStatus.getStartIndex(), harStatus.getLength());
+    BlockLocation[] rawBlocks = fs.getFileBlockLocations(fsFile, 
+        harStatus.getStartIndex() + start, len);
+    return fakeBlockLocations(rawBlocks, harStatus.getStartIndex());
+  }
+  
+  /**
+   * fake the rawblocks since map reduce uses the block offsets to 
+   * fo some computations regarding the blocks
+   * @param rawBlocks the raw blocks returned by the filesystem
+   * @return faked blocks with changed offsets.
+   */
+  private BlockLocation[] fakeBlockLocations(BlockLocation[] rawBlocks, 
+		  long startIndex) {
+	for (BlockLocation block : rawBlocks) {
+		long rawOffset = block.getOffset();
+		block.setOffset(rawOffset - startIndex);
+	}
+	return rawBlocks;
   }
   
   /**

+ 1 - 1
src/mapred/org/apache/hadoop/mapred/FileInputFormat.java

@@ -331,7 +331,7 @@ public abstract class FileInputFormat<K, V> implements InputFormat<K, V> {
         return i;
       }
     }
-    BlockLocation last = blkLocations[blkLocations.length];
+    BlockLocation last = blkLocations[blkLocations.length -1];
     long fileLength = last.getOffset() + last.getLength() -1;
     throw new IllegalArgumentException("Offset " + offset + 
                                        " is outside of file (0.." +