Selaa lähdekoodia

HADOOP-10091. Job with a har archive as input fails on 0.23. Contributed by Jason Dere and Jason Lowe

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-0.23@1541758 13f79535-47bb-0310-9956-ffa450edef68
Jason Darrell Lowe 11 vuotta sitten
vanhempi
commit
5bdf9fc32c

+ 3 - 0
hadoop-common-project/hadoop-common/CHANGES.txt

@@ -26,6 +26,9 @@ Release 0.23.10 - UNRELEASED
 
     HADOOP-9757. Har metadata cache can grow without limit (Cristina Abad via daryn)
 
+    HADOOP-10091. Job with a har archive as input fails on 0.23 (Jason Dere
+    and Jason Lowe via jlowe)
+
 Release 0.23.9 - 2013-07-08
 
   INCOMPATIBLE CHANGES

+ 4 - 0
hadoop-common-project/hadoop-common/pom.xml

@@ -486,6 +486,10 @@
             <exclude>src/test/all-tests</exclude>
             <exclude>src/test/resources/kdc/ldif/users.ldif</exclude>
             <exclude>src/main/native/src/org/apache/hadoop/io/compress/lz4/lz4.c</exclude>
+            <exclude>src/test/resources/test.har/_SUCCESS</exclude>
+            <exclude>src/test/resources/test.har/_index</exclude>
+            <exclude>src/test/resources/test.har/_masterindex</exclude>
+            <exclude>src/test/resources/test.har/part-0</exclude>
           </excludes>
         </configuration>
       </plugin>

+ 1 - 1
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java

@@ -1329,7 +1329,7 @@ public abstract class FileSystem extends Configured implements Closeable {
     return new ContentSummary(summary[0], summary[1], summary[2]);
   }
 
-  final private static PathFilter DEFAULT_FILTER = new PathFilter() {
+  final protected static PathFilter DEFAULT_FILTER = new PathFilter() {
       public boolean accept(Path file) {
         return true;
       }     

+ 7 - 0
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/HarFileSystem.java

@@ -697,6 +697,13 @@ public class HarFileSystem extends FilterFileSystem {
     throw new IOException("Har: delete not allowed");
   }
   
+  @Override
+  public RemoteIterator<LocatedFileStatus> listLocatedStatus(Path f)
+  throws IOException {
+    // Use FileSystem's implementation
+    return listLocatedStatus(f, DEFAULT_FILTER);
+  }
+
   /**
    * liststatus returns the children of a directory 
    * after looking up the index files.

+ 30 - 0
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestHarFileSystemBasics.java

@@ -25,8 +25,12 @@ import static org.junit.Assert.assertFalse;
 import java.io.File;
 import java.io.IOException;
 import java.net.URI;
+import java.util.HashSet;
+import java.util.Set;
 
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.RemoteIterator;
 import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.util.Shell;
 import org.junit.After;
@@ -221,6 +225,32 @@ public class TestHarFileSystemBasics {
     hfs.initialize(uri, new Configuration());
   }
 
+ @Test
+  public void testListLocatedStatus() throws Exception {
+    String testHarPath = this.getClass().getResource("/test.har").getPath();
+    URI uri = new URI("har://" + testHarPath);
+    HarFileSystem hfs = new HarFileSystem(localFileSystem);
+    hfs.initialize(uri, new Configuration());
+
+    // test.har has the following contents:
+    //   dir1/1.txt
+    //   dir1/2.txt
+    Set<String> expectedFileNames = new HashSet<String>();
+    expectedFileNames.add("1.txt");
+    expectedFileNames.add("2.txt");
+
+    // List contents of dir, and ensure we find all expected files
+    Path path = new Path("dir1");
+    RemoteIterator<LocatedFileStatus> fileList = hfs.listLocatedStatus(path);
+    while (fileList.hasNext()) {
+      String fileName = fileList.next().getPath().getName();
+      assertTrue(fileName + " not in expected files list", expectedFileNames.contains(fileName));
+      expectedFileNames.remove(fileName);
+    }
+    assertEquals("Didn't find all of the expected file names: " + expectedFileNames,
+                 0, expectedFileNames.size());
+  }
+
   // ========== Negative:
 
   @Test

BIN
hadoop-common-project/hadoop-common/src/test/resources/test.har/.part-0.crc


+ 0 - 0
hadoop-common-project/hadoop-common/src/test/resources/test.har/_SUCCESS


+ 4 - 0
hadoop-common-project/hadoop-common/src/test/resources/test.har/_index

@@ -0,0 +1,4 @@
+%2F dir 1380270822000+511+root+wheel 0 0 dir1 
+%2Fdir1 dir 1380270441000+493+jdere+wheel 0 0 1.txt 2.txt 
+%2Fdir1%2F1.txt file part-0 0 0 1380270439000+420+jdere+wheel 
+%2Fdir1%2F2.txt file part-0 0 0 1380270441000+420+jdere+wheel 

+ 2 - 0
hadoop-common-project/hadoop-common/src/test/resources/test.har/_masterindex

@@ -0,0 +1,2 @@
+3 
+0 1210114968 0 232 

+ 0 - 0
hadoop-common-project/hadoop-common/src/test/resources/test.har/part-0