Browse Source

HADOOP-10112. har file listing doesn't work with wild card. Contributed by Brandon Li

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-0.23@1565474 13f79535-47bb-0310-9956-ffa450edef68
Jason Darrell Lowe 11 years ago
parent
commit
57841b4e3a

+ 3 - 0
hadoop-common-project/hadoop-common/CHANGES.txt

@@ -21,6 +21,9 @@ Release 0.23.11 - UNRELEASED
 
     HADOOP-10146. Workaround JDK7 Process fd close bug (daryn)
 
+    HADOOP-10112. har file listing doesn't work with wild card (Brandon Li via
+    jlowe)
+
 Release 0.23.10 - 2013-12-09
 
   INCOMPATIBLE CHANGES

+ 13 - 3
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/HarFileSystem.java

@@ -614,15 +614,20 @@ public class HarFileSystem extends FilterFileSystem {
    */
   @Override
   public FileStatus getFileStatus(Path f) throws IOException {
-    HarStatus hstatus = getFileHarStatus(f);
+    Path p = makeQualified(f);
+    if (p.toUri().getPath().length() < archivePath.toString().length()) {
+      // still in the source file system
+      return fs.getFileStatus(new Path(p.toUri().getPath()));
+    }
+
+    HarStatus hstatus = getFileHarStatus(p);
     return toFileStatus(hstatus, null);
   }
 
   private HarStatus getFileHarStatus(Path f) throws IOException {
     // get the fs DataInputStream for the underlying file
     // look up the index.
-    Path p = makeQualified(f);
-    Path harPath = getPathInHar(p);
+    Path harPath = getPathInHar(f);
     if (harPath == null) {
       throw new IOException("Invalid file name: " + f + " in " + uri);
     }
@@ -716,6 +721,11 @@ public class HarFileSystem extends FilterFileSystem {
     // to the client
     List<FileStatus> statuses = new ArrayList<FileStatus>();
     Path tmpPath = makeQualified(f);
+    if (tmpPath.toUri().getPath().length() < archivePath.toString().length()) {
+      // still in the source file system
+      return fs.listStatus(new Path(tmpPath.toUri().getPath()));
+    }
+    
     Path harPath = getPathInHar(tmpPath);
     HarStatus hstatus = metadata.archive.get(harPath);
     if (hstatus == null) {

+ 72 - 1
hadoop-tools/hadoop-archives/src/test/java/org/apache/hadoop/tools/TestHadoopArchives.java

@@ -18,7 +18,10 @@
 
 package org.apache.hadoop.tools;
 
+import static org.junit.Assert.assertTrue;
+
 import java.io.ByteArrayOutputStream;
+import java.io.DataOutputStream;
 import java.io.IOException;
 import java.io.PrintStream;
 import java.net.URI;
@@ -34,6 +37,7 @@ import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.FsShell;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.DistributedFileSystem;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.util.JarFinder;
@@ -111,7 +115,14 @@ public class TestHadoopArchives {
       System.err.println(e);
     }
   }
-
+  static Path writeFile(FileSystem fs, Path f) throws IOException {
+    DataOutputStream out = fs.create(f);
+    out.writeBytes("dhruba: " + f);
+    out.close();
+    assertTrue(fs.exists(f));
+    return f;
+  }
+  
   @Test
   public void testRelativePath() throws Exception {
     fs.delete(archivePath, true);
@@ -222,4 +233,64 @@ public class TestHadoopArchives {
         .println("lsr paths = " + paths.toString().replace(", ", ",\n  "));
     return paths;
   }
+  
+  // Make sure har file system works with wildcards
+  @Test
+  public void testHar() throws IOException {
+    assertTrue("Not a HDFS: " + fs.getUri(),
+        fs instanceof DistributedFileSystem);
+    PrintStream psBackup = System.out;
+    ByteArrayOutputStream out = new ByteArrayOutputStream();
+    PrintStream psOut = new PrintStream(out);
+    System.setOut(psOut);
+    HadoopArchives archiveShell = new HadoopArchives(conf);
+    archiveShell.setConf(conf);
+
+    FsShell fsShell = new FsShell();
+    fsShell.setConf(conf);
+
+    try {
+      Path myPath = new Path("/test/dir");
+      assertTrue(fs.mkdirs(myPath));
+      assertTrue(fs.exists(myPath));
+      myPath = new Path("/test/dir/dir2");
+      assertTrue(fs.mkdirs(myPath));
+      assertTrue(fs.exists(myPath));
+      Path myFile = new Path("/test/dir/dir2/file");
+      writeFile(fs, myFile);
+      assertTrue(fs.exists(myFile));
+
+      String[] args = new String[5];
+      args[0] = "-archiveName";
+      args[1] = "foo.har";
+      args[2] = "-p";
+      args[3] = "/test/dir";
+      args[4] = "/test";
+      int val = -1;
+      try {
+        val = archiveShell.run(args);
+      } catch (Exception e) {
+        System.err.println("Exception raised from HadoopArchives.run "
+            + e.getLocalizedMessage());
+      }
+      assertTrue(val == 0);
+
+      args = new String[2];
+      args[0] = "-ls";
+      args[1] = "har:///test/foo.har/d*";
+      val = -1;
+      try {
+        val = fsShell.run(args);
+      } catch (Exception e) {
+        System.err.println("Exception raised from HadoopArchives.run "
+            + e.getLocalizedMessage());
+      }
+
+      String returnString = out.toString();
+      out.reset();
+      assertTrue(returnString.contains("har:///test/foo.har/dir2/file"));
+    } finally {
+      System.setOut(psBackup);
+    }
+  }
 }