Quellcode durchsuchen

HADOOP-11201. Hadoop Archives should support globs resolving to files. Contributed by Gera Shegalov.

cnauroth vor 10 Jahren
Ursprung
Commit
79301e80d7

+ 3 - 0
hadoop-common-project/hadoop-common/CHANGES.txt

@@ -452,6 +452,9 @@ Release 2.7.0 - UNRELEASED
 
     HADOOP-11312. Fix unit tests to not use uppercase key names. (wang)
 
+    HADOOP-11201. Hadoop Archives should support globs resolving to files.
+    (Gera Shegalov via cnauroth)
+
 Release 2.6.0 - 2014-11-18
 
   INCOMPATIBLE CHANGES

+ 6 - 11
hadoop-tools/hadoop-archives/src/main/java/org/apache/hadoop/tools/HadoopArchives.java

@@ -101,7 +101,7 @@ public class HadoopArchives implements Tool {
   short repl = 10;
 
   private static final String usage = "archive"
-  + " -archiveName NAME -p <parent path> [-r <replication factor>]" +
+  + " -archiveName <NAME>.har -p <parent path> [-r <replication factor>]" +
       "<src>* <dest>" +
   "\n";
   
@@ -348,15 +348,10 @@ public class HadoopArchives implements Tool {
    */
   private void writeTopLevelDirs(SequenceFile.Writer srcWriter, 
       List<Path> paths, Path parentPath) throws IOException {
-    //add all the directories 
-    List<Path> justDirs = new ArrayList<Path>();
+    // extract paths from absolute URI's
+    List<Path> justPaths = new ArrayList<Path>();
     for (Path p: paths) {
-      if (!p.getFileSystem(getConf()).isFile(p)) {
-        justDirs.add(new Path(p.toUri().getPath()));
-      }
-      else {
-        justDirs.add(new Path(p.getParent().toUri().getPath()));
-      }
+      justPaths.add(new Path(p.toUri().getPath()));
     }
     /* find all the common parents of paths that are valid archive
      * paths. The below is done so that we do not add a common path
@@ -372,7 +367,7 @@ public class HadoopArchives implements Tool {
     Path root = new Path(Path.SEPARATOR);
     for (int i = parentPath.depth(); i < deepest.depth(); i++) {
       List<Path> parents = new ArrayList<Path>();
-      for (Path p: justDirs) {
+      for (Path p: justPaths) {
         if (p.compareTo(root) == 0){
           //do nothing
         }
@@ -392,7 +387,7 @@ public class HadoopArchives implements Tool {
           }
         }
       }
-      justDirs = parents;
+      justPaths = parents;
     }
     Set<Map.Entry<String, HashSet<String>>> keyVals = allpaths.entrySet();
     for (Map.Entry<String, HashSet<String>> entry : keyVals) {

+ 66 - 11
hadoop-tools/hadoop-archives/src/test/java/org/apache/hadoop/tools/TestHadoopArchives.java

@@ -203,9 +203,58 @@ public class TestHadoopArchives {
     Assert.assertEquals(originalPaths, harPaths);
   }
 
-  private static List<String> lsr(final FsShell shell, String dir)
-      throws Exception {
-    System.out.println("lsr root=" + dir);
+  @Test
+  public void testSingleFile() throws Exception {
+    final Path sub1 = new Path(inputPath, "dir1");
+    fs.mkdirs(sub1);
+    String singleFileName = "a";
+    createFile(inputPath, fs, sub1.getName(), singleFileName);
+    final FsShell shell = new FsShell(conf);
+
+    final List<String> originalPaths = lsr(shell, sub1.toString());
+    System.out.println("originalPaths: " + originalPaths);
+
+    // make the archive:
+    final String fullHarPathStr = makeArchive(sub1, singleFileName);
+
+    // compare results:
+    final List<String> harPaths = lsr(shell, fullHarPathStr);
+    Assert.assertEquals(originalPaths, harPaths);
+  }
+
+  @Test
+  public void testGlobFiles() throws Exception {
+    final Path sub1 = new Path(inputPath, "dir1");
+    final Path sub2 = new Path(inputPath, "dir2");
+    fs.mkdirs(sub1);
+    String fileName = "a";
+    createFile(inputPath, fs, sub1.getName(), fileName);
+    createFile(inputPath, fs, sub2.getName(), fileName);
+    createFile(inputPath, fs, sub1.getName(), "b"); // not part of result
+
+    final String glob =  "dir{1,2}/a";
+    final FsShell shell = new FsShell(conf);
+    final List<String> originalPaths = lsr(shell, inputPath.toString(),
+        inputPath + "/" + glob);
+    System.out.println("originalPaths: " + originalPaths);
+
+    // make the archive:
+    final String fullHarPathStr = makeArchive(inputPath, glob);
+
+    // compare results:
+    final List<String> harPaths = lsr(shell, fullHarPathStr,
+        fullHarPathStr + "/" + glob);
+    Assert.assertEquals(originalPaths, harPaths);
+  }
+
+  private static List<String> lsr(final FsShell shell, String rootDir) throws Exception {
+    return lsr(shell, rootDir, null);
+  }
+
+  private static List<String> lsr(final FsShell shell, String rootDir,
+      String glob) throws Exception {
+    final String dir = glob == null ? rootDir : glob;
+    System.out.println("lsr root=" + rootDir);
     final ByteArrayOutputStream bytes = new ByteArrayOutputStream();
     final PrintStream out = new PrintStream(bytes);
     final PrintStream oldOut = System.out;
@@ -222,9 +271,9 @@ public class TestHadoopArchives {
       System.setErr(oldErr);
     }
     System.out.println("lsr results:\n" + results);
-    String dirname = dir;
-    if (dir.lastIndexOf(Path.SEPARATOR) != -1) {
-      dirname = dir.substring(dir.lastIndexOf(Path.SEPARATOR));
+    String dirname = rootDir;
+    if (rootDir.lastIndexOf(Path.SEPARATOR) != -1) {
+      dirname = rootDir.substring(rootDir.lastIndexOf(Path.SEPARATOR));
     }
 
     final List<String> paths = new ArrayList<String>();
@@ -621,13 +670,19 @@ public class TestHadoopArchives {
     return bb;
   }
 
+
+  private String makeArchive() throws Exception {
+    return makeArchive(inputPath, null);
+  }
+
   /*
    * Run the HadoopArchives tool to create an archive on the 
    * given file system.
    */
-  private String makeArchive() throws Exception {
-    final String inputPathStr = inputPath.toUri().getPath();
-    System.out.println("inputPathStr = " + inputPathStr);
+  private String makeArchive(Path parentPath, String relGlob) throws Exception {
+    final String parentPathStr = parentPath.toUri().getPath();
+    final String relPathGlob = relGlob == null ? "*" : relGlob;
+    System.out.println("parentPathStr = " + parentPathStr);
 
     final URI uri = fs.getUri();
     final String prefix = "har://hdfs-" + uri.getHost() + ":" + uri.getPort()
@@ -635,8 +690,8 @@ public class TestHadoopArchives {
 
     final String harName = "foo.har";
     final String fullHarPathStr = prefix + harName;
-    final String[] args = { "-archiveName", harName, "-p", inputPathStr, "*",
-        archivePath.toString() };
+    final String[] args = { "-archiveName", harName, "-p", parentPathStr,
+        relPathGlob, archivePath.toString() };
     System.setProperty(HadoopArchives.TEST_HADOOP_ARCHIVES_JAR_PATH,
         HADOOP_ARCHIVES_JAR);
     final HadoopArchives har = new HadoopArchives(conf);