瀏覽代碼

HADOOP-11021. Configurable replication factor in the hadoop archive command. Contributed by Zhe Zhang.

(cherry picked from commit ea1c6f31c2d2ea5b38ed57e2aa241d122103a721)
Andrew Wang 10 年之前
父節點
當前提交
075bb9e380

+ 8 - 4
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/markdown/HadoopArchives.md.vm

@@ -38,7 +38,7 @@ Overview
 How to Create an Archive
 ------------------------
 
-  `Usage: hadoop archive -archiveName name -p <parent> <src>* <dest>`
+  `Usage: hadoop archive -archiveName name -p <parent> [-r <replication factor>] <src>* <dest>`
 
   -archiveName is the name of the archive you would like to create. An example
   would be foo.har. The name should have a \*.har extension. The parent argument
@@ -52,9 +52,12 @@ How to Create an Archive
   would need a map reduce cluster to run this. For a detailed example the later
   sections.
 
+  -r indicates the desired replication factor; if this optional argument is
+  not specified, a replication factor of 10 will be used.
+
   If you just want to archive a single directory /foo/bar then you can just use
 
-  `hadoop archive -archiveName zoo.har -p /foo/bar /outputdir`
+  `hadoop archive -archiveName zoo.har -p /foo/bar -r 3 /outputdir`
 
 How to Look Up Files in Archives
 --------------------------------
@@ -90,14 +93,15 @@ Archives Examples
 
 $H3 Creating an Archive
 
-  `hadoop archive -archiveName foo.har -p /user/hadoop dir1 dir2 /user/zoo`
+  `hadoop archive -archiveName foo.har -p /user/hadoop -r 3 dir1 dir2 /user/zoo`
 
   The above example is creating an archive using /user/hadoop as the relative
   archive directory. The directories /user/hadoop/dir1 and /user/hadoop/dir2
   will be archived in the following file system directory -- /user/zoo/foo.har.
   Archiving does not delete the input files. If you want to delete the input
   files after creating the archives (to reduce namespace), you will have to do
-  it on your own. 
+  it on your own. In this example, because `-r 3` is specified, a replication
+  factor of 3 will be used.
 
 $H3 Looking Up Files
 

+ 10 - 2
hadoop-tools/hadoop-archives/src/main/java/org/apache/hadoop/tools/HadoopArchives.java

@@ -97,9 +97,12 @@ public class HadoopArchives implements Tool {
   long partSize = 2 * 1024 * 1024 * 1024l;
   /** size of blocks in hadoop archives **/
   long blockSize = 512 * 1024 * 1024l;
+  /** the desired replication degree; default is 10 **/
+  short repl = 10;
 
   private static final String usage = "archive"
-  + " -archiveName NAME -p <parent path> <src>* <dest>" +
+  + " -archiveName NAME -p <parent path> [-r <replication factor>]" +
+      "<src>* <dest>" +
   "\n";
   
  
@@ -542,7 +545,7 @@ public class HadoopArchives implements Tool {
       srcWriter.close();
     }
     //increase the replication of src files
-    jobfs.setReplication(srcFiles, (short) 10);
+    jobfs.setReplication(srcFiles, repl);
     conf.setInt(SRC_COUNT_LABEL, numFiles);
     conf.setLong(TOTAL_SIZE_LABEL, totalSize);
     int numMaps = (int)(totalSize/partSize);
@@ -835,6 +838,11 @@ public class HadoopArchives implements Tool {
       }
 
       i+=2;
+
+      if ("-r".equals(args[i])) {
+        repl = Short.parseShort(args[i+1]);
+        i+=2;
+      }
       //read the rest of the paths
       for (; i < args.length; i++) {
         if (i == (args.length - 1)) {

+ 41 - 0
hadoop-tools/hadoop-archives/src/test/java/org/apache/hadoop/tools/TestHadoopArchives.java

@@ -157,6 +157,24 @@ public class TestHadoopArchives {
     final List<String> harPaths = lsr(shell, fullHarPathStr);
     Assert.assertEquals(originalPaths, harPaths);
   }
+
+  @Test
+  public void testRelativePathWitRepl() throws Exception {
+    final Path sub1 = new Path(inputPath, "dir1");
+    fs.mkdirs(sub1);
+    createFile(inputPath, fs, sub1.getName(), "a");
+    final FsShell shell = new FsShell(conf);
+
+    final List<String> originalPaths = lsr(shell, "input");
+    System.out.println("originalPaths: " + originalPaths);
+
+    // make the archive:
+    final String fullHarPathStr = makeArchiveWithRepl();
+
+    // compare results:
+    final List<String> harPaths = lsr(shell, fullHarPathStr);
+    Assert.assertEquals(originalPaths, harPaths);
+  }
   
 @Test
   public void testPathWithSpaces() throws Exception {
@@ -625,6 +643,29 @@ public class TestHadoopArchives {
     assertEquals(0, ToolRunner.run(har, args));
     return fullHarPathStr;
   }
+
+  /*
+ * Run the HadoopArchives tool to create an archive on the
+ * given file system with a specified replication degree.
+ */
+  private String makeArchiveWithRepl() throws Exception {
+    final String inputPathStr = inputPath.toUri().getPath();
+    System.out.println("inputPathStr = " + inputPathStr);
+
+    final URI uri = fs.getUri();
+    final String prefix = "har://hdfs-" + uri.getHost() + ":" + uri.getPort()
+        + archivePath.toUri().getPath() + Path.SEPARATOR;
+
+    final String harName = "foo.har";
+    final String fullHarPathStr = prefix + harName;
+    final String[] args = { "-archiveName", harName, "-p", inputPathStr,
+        "-r 3", "*", archivePath.toString() };
+    System.setProperty(HadoopArchives.TEST_HADOOP_ARCHIVES_JAR_PATH,
+        HADOOP_ARCHIVES_JAR);
+    final HadoopArchives har = new HadoopArchives(conf);
+    assertEquals(0, ToolRunner.run(har, args));
+    return fullHarPathStr;
+  }
   
   @Test
   /*