浏览代码

HADOOP-13024. Distcp with -delete feature on raw data not implemented. Contributed by Mavin Martin.

(cherry picked from commit 0a85d079838f532a13ca237300386d1b3bc1b178)
(cherry picked from commit ad69baf6a9139c0af81e9f72e41c1e3aeb119ebc)
Jing Zhao 8 年之前
父节点
当前提交
d3c4465147

+ 11 - 1
hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpConstants.java

@@ -18,6 +18,8 @@ package org.apache.hadoop.tools;
  * limitations under the License.
  */
 
+import org.apache.hadoop.fs.Path;
+
 /**
  * Utility class to hold commonly used constants.
  */
@@ -140,10 +142,18 @@ public class DistCpConstants {
   public static final int MIN_RECORDS_PER_CHUNK_DEFAULT = 5;
   public static final int SPLIT_RATIO_DEFAULT  = 2;
 
+  /**
+   * Constants for NONE file deletion
+   */
+  public static final String NONE_PATH_NAME = "/NONE";
+  public static final Path NONE_PATH = new Path(NONE_PATH_NAME);
+  public static final Path RAW_NONE_PATH = new Path(
+      DistCpConstants.HDFS_RESERVED_RAW_DIRECTORY_NAME + NONE_PATH_NAME);
+
   /**
    * Value of reserved raw HDFS directory when copying raw.* xattrs.
    */
-  static final String HDFS_RESERVED_RAW_DIRECTORY_NAME = "/.reserved/raw";
+  public static final String HDFS_RESERVED_RAW_DIRECTORY_NAME = "/.reserved/raw";
 
   static final String HDFS_DISTCP_DIFF_DIRECTORY_NAME = ".distcp.diff.tmp";
 }

+ 4 - 1
hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java

@@ -238,7 +238,10 @@ public class CopyCommitter extends FileOutputCommitter {
     List<Path> targets = new ArrayList<Path>(1);
     Path targetFinalPath = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH));
     targets.add(targetFinalPath);
-    DistCpOptions options = new DistCpOptions(targets, new Path("/NONE"));
+    Path resultNonePath = Path.getPathWithoutSchemeAndAuthority(targetFinalPath)
+        .toString().startsWith(DistCpConstants.HDFS_RESERVED_RAW_DIRECTORY_NAME)
+        ? DistCpConstants.RAW_NONE_PATH : DistCpConstants.NONE_PATH;
+    DistCpOptions options = new DistCpOptions(targets, resultNonePath);
     //
     // Set up options to be the same from the CopyListing.buildListing's perspective,
     // so to collect similar listings as when doing the copy

+ 21 - 24
hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpWithRawXAttrs.java

@@ -82,14 +82,7 @@ public class TestDistCpWithRawXAttrs {
     final String relDst = "/./.reserved/../.reserved/raw/../raw/dest/../dest";
     doTestPreserveRawXAttrs(relSrc, relDst, "-px", true, true,
         DistCpConstants.SUCCESS);
-    doTestPreserveRawXAttrs(rootedSrcName, rootedDestName, "-px",
-        false, true, DistCpConstants.SUCCESS);
-    doTestPreserveRawXAttrs(rootedSrcName, rawDestName, "-px",
-        false, true, DistCpConstants.INVALID_ARGUMENT);
-    doTestPreserveRawXAttrs(rawSrcName, rootedDestName, "-px",
-        false, true, DistCpConstants.INVALID_ARGUMENT);
-    doTestPreserveRawXAttrs(rawSrcName, rawDestName, "-px",
-        true, true, DistCpConstants.SUCCESS);
+    doTestStandardPreserveRawXAttrs("-px", true);
     final Path savedWd = fs.getWorkingDirectory();
     try {
       fs.setWorkingDirectory(new Path("/.reserved/raw"));
@@ -103,27 +96,18 @@ public class TestDistCpWithRawXAttrs {
   /* Test that XAttrs are not preserved and raw.* are when appropriate. */
   @Test
   public void testPreserveRawXAttrs2() throws Exception {
-    doTestPreserveRawXAttrs(rootedSrcName, rootedDestName, "-p",
-        false, false, DistCpConstants.SUCCESS);
-    doTestPreserveRawXAttrs(rootedSrcName, rawDestName, "-p",
-        false, false, DistCpConstants.INVALID_ARGUMENT);
-    doTestPreserveRawXAttrs(rawSrcName, rootedDestName, "-p",
-        false, false, DistCpConstants.INVALID_ARGUMENT);
-    doTestPreserveRawXAttrs(rawSrcName, rawDestName, "-p",
-        true, false, DistCpConstants.SUCCESS);
+    doTestStandardPreserveRawXAttrs("-p", false);
   }
 
   /* Test that XAttrs are not preserved and raw.* are when appropriate. */
   @Test
   public void testPreserveRawXAttrs3() throws Exception {
-    doTestPreserveRawXAttrs(rootedSrcName, rootedDestName, null,
-        false, false, DistCpConstants.SUCCESS);
-    doTestPreserveRawXAttrs(rootedSrcName, rawDestName, null,
-        false, false, DistCpConstants.INVALID_ARGUMENT);
-    doTestPreserveRawXAttrs(rawSrcName, rootedDestName, null,
-        false, false, DistCpConstants.INVALID_ARGUMENT);
-    doTestPreserveRawXAttrs(rawSrcName, rawDestName, null,
-        true, false, DistCpConstants.SUCCESS);
+    doTestStandardPreserveRawXAttrs(null, false);
+  }
+
+  @Test
+  public void testPreserveRawXAttrs4() throws Exception {
+    doTestStandardPreserveRawXAttrs("-update -delete", false);
   }
 
   private static Path[] pathnames = { new Path("dir1"),
@@ -145,6 +129,19 @@ public class TestDistCpWithRawXAttrs {
     }
   }
 
+  private void doTestStandardPreserveRawXAttrs(String options,
+      boolean expectUser)
+      throws Exception {
+    doTestPreserveRawXAttrs(rootedSrcName, rootedDestName, options,
+        false, expectUser, DistCpConstants.SUCCESS);
+    doTestPreserveRawXAttrs(rootedSrcName, rawDestName, options,
+        false, expectUser, DistCpConstants.INVALID_ARGUMENT);
+    doTestPreserveRawXAttrs(rawSrcName, rootedDestName, options,
+        false, expectUser, DistCpConstants.INVALID_ARGUMENT);
+    doTestPreserveRawXAttrs(rawSrcName, rawDestName, options,
+        true, expectUser, DistCpConstants.SUCCESS);
+  }
+
   private void doTestPreserveRawXAttrs(String src, String dest,
       String preserveOpts, boolean expectRaw, boolean expectUser,
       int expectedExitCode) throws Exception {

+ 20 - 12
hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/util/DistCpTestUtils.java

@@ -18,21 +18,20 @@
 
 package org.apache.hadoop.tools.util;
 
-import static org.junit.Assert.assertArrayEquals;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-
-import java.util.Iterator;
-import java.util.Map;
-import java.util.Map.Entry;
-
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
-
 import org.apache.hadoop.tools.DistCp;
 import org.apache.hadoop.util.ToolRunner;
 
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Map.Entry;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
 /**
  * Utility class for DistCpTests
  */
@@ -79,10 +78,19 @@ public class DistCpTestUtils {
   public static void assertRunDistCp(int exitCode, String src, String dst,
       String options, Configuration conf)
       throws Exception {
+    assertRunDistCp(exitCode, src, dst,
+        options == null ? new String[0] : options.trim().split(" "), conf);
+  }
+
+  private static void assertRunDistCp(int exitCode, String src, String dst,
+      String[] options, Configuration conf)
+      throws Exception {
     DistCp distCp = new DistCp(conf, null);
-    String[] optsArr = options == null ?
-        new String[] { src, dst } :
-        new String[] { options, src, dst };
+    String[] optsArr = new String[options.length + 2];
+    System.arraycopy(options, 0, optsArr, 0, options.length);
+    optsArr[optsArr.length - 2] = src;
+    optsArr[optsArr.length - 1] = dst;
+
     assertEquals(exitCode,
         ToolRunner.run(conf, distCp, optsArr));
   }