فهرست منبع

HDFS-8151. Always use snapshot path as source when invalid snapshot names are used for diff based distcp. Contributed by Jing Zhao.

(cherry picked from commit 4c097e473bb1f18d1510deb61bae2bcb8c156f18)
(cherry picked from commit d4dd97eabd6691eedeeb9fb7685060dfb192ff21)
Jing Zhao 10 سال پیش
والد
کامیت
9e11ae684a

+ 3 - 0
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt

@@ -20,6 +20,9 @@ Release 2.7.1 - UNRELEASED
     HDFS-8127. NameNode Failover during HA upgrade can cause DataNode to
     finalize upgrade. (jing9)
 
+    HDFS-8151. Always use snapshot path as source when invalid snapshot names
+    are used for diff based distcp. (jing9)
+
 Release 2.7.0 - UNRELEASED
 
   INCOMPATIBLE CHANGES

+ 7 - 5
hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpSync.java

@@ -47,8 +47,8 @@ class DistCpSync {
     List<Path> sourcePaths = inputOptions.getSourcePaths();
     if (sourcePaths.size() != 1) {
       // we only support one source dir which must be a snapshottable directory
-      DistCp.LOG.warn(sourcePaths.size() + " source paths are provided");
-      return false;
+      throw new IllegalArgumentException(sourcePaths.size()
+          + " source paths are provided");
     }
     final Path sourceDir = sourcePaths.get(0);
     final Path targetDir = inputOptions.getTargetPath();
@@ -59,15 +59,17 @@ class DistCpSync {
     // DistributedFileSystem.
     if (!(sfs instanceof DistributedFileSystem) ||
         !(tfs instanceof DistributedFileSystem)) {
-      DistCp.LOG.warn("To use diff-based distcp, the FileSystems needs to" +
-          " be DistributedFileSystem");
-      return false;
+      throw new IllegalArgumentException("The FileSystems needs to" +
+          " be DistributedFileSystem for using snapshot-diff-based distcp");
     }
     final DistributedFileSystem sourceFs = (DistributedFileSystem) sfs;
     final DistributedFileSystem targetFs= (DistributedFileSystem) tfs;
 
     // make sure targetFS has no change between from and the current states
     if (!checkNoChange(inputOptions, targetFs, targetDir)) {
+      // set the source path using the snapshot path
+      inputOptions.setSourcePaths(Arrays.asList(getSourceSnapshotPath(sourceDir,
+          inputOptions.getToSnapshot())));
       return false;
     }
 

+ 14 - 1
hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSync.java

@@ -88,24 +88,37 @@ public class TestDistCpSync {
   public void testFallback() throws Exception {
     // the source/target dir are not snapshottable dir
     Assert.assertFalse(DistCpSync.sync(options, conf));
+    // make sure the source path has been updated to the snapshot path
+    final Path spath = new Path(source,
+        HdfsConstants.DOT_SNAPSHOT_DIR + Path.SEPARATOR + "s2");
+    Assert.assertEquals(spath, options.getSourcePaths().get(0));
 
+    // reset source path in options
+    options.setSourcePaths(Arrays.asList(source));
     // the source/target does not have the given snapshots
     dfs.allowSnapshot(source);
     dfs.allowSnapshot(target);
     Assert.assertFalse(DistCpSync.sync(options, conf));
+    Assert.assertEquals(spath, options.getSourcePaths().get(0));
 
+    // reset source path in options
+    options.setSourcePaths(Arrays.asList(source));
     dfs.createSnapshot(source, "s1");
     dfs.createSnapshot(source, "s2");
     dfs.createSnapshot(target, "s1");
     Assert.assertTrue(DistCpSync.sync(options, conf));
+
     // reset source paths in options
     options.setSourcePaths(Arrays.asList(source));
-
     // changes have been made in target
     final Path subTarget = new Path(target, "sub");
     dfs.mkdirs(subTarget);
     Assert.assertFalse(DistCpSync.sync(options, conf));
+    // make sure the source path has been updated to the snapshot path
+    Assert.assertEquals(spath, options.getSourcePaths().get(0));
 
+    // reset source paths in options
+    options.setSourcePaths(Arrays.asList(source));
     dfs.delete(subTarget, true);
     Assert.assertTrue(DistCpSync.sync(options, conf));
   }