Преглед на файлове

HDFS-8151. Always use snapshot path as source when invalid snapshot names are used for diff based distcp. Contributed by Jing Zhao.

Jing Zhao преди 10 години
родител
ревизия
4c097e473b

+ 3 - 0
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt

@@ -522,6 +522,9 @@ Release 2.7.1 - UNRELEASED
     HDFS-8127. NameNode Failover during HA upgrade can cause DataNode to
     finalize upgrade. (jing9)
 
+    HDFS-8151. Always use snapshot path as source when invalid snapshot names
+    are used for diff based distcp. (jing9)
+
 Release 2.7.0 - UNRELEASED
 
   INCOMPATIBLE CHANGES

+ 7 - 5
hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpSync.java

@@ -47,8 +47,8 @@ class DistCpSync {
     List<Path> sourcePaths = inputOptions.getSourcePaths();
     if (sourcePaths.size() != 1) {
       // we only support one source dir which must be a snapshottable directory
-      DistCp.LOG.warn(sourcePaths.size() + " source paths are provided");
-      return false;
+      throw new IllegalArgumentException(sourcePaths.size()
+          + " source paths are provided");
     }
     final Path sourceDir = sourcePaths.get(0);
     final Path targetDir = inputOptions.getTargetPath();
@@ -59,15 +59,17 @@ class DistCpSync {
     // DistributedFileSystem.
     if (!(sfs instanceof DistributedFileSystem) ||
         !(tfs instanceof DistributedFileSystem)) {
-      DistCp.LOG.warn("To use diff-based distcp, the FileSystems needs to" +
-          " be DistributedFileSystem");
-      return false;
+      throw new IllegalArgumentException("The FileSystems needs to" +
+          " be DistributedFileSystem for using snapshot-diff-based distcp");
     }
     final DistributedFileSystem sourceFs = (DistributedFileSystem) sfs;
     final DistributedFileSystem targetFs= (DistributedFileSystem) tfs;
 
     // make sure targetFS has no change between from and the current states
     if (!checkNoChange(inputOptions, targetFs, targetDir)) {
+      // set the source path using the snapshot path
+      inputOptions.setSourcePaths(Arrays.asList(getSourceSnapshotPath(sourceDir,
+          inputOptions.getToSnapshot())));
       return false;
     }
 

+ 14 - 1
hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSync.java

@@ -88,24 +88,37 @@ public class TestDistCpSync {
   public void testFallback() throws Exception {
     // the source/target dir are not snapshottable dir
     Assert.assertFalse(DistCpSync.sync(options, conf));
+    // make sure the source path has been updated to the snapshot path
+    final Path spath = new Path(source,
+        HdfsConstants.DOT_SNAPSHOT_DIR + Path.SEPARATOR + "s2");
+    Assert.assertEquals(spath, options.getSourcePaths().get(0));
 
+    // reset source path in options
+    options.setSourcePaths(Arrays.asList(source));
     // the source/target does not have the given snapshots
     dfs.allowSnapshot(source);
     dfs.allowSnapshot(target);
     Assert.assertFalse(DistCpSync.sync(options, conf));
+    Assert.assertEquals(spath, options.getSourcePaths().get(0));
 
+    // reset source path in options
+    options.setSourcePaths(Arrays.asList(source));
     dfs.createSnapshot(source, "s1");
     dfs.createSnapshot(source, "s2");
     dfs.createSnapshot(target, "s1");
     Assert.assertTrue(DistCpSync.sync(options, conf));
+
     // reset source paths in options
     options.setSourcePaths(Arrays.asList(source));
-
     // changes have been made in target
     final Path subTarget = new Path(target, "sub");
     dfs.mkdirs(subTarget);
     Assert.assertFalse(DistCpSync.sync(options, conf));
+    // make sure the source path has been updated to the snapshot path
+    Assert.assertEquals(spath, options.getSourcePaths().get(0));
 
+    // reset source paths in options
+    options.setSourcePaths(Arrays.asList(source));
     dfs.delete(subTarget, true);
     Assert.assertTrue(DistCpSync.sync(options, conf));
   }