Browse Source

HADOOP-16775. DistCp reuses the same temp file within the task for different files.

Contributed by Amir Shenavandeh.

This avoids overwrite consistency issues with S3 and other stores

Change-Id: Ic4d05ef3397e963ba28fd9f775bb362b0da36ad9
Steve Loughran 5 years ago
parent
commit
caec6a1945

+ 3 - 1
hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableFileCopyCommand.java

@@ -231,7 +231,9 @@ public class RetriableFileCopyCommand extends RetriableCommand {
     Path root = target.equals(targetWorkPath) ? targetWorkPath.getParent()
         : targetWorkPath;
     Path tempFile = new Path(root, ".distcp.tmp." +
-        context.getTaskAttemptID().toString());
+        context.getTaskAttemptID().toString() +
+        "." + String.valueOf(System.currentTimeMillis()));
+
     LOG.info("Creating temp file: " + tempFile);
     return tempFile;
   }