瀏覽代碼

HADOOP-11009. Add Timestamp Preservation to DistCp (Gary Steelman via aw)

Allen Wittenauer 10 年之前
父節點
當前提交
3cde37c991

+ 2 - 0
hadoop-common-project/hadoop-common/CHANGES.txt

@@ -543,6 +543,8 @@ Release 2.6.0 - UNRELEASED
     HADOOP-11017. KMS delegation token secret manager should be able to use 
     zookeeper as store. (asuresh via tucu)
 
+    HADOOP-11009. Add Timestamp Preservation to DistCp (Gary Steelman via aw)
+
   OPTIMIZATIONS
 
     HADOOP-10838. Byte array native checksumming. (James Thomas via todd)

+ 1 - 1
hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/CopyListingFileStatus.java

@@ -101,7 +101,7 @@ public final class CopyListingFileStatus extends FileStatus {
    * @return Map<String, byte[]> containing all xAttrs
    */
   public Map<String, byte[]> getXAttrs() {
-    return xAttrs;
+    return xAttrs != null ? xAttrs : Collections.<String, byte[]>emptyMap();
   }
   
   /**

+ 12 - 9
hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptionSwitch.java

@@ -37,18 +37,21 @@ public enum DistCpOptionSwitch {
   /**
    * Preserves status of file/path in the target.
    * Default behavior with -p, is to preserve replication,
-   * block size, user, group, permission and checksum type on the target file.
-   * Note that when preserving checksum type, block size is also preserved.
+   * block size, user, group, permission, checksum type and timestamps on the 
+   * target file. Note that when preserving checksum type, block size is also 
+   * preserved.
    *
-   * If any of the optional switches are present among rbugpc, then
-   * only the corresponding file attribute is preserved.
+   * @see PRESERVE_STATUS_DEFAULT
    *
+   * If any of the optional switches are present among rbugpcaxt, then
+   * only the corresponding file attribute is preserved.
    */
   PRESERVE_STATUS(DistCpConstants.CONF_LABEL_PRESERVE_STATUS,
-      new Option("p", true, "preserve status (rbugpcax)(replication, " +
-          "block-size, user, group, permission, checksum-type, ACL, XATTR).  " +
-          "If -p is specified with no <arg>, then preserves replication, " +
-          "block size, user, group, permission and checksum type." +
+      new Option("p", true, "preserve status (rbugpcaxt)(replication, " +
+          "block-size, user, group, permission, checksum-type, ACL, XATTR, " +
+          "timestamps). If -p is specified with no <arg>, then preserves " +
+          "replication, block size, user, group, permission, checksum type " +
+          "and timestamps. " +
           "raw.* xattrs are preserved when both the source and destination " +
           "paths are in the /.reserved/raw hierarchy (HDFS only). raw.* xattr" +
           "preservation is independent of the -p flag." +
@@ -166,7 +169,7 @@ public enum DistCpOptionSwitch {
   BANDWIDTH(DistCpConstants.CONF_LABEL_BANDWIDTH_MB,
       new Option("bandwidth", true, "Specify bandwidth per map in MB"));
 
-  static final String PRESERVE_STATUS_DEFAULT = "-prbugpc";
+  public static final String PRESERVE_STATUS_DEFAULT = "-prbugpct";
   private final String confLabel;
   private final Option option;
 

+ 1 - 1
hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptions.java

@@ -68,7 +68,7 @@ public class DistCpOptions {
   private boolean targetPathExists = true;
   
   public static enum FileAttribute{
-    REPLICATION, BLOCKSIZE, USER, GROUP, PERMISSION, CHECKSUMTYPE, ACL, XATTR;
+    REPLICATION, BLOCKSIZE, USER, GROUP, PERMISSION, CHECKSUMTYPE, ACL, XATTR, TIMES;
 
     public static FileAttribute getAttribute(char symbol) {
       for (FileAttribute attribute : values()) {

+ 31 - 27
hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/DistCpUtils.java

@@ -18,39 +18,39 @@
 
 package org.apache.hadoop.tools.util;
 
-import com.google.common.collect.Maps;
+import java.io.IOException;
+import java.net.InetAddress;
+import java.net.URI;
+import java.net.UnknownHostException;
+import java.text.DecimalFormat;
+import java.util.EnumSet;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Map.Entry;
+
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileChecksum;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.FileChecksum;
 import org.apache.hadoop.fs.XAttr;
 import org.apache.hadoop.fs.permission.AclEntry;
 import org.apache.hadoop.fs.permission.AclUtil;
 import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.io.SequenceFile;
 import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.InputFormat;
+import org.apache.hadoop.tools.CopyListing.AclsNotSupportedException;
 import org.apache.hadoop.tools.CopyListing.XAttrsNotSupportedException;
 import org.apache.hadoop.tools.CopyListingFileStatus;
+import org.apache.hadoop.tools.DistCpOptions;
 import org.apache.hadoop.tools.DistCpOptions.FileAttribute;
 import org.apache.hadoop.tools.mapred.UniformSizeInputFormat;
-import org.apache.hadoop.tools.CopyListing.AclsNotSupportedException;
-import org.apache.hadoop.tools.DistCpOptions;
-import org.apache.hadoop.mapreduce.InputFormat;
 
-import java.io.IOException;
-import java.util.EnumSet;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Locale;
-import java.util.Map;
-import java.util.Map.Entry;
-import java.text.DecimalFormat;
-import java.net.URI;
-import java.net.InetAddress;
-import java.net.UnknownHostException;
+import com.google.common.collect.Maps;
 
 /**
  * Utility functions used in DistCp.
@@ -163,7 +163,7 @@ public class DistCpUtils {
   }
 
   /**
-   * Un packs preservation attribute string containing the first character of
+   * Unpacks preservation attribute string containing the first character of
    * each preservation attribute back to a set of attributes to preserve
    * @param attributes - Attribute string
    * @return - Attribute set
@@ -209,7 +209,7 @@ public class DistCpUtils {
       if (!srcAcl.equals(targetAcl)) {
         targetFS.setAcl(path, srcAcl);
       }
-      // setAcl can't preserve sticky bit, so also call setPermission if needed.
+      // setAcl doesn't preserve sticky bit, so also call setPermission if needed.
       if (srcFileStatus.getPermission().getStickyBit() !=
           targetFileStatus.getPermission().getStickyBit()) {
         targetFS.setPermission(path, srcFileStatus.getPermission());
@@ -225,30 +225,28 @@ public class DistCpUtils {
       Map<String, byte[]> srcXAttrs = srcFileStatus.getXAttrs();
       Map<String, byte[]> targetXAttrs = getXAttrs(targetFS, path);
       if (srcXAttrs != null && !srcXAttrs.equals(targetXAttrs)) {
-        Iterator<Entry<String, byte[]>> iter = srcXAttrs.entrySet().iterator();
-        while (iter.hasNext()) {
-          Entry<String, byte[]> entry = iter.next();
-          final String xattrName = entry.getKey();
+        for (Entry<String, byte[]> entry : srcXAttrs.entrySet()) {
+          String xattrName = entry.getKey();
           if (xattrName.startsWith(rawNS) || preserveXAttrs) {
-            targetFS.setXAttr(path, entry.getKey(), entry.getValue());
+            targetFS.setXAttr(path, xattrName, entry.getValue());
           }
         }
       }
     }
 
-    if (attributes.contains(FileAttribute.REPLICATION) && ! targetFileStatus.isDirectory() &&
-        srcFileStatus.getReplication() != targetFileStatus.getReplication()) {
+    if (attributes.contains(FileAttribute.REPLICATION) && !targetFileStatus.isDirectory() &&
+        (srcFileStatus.getReplication() != targetFileStatus.getReplication())) {
       targetFS.setReplication(path, srcFileStatus.getReplication());
     }
 
     if (attributes.contains(FileAttribute.GROUP) &&
-            !group.equals(srcFileStatus.getGroup())) {
+        !group.equals(srcFileStatus.getGroup())) {
       group = srcFileStatus.getGroup();
       chown = true;
     }
 
     if (attributes.contains(FileAttribute.USER) &&
-            !user.equals(srcFileStatus.getOwner())) {
+        !user.equals(srcFileStatus.getOwner())) {
       user = srcFileStatus.getOwner();
       chown = true;
     }
@@ -256,6 +254,12 @@ public class DistCpUtils {
     if (chown) {
       targetFS.setOwner(path, user, group);
     }
+    
+    if (attributes.contains(FileAttribute.TIMES)) {
+      targetFS.setTimes(path, 
+          srcFileStatus.getModificationTime(), 
+          srcFileStatus.getAccessTime());
+    }
   }
 
   /**

+ 1 - 1
hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestOptionsParser.java

@@ -497,7 +497,7 @@ public class TestOptionsParser {
       attribIterator.next();
       i++;
     }
-    Assert.assertEquals(i, 6);
+    Assert.assertEquals(i, DistCpOptionSwitch.PRESERVE_STATUS_DEFAULT.length() - 2);
 
     try {
       OptionsParser.parse(new String[] {

+ 1 - 0
hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/TestCopyMapper.java

@@ -590,6 +590,7 @@ public class TestCopyMapper {
           EnumSet.allOf(DistCpOptions.FileAttribute.class);
       preserveStatus.remove(DistCpOptions.FileAttribute.ACL);
       preserveStatus.remove(DistCpOptions.FileAttribute.XATTR);
+      preserveStatus.remove(DistCpOptions.FileAttribute.TIMES);
 
       context.getConfiguration().set(DistCpConstants.CONF_LABEL_PRESERVE_STATUS,
         DistCpUtils.packAttributes(preserveStatus));

File diff suppressed because it is too large
+ 926 - 71
hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/util/TestDistCpUtils.java


Some files were not shown because too many files changed in this diff