Bladeren bron

HADOOP-11687. Ignore x-* and response headers when copying an Amazon S3 object. Contributed by Aaron Peterson and harsh.

Harsh J 9 jaren geleden
bovenliggende
commit
256c82fe29

+ 69 - 1
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java

@@ -26,6 +26,7 @@ import java.net.URI;
 import java.util.ArrayList;
 import java.util.ArrayList;
 import java.util.Date;
 import java.util.Date;
 import java.util.List;
 import java.util.List;
+import java.util.Map;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.TimeUnit;
 
 
@@ -1128,7 +1129,7 @@ public class S3AFileSystem extends FileSystem {
     }
     }
 
 
     ObjectMetadata srcom = s3.getObjectMetadata(bucket, srcKey);
     ObjectMetadata srcom = s3.getObjectMetadata(bucket, srcKey);
-    final ObjectMetadata dstom = srcom.clone();
+    ObjectMetadata dstom = cloneObjectMetadata(srcom);
     if (StringUtils.isNotBlank(serverSideEncryptionAlgorithm)) {
     if (StringUtils.isNotBlank(serverSideEncryptionAlgorithm)) {
       dstom.setSSEAlgorithm(serverSideEncryptionAlgorithm);
       dstom.setSSEAlgorithm(serverSideEncryptionAlgorithm);
     }
     }
@@ -1234,6 +1235,73 @@ public class S3AFileSystem extends FileSystem {
     statistics.incrementWriteOps(1);
     statistics.incrementWriteOps(1);
   }
   }
 
 
+  /**
+   * Creates a copy of the passed {@link ObjectMetadata}.
+   * Does so without using the {@link ObjectMetadata#clone()} method,
+   * to avoid copying unnecessary headers.
+   * @param source the {@link ObjectMetadata} to copy
+   * @return a copy of {@link ObjectMetadata} with only relevant attributes
+   */
+  private ObjectMetadata cloneObjectMetadata(ObjectMetadata source) {
+    // This approach may be too brittle, especially if
+    // in future there are new attributes added to ObjectMetadata
+    // that we do not explicitly call to set here
+    ObjectMetadata ret = new ObjectMetadata();
+
+    // Non null attributes
+    ret.setContentLength(source.getContentLength());
+
+    // Possibly null attributes
+    // Allowing nulls to pass breaks it during later use
+    if (source.getCacheControl() != null) {
+      ret.setCacheControl(source.getCacheControl());
+    }
+    if (source.getContentDisposition() != null) {
+      ret.setContentDisposition(source.getContentDisposition());
+    }
+    if (source.getContentEncoding() != null) {
+      ret.setContentEncoding(source.getContentEncoding());
+    }
+    if (source.getContentMD5() != null) {
+      ret.setContentMD5(source.getContentMD5());
+    }
+    if (source.getContentType() != null) {
+      ret.setContentType(source.getContentType());
+    }
+    if (source.getExpirationTime() != null) {
+      ret.setExpirationTime(source.getExpirationTime());
+    }
+    if (source.getExpirationTimeRuleId() != null) {
+      ret.setExpirationTimeRuleId(source.getExpirationTimeRuleId());
+    }
+    if (source.getHttpExpiresDate() != null) {
+      ret.setHttpExpiresDate(source.getHttpExpiresDate());
+    }
+    if (source.getLastModified() != null) {
+      ret.setLastModified(source.getLastModified());
+    }
+    if (source.getOngoingRestore() != null) {
+      ret.setOngoingRestore(source.getOngoingRestore());
+    }
+    if (source.getRestoreExpirationTime() != null) {
+      ret.setRestoreExpirationTime(source.getRestoreExpirationTime());
+    }
+    if (source.getSSEAlgorithm() != null) {
+      ret.setSSEAlgorithm(source.getSSEAlgorithm());
+    }
+    if (source.getSSECustomerAlgorithm() != null) {
+      ret.setSSECustomerAlgorithm(source.getSSECustomerAlgorithm());
+    }
+    if (source.getSSECustomerKeyMd5() != null) {
+      ret.setSSECustomerKeyMd5(source.getSSECustomerKeyMd5());
+    }
+
+    for (Map.Entry<String, String> e : source.getUserMetadata().entrySet()) {
+      ret.addUserMetadata(e.getKey(), e.getValue());
+    }
+    return ret;
+  }
+
   /**
   /**
    * Return the number of bytes that large input files should be optimally
    * Return the number of bytes that large input files should be optimally
    * be split into to minimize i/o time.
    * be split into to minimize i/o time.

+ 7 - 0
hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md

@@ -417,6 +417,13 @@ which pass in authentication details to the test runner
 These are both Hadoop XML configuration files, which must be placed into
 These are both Hadoop XML configuration files, which must be placed into
 `hadoop-tools/hadoop-aws/src/test/resources`.
 `hadoop-tools/hadoop-aws/src/test/resources`.
 
 
+### `core-site.xml`
+
+This file pre-exists and sources the configurations created
+under `auth-keys.xml`.
+
+For most purposes you will not need to edit this file unless you
+need to apply a specific, non-default property change during the tests.
 
 
 ### `auth-keys.xml`
 ### `auth-keys.xml`