Browse Source

HADOOP-11687. Ignore x-* and response headers when copying an Amazon S3 object. Contributed by Aaron Peterson and harsh.

Harsh J 9 years ago
parent
commit
256c82fe29

+ 69 - 1
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java

@@ -26,6 +26,7 @@ import java.net.URI;
 import java.util.ArrayList;
 import java.util.Date;
 import java.util.List;
+import java.util.Map;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.TimeUnit;
 
@@ -1128,7 +1129,7 @@ public class S3AFileSystem extends FileSystem {
     }
 
     ObjectMetadata srcom = s3.getObjectMetadata(bucket, srcKey);
-    final ObjectMetadata dstom = srcom.clone();
+    ObjectMetadata dstom = cloneObjectMetadata(srcom);
     if (StringUtils.isNotBlank(serverSideEncryptionAlgorithm)) {
       dstom.setSSEAlgorithm(serverSideEncryptionAlgorithm);
     }
@@ -1234,6 +1235,73 @@ public class S3AFileSystem extends FileSystem {
     statistics.incrementWriteOps(1);
   }
 
+  /**
+   * Creates a copy of the passed {@link ObjectMetadata}.
+   * Does so without using the {@link ObjectMetadata#clone()} method,
+   * to avoid copying unnecessary headers.
+   * @param source the {@link ObjectMetadata} to copy
+   * @return a copy of {@link ObjectMetadata} with only relevant attributes
+   */
+  private ObjectMetadata cloneObjectMetadata(ObjectMetadata source) {
+    // This approach may be too brittle, especially if
+    // in future there are new attributes added to ObjectMetadata
+    // that we do not explicitly call to set here
+    ObjectMetadata ret = new ObjectMetadata();
+
+    // Non null attributes
+    ret.setContentLength(source.getContentLength());
+
+    // Possibly null attributes
+    // Allowing nulls to pass breaks it during later use
+    if (source.getCacheControl() != null) {
+      ret.setCacheControl(source.getCacheControl());
+    }
+    if (source.getContentDisposition() != null) {
+      ret.setContentDisposition(source.getContentDisposition());
+    }
+    if (source.getContentEncoding() != null) {
+      ret.setContentEncoding(source.getContentEncoding());
+    }
+    if (source.getContentMD5() != null) {
+      ret.setContentMD5(source.getContentMD5());
+    }
+    if (source.getContentType() != null) {
+      ret.setContentType(source.getContentType());
+    }
+    if (source.getExpirationTime() != null) {
+      ret.setExpirationTime(source.getExpirationTime());
+    }
+    if (source.getExpirationTimeRuleId() != null) {
+      ret.setExpirationTimeRuleId(source.getExpirationTimeRuleId());
+    }
+    if (source.getHttpExpiresDate() != null) {
+      ret.setHttpExpiresDate(source.getHttpExpiresDate());
+    }
+    if (source.getLastModified() != null) {
+      ret.setLastModified(source.getLastModified());
+    }
+    if (source.getOngoingRestore() != null) {
+      ret.setOngoingRestore(source.getOngoingRestore());
+    }
+    if (source.getRestoreExpirationTime() != null) {
+      ret.setRestoreExpirationTime(source.getRestoreExpirationTime());
+    }
+    if (source.getSSEAlgorithm() != null) {
+      ret.setSSEAlgorithm(source.getSSEAlgorithm());
+    }
+    if (source.getSSECustomerAlgorithm() != null) {
+      ret.setSSECustomerAlgorithm(source.getSSECustomerAlgorithm());
+    }
+    if (source.getSSECustomerKeyMd5() != null) {
+      ret.setSSECustomerKeyMd5(source.getSSECustomerKeyMd5());
+    }
+
+    for (Map.Entry<String, String> e : source.getUserMetadata().entrySet()) {
+      ret.addUserMetadata(e.getKey(), e.getValue());
+    }
+    return ret;
+  }
+
   /**
    * Return the number of bytes that large input files should be optimally
    * be split into to minimize i/o time.

+ 7 - 0
hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md

@@ -417,6 +417,13 @@ which pass in authentication details to the test runner
 These are both Hadoop XML configuration files, which must be placed into
 `hadoop-tools/hadoop-aws/src/test/resources`.
 
+### `core-site.xml`
+
+This file pre-exists and sources the configurations created
+under `auth-keys.xml`.
+
+For most purposes you will not need to edit this file unless you
+need to apply a specific, non-default property change during the tests.
 
 ### `auth-keys.xml`