Selaa lähdekoodia

HADOOP-18853. Upgrade AWS SDK version to 2.20.28 (#5960)

Upgrades the AWS sdk v2 version to 2.20.28 

This
* adds multipart COPY/rename in the java async client
* removes the aws-crt JAR dependency

Contributed by Ahmar Suhail
ahmarsuhail 1 vuosi sitten
vanhempi
commit
238ba6dc6f

+ 1 - 2
LICENSE-binary

@@ -363,8 +363,7 @@ org.objenesis:objenesis:2.6
 org.xerial.snappy:snappy-java:1.1.10.1
 org.yaml:snakeyaml:2.0
 org.wildfly.openssl:wildfly-openssl:1.1.3.Final
-software.amazon.awssdk:bundle:jar:2.19.12
-software.amazon.awssdk.crt:aws-crt:0.21.0
+software.amazon.awssdk:bundle:jar:2.20.128
 
 
 --------------------------------------------------------------------------------

+ 3 - 9
hadoop-project/pom.xml

@@ -184,9 +184,8 @@
     <surefire.fork.timeout>900</surefire.fork.timeout>
     <aws-java-sdk.version>1.12.367</aws-java-sdk.version>
     <hsqldb.version>2.7.1</hsqldb.version>
-    <aws-java-sdk-v2.version>2.19.12</aws-java-sdk-v2.version>
-    <aws.evenstream.version>1.0.1</aws.evenstream.version>
-    <awscrt.version>0.21.0</awscrt.version>
+    <aws-java-sdk-v2.version>2.20.128</aws-java-sdk-v2.version>
+    <aws.eventstream.version>1.0.1</aws.eventstream.version>
     <frontend-maven-plugin.version>1.11.2</frontend-maven-plugin.version>
     <jasmine-maven-plugin.version>2.1</jasmine-maven-plugin.version>
     <phantomjs-maven-plugin.version>0.7</phantomjs-maven-plugin.version>
@@ -1154,12 +1153,7 @@
       <dependency>
         <groupId>software.amazon.eventstream</groupId>
         <artifactId>eventstream</artifactId>
-        <version>${aws.evenstream.version}</version>
-      </dependency>
-      <dependency>
-        <groupId>software.amazon.awssdk.crt</groupId>
-        <artifactId>aws-crt</artifactId>
-        <version>${awscrt.version}</version>
+        <version>${aws.eventstream.version}</version>
       </dependency>
       <dependency>
         <groupId>org.apache.mina</groupId>

+ 0 - 4
hadoop-tools/hadoop-aws/pom.xml

@@ -518,10 +518,6 @@
       <artifactId>bundle</artifactId>
       <scope>compile</scope>
     </dependency>
-    <dependency>
-      <groupId>software.amazon.awssdk.crt</groupId>
-      <artifactId>aws-crt</artifactId>
-    </dependency>
     <dependency>
       <groupId>software.amazon.eventstream</groupId>
       <artifactId>eventstream</artifactId>

+ 10 - 1
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java

@@ -37,6 +37,7 @@ import software.amazon.awssdk.services.s3.S3AsyncClient;
 import software.amazon.awssdk.services.s3.S3BaseClientBuilder;
 import software.amazon.awssdk.services.s3.S3Client;
 import software.amazon.awssdk.services.s3.S3Configuration;
+import software.amazon.awssdk.services.s3.multipart.MultipartConfiguration;
 import software.amazon.awssdk.transfer.s3.S3TransferManager;
 
 import org.apache.commons.lang3.StringUtils;
@@ -98,17 +99,25 @@ public class DefaultS3ClientFactory extends Configured
 
     Configuration conf = getConf();
     String bucket = uri.getHost();
+
     NettyNioAsyncHttpClient.Builder httpClientBuilder = AWSClientConfig
         .createAsyncHttpClientBuilder(conf)
         .proxyConfiguration(AWSClientConfig.createAsyncProxyConfiguration(conf, bucket));
+
+    MultipartConfiguration multipartConfiguration = MultipartConfiguration.builder()
+        .minimumPartSizeInBytes(parameters.getMinimumPartSize())
+        .thresholdInBytes(parameters.getMultiPartThreshold())
+        .build();
+
     return configureClientBuilder(S3AsyncClient.builder(), parameters, conf, bucket)
         .httpClientBuilder(httpClientBuilder)
+        .multipartConfiguration(multipartConfiguration)
+        .multipartEnabled(true)
         .build();
   }
 
   @Override
   public S3TransferManager createS3TransferManager(final S3AsyncClient s3AsyncClient) {
-
     return S3TransferManager.builder()
         .s3Client(s3AsyncClient)
         .build();

+ 1 - 0
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java

@@ -982,6 +982,7 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities,
         .withRequesterPays(conf.getBoolean(ALLOW_REQUESTER_PAYS, DEFAULT_ALLOW_REQUESTER_PAYS))
         .withExecutionInterceptors(auditManager.createExecutionInterceptors())
         .withMinimumPartSize(partSize)
+        .withMultipartThreshold(multiPartThreshold)
         .withTransferManagerExecutor(unboundedThreadPool)
         .withRegion(region);
 

+ 24 - 0
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java

@@ -151,6 +151,11 @@ public interface S3ClientFactory {
      */
     private long minimumPartSize;
 
+    /**
+     * Threshold for multipart operations.
+     */
+    private long multiPartThreshold;
+
     /**
      * Executor that the transfer manager will use to execute background tasks.
      */
@@ -337,6 +342,25 @@ public interface S3ClientFactory {
       return this;
     }
 
+    /**
+     * Get the threshold for multipart operations.
+     * @return multipart threshold
+     */
+    public long getMultiPartThreshold() {
+      return multiPartThreshold;
+    }
+
+    /**
+     * Set the threshold for multipart operations.
+     * @param value new value
+     * @return the builder
+     */
+    public S3ClientCreationParameters withMultipartThreshold(
+        final long value) {
+      multiPartThreshold = value;
+      return this;
+    }
+
     /**
      * Get the executor that the transfer manager will use to execute background tasks.
      * @return part size