瀏覽代碼

HADOOP-18328. S3A to support S3 on Outposts (#4533)

Contributed by Sotetsu Suzugamine

HADOOP-18328. Add documentation for S3A support on S3 Outposts (#5976)

Contributed by Yuting Chen
suzu 1 年之前
父節點
當前提交
a3bbeba0f1

+ 8 - 2
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/ArnResource.java

@@ -26,7 +26,8 @@ import com.amazonaws.arn.Arn;
  * Represents an Arn Resource, this can be an accesspoint or bucket.
  */
 public final class ArnResource {
-  private final static String ACCESSPOINT_ENDPOINT_FORMAT = "s3-accesspoint.%s.amazonaws.com";
+  private final static String S3_ACCESSPOINT_ENDPOINT_FORMAT = "s3-accesspoint.%s.amazonaws.com";
+  private final static String S3_OUTPOSTS_ACCESSPOINT_ENDPOINT_FORMAT = "s3-outposts.%s.amazonaws.com";
 
   /**
    * Resource name.
@@ -69,6 +70,10 @@ public final class ArnResource {
     this.accessPointRegionKey = String.format("accesspoint-%s", region);
   }
 
+  private boolean isOutposts(){
+    return fullArn.contains("s3-outposts");
+  }
+
   /**
    * Resource name.
    * @return resource name.
@@ -106,7 +111,8 @@ public final class ArnResource {
    * @return resource endpoint.
    */
   public String getEndpoint() {
-    return String.format(ACCESSPOINT_ENDPOINT_FORMAT, region);
+    String format = isOutposts() ? S3_OUTPOSTS_ACCESSPOINT_ENDPOINT_FORMAT : S3_ACCESSPOINT_ENDPOINT_FORMAT;
+    return String.format(format, region);
   }
 
   /**

+ 18 - 0
hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md

@@ -1709,6 +1709,24 @@ the storage class you want.
 Please note that S3A does not support reading from archive storage classes at the moment.
 `AccessDeniedException` with InvalidObjectState will be thrown if you're trying to do so.
 
+## <a name="upload"></a>Configuring S3A for S3 on Outposts
+
+S3A now supports [S3 on Outposts](https://docs.aws.amazon.com/AmazonS3/latest/userguide/S3onOutposts.html).
+Accessing data through an access point is done by using its Amazon Resource Name (ARN), as opposed to just the bucket name.
+The only supported storage class on Outposts is **OUTPOSTS**, and by default objects are encrypted with [SSE-S3](https://docs.aws.amazon.com/AmazonS3/latest/userguide/s3-outposts-data-encryption.html).
+You can set the Access Point ARN property using the following per bucket configuration property:
+
+```xml
+<property>
+  <name>fs.s3a.bucket.sample-outpost-bucket.accesspoint.arn</name>
+  <value>arn:aws:s3-outposts:region:account-id:outpost/outpost-id/accesspoint/accesspoint-name</value>
+  <description>Configure S3a traffic to use this S3 on Outposts Access Point ARN</description>
+</property>
+```
+
+This configures access to the `sample-outpost-bucket` for S3A to go through the new Access Point ARN. So, for example `s3a://sample-outpost-bucket/key` will now use your configured ARN when getting data from S3 on Outpost instead of your bucket.
+
+
 ## <a name="upload"></a>How S3A writes data to S3
 
 The original S3A client implemented file writes by

+ 19 - 5
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestArnResource.java

@@ -56,7 +56,7 @@ public class TestArnResource extends HadoopTestBase {
       String region = testPair[0];
       String partition = testPair[1];
 
-      ArnResource resource = getArnResourceFrom(partition, region, MOCK_ACCOUNT, accessPoint);
+      ArnResource resource = getArnResourceFrom(partition, "s3", region, MOCK_ACCOUNT, accessPoint);
       assertEquals("Access Point name does not match", accessPoint, resource.getName());
       assertEquals("Account Id does not match", MOCK_ACCOUNT, resource.getOwnerAccountId());
       assertEquals("Region does not match", region, resource.getRegion());
@@ -64,10 +64,10 @@ public class TestArnResource extends HadoopTestBase {
   }
 
   @Test
-  public void makeSureEndpointHasTheCorrectFormat() {
+  public void makeSureS3EndpointHasTheCorrectFormat() {
     // Access point (AP) endpoints are different from S3 bucket endpoints, thus when using APs the
     // endpoints for the client are modified. This test makes sure endpoint is set up correctly.
-    ArnResource accessPoint = getArnResourceFrom("aws", "eu-west-1", MOCK_ACCOUNT,
+    ArnResource accessPoint = getArnResourceFrom("aws", "s3", "eu-west-1", MOCK_ACCOUNT,
         "test");
     String expected = "s3-accesspoint.eu-west-1.amazonaws.com";
 
@@ -76,6 +76,19 @@ public class TestArnResource extends HadoopTestBase {
         .isEqualTo(expected);
   }
 
+  @Test
+  public void makeSureS3OutpostsEndpointHasTheCorrectFormat() {
+    // Access point (AP) endpoints are different from S3 bucket endpoints, thus when using APs the
+    // endpoints for the client are modified. This test makes sure endpoint is set up correctly.
+    ArnResource accessPoint = getArnResourceFrom("aws", "s3-outposts", "eu-west-1", MOCK_ACCOUNT,
+        "test");
+    String expected = "s3-outposts.eu-west-1.amazonaws.com";
+
+    Assertions.assertThat(accessPoint.getEndpoint())
+        .describedAs("Endpoint has invalid format. Access Point requests will not work")
+        .isEqualTo(expected);
+  }
+
   @Test
   public void invalidARNsMustThrow() throws Exception {
     describe("Using an invalid ARN format must throw when initializing an ArnResource.");
@@ -87,15 +100,16 @@ public class TestArnResource extends HadoopTestBase {
   /**
    * Create an {@link ArnResource} from string components
    * @param partition - partition for ARN
+   * @param service - service for ARN
    * @param region - region for ARN
    * @param accountId - accountId for ARN
    * @param resourceName - ARN resource name
    * @return ArnResource described by its properties
    */
-  private ArnResource getArnResourceFrom(String partition, String region, String accountId,
+  private ArnResource getArnResourceFrom(String partition, String service, String region, String accountId,
       String resourceName) {
     // arn:partition:service:region:account-id:resource-type/resource-id
-    String arn = String.format("arn:%s:s3:%s:%s:accesspoint/%s", partition, region, accountId,
+    String arn = String.format("arn:%s:%s:%s:%s:accesspoint/%s", partition, service, region, accountId,
         resourceName);
 
     return ArnResource.accessPointFromArn(arn);