瀏覽代碼

HADOOP-18328. S3A to support S3 on Outposts (#4533)

Contributed by Sotetsu Suzugamine

HADOOP-18328. Add documentation for S3A support on S3 Outposts (#5976)

Contributed by Yuting Chen
suzu 1 年之前
父節點
當前提交
a3bbeba0f1

+ 8 - 2
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/ArnResource.java

@@ -26,7 +26,8 @@ import com.amazonaws.arn.Arn;
  * Represents an Arn Resource, this can be an accesspoint or bucket.
  * Represents an Arn Resource, this can be an accesspoint or bucket.
  */
  */
 public final class ArnResource {
 public final class ArnResource {
-  private final static String ACCESSPOINT_ENDPOINT_FORMAT = "s3-accesspoint.%s.amazonaws.com";
+  private final static String S3_ACCESSPOINT_ENDPOINT_FORMAT = "s3-accesspoint.%s.amazonaws.com";
+  private final static String S3_OUTPOSTS_ACCESSPOINT_ENDPOINT_FORMAT = "s3-outposts.%s.amazonaws.com";
 
 
   /**
   /**
    * Resource name.
    * Resource name.
@@ -69,6 +70,10 @@ public final class ArnResource {
     this.accessPointRegionKey = String.format("accesspoint-%s", region);
     this.accessPointRegionKey = String.format("accesspoint-%s", region);
   }
   }
 
 
+  private boolean isOutposts(){
+    return fullArn.contains("s3-outposts");
+  }
+
   /**
   /**
    * Resource name.
    * Resource name.
    * @return resource name.
    * @return resource name.
@@ -106,7 +111,8 @@ public final class ArnResource {
    * @return resource endpoint.
    * @return resource endpoint.
    */
    */
   public String getEndpoint() {
   public String getEndpoint() {
-    return String.format(ACCESSPOINT_ENDPOINT_FORMAT, region);
+    String format = isOutposts() ? S3_OUTPOSTS_ACCESSPOINT_ENDPOINT_FORMAT : S3_ACCESSPOINT_ENDPOINT_FORMAT;
+    return String.format(format, region);
   }
   }
 
 
   /**
   /**

+ 18 - 0
hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md

@@ -1709,6 +1709,24 @@ the storage class you want.
 Please note that S3A does not support reading from archive storage classes at the moment.
 Please note that S3A does not support reading from archive storage classes at the moment.
 `AccessDeniedException` with InvalidObjectState will be thrown if you're trying to do so.
 `AccessDeniedException` with InvalidObjectState will be thrown if you're trying to do so.
 
 
+## <a name="upload"></a>Configuring S3A for S3 on Outposts
+
+S3A now supports [S3 on Outposts](https://docs.aws.amazon.com/AmazonS3/latest/userguide/S3onOutposts.html).
+Accessing data through an access point is done by using its Amazon Resource Name (ARN), as opposed to just the bucket name.
+The only supported storage class on Outposts is **OUTPOSTS**, and by default objects are encrypted with [SSE-S3](https://docs.aws.amazon.com/AmazonS3/latest/userguide/s3-outposts-data-encryption.html).
+You can set the Access Point ARN property using the following per bucket configuration property:
+
+```xml
+<property>
+  <name>fs.s3a.bucket.sample-outpost-bucket.accesspoint.arn</name>
+  <value>arn:aws:s3-outposts:region:account-id:outpost/outpost-id/accesspoint/accesspoint-name</value>
+  <description>Configure S3a traffic to use this S3 on Outposts Access Point ARN</description>
+</property>
+```
+
+This configures access to the `sample-outpost-bucket` for S3A to go through the new Access Point ARN. So, for example `s3a://sample-outpost-bucket/key` will now use your configured ARN when getting data from S3 on Outpost instead of your bucket.
+
+
 ## <a name="upload"></a>How S3A writes data to S3
 ## <a name="upload"></a>How S3A writes data to S3
 
 
 The original S3A client implemented file writes by
 The original S3A client implemented file writes by

+ 19 - 5
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestArnResource.java

@@ -56,7 +56,7 @@ public class TestArnResource extends HadoopTestBase {
       String region = testPair[0];
       String region = testPair[0];
       String partition = testPair[1];
       String partition = testPair[1];
 
 
-      ArnResource resource = getArnResourceFrom(partition, region, MOCK_ACCOUNT, accessPoint);
+      ArnResource resource = getArnResourceFrom(partition, "s3", region, MOCK_ACCOUNT, accessPoint);
       assertEquals("Access Point name does not match", accessPoint, resource.getName());
       assertEquals("Access Point name does not match", accessPoint, resource.getName());
       assertEquals("Account Id does not match", MOCK_ACCOUNT, resource.getOwnerAccountId());
       assertEquals("Account Id does not match", MOCK_ACCOUNT, resource.getOwnerAccountId());
       assertEquals("Region does not match", region, resource.getRegion());
       assertEquals("Region does not match", region, resource.getRegion());
@@ -64,10 +64,10 @@ public class TestArnResource extends HadoopTestBase {
   }
   }
 
 
   @Test
   @Test
-  public void makeSureEndpointHasTheCorrectFormat() {
+  public void makeSureS3EndpointHasTheCorrectFormat() {
     // Access point (AP) endpoints are different from S3 bucket endpoints, thus when using APs the
     // Access point (AP) endpoints are different from S3 bucket endpoints, thus when using APs the
     // endpoints for the client are modified. This test makes sure endpoint is set up correctly.
     // endpoints for the client are modified. This test makes sure endpoint is set up correctly.
-    ArnResource accessPoint = getArnResourceFrom("aws", "eu-west-1", MOCK_ACCOUNT,
+    ArnResource accessPoint = getArnResourceFrom("aws", "s3", "eu-west-1", MOCK_ACCOUNT,
         "test");
         "test");
     String expected = "s3-accesspoint.eu-west-1.amazonaws.com";
     String expected = "s3-accesspoint.eu-west-1.amazonaws.com";
 
 
@@ -76,6 +76,19 @@ public class TestArnResource extends HadoopTestBase {
         .isEqualTo(expected);
         .isEqualTo(expected);
   }
   }
 
 
+  @Test
+  public void makeSureS3OutpostsEndpointHasTheCorrectFormat() {
+    // Access point (AP) endpoints are different from S3 bucket endpoints, thus when using APs the
+    // endpoints for the client are modified. This test makes sure endpoint is set up correctly.
+    ArnResource accessPoint = getArnResourceFrom("aws", "s3-outposts", "eu-west-1", MOCK_ACCOUNT,
+        "test");
+    String expected = "s3-outposts.eu-west-1.amazonaws.com";
+
+    Assertions.assertThat(accessPoint.getEndpoint())
+        .describedAs("Endpoint has invalid format. Access Point requests will not work")
+        .isEqualTo(expected);
+  }
+
   @Test
   @Test
   public void invalidARNsMustThrow() throws Exception {
   public void invalidARNsMustThrow() throws Exception {
     describe("Using an invalid ARN format must throw when initializing an ArnResource.");
     describe("Using an invalid ARN format must throw when initializing an ArnResource.");
@@ -87,15 +100,16 @@ public class TestArnResource extends HadoopTestBase {
   /**
   /**
    * Create an {@link ArnResource} from string components
    * Create an {@link ArnResource} from string components
    * @param partition - partition for ARN
    * @param partition - partition for ARN
+   * @param service - service for ARN
    * @param region - region for ARN
    * @param region - region for ARN
    * @param accountId - accountId for ARN
    * @param accountId - accountId for ARN
    * @param resourceName - ARN resource name
    * @param resourceName - ARN resource name
    * @return ArnResource described by its properties
    * @return ArnResource described by its properties
    */
    */
-  private ArnResource getArnResourceFrom(String partition, String region, String accountId,
+  private ArnResource getArnResourceFrom(String partition, String service, String region, String accountId,
       String resourceName) {
       String resourceName) {
     // arn:partition:service:region:account-id:resource-type/resource-id
     // arn:partition:service:region:account-id:resource-type/resource-id
-    String arn = String.format("arn:%s:s3:%s:%s:accesspoint/%s", partition, region, accountId,
+    String arn = String.format("arn:%s:%s:%s:%s:accesspoint/%s", partition, service, region, accountId,
         resourceName);
         resourceName);
 
 
     return ArnResource.accessPointFromArn(arn);
     return ArnResource.accessPointFromArn(arn);