Bläddra i källkod

HADOOP-19286: S3A: Support cross region access when S3 region/endpoint is set (#7067)

Adds new option
   s3a.cross.region.access.enabled
Which is true by default

This enables cross region access as a separate config and enable/disables it irrespective of region/endpoint is set.

Contributed by Syed Shameerur Rahman
Syed Shameerur Rahman 7 månader sedan
förälder
incheckning
e9ed21c065

+ 13 - 0
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java

@@ -1372,6 +1372,19 @@ public final class Constants {
    */
   public static final String XA_HEADER_PREFIX = "header.";
 
+  /**
+   * S3 cross region access enabled ?
+   * Value: {@value}.
+   */
+
+  public static final String AWS_S3_CROSS_REGION_ACCESS_ENABLED =
+      "fs.s3a.cross.region.access.enabled";
+  /**
+   * Default value for S3 cross region access enabled: {@value}.
+   */
+  public static final boolean AWS_S3_CROSS_REGION_ACCESS_ENABLED_DEFAULT = true;
+
+
   /**
    * AWS S3 region for the bucket. When set bypasses the construction of
    * region through endpoint url.

+ 10 - 4
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java

@@ -58,6 +58,8 @@ import org.apache.hadoop.fs.store.LogExactlyOnce;
 import static org.apache.hadoop.fs.s3a.Constants.AWS_REGION;
 import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_ACCESS_GRANTS_ENABLED;
 import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_ACCESS_GRANTS_FALLBACK_TO_IAM_ENABLED;
+import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_CROSS_REGION_ACCESS_ENABLED;
+import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_CROSS_REGION_ACCESS_ENABLED_DEFAULT;
 import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_DEFAULT_REGION;
 import static org.apache.hadoop.fs.s3a.Constants.CENTRAL_ENDPOINT;
 import static org.apache.hadoop.fs.s3a.Constants.FIPS_ENDPOINT;
@@ -330,7 +332,6 @@ public class DefaultS3ClientFactory extends Configured
         builder.endpointOverride(endpoint);
         LOG.debug("Setting endpoint to {}", endpoint);
       } else {
-        builder.crossRegionAccessEnabled(true);
         origin = "central endpoint with cross region access";
         LOG.debug("Enabling cross region access for endpoint {}",
             endpointStr);
@@ -343,7 +344,6 @@ public class DefaultS3ClientFactory extends Configured
       // no region is configured, and none could be determined from the endpoint.
       // Use US_EAST_2 as default.
       region = Region.of(AWS_S3_DEFAULT_REGION);
-      builder.crossRegionAccessEnabled(true);
       builder.region(region);
       origin = "cross region access fallback";
     } else if (configuredRegion.isEmpty()) {
@@ -354,8 +354,14 @@ public class DefaultS3ClientFactory extends Configured
       LOG.debug(SDK_REGION_CHAIN_IN_USE);
       origin = "SDK region chain";
     }
-
-    LOG.debug("Setting region to {} from {}", region, origin);
+    boolean isCrossRegionAccessEnabled = conf.getBoolean(AWS_S3_CROSS_REGION_ACCESS_ENABLED,
+        AWS_S3_CROSS_REGION_ACCESS_ENABLED_DEFAULT);
+    // s3 cross region access
+    if (isCrossRegionAccessEnabled) {
+      builder.crossRegionAccessEnabled(true);
+    }
+    LOG.debug("Setting region to {} from {} with cross region access {}",
+        region, origin, isCrossRegionAccessEnabled);
   }
 
   /**

+ 10 - 0
hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/connecting.md

@@ -48,6 +48,16 @@ There are multiple ways to connect to an S3 bucket
 
 The S3A connector supports all these; S3 Endpoints are the primary mechanism used -either explicitly declared or automatically determined from the declared region of the bucket.
 
+The S3A connector supports S3 cross region access via AWS SDK which is enabled by default. This allows users to access S3 buckets in a different region than the one defined in the S3 endpoint/region configuration, as long as they are within the same AWS partition. However, S3 cross-region access can be disabled by:
+```xml
+<property>
+  <name>fs.s3a.cross.region.access.enabled</name>
+  <value>false</value>
+  <description>S3 cross region access</description>
+</property>
+```
+
+
 Not supported:
 * AWS [Snowball](https://aws.amazon.com/snowball/).
 

+ 3 - 2
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java

@@ -439,6 +439,7 @@ public class ITestS3AConfiguration {
   @Test
   public void testRequestTimeout() throws Exception {
     conf = new Configuration();
+    skipIfCrossRegionClient(conf);
     // remove the safety check on minimum durations.
     AWSClientConfig.setMinimumOperationDuration(Duration.ZERO);
     try {
@@ -632,8 +633,8 @@ public class ITestS3AConfiguration {
    */
   private static void skipIfCrossRegionClient(
       Configuration configuration) {
-    if (configuration.get(ENDPOINT, null) == null
-        && configuration.get(AWS_REGION, null) == null) {
+    if (configuration.getBoolean(AWS_S3_CROSS_REGION_ACCESS_ENABLED,
+        AWS_S3_CROSS_REGION_ACCESS_ENABLED_DEFAULT)) {
       skip("Skipping test as cross region client is in use ");
     }
   }

+ 49 - 0
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java

@@ -44,8 +44,10 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.s3a.statistics.impl.EmptyS3AStatisticsContext;
 import org.apache.hadoop.fs.s3a.test.PublicDatasetTestUtils;
 
+import static org.apache.hadoop.fs.contract.ContractTestUtils.skip;
 import static org.apache.hadoop.fs.s3a.Constants.ALLOW_REQUESTER_PAYS;
 import static org.apache.hadoop.fs.s3a.Constants.AWS_REGION;
+import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_CROSS_REGION_ACCESS_ENABLED;
 import static org.apache.hadoop.fs.s3a.Constants.CENTRAL_ENDPOINT;
 import static org.apache.hadoop.fs.s3a.Constants.ENDPOINT;
 import static org.apache.hadoop.fs.s3a.Constants.FIPS_ENDPOINT;
@@ -71,6 +73,8 @@ public class ITestS3AEndpointRegion extends AbstractS3ATestBase {
 
   private static final String US_WEST_2 = "us-west-2";
 
+  private static final String SA_EAST_1 = "sa-east-1";
+
   private static final String EU_WEST_2 = "eu-west-2";
 
   private static final String CN_NORTHWEST_1 = "cn-northwest-1";
@@ -346,6 +350,41 @@ public class ITestS3AEndpointRegion extends AbstractS3ATestBase {
     assertRequesterPaysFileExistence(newConf);
   }
 
+  @Test
+  public void testWithOutCrossRegionAccess() throws Exception {
+    describe("Verify cross region access fails when disabled");
+    // skip the test if the region is sa-east-1
+    skipCrossRegionTest();
+    final Configuration newConf = new Configuration(getConfiguration());
+    // disable cross region access
+    newConf.setBoolean(AWS_S3_CROSS_REGION_ACCESS_ENABLED, false);
+    newConf.set(AWS_REGION, SA_EAST_1);
+    try (S3AFileSystem fs = new S3AFileSystem()) {
+      fs.initialize(getFileSystem().getUri(), newConf);
+      intercept(AWSRedirectException.class,
+          "does not match the AWS region containing the bucket",
+          () -> fs.exists(getFileSystem().getWorkingDirectory()));
+    }
+  }
+
+  @Test
+  public void testWithCrossRegionAccess() throws Exception {
+    describe("Verify cross region access succeed when enabled");
+    // skip the test if the region is sa-east-1
+    skipCrossRegionTest();
+    final Configuration newConf = new Configuration(getConfiguration());
+    removeBaseAndBucketOverrides(newConf,
+        AWS_S3_CROSS_REGION_ACCESS_ENABLED,
+        AWS_REGION);
+    // enable cross region access
+    newConf.setBoolean(AWS_S3_CROSS_REGION_ACCESS_ENABLED, true);
+    newConf.set(AWS_REGION, SA_EAST_1);
+    try (S3AFileSystem fs = new S3AFileSystem()) {
+      fs.initialize(getFileSystem().getUri(), newConf);
+      fs.exists(getFileSystem().getWorkingDirectory());
+    }
+  }
+
   @Test
   public void testCentralEndpointAndSameRegionAsBucket() throws Throwable {
     describe("Access public bucket using central endpoint and region "
@@ -478,6 +517,16 @@ public class ITestS3AEndpointRegion extends AbstractS3ATestBase {
     assertOpsUsingNewFs();
   }
 
+  /**
+   * Skip the test if the region is null or sa-east-1.
+   */
+  private void skipCrossRegionTest() throws IOException {
+    String region = getFileSystem().getS3AInternals().getBucketMetadata().bucketRegion();
+    if (region == null || SA_EAST_1.equals(region)) {
+      skip("Skipping test since region is null or it is set to sa-east-1");
+    }
+  }
+
   private void assertOpsUsingNewFs() throws IOException {
     final String file = getMethodName();
     final Path basePath = methodPath();