Forráskód Böngészése

HADOOP-12963 Allow using path style addressing for accessing the s3 endpoint. (Stephen Montgomery via stevel)

Steve Loughran 9 éve
szülő
commit
e124c3a2ae

+ 7 - 0
hadoop-common-project/hadoop-common/src/main/resources/core-default.xml

@@ -760,6 +760,13 @@
   </description>
 </property>
 
+<property>
+  <name>fs.s3a.path.style.access</name>
+  <description>Enable S3 path style access ie disabling the default virtual hosting behaviour.
+    Useful for S3A-compliant storage providers as it removes the need to set up DNS for virtual hosting.
+  </description>
+</property>
+
 <property>
   <name>fs.s3a.proxy.host</name>
   <description>Hostname of the (optional) proxy server for S3 connections.</description>

+ 7 - 3
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java

@@ -28,13 +28,17 @@ public class Constants {
   // number of simultaneous connections to s3
   public static final String MAXIMUM_CONNECTIONS = "fs.s3a.connection.maximum";
   public static final int DEFAULT_MAXIMUM_CONNECTIONS = 15;
-  
+
   // connect to s3 over ssl?
   public static final String SECURE_CONNECTIONS = "fs.s3a.connection.ssl.enabled";
   public static final boolean DEFAULT_SECURE_CONNECTIONS = true;
 
   //use a custom endpoint?
   public static final String ENDPOINT = "fs.s3a.endpoint";
+
+  //Enable path style access? Overrides default virtual hosting
+  public static final String PATH_STYLE_ACCESS = "fs.s3a.path.style.access";
+
   //connect to s3 through a proxy server?
   public static final String PROXY_HOST = "fs.s3a.proxy.host";
   public static final String PROXY_PORT = "fs.s3a.proxy.port";
@@ -50,7 +54,7 @@ public class Constants {
   // seconds until we give up trying to establish a connection to s3
   public static final String ESTABLISH_TIMEOUT = "fs.s3a.connection.establish.timeout";
   public static final int DEFAULT_ESTABLISH_TIMEOUT = 50000;
-  
+
   // seconds until we give up on a connection to s3
   public static final String SOCKET_TIMEOUT = "fs.s3a.connection.timeout";
   public static final int DEFAULT_SOCKET_TIMEOUT = 200000;
@@ -79,7 +83,7 @@ public class Constants {
   // size of each of or multipart pieces in bytes
   public static final String MULTIPART_SIZE = "fs.s3a.multipart.size";
   public static final long DEFAULT_MULTIPART_SIZE = 104857600; // 100 MB
-  
+
   // minimum size in bytes before we start a multipart uploads or copy
   public static final String MIN_MULTIPART_THRESHOLD = "fs.s3a.multipart.threshold";
   public static final long DEFAULT_MIN_MULTIPART_THRESHOLD = Integer.MAX_VALUE;

+ 10 - 0
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java

@@ -41,6 +41,7 @@ import com.amazonaws.auth.AWSCredentialsProviderChain;
 
 import com.amazonaws.auth.InstanceProfileCredentialsProvider;
 import com.amazonaws.services.s3.AmazonS3Client;
+import com.amazonaws.services.s3.S3ClientOptions;
 import com.amazonaws.services.s3.model.CannedAccessControlList;
 import com.amazonaws.services.s3.model.DeleteObjectRequest;
 import com.amazonaws.services.s3.model.DeleteObjectsRequest;
@@ -302,6 +303,15 @@ public class S3AFileSystem extends FileSystem {
         throw new IllegalArgumentException(msg, e);
       }
     }
+    enablePathStyleAccessIfRequired(conf);
+  }
+
+  private void enablePathStyleAccessIfRequired(Configuration conf) {
+    final boolean pathStyleAccess = conf.getBoolean(PATH_STYLE_ACCESS, false);
+    if (pathStyleAccess) {
+      LOG.debug("Enabling path style access!");
+      s3.setS3ClientOptions(new S3ClientOptions().withPathStyleAccess(true));
+    }
   }
 
   private void initTransferManager() {

+ 7 - 0
hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md

@@ -221,6 +221,13 @@ this capability.
       </description>
     </property>
 
+    <property>
+      <name>fs.s3a.path.style.access</name>
+      <description>Enable S3 path style access ie disabling the default virtual hosting behaviour.
+        Useful for S3A-compliant storage providers as it removes the need to set up DNS for virtual hosting.
+      </description>
+    </property>
+
     <property>
       <name>fs.s3a.proxy.host</name>
       <description>Hostname of the (optional) proxy server for S3 connections.</description>

+ 44 - 3
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AConfiguration.java

@@ -19,10 +19,14 @@
 package org.apache.hadoop.fs.s3a;
 
 import com.amazonaws.services.s3.AmazonS3Client;
+import com.amazonaws.services.s3.S3ClientOptions;
+import com.amazonaws.services.s3.model.AmazonS3Exception;
+
 import org.apache.commons.lang.StringUtils;
 import com.amazonaws.AmazonClientException;
 import org.apache.hadoop.conf.Configuration;
-
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.contract.ContractTestUtils;
 import org.junit.Rule;
 import org.junit.Test;
 import org.junit.rules.Timeout;
@@ -30,17 +34,19 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.fail;
 
 import java.io.File;
 import java.net.URI;
-import java.io.IOException;
+import java.lang.reflect.Field;
 
 import org.apache.hadoop.security.ProviderUtils;
 import org.apache.hadoop.security.alias.CredentialProvider;
 import org.apache.hadoop.security.alias.CredentialProviderFactory;
-
+import org.apache.http.HttpStatus;
 import org.junit.rules.TemporaryFolder;
 
 public class TestS3AConfiguration {
@@ -354,4 +360,39 @@ public class TestS3AConfiguration {
     assertEquals("SecretKey incorrect.", "456", creds.getAccessSecret());
 
   }
+
+  @Test
+  public void shouldBeAbleToSwitchOnS3PathStyleAccessViaConfigProperty() throws Exception {
+
+    conf = new Configuration();
+    conf.set(Constants.PATH_STYLE_ACCESS, Boolean.toString(true));
+    assertTrue(conf.getBoolean(Constants.PATH_STYLE_ACCESS, false));
+
+    try {
+      fs = S3ATestUtils.createTestFileSystem(conf);
+      final Object object = getClientOptionsField(fs.getAmazonS3Client(), "clientOptions");
+      assertNotNull(object);
+      assertTrue("Unexpected type found for clientOptions!", object instanceof S3ClientOptions);
+      assertTrue("Expected to find path style access to be switched on!", ((S3ClientOptions) object).isPathStyleAccess());
+      byte[] file = ContractTestUtils.toAsciiByteArray("test file");
+      ContractTestUtils.writeAndRead(fs, new Path("/path/style/access/testFile"), file, file.length, conf.getInt(Constants.FS_S3A_BLOCK_SIZE, file.length), false, true);
+    } catch (final AmazonS3Exception e) {
+      LOG.error("Caught exception: ", e);
+      // Catch/pass standard path style access behaviour when live bucket
+      // isn't in the same region as the s3 client default. See
+      // http://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html
+      assertEquals(e.getStatusCode(), HttpStatus.SC_MOVED_PERMANENTLY);
+    }
+  }
+
+  private Object getClientOptionsField(AmazonS3Client s3client, String field)
+      throws NoSuchFieldException, IllegalAccessException {
+    final Field clientOptionsProps = s3client.getClass().getDeclaredField(field);
+    assertNotNull(clientOptionsProps);
+    if (!clientOptionsProps.isAccessible()) {
+      clientOptionsProps.setAccessible(true);
+    }
+    final Object object = clientOptionsProps.get(s3client);
+    return object;
+  }
 }