瀏覽代碼

HADOOP-13876 S3Guard: better support for multi-bucket access

A single DynamoDB table can now be used to store metadata for multiple s3
buckets.
Aaron Fabbri 8 年之前
父節點
當前提交
2f80fe773d

+ 40 - 33
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java

@@ -52,6 +52,7 @@ import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
 
 import org.apache.commons.lang.StringUtils;
+import org.apache.hadoop.fs.s3a.Constants;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -81,7 +82,8 @@ import static org.apache.hadoop.fs.s3a.s3guard.PathMetadataDynamoDBTranslation.*
  * represents a single directory or file.  Its path is split into separate table
  * attributes:
  * <ul>
- * <li> parent (absolute path of the parent). </li>
+ * <li> parent (absolute path of the parent, with bucket name inserted as
+ * first path component). </li>
  * <li> child (path of that specific child, relative to parent). </li>
  * <li> optional boolean attribute tracking whether the path is a directory.
  *      Absence or a false value indicates the path is a file. </li>
@@ -95,16 +97,14 @@ import static org.apache.hadoop.fs.s3a.s3guard.PathMetadataDynamoDBTranslation.*
  *
  * The DynamoDB partition key is the parent, and the range key is the child.
  *
- * Root is a special case.  It has no parent, so it cannot be split into
- * separate parent and child attributes.  To avoid confusion in the DynamoDB
- * table, we simply do not persist root and instead treat it as a special case
- * path that always exists.
+ * To allow multiple buckets to share the same DynamoDB table, the bucket
+ * name is treated as the root directory.
  *
  * For example, assume the consistent store contains metadata representing this
  * file system structure:
  *
  * <pre>
- * /dir1
+ * s3a://bucket/dir1
  * |-- dir2
  * |   |-- file1
  * |   `-- file2
@@ -119,20 +119,20 @@ import static org.apache.hadoop.fs.s3a.s3guard.PathMetadataDynamoDBTranslation.*
  * This is persisted to a single DynamoDB table as:
  *
  * <pre>
- * ==================================================================
- * | parent          | child | is_dir | mod_time | len |     ...    |
- * ==================================================================
- * | /               | dir1  | true   |          |     |            |
- * | /dir1           | dir2  | true   |          |     |            |
- * | /dir1           | dir3  | true   |          |     |            |
- * | /dir1/dir2      | file1 |        |   100    | 111 |            |
- * | /dir1/dir2      | file2 |        |   200    | 222 |            |
- * | /dir1/dir3      | dir4  | true   |          |     |            |
- * | /dir1/dir3      | dir5  | true   |          |     |            |
- * | /dir1/dir3/dir4 | file3 |        |   300    | 333 |            |
- * | /dir1/dir3/dir5 | file4 |        |   400    | 444 |            |
- * | /dir1/dir3      | dir6  | true   |          |     |            |
- * ==================================================================
+ * =========================================================================
+ * | parent                 | child | is_dir | mod_time | len |     ...    |
+ * =========================================================================
+ * | /bucket                | dir1  | true   |          |     |            |
+ * | /bucket/dir1           | dir2  | true   |          |     |            |
+ * | /bucket/dir1           | dir3  | true   |          |     |            |
+ * | /bucket/dir1/dir2      | file1 |        |   100    | 111 |            |
+ * | /bucket/dir1/dir2      | file2 |        |   200    | 222 |            |
+ * | /bucket/dir1/dir3      | dir4  | true   |          |     |            |
+ * | /bucket/dir1/dir3      | dir5  | true   |          |     |            |
+ * | /bucket/dir1/dir3/dir4 | file3 |        |   300    | 333 |            |
+ * | /bucket/dir1/dir3/dir5 | file4 |        |   400    | 444 |            |
+ * | /bucket/dir1/dir3      | dir6  | true   |          |     |            |
+ * =========================================================================
  * </pre>
  *
  * This choice of schema is efficient for read access patterns.
@@ -147,9 +147,11 @@ import static org.apache.hadoop.fs.s3a.s3guard.PathMetadataDynamoDBTranslation.*
  * {@link #move(Collection, Collection)}, are less efficient with this schema.
  * They require mutating multiple items in the DynamoDB table.
  *
- * All DynamoDB access is performed within the same AWS region as the S3 bucket
- * that hosts the S3A instance.  During initialization, it checks the location
- * of the S3 bucket and creates a DynamoDB client connected to the same region.
+ * By default, DynamoDB access is performed within the same AWS region as
+ * the S3 bucket that hosts the S3A instance.  During initialization, it checks
+ * the location of the S3 bucket and creates a DynamoDB client connected to the
+ * same region. The region may also be set explicitly by setting the config
+ * parameter fs.s3a.s3guard.ddb.endpoint with the corresponding endpoint.
  */
 @InterfaceAudience.Private
 @InterfaceStability.Evolving
@@ -176,7 +178,6 @@ public class DynamoDBMetadataStore implements MetadataStore {
   private Table table;
   private String tableName;
   private Configuration conf;
-  private URI s3Uri;
   private String username;
 
   /**
@@ -233,7 +234,6 @@ public class DynamoDBMetadataStore implements MetadataStore {
 
     username = s3afs.getUsername();
     conf = s3afs.getConf();
-    s3Uri = s3afs.getUri();
     Class<? extends DynamoDBClientFactory> cls = conf.getClass(
         S3GUARD_DDB_CLIENT_FACTORY_IMPL,
         S3GUARD_DDB_CLIENT_FACTORY_IMPL_DEFAULT,
@@ -338,7 +338,7 @@ public class DynamoDBMetadataStore implements MetadataStore {
             .withPrimaryKey(pathToKey(path))
             .withConsistentRead(true); // strictly consistent read
         final Item item = table.getItem(spec);
-        meta = itemToPathMetadata(s3Uri, item, username);
+        meta = itemToPathMetadata(item, username);
         LOG.debug("Get from table {} in region {} returning for {}: {}",
             tableName, region, path, meta);
       }
@@ -375,7 +375,7 @@ public class DynamoDBMetadataStore implements MetadataStore {
 
       final List<PathMetadata> metas = new ArrayList<>();
       for (Item item : items) {
-        metas.add(itemToPathMetadata(s3Uri, item, username));
+        metas.add(itemToPathMetadata(item, username));
       }
       LOG.trace("Listing table {} in region {} for {} returning {}",
           tableName, region, path, metas);
@@ -399,8 +399,8 @@ public class DynamoDBMetadataStore implements MetadataStore {
         + " paths to create", tableName, region,
         pathsToDelete == null ? 0 : pathsToDelete.size(),
         pathsToCreate == null ? 0 : pathsToCreate.size());
-    LOG.trace("move: pathsToDelete = {}, pathsToCreate = {}",
-        pathsToDelete, pathsToCreate);
+    LOG.trace("move: pathsToDelete = {}, pathsToCreate = {}", pathsToDelete,
+        pathsToCreate);
     try {
       processBatchWriteRequest(pathToKey(pathsToDelete),
           pathMetadataToItem(pathsToCreate));
@@ -714,13 +714,20 @@ public class DynamoDBMetadataStore implements MetadataStore {
   }
 
   /**
-   * Validates a path object; and make it qualified if it's not.
+   * Validates a path object; it must be absolute, and contain a host
+   * (bucket) component.
    */
   private Path checkPath(Path path) {
     Preconditions.checkNotNull(path);
-    Preconditions.checkArgument(path.isAbsolute(),
-        "Path '" + path + "' is not absolute!");
-    return s3Uri == null ? path : path.makeQualified(s3Uri, null);
+    Preconditions.checkArgument(path.isAbsolute(), "Path %s is not absolute",
+        path);
+    URI uri = path.toUri();
+    Preconditions.checkNotNull(uri.getScheme(), "Path %s missing scheme", path);
+    Preconditions.checkArgument(uri.getScheme().equals(Constants.FS_S3A),
+        "Path %s scheme must be %s", path, Constants.FS_S3A);
+    Preconditions.checkArgument(!StringUtils.isEmpty(uri.getHost()), "Path %s" +
+        " is missing bucket.", path);
+    return path;
   }
 
   /**

+ 38 - 20
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/PathMetadataDynamoDBTranslation.java

@@ -33,10 +33,12 @@ import com.amazonaws.services.dynamodbv2.model.ScalarAttributeType;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
 
+import org.apache.commons.lang3.StringUtils;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.s3a.Constants;
 import org.apache.hadoop.fs.s3a.S3AFileStatus;
 
 /**
@@ -101,24 +103,26 @@ final class PathMetadataDynamoDBTranslation {
    * @param item DynamoDB item to convert
    * @return {@code item} converted to a {@link PathMetadata}
    */
-  static PathMetadata itemToPathMetadata(URI s3aUri, Item item, String username)
+  static PathMetadata itemToPathMetadata(Item item, String username)
       throws IOException {
     if (item == null) {
       return null;
     }
 
-    String parent = item.getString(PARENT);
-    Preconditions.checkNotNull(parent, "No parent entry in item %s", item);
-    String child = item.getString(CHILD);
-    Preconditions.checkNotNull(child, "No child entry in item %s", item);
-    Path path = new Path(parent, child);
-    if (!path.isAbsoluteAndSchemeAuthorityNull()) {
+    String parentStr = item.getString(PARENT);
+    Preconditions.checkNotNull(parentStr, "No parent entry in item %s", item);
+    String childStr = item.getString(CHILD);
+    Preconditions.checkNotNull(childStr, "No child entry in item %s", item);
+
+    // Skip table version markers, which are only non-absolute paths stored.
+    Path rawPath = new Path(parentStr, childStr);
+    if (!rawPath.isAbsoluteAndSchemeAuthorityNull()) {
       return null;
     }
 
-    if (s3aUri != null) {
-      path = path.makeQualified(s3aUri, null);
-    }
+    Path parent = new Path(Constants.FS_S3A + ":/" + parentStr + "/");
+    Path path = new Path(parent, childStr);
+
     boolean isDir = item.hasAttribute(IS_DIR) && item.getBoolean(IS_DIR);
     final FileStatus fileStatus;
     if (isDir) {
@@ -231,8 +235,28 @@ final class PathMetadataDynamoDBTranslation {
    * @return DynamoDB equality condition on {@code path} as parent
    */
   static KeyAttribute pathToParentKeyAttribute(Path path) {
-    removeSchemeAndAuthority(path);
-    return new KeyAttribute(PARENT, path.toUri().getPath());
+    return new KeyAttribute(PARENT, pathToParentKey(path));
+  }
+
+  /**
+   * e.g. pathToParentKey(s3a://bucket/path/a) -> /bucket/path/a
+   * @param path
+   * @return string for parent key
+   */
+  static String pathToParentKey(Path path) {
+    Preconditions.checkNotNull(path);
+    Preconditions.checkArgument(path.isUriPathAbsolute(), "Path not absolute");
+    URI uri = path.toUri();
+    String bucket = uri.getHost();
+    Preconditions.checkArgument(!StringUtils.isEmpty(bucket),
+        "Path missing bucket");
+    String pKey = "/" + bucket + uri.getPath();
+
+    // Strip trailing slash
+    if (pKey.endsWith("/")) {
+      pKey = pKey.substring(0, pKey.length() - 1);
+    }
+    return pKey;
   }
 
   /**
@@ -243,11 +267,10 @@ final class PathMetadataDynamoDBTranslation {
    * @return DynamoDB key for item matching {@code path}
    */
   static PrimaryKey pathToKey(Path path) {
-    path = removeSchemeAndAuthority(path);
     Preconditions.checkArgument(!path.isRoot(),
         "Root path is not mapped to any PrimaryKey");
-    return new PrimaryKey(PARENT, path.getParent().toUri().getPath(),
-        CHILD, path.getName());
+    return new PrimaryKey(PARENT, pathToParentKey(path.getParent()), CHILD,
+        path.getName());
   }
 
   /**
@@ -268,11 +291,6 @@ final class PathMetadataDynamoDBTranslation {
     return keys;
   }
 
-  private static Path removeSchemeAndAuthority(Path path) {
-    Preconditions.checkNotNull(path);
-    return Path.getPathWithoutSchemeAndAuthority(path);
-  }
-
   /**
    * There is no need to instantiate this class.
    */

+ 11 - 0
hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3guard.md

@@ -166,6 +166,17 @@ following parameter to true.
 </property>
 ```
 
+We can also explicitly set the DynamoDB service endpoint we will connect to.
+This makes sure we always access the region our table lives in, even if we
+access S3 buckets in other regions.  In this example we use the endpoint for
+US West (Oregon):
+
+```xml
+<property>
+  <name>fs.s3a.s3guard.ddb.endpoint</name>
+  <value>dynamodb.us-west-2.amazonaws.com</value>
+</property>
+```
 Next, you need to set the DynamoDB read and write throughput requirements you
 expect to need for your cluster.  Setting higher values will cost you more
 money.

+ 29 - 10
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreTestBase.java

@@ -454,21 +454,40 @@ public abstract class MetadataStoreTestBase extends Assert {
     }
   }
 
+  /**
+   * Test that the MetadataStore differentiates between the same path in two
+   * different buckets.
+   * @throws Exception
+   */
+  @Test
+  public void testMultiBucketPaths() throws Exception {
+    String p1 = "s3a://bucket-a/path1";
+    String p2 = "s3a://bucket-b/path2";
+
+    // Make sure we start out empty
+    PathMetadata meta = ms.get(new Path(p1));
+    assertNull("Path should not be present yet.", meta);
+    meta = ms.get(new Path(p2));
+    assertNull("Path2 should not be present yet.", meta);
+
+    // Put p1, assert p2 doesn't match
+    ms.put(new PathMetadata(makeFileStatus(p1, 100)));
+    meta = ms.get(new Path(p2));
+    assertNull("Path 2 should not match path 1.", meta);
+
+    // Make sure delete is correct as well
+    if (!allowMissing()) {
+      ms.delete(new Path(p2));
+      meta = ms.get(new Path(p1));
+      assertNotNull("Path should not have been deleted");
+    }
+    ms.delete(new Path(p1));
+  }
 
   /*
    * Helper functions.
    */
 
-  /** Builds array of Path objects based on parent dir and filenames. */
-  private Path[] buildPaths(String parent, String... paths) {
-
-    Path[] output = new Path[paths.length];
-    for (int i = 0; i < paths.length; i++) {
-      output[i] = new Path(strToPath(parent), paths[i]);
-    }
-    return output;
-  }
-
   /** Modifies paths input array and returns it. */
   private String[] buildPathStrings(String parent, String... paths) {
     for (int i = 0; i < paths.length; i++) {

+ 8 - 7
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestDynamoDBMetadataStore.java

@@ -83,10 +83,10 @@ public class TestDynamoDBMetadataStore extends MetadataStoreTestBase {
       LoggerFactory.getLogger(TestDynamoDBMetadataStore.class);
   private static final String BUCKET = "TestDynamoDBMetadataStore";
   private static final String S3URI =
-      URI.create(Constants.FS_S3A + "://" + BUCKET).toString();
+      URI.create(Constants.FS_S3A + "://" + BUCKET + "/").toString();
   public static final PrimaryKey
       VERSION_MARKER_PRIMARY_KEY = createVersionMarkerPrimaryKey(
-          DynamoDBMetadataStore.VERSION_MARKER);
+      DynamoDBMetadataStore.VERSION_MARKER);
 
   /** The DynamoDBLocal dynamoDBLocalServer instance for testing. */
   private static DynamoDBProxyServer dynamoDBLocalServer;
@@ -415,12 +415,13 @@ public class TestDynamoDBMetadataStore extends MetadataStoreTestBase {
   @Test
   public void testRootDirectory() throws IOException {
     final DynamoDBMetadataStore ddbms = getDynamoMetadataStore();
-    verifyRootDirectory(ddbms.get(new Path("/")), true);
+    Path rootPath = new Path(S3URI);
+    verifyRootDirectory(ddbms.get(rootPath), true);
 
     ddbms.put(new PathMetadata(new S3AFileStatus(true,
-        new Path("/foo"),
+        new Path(rootPath, "foo"),
         UserGroupInformation.getCurrentUser().getShortUserName())));
-    verifyRootDirectory(ddbms.get(new Path("/")), false);
+    verifyRootDirectory(ddbms.get(new Path(S3URI)), false);
   }
 
   private void verifyRootDirectory(PathMetadata rootMeta, boolean isEmpty) {
@@ -458,7 +459,7 @@ public class TestDynamoDBMetadataStore extends MetadataStoreTestBase {
     try (DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore()) {
       ddbms.initialize(s3afs);
       // we can list the empty table
-      ddbms.listChildren(new Path("/"));
+      ddbms.listChildren(new Path(S3URI));
 
       ddbms.destroy();
       verifyTableNotExist(tableName);
@@ -469,7 +470,7 @@ public class TestDynamoDBMetadataStore extends MetadataStoreTestBase {
 
       try {
         // we can no longer list the destroyed table
-        ddbms.listChildren(new Path("/"));
+        ddbms.listChildren(new Path(S3URI));
         fail("Should have failed after the table is destroyed!");
       } catch (IOException ignored) {
       }

+ 27 - 15
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestPathMetadataDynamoDBTranslation.java

@@ -27,6 +27,7 @@ import com.amazonaws.services.dynamodbv2.document.KeyAttribute;
 import com.amazonaws.services.dynamodbv2.document.PrimaryKey;
 import com.amazonaws.services.dynamodbv2.model.AttributeDefinition;
 import com.amazonaws.services.dynamodbv2.model.KeySchemaElement;
+import com.google.common.base.Preconditions;
 import org.junit.BeforeClass;
 import org.junit.Rule;
 import org.junit.Test;
@@ -56,10 +57,8 @@ import static org.apache.hadoop.fs.s3a.s3guard.DynamoDBMetadataStore.VERSION;
  * model objects and DynamoDB items.
  */
 public class TestPathMetadataDynamoDBTranslation {
-  private static final String DEFAULT_URI = "s3a://test-bucket/";
-  private static final URI S3AURI = URI.create(DEFAULT_URI);
 
-  private static final Path TEST_DIR_PATH = new Path("/myDir");
+  private static final Path TEST_DIR_PATH = new Path("s3a://test-bucket/myDir");
   private static final Item TEST_DIR_ITEM = new Item();
   private static PathMetadata testDirPathMetadata;
 
@@ -77,14 +76,14 @@ public class TestPathMetadataDynamoDBTranslation {
     testDirPathMetadata =
         new PathMetadata(new S3AFileStatus(false, TEST_DIR_PATH, username));
     TEST_DIR_ITEM
-        .withPrimaryKey(PARENT, "/", CHILD, TEST_DIR_PATH.getName())
+        .withPrimaryKey(PARENT, "/test-bucket", CHILD, TEST_DIR_PATH.getName())
         .withBoolean(IS_DIR, true);
 
     testFilePathMetadata = new PathMetadata(
         new S3AFileStatus(TEST_FILE_LENGTH, TEST_MOD_TIME, TEST_FILE_PATH,
             TEST_BLOCK_SIZE, username));
     TEST_FILE_ITEM
-        .withPrimaryKey(PARENT, TEST_DIR_PATH.toString(),
+        .withPrimaryKey(PARENT, pathToParentKey(TEST_FILE_PATH.getParent()),
             CHILD, TEST_FILE_PATH.getName())
         .withBoolean(IS_DIR, false)
         .withLong(FILE_LENGTH, TEST_FILE_LENGTH)
@@ -128,10 +127,10 @@ public class TestPathMetadataDynamoDBTranslation {
   public void testItemToPathMetadata() throws IOException {
     final String user =
         UserGroupInformation.getCurrentUser().getShortUserName();
-    assertNull(itemToPathMetadata(S3AURI, null, user));
+    assertNull(itemToPathMetadata(null, user));
 
-    verify(TEST_DIR_ITEM, itemToPathMetadata(S3AURI, TEST_DIR_ITEM, user));
-    verify(TEST_FILE_ITEM, itemToPathMetadata(S3AURI, TEST_FILE_ITEM, user));
+    verify(TEST_DIR_ITEM, itemToPathMetadata(TEST_DIR_ITEM, user));
+    verify(TEST_FILE_ITEM, itemToPathMetadata(TEST_FILE_ITEM, user));
   }
 
   /**
@@ -141,8 +140,8 @@ public class TestPathMetadataDynamoDBTranslation {
     assertNotNull(meta);
     assert meta.getFileStatus() instanceof S3AFileStatus;
     final S3AFileStatus status = (S3AFileStatus) meta.getFileStatus();
-    final Path path = Path.getPathWithoutSchemeAndAuthority(status.getPath());
-    assertEquals(item.get(PARENT), path.getParent().toString());
+    final Path path = status.getPath();
+    assertEquals(item.get(PARENT), pathToParentKey(path.getParent()));
     assertEquals(item.get(CHILD), path.getName());
     boolean isDir = item.hasAttribute(IS_DIR) && item.getBoolean(IS_DIR);
     assertEquals(isDir, status.isDirectory());
@@ -178,17 +177,29 @@ public class TestPathMetadataDynamoDBTranslation {
     assertNotNull(attr);
     assertEquals(PARENT, attr.getName());
     // this path is expected as parent filed
-    assertEquals(path.toString(), attr.getValue());
+    assertEquals(pathToParentKey(path), attr.getValue());
+  }
+
+  private static String pathToParentKey(Path p) {
+    Preconditions.checkArgument(p.isUriPathAbsolute());
+    URI parentUri = p.toUri();
+    String bucket = parentUri.getHost();
+    Preconditions.checkNotNull(bucket);
+    String s =  "/" + bucket + parentUri.getPath();
+    // strip trailing slash
+    if (s.endsWith("/")) {
+      s = s.substring(0, s.length()-1);
+    }
+    return s;
   }
 
   @Test
-  public void testPathToKey() {
+  public void testPathToKey() throws Exception {
     try {
       pathToKey(new Path("/"));
       fail("Root path should have not been mapped to any PrimaryKey");
     } catch (IllegalArgumentException ignored) {
     }
-
     doTestPathToKey(TEST_DIR_PATH);
     doTestPathToKey(TEST_FILE_PATH);
   }
@@ -202,7 +213,8 @@ public class TestPathMetadataDynamoDBTranslation {
     for (KeyAttribute keyAttribute : key.getComponents()) {
       assertThat(keyAttribute.getName(), anyOf(is(PARENT), is(CHILD)));
       if (PARENT.equals(keyAttribute.getName())) {
-        assertEquals(path.getParent().toString(), keyAttribute.getValue());
+        assertEquals(pathToParentKey(path.getParent()),
+            keyAttribute.getValue());
       } else {
         assertEquals(path.getName(), keyAttribute.getValue());
       }
@@ -220,7 +232,7 @@ public class TestPathMetadataDynamoDBTranslation {
   public void testVersionMarkerNotStatusIllegalPath() throws Throwable {
     final Item marker = createVersionMarker(VERSION_MARKER, VERSION, 0);
     assertNull("Path metadata fromfrom " + marker,
-        itemToPathMetadata(null, marker, "alice"));
+        itemToPathMetadata(marker, "alice"));
   }
 
 }