Browse Source

HADOOP-13876 S3Guard: better support for multi-bucket access

A single DynamoDB table can now be used to store metadata for multiple s3
buckets.
Aaron Fabbri 8 years ago
parent
commit
2f80fe773d

+ 40 - 33
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java

@@ -52,6 +52,7 @@ import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
 import com.google.common.base.Preconditions;
 
 
 import org.apache.commons.lang.StringUtils;
 import org.apache.commons.lang.StringUtils;
+import org.apache.hadoop.fs.s3a.Constants;
 import org.slf4j.Logger;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.slf4j.LoggerFactory;
 
 
@@ -81,7 +82,8 @@ import static org.apache.hadoop.fs.s3a.s3guard.PathMetadataDynamoDBTranslation.*
  * represents a single directory or file.  Its path is split into separate table
  * represents a single directory or file.  Its path is split into separate table
  * attributes:
  * attributes:
  * <ul>
  * <ul>
- * <li> parent (absolute path of the parent). </li>
+ * <li> parent (absolute path of the parent, with bucket name inserted as
+ * first path component). </li>
  * <li> child (path of that specific child, relative to parent). </li>
  * <li> child (path of that specific child, relative to parent). </li>
  * <li> optional boolean attribute tracking whether the path is a directory.
  * <li> optional boolean attribute tracking whether the path is a directory.
  *      Absence or a false value indicates the path is a file. </li>
  *      Absence or a false value indicates the path is a file. </li>
@@ -95,16 +97,14 @@ import static org.apache.hadoop.fs.s3a.s3guard.PathMetadataDynamoDBTranslation.*
  *
  *
  * The DynamoDB partition key is the parent, and the range key is the child.
  * The DynamoDB partition key is the parent, and the range key is the child.
  *
  *
- * Root is a special case.  It has no parent, so it cannot be split into
- * separate parent and child attributes.  To avoid confusion in the DynamoDB
- * table, we simply do not persist root and instead treat it as a special case
- * path that always exists.
+ * To allow multiple buckets to share the same DynamoDB table, the bucket
+ * name is treated as the root directory.
  *
  *
  * For example, assume the consistent store contains metadata representing this
  * For example, assume the consistent store contains metadata representing this
  * file system structure:
  * file system structure:
  *
  *
  * <pre>
  * <pre>
- * /dir1
+ * s3a://bucket/dir1
  * |-- dir2
  * |-- dir2
  * |   |-- file1
  * |   |-- file1
  * |   `-- file2
  * |   `-- file2
@@ -119,20 +119,20 @@ import static org.apache.hadoop.fs.s3a.s3guard.PathMetadataDynamoDBTranslation.*
  * This is persisted to a single DynamoDB table as:
  * This is persisted to a single DynamoDB table as:
  *
  *
  * <pre>
  * <pre>
- * ==================================================================
- * | parent          | child | is_dir | mod_time | len |     ...    |
- * ==================================================================
- * | /               | dir1  | true   |          |     |            |
- * | /dir1           | dir2  | true   |          |     |            |
- * | /dir1           | dir3  | true   |          |     |            |
- * | /dir1/dir2      | file1 |        |   100    | 111 |            |
- * | /dir1/dir2      | file2 |        |   200    | 222 |            |
- * | /dir1/dir3      | dir4  | true   |          |     |            |
- * | /dir1/dir3      | dir5  | true   |          |     |            |
- * | /dir1/dir3/dir4 | file3 |        |   300    | 333 |            |
- * | /dir1/dir3/dir5 | file4 |        |   400    | 444 |            |
- * | /dir1/dir3      | dir6  | true   |          |     |            |
- * ==================================================================
+ * =========================================================================
+ * | parent                 | child | is_dir | mod_time | len |     ...    |
+ * =========================================================================
+ * | /bucket                | dir1  | true   |          |     |            |
+ * | /bucket/dir1           | dir2  | true   |          |     |            |
+ * | /bucket/dir1           | dir3  | true   |          |     |            |
+ * | /bucket/dir1/dir2      | file1 |        |   100    | 111 |            |
+ * | /bucket/dir1/dir2      | file2 |        |   200    | 222 |            |
+ * | /bucket/dir1/dir3      | dir4  | true   |          |     |            |
+ * | /bucket/dir1/dir3      | dir5  | true   |          |     |            |
+ * | /bucket/dir1/dir3/dir4 | file3 |        |   300    | 333 |            |
+ * | /bucket/dir1/dir3/dir5 | file4 |        |   400    | 444 |            |
+ * | /bucket/dir1/dir3      | dir6  | true   |          |     |            |
+ * =========================================================================
  * </pre>
  * </pre>
  *
  *
  * This choice of schema is efficient for read access patterns.
  * This choice of schema is efficient for read access patterns.
@@ -147,9 +147,11 @@ import static org.apache.hadoop.fs.s3a.s3guard.PathMetadataDynamoDBTranslation.*
  * {@link #move(Collection, Collection)}, are less efficient with this schema.
  * {@link #move(Collection, Collection)}, are less efficient with this schema.
  * They require mutating multiple items in the DynamoDB table.
  * They require mutating multiple items in the DynamoDB table.
  *
  *
- * All DynamoDB access is performed within the same AWS region as the S3 bucket
- * that hosts the S3A instance.  During initialization, it checks the location
- * of the S3 bucket and creates a DynamoDB client connected to the same region.
+ * By default, DynamoDB access is performed within the same AWS region as
+ * the S3 bucket that hosts the S3A instance.  During initialization, it checks
+ * the location of the S3 bucket and creates a DynamoDB client connected to the
+ * same region. The region may also be set explicitly by setting the config
+ * parameter fs.s3a.s3guard.ddb.endpoint with the corresponding endpoint.
  */
  */
 @InterfaceAudience.Private
 @InterfaceAudience.Private
 @InterfaceStability.Evolving
 @InterfaceStability.Evolving
@@ -176,7 +178,6 @@ public class DynamoDBMetadataStore implements MetadataStore {
   private Table table;
   private Table table;
   private String tableName;
   private String tableName;
   private Configuration conf;
   private Configuration conf;
-  private URI s3Uri;
   private String username;
   private String username;
 
 
   /**
   /**
@@ -233,7 +234,6 @@ public class DynamoDBMetadataStore implements MetadataStore {
 
 
     username = s3afs.getUsername();
     username = s3afs.getUsername();
     conf = s3afs.getConf();
     conf = s3afs.getConf();
-    s3Uri = s3afs.getUri();
     Class<? extends DynamoDBClientFactory> cls = conf.getClass(
     Class<? extends DynamoDBClientFactory> cls = conf.getClass(
         S3GUARD_DDB_CLIENT_FACTORY_IMPL,
         S3GUARD_DDB_CLIENT_FACTORY_IMPL,
         S3GUARD_DDB_CLIENT_FACTORY_IMPL_DEFAULT,
         S3GUARD_DDB_CLIENT_FACTORY_IMPL_DEFAULT,
@@ -338,7 +338,7 @@ public class DynamoDBMetadataStore implements MetadataStore {
             .withPrimaryKey(pathToKey(path))
             .withPrimaryKey(pathToKey(path))
             .withConsistentRead(true); // strictly consistent read
             .withConsistentRead(true); // strictly consistent read
         final Item item = table.getItem(spec);
         final Item item = table.getItem(spec);
-        meta = itemToPathMetadata(s3Uri, item, username);
+        meta = itemToPathMetadata(item, username);
         LOG.debug("Get from table {} in region {} returning for {}: {}",
         LOG.debug("Get from table {} in region {} returning for {}: {}",
             tableName, region, path, meta);
             tableName, region, path, meta);
       }
       }
@@ -375,7 +375,7 @@ public class DynamoDBMetadataStore implements MetadataStore {
 
 
       final List<PathMetadata> metas = new ArrayList<>();
       final List<PathMetadata> metas = new ArrayList<>();
       for (Item item : items) {
       for (Item item : items) {
-        metas.add(itemToPathMetadata(s3Uri, item, username));
+        metas.add(itemToPathMetadata(item, username));
       }
       }
       LOG.trace("Listing table {} in region {} for {} returning {}",
       LOG.trace("Listing table {} in region {} for {} returning {}",
           tableName, region, path, metas);
           tableName, region, path, metas);
@@ -399,8 +399,8 @@ public class DynamoDBMetadataStore implements MetadataStore {
         + " paths to create", tableName, region,
         + " paths to create", tableName, region,
         pathsToDelete == null ? 0 : pathsToDelete.size(),
         pathsToDelete == null ? 0 : pathsToDelete.size(),
         pathsToCreate == null ? 0 : pathsToCreate.size());
         pathsToCreate == null ? 0 : pathsToCreate.size());
-    LOG.trace("move: pathsToDelete = {}, pathsToCreate = {}",
-        pathsToDelete, pathsToCreate);
+    LOG.trace("move: pathsToDelete = {}, pathsToCreate = {}", pathsToDelete,
+        pathsToCreate);
     try {
     try {
       processBatchWriteRequest(pathToKey(pathsToDelete),
       processBatchWriteRequest(pathToKey(pathsToDelete),
           pathMetadataToItem(pathsToCreate));
           pathMetadataToItem(pathsToCreate));
@@ -714,13 +714,20 @@ public class DynamoDBMetadataStore implements MetadataStore {
   }
   }
 
 
   /**
   /**
-   * Validates a path object; and make it qualified if it's not.
+   * Validates a path object; it must be absolute, and contain a host
+   * (bucket) component.
    */
    */
   private Path checkPath(Path path) {
   private Path checkPath(Path path) {
     Preconditions.checkNotNull(path);
     Preconditions.checkNotNull(path);
-    Preconditions.checkArgument(path.isAbsolute(),
-        "Path '" + path + "' is not absolute!");
-    return s3Uri == null ? path : path.makeQualified(s3Uri, null);
+    Preconditions.checkArgument(path.isAbsolute(), "Path %s is not absolute",
+        path);
+    URI uri = path.toUri();
+    Preconditions.checkNotNull(uri.getScheme(), "Path %s missing scheme", path);
+    Preconditions.checkArgument(uri.getScheme().equals(Constants.FS_S3A),
+        "Path %s scheme must be %s", path, Constants.FS_S3A);
+    Preconditions.checkArgument(!StringUtils.isEmpty(uri.getHost()), "Path %s" +
+        " is missing bucket.", path);
+    return path;
   }
   }
 
 
   /**
   /**

+ 38 - 20
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/PathMetadataDynamoDBTranslation.java

@@ -33,10 +33,12 @@ import com.amazonaws.services.dynamodbv2.model.ScalarAttributeType;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
 import com.google.common.base.Preconditions;
 
 
+import org.apache.commons.lang3.StringUtils;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.s3a.Constants;
 import org.apache.hadoop.fs.s3a.S3AFileStatus;
 import org.apache.hadoop.fs.s3a.S3AFileStatus;
 
 
 /**
 /**
@@ -101,24 +103,26 @@ final class PathMetadataDynamoDBTranslation {
    * @param item DynamoDB item to convert
    * @param item DynamoDB item to convert
    * @return {@code item} converted to a {@link PathMetadata}
    * @return {@code item} converted to a {@link PathMetadata}
    */
    */
-  static PathMetadata itemToPathMetadata(URI s3aUri, Item item, String username)
+  static PathMetadata itemToPathMetadata(Item item, String username)
       throws IOException {
       throws IOException {
     if (item == null) {
     if (item == null) {
       return null;
       return null;
     }
     }
 
 
-    String parent = item.getString(PARENT);
-    Preconditions.checkNotNull(parent, "No parent entry in item %s", item);
-    String child = item.getString(CHILD);
-    Preconditions.checkNotNull(child, "No child entry in item %s", item);
-    Path path = new Path(parent, child);
-    if (!path.isAbsoluteAndSchemeAuthorityNull()) {
+    String parentStr = item.getString(PARENT);
+    Preconditions.checkNotNull(parentStr, "No parent entry in item %s", item);
+    String childStr = item.getString(CHILD);
+    Preconditions.checkNotNull(childStr, "No child entry in item %s", item);
+
+    // Skip table version markers, which are only non-absolute paths stored.
+    Path rawPath = new Path(parentStr, childStr);
+    if (!rawPath.isAbsoluteAndSchemeAuthorityNull()) {
       return null;
       return null;
     }
     }
 
 
-    if (s3aUri != null) {
-      path = path.makeQualified(s3aUri, null);
-    }
+    Path parent = new Path(Constants.FS_S3A + ":/" + parentStr + "/");
+    Path path = new Path(parent, childStr);
+
     boolean isDir = item.hasAttribute(IS_DIR) && item.getBoolean(IS_DIR);
     boolean isDir = item.hasAttribute(IS_DIR) && item.getBoolean(IS_DIR);
     final FileStatus fileStatus;
     final FileStatus fileStatus;
     if (isDir) {
     if (isDir) {
@@ -231,8 +235,28 @@ final class PathMetadataDynamoDBTranslation {
    * @return DynamoDB equality condition on {@code path} as parent
    * @return DynamoDB equality condition on {@code path} as parent
    */
    */
   static KeyAttribute pathToParentKeyAttribute(Path path) {
   static KeyAttribute pathToParentKeyAttribute(Path path) {
-    removeSchemeAndAuthority(path);
-    return new KeyAttribute(PARENT, path.toUri().getPath());
+    return new KeyAttribute(PARENT, pathToParentKey(path));
+  }
+
+  /**
+   * e.g. pathToParentKey(s3a://bucket/path/a) -> /bucket/path/a
+   * @param path
+   * @return string for parent key
+   */
+  static String pathToParentKey(Path path) {
+    Preconditions.checkNotNull(path);
+    Preconditions.checkArgument(path.isUriPathAbsolute(), "Path not absolute");
+    URI uri = path.toUri();
+    String bucket = uri.getHost();
+    Preconditions.checkArgument(!StringUtils.isEmpty(bucket),
+        "Path missing bucket");
+    String pKey = "/" + bucket + uri.getPath();
+
+    // Strip trailing slash
+    if (pKey.endsWith("/")) {
+      pKey = pKey.substring(0, pKey.length() - 1);
+    }
+    return pKey;
   }
   }
 
 
   /**
   /**
@@ -243,11 +267,10 @@ final class PathMetadataDynamoDBTranslation {
    * @return DynamoDB key for item matching {@code path}
    * @return DynamoDB key for item matching {@code path}
    */
    */
   static PrimaryKey pathToKey(Path path) {
   static PrimaryKey pathToKey(Path path) {
-    path = removeSchemeAndAuthority(path);
     Preconditions.checkArgument(!path.isRoot(),
     Preconditions.checkArgument(!path.isRoot(),
         "Root path is not mapped to any PrimaryKey");
         "Root path is not mapped to any PrimaryKey");
-    return new PrimaryKey(PARENT, path.getParent().toUri().getPath(),
-        CHILD, path.getName());
+    return new PrimaryKey(PARENT, pathToParentKey(path.getParent()), CHILD,
+        path.getName());
   }
   }
 
 
   /**
   /**
@@ -268,11 +291,6 @@ final class PathMetadataDynamoDBTranslation {
     return keys;
     return keys;
   }
   }
 
 
-  private static Path removeSchemeAndAuthority(Path path) {
-    Preconditions.checkNotNull(path);
-    return Path.getPathWithoutSchemeAndAuthority(path);
-  }
-
   /**
   /**
    * There is no need to instantiate this class.
    * There is no need to instantiate this class.
    */
    */

+ 11 - 0
hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3guard.md

@@ -166,6 +166,17 @@ following parameter to true.
 </property>
 </property>
 ```
 ```
 
 
+We can also explicitly set the DynamoDB service endpoint we will connect to.
+This makes sure we always access the region our table lives in, even if we
+access S3 buckets in other regions.  In this example we use the endpoint for
+US West (Oregon):
+
+```xml
+<property>
+  <name>fs.s3a.s3guard.ddb.endpoint</name>
+  <value>dynamodb.us-west-2.amazonaws.com</value>
+</property>
+```
 Next, you need to set the DynamoDB read and write throughput requirements you
 Next, you need to set the DynamoDB read and write throughput requirements you
 expect to need for your cluster.  Setting higher values will cost you more
 expect to need for your cluster.  Setting higher values will cost you more
 money.
 money.

+ 29 - 10
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreTestBase.java

@@ -454,21 +454,40 @@ public abstract class MetadataStoreTestBase extends Assert {
     }
     }
   }
   }
 
 
+  /**
+   * Test that the MetadataStore differentiates between the same path in two
+   * different buckets.
+   * @throws Exception
+   */
+  @Test
+  public void testMultiBucketPaths() throws Exception {
+    String p1 = "s3a://bucket-a/path1";
+    String p2 = "s3a://bucket-b/path2";
+
+    // Make sure we start out empty
+    PathMetadata meta = ms.get(new Path(p1));
+    assertNull("Path should not be present yet.", meta);
+    meta = ms.get(new Path(p2));
+    assertNull("Path2 should not be present yet.", meta);
+
+    // Put p1, assert p2 doesn't match
+    ms.put(new PathMetadata(makeFileStatus(p1, 100)));
+    meta = ms.get(new Path(p2));
+    assertNull("Path 2 should not match path 1.", meta);
+
+    // Make sure delete is correct as well
+    if (!allowMissing()) {
+      ms.delete(new Path(p2));
+      meta = ms.get(new Path(p1));
+      assertNotNull("Path should not have been deleted");
+    }
+    ms.delete(new Path(p1));
+  }
 
 
   /*
   /*
    * Helper functions.
    * Helper functions.
    */
    */
 
 
-  /** Builds array of Path objects based on parent dir and filenames. */
-  private Path[] buildPaths(String parent, String... paths) {
-
-    Path[] output = new Path[paths.length];
-    for (int i = 0; i < paths.length; i++) {
-      output[i] = new Path(strToPath(parent), paths[i]);
-    }
-    return output;
-  }
-
   /** Modifies paths input array and returns it. */
   /** Modifies paths input array and returns it. */
   private String[] buildPathStrings(String parent, String... paths) {
   private String[] buildPathStrings(String parent, String... paths) {
     for (int i = 0; i < paths.length; i++) {
     for (int i = 0; i < paths.length; i++) {

+ 8 - 7
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestDynamoDBMetadataStore.java

@@ -83,10 +83,10 @@ public class TestDynamoDBMetadataStore extends MetadataStoreTestBase {
       LoggerFactory.getLogger(TestDynamoDBMetadataStore.class);
       LoggerFactory.getLogger(TestDynamoDBMetadataStore.class);
   private static final String BUCKET = "TestDynamoDBMetadataStore";
   private static final String BUCKET = "TestDynamoDBMetadataStore";
   private static final String S3URI =
   private static final String S3URI =
-      URI.create(Constants.FS_S3A + "://" + BUCKET).toString();
+      URI.create(Constants.FS_S3A + "://" + BUCKET + "/").toString();
   public static final PrimaryKey
   public static final PrimaryKey
       VERSION_MARKER_PRIMARY_KEY = createVersionMarkerPrimaryKey(
       VERSION_MARKER_PRIMARY_KEY = createVersionMarkerPrimaryKey(
-          DynamoDBMetadataStore.VERSION_MARKER);
+      DynamoDBMetadataStore.VERSION_MARKER);
 
 
   /** The DynamoDBLocal dynamoDBLocalServer instance for testing. */
   /** The DynamoDBLocal dynamoDBLocalServer instance for testing. */
   private static DynamoDBProxyServer dynamoDBLocalServer;
   private static DynamoDBProxyServer dynamoDBLocalServer;
@@ -415,12 +415,13 @@ public class TestDynamoDBMetadataStore extends MetadataStoreTestBase {
   @Test
   @Test
   public void testRootDirectory() throws IOException {
   public void testRootDirectory() throws IOException {
     final DynamoDBMetadataStore ddbms = getDynamoMetadataStore();
     final DynamoDBMetadataStore ddbms = getDynamoMetadataStore();
-    verifyRootDirectory(ddbms.get(new Path("/")), true);
+    Path rootPath = new Path(S3URI);
+    verifyRootDirectory(ddbms.get(rootPath), true);
 
 
     ddbms.put(new PathMetadata(new S3AFileStatus(true,
     ddbms.put(new PathMetadata(new S3AFileStatus(true,
-        new Path("/foo"),
+        new Path(rootPath, "foo"),
         UserGroupInformation.getCurrentUser().getShortUserName())));
         UserGroupInformation.getCurrentUser().getShortUserName())));
-    verifyRootDirectory(ddbms.get(new Path("/")), false);
+    verifyRootDirectory(ddbms.get(new Path(S3URI)), false);
   }
   }
 
 
   private void verifyRootDirectory(PathMetadata rootMeta, boolean isEmpty) {
   private void verifyRootDirectory(PathMetadata rootMeta, boolean isEmpty) {
@@ -458,7 +459,7 @@ public class TestDynamoDBMetadataStore extends MetadataStoreTestBase {
     try (DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore()) {
     try (DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore()) {
       ddbms.initialize(s3afs);
       ddbms.initialize(s3afs);
       // we can list the empty table
       // we can list the empty table
-      ddbms.listChildren(new Path("/"));
+      ddbms.listChildren(new Path(S3URI));
 
 
       ddbms.destroy();
       ddbms.destroy();
       verifyTableNotExist(tableName);
       verifyTableNotExist(tableName);
@@ -469,7 +470,7 @@ public class TestDynamoDBMetadataStore extends MetadataStoreTestBase {
 
 
       try {
       try {
         // we can no longer list the destroyed table
         // we can no longer list the destroyed table
-        ddbms.listChildren(new Path("/"));
+        ddbms.listChildren(new Path(S3URI));
         fail("Should have failed after the table is destroyed!");
         fail("Should have failed after the table is destroyed!");
       } catch (IOException ignored) {
       } catch (IOException ignored) {
       }
       }

+ 27 - 15
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestPathMetadataDynamoDBTranslation.java

@@ -27,6 +27,7 @@ import com.amazonaws.services.dynamodbv2.document.KeyAttribute;
 import com.amazonaws.services.dynamodbv2.document.PrimaryKey;
 import com.amazonaws.services.dynamodbv2.document.PrimaryKey;
 import com.amazonaws.services.dynamodbv2.model.AttributeDefinition;
 import com.amazonaws.services.dynamodbv2.model.AttributeDefinition;
 import com.amazonaws.services.dynamodbv2.model.KeySchemaElement;
 import com.amazonaws.services.dynamodbv2.model.KeySchemaElement;
+import com.google.common.base.Preconditions;
 import org.junit.BeforeClass;
 import org.junit.BeforeClass;
 import org.junit.Rule;
 import org.junit.Rule;
 import org.junit.Test;
 import org.junit.Test;
@@ -56,10 +57,8 @@ import static org.apache.hadoop.fs.s3a.s3guard.DynamoDBMetadataStore.VERSION;
  * model objects and DynamoDB items.
  * model objects and DynamoDB items.
  */
  */
 public class TestPathMetadataDynamoDBTranslation {
 public class TestPathMetadataDynamoDBTranslation {
-  private static final String DEFAULT_URI = "s3a://test-bucket/";
-  private static final URI S3AURI = URI.create(DEFAULT_URI);
 
 
-  private static final Path TEST_DIR_PATH = new Path("/myDir");
+  private static final Path TEST_DIR_PATH = new Path("s3a://test-bucket/myDir");
   private static final Item TEST_DIR_ITEM = new Item();
   private static final Item TEST_DIR_ITEM = new Item();
   private static PathMetadata testDirPathMetadata;
   private static PathMetadata testDirPathMetadata;
 
 
@@ -77,14 +76,14 @@ public class TestPathMetadataDynamoDBTranslation {
     testDirPathMetadata =
     testDirPathMetadata =
         new PathMetadata(new S3AFileStatus(false, TEST_DIR_PATH, username));
         new PathMetadata(new S3AFileStatus(false, TEST_DIR_PATH, username));
     TEST_DIR_ITEM
     TEST_DIR_ITEM
-        .withPrimaryKey(PARENT, "/", CHILD, TEST_DIR_PATH.getName())
+        .withPrimaryKey(PARENT, "/test-bucket", CHILD, TEST_DIR_PATH.getName())
         .withBoolean(IS_DIR, true);
         .withBoolean(IS_DIR, true);
 
 
     testFilePathMetadata = new PathMetadata(
     testFilePathMetadata = new PathMetadata(
         new S3AFileStatus(TEST_FILE_LENGTH, TEST_MOD_TIME, TEST_FILE_PATH,
         new S3AFileStatus(TEST_FILE_LENGTH, TEST_MOD_TIME, TEST_FILE_PATH,
             TEST_BLOCK_SIZE, username));
             TEST_BLOCK_SIZE, username));
     TEST_FILE_ITEM
     TEST_FILE_ITEM
-        .withPrimaryKey(PARENT, TEST_DIR_PATH.toString(),
+        .withPrimaryKey(PARENT, pathToParentKey(TEST_FILE_PATH.getParent()),
             CHILD, TEST_FILE_PATH.getName())
             CHILD, TEST_FILE_PATH.getName())
         .withBoolean(IS_DIR, false)
         .withBoolean(IS_DIR, false)
         .withLong(FILE_LENGTH, TEST_FILE_LENGTH)
         .withLong(FILE_LENGTH, TEST_FILE_LENGTH)
@@ -128,10 +127,10 @@ public class TestPathMetadataDynamoDBTranslation {
   public void testItemToPathMetadata() throws IOException {
   public void testItemToPathMetadata() throws IOException {
     final String user =
     final String user =
         UserGroupInformation.getCurrentUser().getShortUserName();
         UserGroupInformation.getCurrentUser().getShortUserName();
-    assertNull(itemToPathMetadata(S3AURI, null, user));
+    assertNull(itemToPathMetadata(null, user));
 
 
-    verify(TEST_DIR_ITEM, itemToPathMetadata(S3AURI, TEST_DIR_ITEM, user));
-    verify(TEST_FILE_ITEM, itemToPathMetadata(S3AURI, TEST_FILE_ITEM, user));
+    verify(TEST_DIR_ITEM, itemToPathMetadata(TEST_DIR_ITEM, user));
+    verify(TEST_FILE_ITEM, itemToPathMetadata(TEST_FILE_ITEM, user));
   }
   }
 
 
   /**
   /**
@@ -141,8 +140,8 @@ public class TestPathMetadataDynamoDBTranslation {
     assertNotNull(meta);
     assertNotNull(meta);
     assert meta.getFileStatus() instanceof S3AFileStatus;
     assert meta.getFileStatus() instanceof S3AFileStatus;
     final S3AFileStatus status = (S3AFileStatus) meta.getFileStatus();
     final S3AFileStatus status = (S3AFileStatus) meta.getFileStatus();
-    final Path path = Path.getPathWithoutSchemeAndAuthority(status.getPath());
-    assertEquals(item.get(PARENT), path.getParent().toString());
+    final Path path = status.getPath();
+    assertEquals(item.get(PARENT), pathToParentKey(path.getParent()));
     assertEquals(item.get(CHILD), path.getName());
     assertEquals(item.get(CHILD), path.getName());
     boolean isDir = item.hasAttribute(IS_DIR) && item.getBoolean(IS_DIR);
     boolean isDir = item.hasAttribute(IS_DIR) && item.getBoolean(IS_DIR);
     assertEquals(isDir, status.isDirectory());
     assertEquals(isDir, status.isDirectory());
@@ -178,17 +177,29 @@ public class TestPathMetadataDynamoDBTranslation {
     assertNotNull(attr);
     assertNotNull(attr);
     assertEquals(PARENT, attr.getName());
     assertEquals(PARENT, attr.getName());
     // this path is expected as parent filed
     // this path is expected as parent filed
-    assertEquals(path.toString(), attr.getValue());
+    assertEquals(pathToParentKey(path), attr.getValue());
+  }
+
+  private static String pathToParentKey(Path p) {
+    Preconditions.checkArgument(p.isUriPathAbsolute());
+    URI parentUri = p.toUri();
+    String bucket = parentUri.getHost();
+    Preconditions.checkNotNull(bucket);
+    String s =  "/" + bucket + parentUri.getPath();
+    // strip trailing slash
+    if (s.endsWith("/")) {
+      s = s.substring(0, s.length()-1);
+    }
+    return s;
   }
   }
 
 
   @Test
   @Test
-  public void testPathToKey() {
+  public void testPathToKey() throws Exception {
     try {
     try {
       pathToKey(new Path("/"));
       pathToKey(new Path("/"));
       fail("Root path should have not been mapped to any PrimaryKey");
       fail("Root path should have not been mapped to any PrimaryKey");
     } catch (IllegalArgumentException ignored) {
     } catch (IllegalArgumentException ignored) {
     }
     }
-
     doTestPathToKey(TEST_DIR_PATH);
     doTestPathToKey(TEST_DIR_PATH);
     doTestPathToKey(TEST_FILE_PATH);
     doTestPathToKey(TEST_FILE_PATH);
   }
   }
@@ -202,7 +213,8 @@ public class TestPathMetadataDynamoDBTranslation {
     for (KeyAttribute keyAttribute : key.getComponents()) {
     for (KeyAttribute keyAttribute : key.getComponents()) {
       assertThat(keyAttribute.getName(), anyOf(is(PARENT), is(CHILD)));
       assertThat(keyAttribute.getName(), anyOf(is(PARENT), is(CHILD)));
       if (PARENT.equals(keyAttribute.getName())) {
       if (PARENT.equals(keyAttribute.getName())) {
-        assertEquals(path.getParent().toString(), keyAttribute.getValue());
+        assertEquals(pathToParentKey(path.getParent()),
+            keyAttribute.getValue());
       } else {
       } else {
         assertEquals(path.getName(), keyAttribute.getValue());
         assertEquals(path.getName(), keyAttribute.getValue());
       }
       }
@@ -220,7 +232,7 @@ public class TestPathMetadataDynamoDBTranslation {
   public void testVersionMarkerNotStatusIllegalPath() throws Throwable {
   public void testVersionMarkerNotStatusIllegalPath() throws Throwable {
     final Item marker = createVersionMarker(VERSION_MARKER, VERSION, 0);
     final Item marker = createVersionMarker(VERSION_MARKER, VERSION, 0);
     assertNull("Path metadata fromfrom " + marker,
     assertNull("Path metadata fromfrom " + marker,
-        itemToPathMetadata(null, marker, "alice"));
+        itemToPathMetadata(marker, "alice"));
   }
   }
 
 
 }
 }