Browse Source

HADOOP-14020 Optimize dirListingUnion (Sean Mackrory)

Instead of always pushing full directory listings into the MetadataStore at the
end of listStatus(), only do it if authoritative mode is enabled, and something
in the Directory Listing Metadata has actually changed from what we retrieved
from the MetadataStore.
Aaron Fabbri 8 years ago
parent
commit
ab213fbd0c

+ 2 - 1
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java

@@ -1457,7 +1457,8 @@ public class S3AFileSystem extends FileSystem {
       while (files.hasNext()) {
       while (files.hasNext()) {
         result.add(files.next());
         result.add(files.next());
       }
       }
-      return S3Guard.dirListingUnion(metadataStore, path, result, dirMeta);
+      return S3Guard.dirListingUnion(metadataStore, path, result, dirMeta,
+          allowAuthoritative);
     } else {
     } else {
       LOG.debug("Adding: rd (not a dir): {}", path);
       LOG.debug("Adding: rd (not a dir): {}", path);
       FileStatus[] stats = new FileStatus[1];
       FileStatus[] stats = new FileStatus[1];

+ 10 - 2
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DirListingMetadata.java

@@ -149,12 +149,20 @@ public class DirListingMetadata {
    * {@code FileStatus} with the same path, it will be replaced.
    * {@code FileStatus} with the same path, it will be replaced.
    *
    *
    * @param childFileStatus entry to add to this directory listing.
    * @param childFileStatus entry to add to this directory listing.
+   * @return true if the status was added or replaced with a new value. False
+   * if the same FileStatus value was already present.
    */
    */
-  public void put(FileStatus childFileStatus) {
+  public boolean put(FileStatus childFileStatus) {
     Preconditions.checkNotNull(childFileStatus,
     Preconditions.checkNotNull(childFileStatus,
         "childFileStatus must be non-null");
         "childFileStatus must be non-null");
     Path childPath = childStatusToPathKey(childFileStatus);
     Path childPath = childStatusToPathKey(childFileStatus);
-    listMap.put(childPath, new PathMetadata(childFileStatus));
+    PathMetadata newValue = new PathMetadata(childFileStatus);
+    PathMetadata oldValue = listMap.put(childPath, newValue);
+    if (oldValue == null) {
+      return true;
+    } else {
+      return !oldValue.equals(newValue);
+    }
   }
   }
 
 
   @Override
   @Override

+ 10 - 8
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java

@@ -155,12 +155,13 @@ public final class S3Guard {
    * @param path path to directory
    * @param path path to directory
    * @param backingStatuses Directory listing from the backing store.
    * @param backingStatuses Directory listing from the backing store.
    * @param dirMeta  Directory listing from MetadataStore.  May be null.
    * @param dirMeta  Directory listing from MetadataStore.  May be null.
+   * @param isAuthoritative State of authoritative mode
    * @return Final result of directory listing.
    * @return Final result of directory listing.
    * @throws IOException if metadata store update failed
    * @throws IOException if metadata store update failed
    */
    */
   public static FileStatus[] dirListingUnion(MetadataStore ms, Path path,
   public static FileStatus[] dirListingUnion(MetadataStore ms, Path path,
-      List<FileStatus> backingStatuses, DirListingMetadata dirMeta)
-      throws IOException {
+      List<FileStatus> backingStatuses, DirListingMetadata dirMeta,
+      boolean isAuthoritative) throws IOException {
 
 
     // Fast-path for NullMetadataStore
     // Fast-path for NullMetadataStore
     if (ms instanceof NullMetadataStore) {
     if (ms instanceof NullMetadataStore) {
@@ -184,6 +185,7 @@ public final class S3Guard {
 
 
     // HADOOP-13760: filter out deleted files via PathMetadata#isDeleted() here
     // HADOOP-13760: filter out deleted files via PathMetadata#isDeleted() here
 
 
+    boolean changed = false;
     for (FileStatus s : backingStatuses) {
     for (FileStatus s : backingStatuses) {
 
 
       // Minor race condition here.  Multiple threads could add to this
       // Minor race condition here.  Multiple threads could add to this
@@ -193,14 +195,14 @@ public final class S3Guard {
       // Any FileSystem has similar race conditions, but we could persist
       // Any FileSystem has similar race conditions, but we could persist
       // a stale entry longer.  We could expose an atomic
       // a stale entry longer.  We could expose an atomic
       // DirListingMetadata#putIfNotPresent()
       // DirListingMetadata#putIfNotPresent()
-      if (dirMeta.get(s.getPath()) == null) {
-        dirMeta.put(s);
-      }
+      changed = changed || dirMeta.put(s);
+    }
+
+    if (changed && isAuthoritative) {
+      dirMeta.setAuthoritative(true); // This is the full directory contents
+      ms.put(dirMeta);
     }
     }
 
 
-    // TODO optimize for when allowAuthoritative = false
-    dirMeta.setAuthoritative(true); // This is the full directory contents
-    ms.put(dirMeta);
     return dirMetaToStatuses(dirMeta);
     return dirMetaToStatuses(dirMeta);
   }
   }
 
 

+ 64 - 0
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardListConsistency.java

@@ -20,9 +20,12 @@ package org.apache.hadoop.fs.s3a;
 
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.contract.AbstractFSContract;
 import org.apache.hadoop.fs.contract.AbstractFSContract;
 import org.apache.hadoop.fs.contract.s3a.S3AContract;
 import org.apache.hadoop.fs.contract.s3a.S3AContract;
+import org.apache.hadoop.fs.s3a.s3guard.DirListingMetadata;
+import org.apache.hadoop.fs.s3a.s3guard.S3Guard;
 import org.junit.Assume;
 import org.junit.Assume;
 import org.junit.Test;
 import org.junit.Test;
 
 
@@ -76,4 +79,65 @@ public class ITestS3GuardListConsistency extends AbstractS3ATestBase {
     // This should fail without S3Guard, and succeed with it.
     // This should fail without S3Guard, and succeed with it.
     assertTrue(list.contains(inconsistentPath));
     assertTrue(list.contains(inconsistentPath));
   }
   }
+
+  @Test
+  public void testListStatusWriteBack() throws Exception {
+    Assume.assumeTrue(getFileSystem().hasMetadataStore());
+
+    Configuration conf;
+    Path directory = path("ListStatusWriteBack");
+
+    // Create a FileSystem that is S3-backed only
+    conf = createConfiguration();
+    conf.setBoolean("fs.s3a.impl.disable.cache", true);
+    conf.set(Constants.S3_METADATA_STORE_IMPL,
+        Constants.S3GUARD_METASTORE_NULL);
+    FileSystem noS3Guard = FileSystem.get(directory.toUri(), conf);
+
+    // Create a FileSystem with S3Guard and write-back disabled
+    conf = createConfiguration();
+    S3ATestUtils.maybeEnableS3Guard(conf);
+    conf.setBoolean("fs.s3a.impl.disable.cache", true);
+    conf.setBoolean(Constants.METADATASTORE_AUTHORITATIVE, false);
+    FileSystem noWriteBack = FileSystem.get(directory.toUri(), conf);
+
+    // Create a FileSystem with S3Guard and write-back enabled
+    conf = createConfiguration();
+    S3ATestUtils.maybeEnableS3Guard(conf);
+    conf.setBoolean("fs.s3a.impl.disable.cache", true);
+    conf.setBoolean(Constants.METADATASTORE_AUTHORITATIVE, true);
+    FileSystem yesWriteBack = FileSystem.get(directory.toUri(), conf);
+
+    // Create a directory on S3 only
+    noS3Guard.mkdirs(new Path(directory, "123"));
+    // Create a directory on metastore only
+    noWriteBack.mkdirs(new Path(directory, "XYZ"));
+
+    FileStatus[] fsResults;
+    DirListingMetadata mdResults;
+
+    // FS should return both
+    fsResults = noWriteBack.listStatus(directory);
+    assertTrue("Unexpected number of results from filesystem. " +
+            "Should have /XYZ and /123: " + fsResults.toString(),
+        fsResults.length == 2);
+
+    // Metastore without write-back should still only contain 1
+    mdResults = S3Guard.getMetadataStore(noWriteBack).listChildren(directory);
+    assertTrue("Unexpected number of results from metastore. " +
+            "Metastore should only know about /XYZ: " + mdResults.toString(),
+        mdResults.numEntries() == 1);
+
+    // FS should return both (and will write it back)
+    fsResults = yesWriteBack.listStatus(directory);
+    assertTrue("Unexpected number of results from filesystem. " +
+            "Should have /XYZ and /123: " + fsResults.toString(),
+        fsResults.length == 2);
+
+    // Metastore should not contain both
+    mdResults = S3Guard.getMetadataStore(yesWriteBack).listChildren(directory);
+    assertTrue("Unexpected number of results from metastore. " +
+            "Should have /XYZ and /123: " + mdResults.toString(),
+        mdResults.numEntries() == 2);
+  }
 }
 }