Browse Source

HADOOP-17323. S3A getFileStatus("/") to skip IO (#2479)

Contributed by Mukund Thakur.
Mukund Thakur 4 năm trước cách đây
mục cha
commit
5fee95076b

+ 4 - 0
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java

@@ -3144,6 +3144,10 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities,
         "s3GetFileStatus(%s) wants to know if a directory is empty but"
             + " does not request a list probe", path);
 
+    if (key.isEmpty() && !needEmptyDirectoryFlag) {
+      return new S3AFileStatus(Tristate.UNKNOWN, path, username);
+    }
+
     if (!key.isEmpty() && !key.endsWith("/")
         && probes.contains(StatusProbeEnum.Head)) {
       try {

+ 31 - 0
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java

@@ -29,6 +29,7 @@ import org.apache.hadoop.fs.s3a.performance.AbstractS3ACostTest;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.Parameterized;
+import org.assertj.core.api.Assertions;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -258,6 +259,36 @@ public class ITestS3AFileOperationCost extends AbstractS3ACostTest {
         GET_FILE_STATUS_FNFE);
   }
 
+  @Test
+  public void testCostOfRootFileStatus() throws Throwable {
+    Path root = path("/");
+    S3AFileStatus rootStatus = verifyRawInnerGetFileStatus(
+            root,
+            false,
+            StatusProbeEnum.ALL,
+            ROOT_FILE_STATUS_PROBE);
+    String rootStatusContent = rootStatus.toString();
+    Assertions.assertThat(rootStatus.isDirectory())
+            .describedAs("Status returned should be a directory "
+                    + rootStatusContent)
+            .isEqualTo(true);
+    Assertions.assertThat(rootStatus.isEmptyDirectory())
+            .isEqualTo(Tristate.UNKNOWN);
+
+    rootStatus = verifyRawInnerGetFileStatus(
+            root,
+            true,
+            StatusProbeEnum.ALL,
+            FILE_STATUS_DIR_PROBE);
+    Assertions.assertThat(rootStatus.isDirectory())
+            .describedAs("Status returned should be a directory "
+                    + rootStatusContent)
+            .isEqualTo(true);
+    Assertions.assertThat(rootStatus.isEmptyDirectory())
+            .isNotEqualByComparingTo(Tristate.UNKNOWN);
+
+  }
+
   @Test
   public void testIsDirIsFileMissingPath() throws Throwable {
     describe("performing isDir and isFile on a missing file");

+ 5 - 0
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/OperationCost.java

@@ -76,6 +76,11 @@ public final class OperationCost {
    */
   public static final OperationCost FILE_STATUS_FILE_PROBE = HEAD_OPERATION;
 
+  /**
+   * Cost of getFileStatus on root directory.
+   */
+  public static final OperationCost ROOT_FILE_STATUS_PROBE = NO_IO;
+
   /**
    * Cost of {@link org.apache.hadoop.fs.s3a.impl.StatusProbeEnum#ALL}.
    */