Browse Source

HADOOP-13164 Optimize S3AFileSystem::deleteUnnecessaryFakeDirectories. Contributed by Rajesh Balamohan.

Steve Loughran 8 years ago
parent
commit
896df3f55a

+ 1 - 1
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractFSContractTestBase.java

@@ -359,7 +359,7 @@ public abstract class AbstractFSContractTestBase extends Assert
     assertEquals(text + " wrong read result " + result, -1, result);
     assertEquals(text + " wrong read result " + result, -1, result);
   }
   }
 
 
-  boolean rename(Path src, Path dst) throws IOException {
+  protected boolean rename(Path src, Path dst) throws IOException {
     return getFileSystem().rename(src, dst);
     return getFileSystem().rename(src, dst);
   }
   }
 
 

+ 33 - 32
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java

@@ -676,7 +676,7 @@ public class S3AFileSystem extends FileSystem {
           copyFile(summary.getKey(), newDstKey, summary.getSize());
           copyFile(summary.getKey(), newDstKey, summary.getSize());
 
 
           if (keysToDelete.size() == MAX_ENTRIES_TO_DELETE) {
           if (keysToDelete.size() == MAX_ENTRIES_TO_DELETE) {
-            removeKeys(keysToDelete, true);
+            removeKeys(keysToDelete, true, false);
           }
           }
         }
         }
 
 
@@ -684,7 +684,7 @@ public class S3AFileSystem extends FileSystem {
           objects = continueListObjects(objects);
           objects = continueListObjects(objects);
         } else {
         } else {
           if (!keysToDelete.isEmpty()) {
           if (!keysToDelete.isEmpty()) {
-            removeKeys(keysToDelete, false);
+            removeKeys(keysToDelete, false, false);
           }
           }
           break;
           break;
         }
         }
@@ -932,17 +932,25 @@ public class S3AFileSystem extends FileSystem {
    * @param keysToDelete collection of keys to delete on the s3-backend
    * @param keysToDelete collection of keys to delete on the s3-backend
    * @param clearKeys clears the keysToDelete-list after processing the list
    * @param clearKeys clears the keysToDelete-list after processing the list
    *            when set to true
    *            when set to true
+   * @param deleteFakeDir indicates whether this is for deleting fake dirs
    */
    */
   private void removeKeys(List<DeleteObjectsRequest.KeyVersion> keysToDelete,
   private void removeKeys(List<DeleteObjectsRequest.KeyVersion> keysToDelete,
-          boolean clearKeys) throws AmazonClientException {
+      boolean clearKeys, boolean deleteFakeDir) throws AmazonClientException {
+    if (keysToDelete.isEmpty()) {
+      // no keys
+      return;
+    }
     if (enableMultiObjectsDelete) {
     if (enableMultiObjectsDelete) {
       deleteObjects(new DeleteObjectsRequest(bucket).withKeys(keysToDelete));
       deleteObjects(new DeleteObjectsRequest(bucket).withKeys(keysToDelete));
-      instrumentation.fileDeleted(keysToDelete.size());
     } else {
     } else {
       for (DeleteObjectsRequest.KeyVersion keyVersion : keysToDelete) {
       for (DeleteObjectsRequest.KeyVersion keyVersion : keysToDelete) {
         deleteObject(keyVersion.getKey());
         deleteObject(keyVersion.getKey());
       }
       }
+    }
+    if (!deleteFakeDir) {
       instrumentation.fileDeleted(keysToDelete.size());
       instrumentation.fileDeleted(keysToDelete.size());
+    } else {
+      instrumentation.fakeDirsDeleted(keysToDelete.size());
     }
     }
     if (clearKeys) {
     if (clearKeys) {
       keysToDelete.clear();
       keysToDelete.clear();
@@ -1025,7 +1033,7 @@ public class S3AFileSystem extends FileSystem {
             LOG.debug("Got object to delete {}", summary.getKey());
             LOG.debug("Got object to delete {}", summary.getKey());
 
 
             if (keys.size() == MAX_ENTRIES_TO_DELETE) {
             if (keys.size() == MAX_ENTRIES_TO_DELETE) {
-              removeKeys(keys, true);
+              removeKeys(keys, true, false);
             }
             }
           }
           }
 
 
@@ -1033,7 +1041,7 @@ public class S3AFileSystem extends FileSystem {
             objects = continueListObjects(objects);
             objects = continueListObjects(objects);
           } else {
           } else {
             if (!keys.isEmpty()) {
             if (!keys.isEmpty()) {
-              removeKeys(keys, false);
+              removeKeys(keys, false, false);
             }
             }
             break;
             break;
           }
           }
@@ -1512,37 +1520,30 @@ public class S3AFileSystem extends FileSystem {
   /**
   /**
    * Delete mock parent directories which are no longer needed.
    * Delete mock parent directories which are no longer needed.
    * This code swallows IO exceptions encountered
    * This code swallows IO exceptions encountered
-   * @param f path
-   */
-  private void deleteUnnecessaryFakeDirectories(Path f) {
-    while (true) {
-      String key = "";
-      try {
-        key = pathToKey(f);
-        if (key.isEmpty()) {
-          break;
-        }
-
-        S3AFileStatus status = getFileStatus(f);
-
-        if (status.isDirectory() && status.isEmptyDirectory()) {
-          LOG.debug("Deleting fake directory {}/", key);
-          deleteObject(key + "/");
+   * @param path path
+   */
+  private void deleteUnnecessaryFakeDirectories(Path path) {
+    List<DeleteObjectsRequest.KeyVersion> keysToRemove = new ArrayList<>();
+    while (!path.isRoot()) {
+      String key = pathToKey(path);
+      key = (key.endsWith("/")) ? key : (key + "/");
+      keysToRemove.add(new DeleteObjectsRequest.KeyVersion(key));
+      path = path.getParent();
+    }
+    try {
+      removeKeys(keysToRemove, false, true);
+    } catch(AmazonClientException e) {
+      instrumentation.errorIgnored();
+      if (LOG.isDebugEnabled()) {
+        StringBuilder sb = new StringBuilder();
+        for(DeleteObjectsRequest.KeyVersion kv : keysToRemove) {
+          sb.append(kv.getKey()).append(",");
         }
         }
-      } catch (IOException | AmazonClientException e) {
-        LOG.debug("While deleting key {} ", key, e);
-        instrumentation.errorIgnored();
+        LOG.debug("While deleting keys {} ", sb.toString(), e);
       }
       }
-
-      if (f.isRoot()) {
-        break;
-      }
-
-      f = f.getParent();
     }
     }
   }
   }
 
 
-
   private void createFakeDirectory(final String objectName)
   private void createFakeDirectory(final String objectName)
       throws AmazonClientException, AmazonServiceException,
       throws AmazonClientException, AmazonServiceException,
       InterruptedIOException {
       InterruptedIOException {

+ 10 - 0
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInstrumentation.java

@@ -75,6 +75,7 @@ public class S3AInstrumentation {
   private final MutableCounterLong numberOfFilesCopied;
   private final MutableCounterLong numberOfFilesCopied;
   private final MutableCounterLong bytesOfFilesCopied;
   private final MutableCounterLong bytesOfFilesCopied;
   private final MutableCounterLong numberOfFilesDeleted;
   private final MutableCounterLong numberOfFilesDeleted;
+  private final MutableCounterLong numberOfFakeDirectoryDeletes;
   private final MutableCounterLong numberOfDirectoriesCreated;
   private final MutableCounterLong numberOfDirectoriesCreated;
   private final MutableCounterLong numberOfDirectoriesDeleted;
   private final MutableCounterLong numberOfDirectoriesDeleted;
   private final Map<String, MutableCounterLong> streamMetrics =
   private final Map<String, MutableCounterLong> streamMetrics =
@@ -135,6 +136,7 @@ public class S3AInstrumentation {
     numberOfFilesCopied = counter(FILES_COPIED);
     numberOfFilesCopied = counter(FILES_COPIED);
     bytesOfFilesCopied = counter(FILES_COPIED_BYTES);
     bytesOfFilesCopied = counter(FILES_COPIED_BYTES);
     numberOfFilesDeleted = counter(FILES_DELETED);
     numberOfFilesDeleted = counter(FILES_DELETED);
+    numberOfFakeDirectoryDeletes = counter(FAKE_DIRECTORIES_DELETED);
     numberOfDirectoriesCreated = counter(DIRECTORIES_CREATED);
     numberOfDirectoriesCreated = counter(DIRECTORIES_CREATED);
     numberOfDirectoriesDeleted = counter(DIRECTORIES_DELETED);
     numberOfDirectoriesDeleted = counter(DIRECTORIES_DELETED);
     ignoredErrors = counter(IGNORED_ERRORS);
     ignoredErrors = counter(IGNORED_ERRORS);
@@ -295,6 +297,14 @@ public class S3AInstrumentation {
     numberOfFilesDeleted.incr(count);
     numberOfFilesDeleted.incr(count);
   }
   }
 
 
+  /**
+   * Indicate that fake directory request was made.
+   * @param count number of directory entries included in the delete request.
+   */
+  public void fakeDirsDeleted(int count) {
+    numberOfFakeDirectoryDeletes.incr(count);
+  }
+
   /**
   /**
    * Indicate that S3A created a directory.
    * Indicate that S3A created a directory.
    */
    */

+ 4 - 0
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Statistic.java

@@ -42,6 +42,10 @@ public enum Statistic {
       "Total number of files created through the object store."),
       "Total number of files created through the object store."),
   FILES_DELETED("files_deleted",
   FILES_DELETED("files_deleted",
       "Total number of files deleted from the object store."),
       "Total number of files deleted from the object store."),
+  FAKE_DIRECTORIES_CREATED("fake_directories_created",
+      "Total number of fake directory entries created in the object store."),
+  FAKE_DIRECTORIES_DELETED("fake_directories_deleted",
+      "Total number of fake directory deletes submitted to object store."),
   IGNORED_ERRORS("ignored_errors", "Errors caught and ignored"),
   IGNORED_ERRORS("ignored_errors", "Errors caught and ignored"),
   INVOCATION_COPY_FROM_LOCAL_FILE(CommonStatisticNames.OP_COPY_FROM_LOCAL_FILE,
   INVOCATION_COPY_FROM_LOCAL_FILE(CommonStatisticNames.OP_COPY_FROM_LOCAL_FILE,
       "Calls of copyFromLocalFile()"),
       "Calls of copyFromLocalFile()"),

+ 85 - 0
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java

@@ -188,4 +188,89 @@ public class ITestS3AFileOperationCost extends AbstractFSContractTestBase {
       tmpFile.delete();
       tmpFile.delete();
     }
     }
   }
   }
+
+  private void reset(MetricDiff... diffs) {
+    for (MetricDiff diff : diffs) {
+      diff.reset();
+    }
+  }
+
+  @Test
+  public void testFakeDirectoryDeletion() throws Throwable {
+    describe("Verify whether create file works after renaming a file. "
+        + "In S3, rename deletes any fake directories as a part of "
+        + "clean up activity");
+    S3AFileSystem fs = getFileSystem();
+    Path srcBaseDir = path("src");
+    mkdirs(srcBaseDir);
+    MetricDiff deleteRequests =
+        new MetricDiff(fs, Statistic.OBJECT_DELETE_REQUESTS);
+    MetricDiff directoriesDeleted =
+        new MetricDiff(fs, Statistic.DIRECTORIES_DELETED);
+    MetricDiff fakeDirectoriesDeleted =
+        new MetricDiff(fs, Statistic.FAKE_DIRECTORIES_DELETED);
+    MetricDiff directoriesCreated =
+        new MetricDiff(fs, Statistic.DIRECTORIES_CREATED);
+
+    Path srcDir = new Path(srcBaseDir, "1/2/3/4/5/6");
+    Path srcFilePath = new Path(srcDir, "source.txt");
+    int srcDirDepth = directoriesInPath(srcDir);
+    // one dir created, one removed
+    mkdirs(srcDir);
+    String state = "after mkdir(srcDir)";
+    directoriesCreated.assertDiffEquals(state, 1);
+/*  TODO: uncomment once HADOOP-13222 is in
+    deleteRequests.assertDiffEquals(state, 1);
+    directoriesDeleted.assertDiffEquals(state, 0);
+    fakeDirectoriesDeleted.assertDiffEquals(state, srcDirDepth);
+*/
+    reset(deleteRequests, directoriesCreated, directoriesDeleted,
+        fakeDirectoriesDeleted);
+
+    // creating a file should trigger demise of the src dir
+    touch(fs, srcFilePath);
+    state = "after touch(fs, srcFilePath)";
+    deleteRequests.assertDiffEquals(state, 1);
+    directoriesCreated.assertDiffEquals(state, 0);
+    directoriesDeleted.assertDiffEquals(state, 0);
+    fakeDirectoriesDeleted.assertDiffEquals(state, srcDirDepth);
+
+    reset(deleteRequests, directoriesCreated, directoriesDeleted,
+        fakeDirectoriesDeleted);
+
+    Path destBaseDir = path("dest");
+    Path destDir = new Path(destBaseDir, "1/2/3/4/5/6");
+    Path destFilePath = new Path(destDir, "dest.txt");
+    mkdirs(destDir);
+    state = "after mkdir(destDir)";
+
+    int destDirDepth = directoriesInPath(destDir);
+    directoriesCreated.assertDiffEquals(state, 1);
+/*  TODO: uncomment once HADOOP-13222 is in
+    deleteRequests.assertDiffEquals(state,1);
+    directoriesDeleted.assertDiffEquals(state,0);
+    fakeDirectoriesDeleted.assertDiffEquals(state,destDirDepth);
+*/
+    reset(deleteRequests, directoriesCreated, directoriesDeleted,
+        fakeDirectoriesDeleted);
+
+    fs.rename(srcFilePath, destFilePath);
+    state = "after rename(srcFilePath, destFilePath)";
+    directoriesCreated.assertDiffEquals(state, 1);
+    // one for the renamed file, one for the parent
+    deleteRequests.assertDiffEquals(state, 2);
+    directoriesDeleted.assertDiffEquals(state, 0);
+    fakeDirectoriesDeleted.assertDiffEquals(state, destDirDepth);
+
+    reset(deleteRequests, directoriesCreated, directoriesDeleted,
+        fakeDirectoriesDeleted);
+
+    assertIsFile(destFilePath);
+    assertIsDirectory(srcDir);
+  }
+
+  private int directoriesInPath(Path path) {
+    return path.isRoot() ? 0 : 1 + directoriesInPath(path.getParent());
+  }
+
 }
 }

+ 11 - 2
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java

@@ -298,13 +298,22 @@ public class S3ATestUtils {
 
 
     /**
     /**
      * Assert that the value of {@link #diff()} matches that expected.
      * Assert that the value of {@link #diff()} matches that expected.
+     * @param message message to print; metric name is appended
      * @param expected expected value.
      * @param expected expected value.
      */
      */
-    public void assertDiffEquals(long expected) {
-      Assert.assertEquals("Count of " + this,
+    public void assertDiffEquals(String message, long expected) {
+      Assert.assertEquals(message + ": " + statistic.getSymbol(),
           expected, diff());
           expected, diff());
     }
     }
 
 
+    /**
+     * Assert that the value of {@link #diff()} matches that expected.
+     * @param expected expected value.
+     */
+    public void assertDiffEquals(long expected) {
+      assertDiffEquals("Count of " + this, expected);
+    }
+
     /**
     /**
      * Assert that the value of {@link #diff()} matches that of another
      * Assert that the value of {@link #diff()} matches that of another
      * instance.
      * instance.