浏览代码

HADOOP-19357: [ABFS] Optimizations for Retry Handling and Client Side Throttling (#7216) (#7234)

Default for following configs are changed:

Client-side throttling (CST): Off
Client Backoff - 500ms (reduced from 3sec)
Max Backoff - 25s (reduced from 30sec)
Min Backoff - 500ms (reduced from 3sec)

Contributed by Manika Joshi (@manika137)
manika137 4 月之前
父节点
当前提交
23b0559dab

+ 6 - 4
hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java

@@ -32,17 +32,18 @@ import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.EMPTY_ST
 public final class FileSystemConfigurations {
 
   public static final String DEFAULT_FS_AZURE_ACCOUNT_IS_HNS_ENABLED = "";
+  public static final boolean DEFAULT_FS_AZURE_ENABLE_DFSTOBLOB_FALLBACK = false;
   public static final boolean DEFAULT_FS_AZURE_ACCOUNT_IS_EXPECT_HEADER_ENABLED = true;
   public static final String USER_HOME_DIRECTORY_PREFIX = "/user";
 
   private static final int SIXTY_SECONDS = 60_000;
 
   // Retry parameter defaults.
-  public static final int DEFAULT_MIN_BACKOFF_INTERVAL = 3_000;  // 3s
-  public static final int DEFAULT_MAX_BACKOFF_INTERVAL = 30_000;  // 30s
+  public static final int DEFAULT_MIN_BACKOFF_INTERVAL = 500;  // 500ms
+  public static final int DEFAULT_MAX_BACKOFF_INTERVAL = 25_000;  // 25s
   public static final boolean DEFAULT_STATIC_RETRY_FOR_CONNECTION_TIMEOUT_ENABLED = true;
   public static final int DEFAULT_STATIC_RETRY_INTERVAL = 1_000; // 1s
-  public static final int DEFAULT_BACKOFF_INTERVAL = 3_000;  // 3s
+  public static final int DEFAULT_BACKOFF_INTERVAL = 500;  // 500ms
   public static final int DEFAULT_MAX_RETRY_ATTEMPTS = 30;
   public static final int DEFAULT_CUSTOM_TOKEN_FETCH_RETRY_COUNT = 3;
 
@@ -107,7 +108,7 @@ public final class FileSystemConfigurations {
 
   public static final boolean DEFAULT_ENABLE_FLUSH = true;
   public static final boolean DEFAULT_DISABLE_OUTPUTSTREAM_FLUSH = true;
-  public static final boolean DEFAULT_ENABLE_AUTOTHROTTLING = true;
+  public static final boolean DEFAULT_ENABLE_AUTOTHROTTLING = false;
   public static final boolean DEFAULT_FS_AZURE_ACCOUNT_LEVEL_THROTTLING_ENABLED = true;
   public static final int DEFAULT_ACCOUNT_OPERATION_IDLE_TIMEOUT_MS = 60_000;
   public static final int DEFAULT_ANALYSIS_PERIOD_MS = 10_000;
@@ -173,5 +174,6 @@ public final class FileSystemConfigurations {
   public static final long DEFAULT_HTTP_CLIENT_CONN_MAX_IDLE_TIME = 5_000L;
 
   public static final int DEFAULT_HTTP_CLIENT_CONN_MAX_CACHED_CONNECTIONS = 5;
+
   private FileSystemConfigurations() {}
 }

+ 18 - 0
hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestExponentialRetryPolicy.java

@@ -96,6 +96,24 @@ public class ITestExponentialRetryPolicy extends AbstractAbfsIntegrationTest {
     testMaxIOConfig(abfsConfig);
   }
 
+  @Test
+  public void testClientSideThrottlingConfigs() throws Exception {
+    final Configuration configuration = new Configuration();
+    configuration.setBoolean(FS_AZURE_ENABLE_AUTOTHROTTLING, true);
+    AbfsConfiguration abfsConfiguration = new AbfsConfiguration(configuration,
+            DUMMY_ACCOUNT_NAME);
+   Assertions.assertThat(abfsConfiguration.isAutoThrottlingEnabled())
+            .describedAs("Client-side throttling enabled by configuration key")
+            .isTrue();
+
+    configuration.unset(FS_AZURE_ENABLE_AUTOTHROTTLING);
+    AbfsConfiguration abfsConfiguration2 = new AbfsConfiguration(configuration,
+            DUMMY_ACCOUNT_NAME);
+    Assertions.assertThat(abfsConfiguration2.isAutoThrottlingEnabled())
+            .describedAs("Client-side throttling should be disabled by default")
+            .isFalse();
+  }
+
   @Test
   public void testThrottlingIntercept() throws Exception {
     AzureBlobFileSystem fs = getFileSystem();