Selaa lähdekoodia

HADOOP-17811: ABFS ExponentialRetryPolicy doesn't pick up configuration values (#3221)

Contributed by Brian Loss.
Brian Loss 3 vuotta sitten
vanhempi
commit
1d03c69963

+ 1 - 1
hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java

@@ -1524,7 +1524,7 @@ public class AzureBlobFileSystemStore implements Closeable, ListingSupport {
   private AbfsClientContext populateAbfsClientContext() {
     return new AbfsClientContextBuilder()
         .withExponentialRetryPolicy(
-            new ExponentialRetryPolicy(abfsConfiguration.getMaxIoRetries()))
+            new ExponentialRetryPolicy(abfsConfiguration))
         .withAbfsCounters(abfsCounters)
         .withAbfsPerfTracker(abfsPerfTracker)
         .build();

+ 11 - 0
hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ExponentialRetryPolicy.java

@@ -21,6 +21,7 @@ package org.apache.hadoop.fs.azurebfs.services;
 import java.util.Random;
 import java.net.HttpURLConnection;
 
+import org.apache.hadoop.fs.azurebfs.AbfsConfiguration;
 import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting;
 
 /**
@@ -89,6 +90,16 @@ public class ExponentialRetryPolicy {
         DEFAULT_CLIENT_BACKOFF);
   }
 
+  /**
+   * Initializes a new instance of the {@link ExponentialRetryPolicy} class.
+   *
+   * @param conf The {@link AbfsConfiguration} from which to retrieve retry configuration.
+   */
+  public ExponentialRetryPolicy(AbfsConfiguration conf) {
+    this(conf.getMaxIoRetries(), conf.getMinBackoffIntervalMilliseconds(), conf.getMaxBackoffIntervalMilliseconds(),
+        conf.getBackoffIntervalMilliseconds());
+  }
+
   /**
    * Initializes a new instance of the {@link ExponentialRetryPolicy} class.
    *

+ 25 - 3
hadoop-tools/hadoop-azure/src/site/markdown/abfs.md

@@ -338,7 +338,7 @@ with the following configurations.
  retries. Default value is 5.
 * `fs.azure.oauth.token.fetch.retry.min.backoff.interval`: Minimum back-off
   interval. Added to the retry interval computed from delta backoff. By
-   default this si set as 0. Set the interval in milli seconds.
+   default this is set as 0. Set the interval in milli seconds.
 * `fs.azure.oauth.token.fetch.retry.max.backoff.interval`: Maximum back-off
 interval. Default value is 60000 (sixty seconds). Set the interval in milli
 seconds.
@@ -800,9 +800,31 @@ The following configs are related to read and write operations.
 
 `fs.azure.io.retry.max.retries`: Sets the number of retries for IO operations.
 Currently this is used only for the server call retry logic. Used within
-AbfsClient class as part of the ExponentialRetryPolicy. The value should be
+`AbfsClient` class as part of the ExponentialRetryPolicy. The value should be
 greater than or equal to 0.
 
+`fs.azure.io.retry.min.backoff.interval`: Sets the minimum backoff interval for
+retries of IO operations. Currently this is used only for the server call retry
+logic. Used within `AbfsClient` class as part of the ExponentialRetryPolicy. This
+value indicates the smallest interval (in milliseconds) to wait before retrying
+an IO operation. The default value is 3000 (3 seconds).
+
+`fs.azure.io.retry.max.backoff.interval`: Sets the maximum backoff interval for
+retries of IO operations. Currently this is used only for the server call retry
+logic. Used within `AbfsClient` class as part of the ExponentialRetryPolicy. This
+value indicates the largest interval (in milliseconds) to wait before retrying
+an IO operation. The default value is 30000 (30 seconds).
+
+`fs.azure.io.retry.backoff.interval`: Sets the default backoff interval for
+retries of IO operations. Currently this is used only for the server call retry
+logic. Used within `AbfsClient` class as part of the ExponentialRetryPolicy. This
+value is used to compute a random delta between 80% and 120% of the specified
+value. This random delta is then multiplied by an exponent of the current IO
+retry number (i.e., the default is multiplied by `2^(retryNum - 1)`) and then
+contstrained within the range of [`fs.azure.io.retry.min.backoff.interval`,
+`fs.azure.io.retry.max.backoff.interval`] to determine the amount of time to
+wait before the next IO retry attempt. The default value is 3000 (3 seconds).
+
 `fs.azure.write.request.size`: To set the write buffer size. Specify the value
 in bytes. The value should be between 16384 to 104857600 both inclusive (16 KB
 to 100 MB). The default value will be 8388608 (8 MB).
@@ -859,7 +881,7 @@ when there are too many writes from the same process.
 
 ### <a name="securityconfigoptions"></a> Security Options
 `fs.azure.always.use.https`: Enforces to use HTTPS instead of HTTP when the flag
-is made true. Irrespective of the flag, AbfsClient will use HTTPS if the secure
+is made true. Irrespective of the flag, `AbfsClient` will use HTTPS if the secure
 scheme (ABFSS) is used or OAuth is used for authentication. By default this will
 be set to true.
 

+ 2 - 2
hadoop-tools/hadoop-azure/src/site/markdown/testing_azure.md

@@ -448,7 +448,7 @@ use requires the presence of secret credentials, where tests may be slow,
 and where finding out why something failed from nothing but the test output
 is critical.
 
-#### Subclasses Existing Shared Base Blasses
+#### Subclasses Existing Shared Base Classes
 
 There are a set of base classes which should be extended for Azure tests and
 integration tests.
@@ -602,7 +602,7 @@ various test combinations, it will:
 2. Run tests for all combinations
 3. Summarize results across all the test combination runs.
 
-As a pre-requiste step, fill config values for test accounts and credentials
+As a pre-requisite step, fill config values for test accounts and credentials
 needed for authentication in `src/test/resources/azure-auth-keys.xml.template`
 and rename as `src/test/resources/azure-auth-keys.xml`.
 

+ 35 - 5
hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestExponentialRetryPolicy.java

@@ -18,6 +18,11 @@
 
 package org.apache.hadoop.fs.azurebfs.services;
 
+import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.AZURE_BACKOFF_INTERVAL;
+import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.AZURE_MAX_BACKOFF_INTERVAL;
+import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.AZURE_MAX_IO_RETRIES;
+import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.AZURE_MIN_BACKOFF_INTERVAL;
+
 import java.util.Random;
 
 import org.junit.Assert;
@@ -32,7 +37,6 @@ import org.apache.hadoop.fs.azurebfs.AbstractAbfsIntegrationTest;
  * Unit test TestExponentialRetryPolicy.
  */
 public class TestExponentialRetryPolicy extends AbstractAbfsIntegrationTest {
-
   private final int maxRetryCount = 30;
   private final int noRetryCount = 0;
   private final int retryCount = new Random().nextInt(maxRetryCount);
@@ -57,12 +61,38 @@ public class TestExponentialRetryPolicy extends AbstractAbfsIntegrationTest {
   @Test
   public void testDefaultMaxIORetryCount() throws Exception {
     AbfsConfiguration abfsConfig = getAbfsConfig();
-    Assert.assertTrue(
+    Assert.assertEquals(
         String.format("default maxIORetry count is %s.", maxRetryCount),
-        abfsConfig.getMaxIoRetries() == maxRetryCount);
+        maxRetryCount, abfsConfig.getMaxIoRetries());
     testMaxIOConfig(abfsConfig);
   }
 
+  @Test
+  public void testAbfsConfigConstructor() throws Exception {
+    // Ensure we choose expected values that are not defaults
+    ExponentialRetryPolicy template = new ExponentialRetryPolicy(
+        getAbfsConfig().getMaxIoRetries());
+    int testModifier = 1;
+    int expectedMaxRetries = template.getRetryCount() + testModifier;
+    int expectedMinBackoff = template.getMinBackoff() + testModifier;
+    int expectedMaxBackoff = template.getMaxBackoff() + testModifier;
+    int expectedDeltaBackoff = template.getDeltaBackoff() + testModifier;
+
+    Configuration config = new Configuration(this.getRawConfiguration());
+    config.setInt(AZURE_MAX_IO_RETRIES, expectedMaxRetries);
+    config.setInt(AZURE_MIN_BACKOFF_INTERVAL, expectedMinBackoff);
+    config.setInt(AZURE_MAX_BACKOFF_INTERVAL, expectedMaxBackoff);
+    config.setInt(AZURE_BACKOFF_INTERVAL, expectedDeltaBackoff);
+
+    ExponentialRetryPolicy policy = new ExponentialRetryPolicy(
+        new AbfsConfiguration(config, "dummyAccountName"));
+
+    Assert.assertEquals("Max retry count was not set as expected.", expectedMaxRetries, policy.getRetryCount());
+    Assert.assertEquals("Min backoff interval was not set as expected.", expectedMinBackoff, policy.getMinBackoff());
+    Assert.assertEquals("Max backoff interval was not set as expected.", expectedMaxBackoff, policy.getMaxBackoff());
+    Assert.assertEquals("Delta backoff interval was not set as expected.", expectedDeltaBackoff, policy.getDeltaBackoff());
+  }
+
   private AbfsConfiguration getAbfsConfig() throws Exception {
     Configuration
         config = new Configuration(this.getRawConfiguration());
@@ -81,8 +111,8 @@ public class TestExponentialRetryPolicy extends AbstractAbfsIntegrationTest {
       localRetryCount++;
     }
 
-    Assert.assertTrue(
+    Assert.assertEquals(
         "When all retries are exhausted, the retryCount will be same as max configured",
-        localRetryCount == abfsConfig.getMaxIoRetries());
+        abfsConfig.getMaxIoRetries(), localRetryCount);
   }
 }