Browse Source

HADOOP-19015. Increase fs.s3a.connection.maximum to 500 to minimize risk of Timeout waiting for connection from pool. (#6372) (#6487)

Contributed By: Mukund Thakur
Mukund Thakur 1 year ago
parent
commit
11d622d4b7

+ 1 - 1
hadoop-common-project/hadoop-common/src/main/resources/core-default.xml

@@ -1362,7 +1362,7 @@
 
 
 <property>
 <property>
   <name>fs.s3a.connection.maximum</name>
   <name>fs.s3a.connection.maximum</name>
-  <value>96</value>
+  <value>500</value>
   <description>Controls the maximum number of simultaneous connections to S3.
   <description>Controls the maximum number of simultaneous connections to S3.
     This must be bigger than the value of fs.s3a.threads.max so as to stop
     This must be bigger than the value of fs.s3a.threads.max so as to stop
     threads being blocked waiting for new HTTPS connections.
     threads being blocked waiting for new HTTPS connections.

+ 7 - 1
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java

@@ -152,7 +152,13 @@ public final class Constants {
 
 
   // number of simultaneous connections to s3
   // number of simultaneous connections to s3
   public static final String MAXIMUM_CONNECTIONS = "fs.s3a.connection.maximum";
   public static final String MAXIMUM_CONNECTIONS = "fs.s3a.connection.maximum";
-  public static final int DEFAULT_MAXIMUM_CONNECTIONS = 96;
+
+  /**
+   * Default value for {@link #MAXIMUM_CONNECTIONS}: {@value}.
+   * Future releases are likely to increase this value.
+   * Keep in sync with the value in {@code core-default.xml}
+   */
+  public static final int DEFAULT_MAXIMUM_CONNECTIONS = 500;
 
 
   /**
   /**
    * Configuration option to configure expiration time of
    * Configuration option to configure expiration time of

+ 3 - 3
hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/performance.md

@@ -211,7 +211,7 @@ for parallel IO (especially uploads) by setting the properties
 | property | meaning | default |
 | property | meaning | default |
 |----------|---------|---------|
 |----------|---------|---------|
 | `fs.s3a.threads.max`| Threads in the AWS transfer manager| 10 |
 | `fs.s3a.threads.max`| Threads in the AWS transfer manager| 10 |
-| `fs.s3a.connection.maximum`| Maximum number of HTTP connections | 10|
+| `fs.s3a.connection.maximum`| Maximum number of HTTP connections | 500 |
 
 
 We recommend using larger values for processes which perform
 We recommend using larger values for processes which perform
 a lot of IO: `DistCp`, Spark Workers and similar.
 a lot of IO: `DistCp`, Spark Workers and similar.
@@ -219,11 +219,11 @@ a lot of IO: `DistCp`, Spark Workers and similar.
 ```xml
 ```xml
 <property>
 <property>
   <name>fs.s3a.threads.max</name>
   <name>fs.s3a.threads.max</name>
-  <value>20</value>
+  <value>10</value>
 </property>
 </property>
 <property>
 <property>
   <name>fs.s3a.connection.maximum</name>
   <name>fs.s3a.connection.maximum</name>
-  <value>20</value>
+  <value>500</value>
 </property>
 </property>
 ```
 ```