Browse Source

HDDS-2153. Add a config to tune max pending requests in Ratis leader

Closes #1474
Shashikant Banerjee 5 years ago
parent
commit
a530ac3f50

+ 5 - 0
hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java

@@ -105,6 +105,11 @@ public final class ScmConfigKeys {
   // TODO: Set to 1024 once RATIS issue around purge is fixed.
   public static final int DFS_CONTAINER_RATIS_LOG_PURGE_GAP_DEFAULT =
       1000000;
+
+  public static final String DFS_CONTAINER_RATIS_LEADER_NUM_PENDING_REQUESTS =
+      "dfs.container.ratis.leader.num.pending.requests";
+  public static final int
+      DFS_CONTAINER_RATIS_LEADER_NUM_PENDING_REQUESTS_DEFAULT = 4096;
   // expiry interval stateMachineData cache entry inside containerStateMachine
   public static final String
       DFS_CONTAINER_RATIS_STATEMACHINEDATA_CACHE_EXPIRY_INTERVAL =

+ 5 - 0
hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java

@@ -329,6 +329,11 @@ public final class OzoneConfigKeys {
       ScmConfigKeys.DFS_CONTAINER_RATIS_LOG_PURGE_GAP;
   public static final int DFS_CONTAINER_RATIS_LOG_PURGE_GAP_DEFAULT =
       ScmConfigKeys.DFS_CONTAINER_RATIS_LOG_PURGE_GAP_DEFAULT;
+  public static final String DFS_CONTAINER_RATIS_LEADER_NUM_PENDING_REQUESTS =
+      ScmConfigKeys.DFS_CONTAINER_RATIS_LEADER_NUM_PENDING_REQUESTS;
+  public static final int
+      DFS_CONTAINER_RATIS_LEADER_NUM_PENDING_REQUESTS_DEFAULT =
+      ScmConfigKeys.DFS_CONTAINER_RATIS_LEADER_NUM_PENDING_REQUESTS_DEFAULT;
   public static final String DFS_RATIS_SERVER_REQUEST_TIMEOUT_DURATION_KEY =
       ScmConfigKeys.DFS_RATIS_SERVER_REQUEST_TIMEOUT_DURATION_KEY;
   public static final TimeDuration

+ 8 - 0
hadoop-hdds/common/src/main/resources/ozone-default.xml

@@ -203,6 +203,14 @@
       will use for writing chunks (60 by default).
     </description>
   </property>
+  <property>
+    <name>dfs.container.ratis.leader.num.pending.requests</name>
+    <value>4096</value>
+    <tag>OZONE, RATIS, PERFORMANCE</tag>
+    <description>Maximum number of pending requests after which the leader
+      starts rejecting requests from client.
+    </description>
+  </property>
   <property>
     <name>dfs.container.ratis.replication.level</name>
     <value>MAJORITY</value>

+ 6 - 0
hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java

@@ -33,6 +33,7 @@ import org.apache.hadoop.hdds.security.x509.SecurityConfig;
 import org.apache.hadoop.hdds.security.x509.certificate.client.CertificateClient;
 import org.apache.hadoop.hdds.tracing.TracingUtil;
 import org.apache.hadoop.ozone.OzoneConfigKeys;
+
 import org.apache.hadoop.ozone.OzoneConsts;
 import org.apache.hadoop.ozone.container.common.interfaces.ContainerDispatcher;
 import org.apache.hadoop.ozone.container.common.statemachine.StateContext;
@@ -225,6 +226,11 @@ public final class XceiverServerRatis extends XceiverServer {
       setAutoTriggerEnabled(properties, true);
     RaftServerConfigKeys.Snapshot.
       setAutoTriggerThreshold(properties, snapshotThreshold);
+    int maxPendingRequets = conf.getInt(
+        OzoneConfigKeys.DFS_CONTAINER_RATIS_LEADER_NUM_PENDING_REQUESTS,
+        OzoneConfigKeys.DFS_CONTAINER_RATIS_LEADER_NUM_PENDING_REQUESTS_DEFAULT
+    );
+    RaftServerConfigKeys.Write.setElementLimit(properties, maxPendingRequets);
     int logQueueNumElements =
         conf.getInt(OzoneConfigKeys.DFS_CONTAINER_RATIS_LOG_QUEUE_NUM_ELEMENTS,
             OzoneConfigKeys.DFS_CONTAINER_RATIS_LOG_QUEUE_NUM_ELEMENTS_DEFAULT);