Browse Source

HDDS-794. Add configs to set StateMachineData write timeout in ContainerStateMachine. Contributed by Shashikant Banerjee.

(cherry picked from commit 408f59caa9321be8a55afe44b1811c5dacf23206)
Arpit Agarwal 6 years ago
parent
commit
53d4aefae8

+ 6 - 0
hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java

@@ -74,6 +74,12 @@ public final class ScmConfigKeys {
       "dfs.container.ratis.segment.preallocated.size";
       "dfs.container.ratis.segment.preallocated.size";
   public static final int
   public static final int
       DFS_CONTAINER_RATIS_SEGMENT_PREALLOCATED_SIZE_DEFAULT = 128 * 1024 * 1024;
       DFS_CONTAINER_RATIS_SEGMENT_PREALLOCATED_SIZE_DEFAULT = 128 * 1024 * 1024;
+  public static final String
+      DFS_CONTAINER_RATIS_STATEMACHINEDATA_SYNC_TIMEOUT =
+      "dfs.container.ratis.statemachinedata.sync.timeout";
+  public static final TimeDuration
+      DFS_CONTAINER_RATIS_STATEMACHINEDATA_SYNC_TIMEOUT_DEFAULT =
+      TimeDuration.valueOf(10, TimeUnit.SECONDS);
   public static final String DFS_RATIS_CLIENT_REQUEST_TIMEOUT_DURATION_KEY =
   public static final String DFS_RATIS_CLIENT_REQUEST_TIMEOUT_DURATION_KEY =
       "dfs.ratis.client.request.timeout.duration";
       "dfs.ratis.client.request.timeout.duration";
   public static final TimeDuration
   public static final TimeDuration

+ 9 - 0
hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java

@@ -232,6 +232,15 @@ public final class OzoneConfigKeys {
       = ScmConfigKeys.DFS_CONTAINER_RATIS_SEGMENT_PREALLOCATED_SIZE_KEY;
       = ScmConfigKeys.DFS_CONTAINER_RATIS_SEGMENT_PREALLOCATED_SIZE_KEY;
   public static final int DFS_CONTAINER_RATIS_SEGMENT_PREALLOCATED_SIZE_DEFAULT
   public static final int DFS_CONTAINER_RATIS_SEGMENT_PREALLOCATED_SIZE_DEFAULT
       = ScmConfigKeys.DFS_CONTAINER_RATIS_SEGMENT_PREALLOCATED_SIZE_DEFAULT;
       = ScmConfigKeys.DFS_CONTAINER_RATIS_SEGMENT_PREALLOCATED_SIZE_DEFAULT;
+
+  // config settings to enable stateMachineData write timeout
+  public static final String
+      DFS_CONTAINER_RATIS_STATEMACHINEDATA_SYNC_TIMEOUT =
+      ScmConfigKeys.DFS_CONTAINER_RATIS_STATEMACHINEDATA_SYNC_TIMEOUT;
+  public static final TimeDuration
+      DFS_CONTAINER_RATIS_STATEMACHINEDATA_SYNC_TIMEOUT_DEFAULT =
+      ScmConfigKeys.DFS_CONTAINER_RATIS_STATEMACHINEDATA_SYNC_TIMEOUT_DEFAULT;
+
   public static final int DFS_CONTAINER_CHUNK_MAX_SIZE
   public static final int DFS_CONTAINER_CHUNK_MAX_SIZE
       = ScmConfigKeys.OZONE_SCM_CHUNK_MAX_SIZE;
       = ScmConfigKeys.OZONE_SCM_CHUNK_MAX_SIZE;
   public static final String DFS_CONTAINER_RATIS_DATANODE_STORAGE_DIR =
   public static final String DFS_CONTAINER_RATIS_DATANODE_STORAGE_DIR =

+ 7 - 0
hadoop-hdds/common/src/main/resources/ozone-default.xml

@@ -52,6 +52,13 @@
       running unit tests.
       running unit tests.
     </description>
     </description>
   </property>
   </property>
+  <property>
+    <name>dfs.container.ratis.statemachinedata.sync.timeout</name>
+    <value>10s</value>
+    <tag>OZONE, DEBUG, CONTAINER, RATIS</tag>
+    <description>Timeout for StateMachine data writes by Ratis.
+    </description>
+  </property>
   <property>
   <property>
     <name>dfs.container.ratis.datanode.storage.dir</name>
     <name>dfs.container.ratis.datanode.storage.dir</name>
     <value/>
     <value/>

+ 16 - 2
hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java

@@ -307,9 +307,18 @@ public class ContainerStateMachine extends BaseStateMachine {
           () -> runCommand(requestProto), chunkExecutor);
           () -> runCommand(requestProto), chunkExecutor);
     }
     }
     writeChunkFutureMap.put(entryIndex, writeChunkFuture);
     writeChunkFutureMap.put(entryIndex, writeChunkFuture);
+    LOG.debug("writeChunk writeStateMachineData : blockId " + write.getBlockID()
+        + " logIndex " + entryIndex + " chunkName " + write.getChunkData()
+        .getChunkName());
     // Remove the future once it finishes execution from the
     // Remove the future once it finishes execution from the
     // writeChunkFutureMap.
     // writeChunkFutureMap.
-    writeChunkFuture.thenApply(r -> writeChunkFutureMap.remove(entryIndex));
+    writeChunkFuture.thenApply(r -> {
+      writeChunkFutureMap.remove(entryIndex);
+      LOG.debug("writeChunk writeStateMachineData  completed: blockId " + write
+          .getBlockID() + " logIndex " + entryIndex + " chunkName " + write
+          .getChunkData().getChunkName());
+      return r;
+    });
     return writeChunkFuture;
     return writeChunkFuture;
   }
   }
 
 
@@ -519,7 +528,12 @@ public class ContainerStateMachine extends BaseStateMachine {
       if (cmdType == Type.CreateContainer) {
       if (cmdType == Type.CreateContainer) {
         long containerID = requestProto.getContainerID();
         long containerID = requestProto.getContainerID();
         future.thenApply(
         future.thenApply(
-            r -> createContainerFutureMap.remove(containerID).complete(null));
+            r -> {
+              createContainerFutureMap.remove(containerID).complete(null);
+              LOG.info("create Container Transaction completed for container " +
+                  containerID + " log index " + index);
+              return r;
+            });
       }
       }
 
 
       future.thenAccept(m -> {
       future.thenAccept(m -> {

+ 14 - 0
hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java

@@ -186,6 +186,20 @@ public final class XceiverServerRatis implements XceiverServerSpi {
     RaftClientConfigKeys.Rpc
     RaftClientConfigKeys.Rpc
         .setRequestTimeout(properties, clientRequestTimeout);
         .setRequestTimeout(properties, clientRequestTimeout);
 
 
+    // set the configs enable and set the stateMachineData sync timeout
+    RaftServerConfigKeys.Log.StateMachineData.setSync(properties, true);
+    timeUnit = OzoneConfigKeys.
+        DFS_CONTAINER_RATIS_STATEMACHINEDATA_SYNC_TIMEOUT_DEFAULT.getUnit();
+    duration = conf.getTimeDuration(
+        OzoneConfigKeys.DFS_CONTAINER_RATIS_STATEMACHINEDATA_SYNC_TIMEOUT,
+        OzoneConfigKeys.
+            DFS_CONTAINER_RATIS_STATEMACHINEDATA_SYNC_TIMEOUT_DEFAULT
+            .getDuration(), timeUnit);
+    final TimeDuration dataSyncTimeout =
+        TimeDuration.valueOf(duration, timeUnit);
+    RaftServerConfigKeys.Log.StateMachineData
+        .setSyncTimeout(properties, dataSyncTimeout);
+
     // Set the server Request timeout
     // Set the server Request timeout
     timeUnit = OzoneConfigKeys.DFS_RATIS_SERVER_REQUEST_TIMEOUT_DURATION_DEFAULT
     timeUnit = OzoneConfigKeys.DFS_RATIS_SERVER_REQUEST_TIMEOUT_DURATION_DEFAULT
         .getUnit();
         .getUnit();

+ 2 - 0
hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/ChunkUtils.java

@@ -138,6 +138,8 @@ public final class ChunkUtils {
         }
         }
       }
       }
     }
     }
+    log.debug("Write Chunk completed for chunkFile: {}, size {}", chunkFile,
+        data.length);
   }
   }
 
 
   /**
   /**

+ 2 - 1
hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/ChunkManagerImpl.java

@@ -82,7 +82,8 @@ public class ChunkManagerImpl implements ChunkManager {
           chunkFile, info);
           chunkFile, info);
       File tmpChunkFile = getTmpChunkFile(chunkFile, info);
       File tmpChunkFile = getTmpChunkFile(chunkFile, info);
 
 
-      LOG.debug("writing chunk:{} chunk stage:{} chunk file:{} tmp chunk file",
+      LOG.debug(
+          "writing chunk:{} chunk stage:{} chunk file:{} tmp chunk file:{}",
           info.getChunkName(), stage, chunkFile, tmpChunkFile);
           info.getChunkName(), stage, chunkFile, tmpChunkFile);
 
 
       switch (stage) {
       switch (stage) {