Browse Source

HDDS-1214. Enable tracing for the datanode read/write path. Contributed by Elek, Marton.

Closes #550.
Márton Elek 6 years ago
parent
commit
d17e31e062

+ 14 - 0
hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/tracing/TracingUtil.java

@@ -22,6 +22,7 @@ import java.lang.reflect.Proxy;
 import io.jaegertracing.Configuration;
 import io.jaegertracing.internal.JaegerTracer;
 import io.opentracing.Scope;
+import io.opentracing.Span;
 import io.opentracing.SpanContext;
 import io.opentracing.Tracer;
 import io.opentracing.util.GlobalTracer;
@@ -64,6 +65,19 @@ public final class TracingUtil {
     return builder.toString();
   }
 
+  /**
+   * Export the specific span as a string.
+   *
+   * @return encoded tracing context.
+   */
+  public static String exportSpan(Span span) {
+    StringBuilder builder = new StringBuilder();
+    if (span != null) {
+      GlobalTracer.get().inject(span.context(), StringCodec.FORMAT, builder);
+    }
+    return builder.toString();
+  }
+
   /**
    * Create a new scope and use the imported span as the parent.
    *

+ 13 - 1
hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/HddsDispatcher.java

@@ -33,6 +33,7 @@ import org.apache.hadoop.hdds.scm.container.common.helpers
     .InvalidContainerStateException;
 import org.apache.hadoop.hdds.scm.container.common.helpers
     .StorageContainerException;
+import org.apache.hadoop.hdds.tracing.TracingUtil;
 import org.apache.hadoop.ozone.audit.AuditAction;
 import org.apache.hadoop.ozone.audit.AuditEventStatus;
 import org.apache.hadoop.ozone.audit.AuditLogger;
@@ -61,6 +62,8 @@ import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.
     ContainerDataProto.State;
 import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result;
 import org.apache.hadoop.ozone.container.common.interfaces.ContainerDispatcher;
+
+import io.opentracing.Scope;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -137,10 +140,19 @@ public class HddsDispatcher implements ContainerDispatcher, Auditor {
     containerSet.buildMissingContainerSet(createdContainerSet);
   }
 
-  @SuppressWarnings("methodlength")
   @Override
   public ContainerCommandResponseProto dispatch(
       ContainerCommandRequestProto msg, DispatcherContext dispatcherContext) {
+    String spanName = "HddsDispatcher." + msg.getCmdType().name();
+    try (Scope scope = TracingUtil
+        .importAndCreateScope(spanName, msg.getTraceID())) {
+      return dispatchRequest(msg, dispatcherContext);
+    }
+  }
+
+  @SuppressWarnings("methodlength")
+  private ContainerCommandResponseProto dispatchRequest(
+      ContainerCommandRequestProto msg, DispatcherContext dispatcherContext) {
     Preconditions.checkNotNull(msg);
     LOG.trace("Command {}, trace ID: {} ", msg.getCmdType().toString(),
         msg.getTraceID());

+ 2 - 0
hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/CloseContainerCommandHandler.java

@@ -24,6 +24,7 @@ import org.apache.hadoop.hdds.protocol.proto
     .StorageContainerDatanodeProtocolProtos.SCMCommandProto;
 import org.apache.hadoop.hdds.protocol.proto
     .StorageContainerDatanodeProtocolProtos.CloseContainerCommandProto;
+import org.apache.hadoop.hdds.tracing.TracingUtil;
 import org.apache.hadoop.ozone.container.common.interfaces.Container;
 import org.apache.hadoop.ozone.container.common.statemachine
     .SCMConnectionManager;
@@ -133,6 +134,7 @@ public class CloseContainerCommandHandler implements CommandHandler {
     final ContainerCommandRequestProto.Builder command =
         ContainerCommandRequestProto.newBuilder();
     command.setCmdType(ContainerProtos.Type.CloseContainer);
+    command.setTraceID(TracingUtil.exportCurrentSpan());
     command.setContainerID(containerId);
     command.setCloseContainer(
         ContainerProtos.CloseContainerRequestProto.getDefaultInstance());

+ 14 - 6
hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/XceiverServerGrpc.java

@@ -31,10 +31,12 @@ import org.apache.hadoop.hdds.scm.pipeline.PipelineID;
 import org.apache.hadoop.hdds.scm.container.common.helpers.
     StorageContainerException;
 import org.apache.hadoop.hdds.tracing.GrpcServerInterceptor;
+import org.apache.hadoop.hdds.tracing.TracingUtil;
 import org.apache.hadoop.ozone.OzoneConfigKeys;
 import org.apache.hadoop.ozone.OzoneConsts;
 import org.apache.hadoop.ozone.container.common.interfaces.ContainerDispatcher;
 
+import io.opentracing.Scope;
 import org.apache.ratis.thirdparty.io.grpc.BindableService;
 import org.apache.ratis.thirdparty.io.grpc.Server;
 import org.apache.ratis.thirdparty.io.grpc.ServerBuilder;
@@ -168,12 +170,18 @@ public final class XceiverServerGrpc extends XceiverServer {
   @Override
   public void submitRequest(ContainerCommandRequestProto request,
       HddsProtos.PipelineID pipelineID) throws IOException {
-    super.submitRequest(request, pipelineID);
-    ContainerProtos.ContainerCommandResponseProto response =
-        storageContainer.dispatch(request, null);
-    if (response.getResult() != ContainerProtos.Result.SUCCESS) {
-      throw new StorageContainerException(response.getMessage(),
-          response.getResult());
+    try (Scope scope = TracingUtil
+        .importAndCreateScope(
+            "XceiverServerGrpc." + request.getCmdType().name(),
+            request.getTraceID())) {
+
+      super.submitRequest(request, pipelineID);
+      ContainerProtos.ContainerCommandResponseProto response =
+          storageContainer.dispatch(request, null);
+      if (response.getResult() != ContainerProtos.Result.SUCCESS) {
+        throw new StorageContainerException(response.getMessage(),
+            response.getResult());
+      }
     }
   }
 

+ 43 - 46
hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java

@@ -25,7 +25,6 @@ import org.apache.commons.io.IOUtils;
 import org.apache.hadoop.hdds.HddsUtils;
 import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos;
 
-import io.opentracing.Scope;
 import org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException;
 import org.apache.hadoop.ozone.container.common.helpers.ContainerUtils;
 import org.apache.ratis.proto.RaftProtos.RaftPeerRole;
@@ -51,7 +50,6 @@ import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos
     .ReadChunkRequestProto;
 import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos
     .ReadChunkResponseProto;
-import org.apache.hadoop.hdds.tracing.TracingUtil;
 import org.apache.hadoop.ozone.container.common.interfaces.ContainerDispatcher;
 import org.apache.hadoop.hdds.security.token.TokenVerifier;
 import org.apache.hadoop.security.UserGroupInformation;
@@ -271,51 +269,50 @@ public class ContainerStateMachine extends BaseStateMachine {
     final ContainerCommandRequestProto proto =
         getContainerCommandRequestProto(request.getMessage().getContent());
     Preconditions.checkArgument(request.getRaftGroupId().equals(gid));
-    try (Scope scope = TracingUtil
-        .importAndCreateScope(proto.getCmdType().name(), proto.getTraceID())) {
-      try {
-        dispatcher.validateContainerCommand(proto);
-      } catch (IOException ioe) {
-        TransactionContext ctxt = TransactionContext.newBuilder()
-            .setClientRequest(request)
-            .setStateMachine(this)
-            .setServerRole(RaftPeerRole.LEADER)
-            .build();
-        ctxt.setException(ioe);
-        return ctxt;
-      }
-      if (proto.getCmdType() == Type.WriteChunk) {
-        final WriteChunkRequestProto write = proto.getWriteChunk();
-        // create the log entry proto
-        final WriteChunkRequestProto commitWriteChunkProto =
-            WriteChunkRequestProto.newBuilder()
-                .setBlockID(write.getBlockID())
-                .setChunkData(write.getChunkData())
-                // skipping the data field as it is
-                // already set in statemachine data proto
-                .build();
-        ContainerCommandRequestProto commitContainerCommandProto =
-            ContainerCommandRequestProto
-                .newBuilder(proto)
-                .setWriteChunk(commitWriteChunkProto)
-                .build();
-
-        return TransactionContext.newBuilder()
-            .setClientRequest(request)
-            .setStateMachine(this)
-            .setServerRole(RaftPeerRole.LEADER)
-            .setStateMachineData(write.getData())
-            .setLogData(commitContainerCommandProto.toByteString())
-            .build();
-      } else {
-        return TransactionContext.newBuilder()
-            .setClientRequest(request)
-            .setStateMachine(this)
-            .setServerRole(RaftPeerRole.LEADER)
-            .setLogData(request.getMessage().getContent())
-            .build();
-      }
+    try {
+      dispatcher.validateContainerCommand(proto);
+    } catch (IOException ioe) {
+      TransactionContext ctxt = TransactionContext.newBuilder()
+          .setClientRequest(request)
+          .setStateMachine(this)
+          .setServerRole(RaftPeerRole.LEADER)
+          .build();
+      ctxt.setException(ioe);
+      return ctxt;
     }
+    if (proto.getCmdType() == Type.WriteChunk) {
+      final WriteChunkRequestProto write = proto.getWriteChunk();
+      // create the log entry proto
+      final WriteChunkRequestProto commitWriteChunkProto =
+          WriteChunkRequestProto.newBuilder()
+              .setBlockID(write.getBlockID())
+              .setChunkData(write.getChunkData())
+              // skipping the data field as it is
+              // already set in statemachine data proto
+              .build();
+      ContainerCommandRequestProto commitContainerCommandProto =
+          ContainerCommandRequestProto
+              .newBuilder(proto)
+              .setWriteChunk(commitWriteChunkProto)
+              .setTraceID(proto.getTraceID())
+              .build();
+
+      return TransactionContext.newBuilder()
+          .setClientRequest(request)
+          .setStateMachine(this)
+          .setServerRole(RaftPeerRole.LEADER)
+          .setStateMachineData(write.getData())
+          .setLogData(commitContainerCommandProto.toByteString())
+          .build();
+    } else {
+      return TransactionContext.newBuilder()
+          .setClientRequest(request)
+          .setStateMachine(this)
+          .setServerRole(RaftPeerRole.LEADER)
+          .setLogData(request.getMessage().getContent())
+          .build();
+    }
+
   }
 
   private ByteString getStateMachineData(StateMachineLogEntryProto entryProto) {

+ 2 - 1
hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java

@@ -486,7 +486,8 @@ public final class XceiverServerRatis extends XceiverServer {
     super.submitRequest(request, pipelineID);
     RaftClientReply reply;
     try (Scope scope = TracingUtil
-        .importAndCreateScope(request.getCmdType().name(),
+        .importAndCreateScope(
+            "XceiverServerRatis." + request.getCmdType().name(),
             request.getTraceID())) {
 
       RaftClientRequest raftClientRequest =