Browse Source

HDFS-7189. Add trace spans for DFSClient metadata operations. (Colin P. McCabe via yliu)

yliu 10 years ago
parent
commit
bdbf13ac46

+ 3 - 0
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt

@@ -247,6 +247,9 @@ Release 2.7.0 - UNRELEASED
 
     HDFS-7457. DatanodeID generates excessive garbage. (daryn via kihwal)
 
+    HDFS-7189. Add trace spans for DFSClient metadata operations. (Colin P.
+    McCabe via yliu)
+
   OPTIMIZATIONS
 
     HDFS-7454. Reduce memory footprint for AclEntries in NameNode.

+ 14 - 5
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockStorageLocationUtil.java

@@ -51,6 +51,10 @@ import org.apache.hadoop.security.token.Token;
 
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
+import org.htrace.Sampler;
+import org.htrace.Span;
+import org.htrace.Trace;
+import org.htrace.TraceScope;
 
 @InterfaceAudience.Private
 @InterfaceStability.Unstable
@@ -71,7 +75,7 @@ class BlockStorageLocationUtil {
    */
   private static List<VolumeBlockLocationCallable> createVolumeBlockLocationCallables(
       Configuration conf, Map<DatanodeInfo, List<LocatedBlock>> datanodeBlocks,
-      int timeout, boolean connectToDnViaHostname) {
+      int timeout, boolean connectToDnViaHostname, Span parent) {
     
     if (datanodeBlocks.isEmpty()) {
       return Lists.newArrayList();
@@ -111,7 +115,7 @@ class BlockStorageLocationUtil {
       }
       VolumeBlockLocationCallable callable = new VolumeBlockLocationCallable(
           conf, datanode, poolId, blockIds, dnTokens, timeout, 
-          connectToDnViaHostname);
+          connectToDnViaHostname, parent);
       callables.add(callable);
     }
     return callables;
@@ -131,11 +135,11 @@ class BlockStorageLocationUtil {
   static Map<DatanodeInfo, HdfsBlocksMetadata> queryDatanodesForHdfsBlocksMetadata(
       Configuration conf, Map<DatanodeInfo, List<LocatedBlock>> datanodeBlocks,
       int poolsize, int timeoutMs, boolean connectToDnViaHostname)
-      throws InvalidBlockTokenException {
+        throws InvalidBlockTokenException {
 
     List<VolumeBlockLocationCallable> callables = 
         createVolumeBlockLocationCallables(conf, datanodeBlocks, timeoutMs, 
-            connectToDnViaHostname);
+            connectToDnViaHostname, Trace.currentSpan());
     
     // Use a thread pool to execute the Callables in parallel
     List<Future<HdfsBlocksMetadata>> futures = 
@@ -319,11 +323,12 @@ class BlockStorageLocationUtil {
     private final long[] blockIds;
     private final List<Token<BlockTokenIdentifier>> dnTokens;
     private final boolean connectToDnViaHostname;
+    private final Span parentSpan;
     
     VolumeBlockLocationCallable(Configuration configuration,
         DatanodeInfo datanode, String poolId, long []blockIds,
         List<Token<BlockTokenIdentifier>> dnTokens, int timeout, 
-        boolean connectToDnViaHostname) {
+        boolean connectToDnViaHostname, Span parentSpan) {
       this.configuration = configuration;
       this.timeout = timeout;
       this.datanode = datanode;
@@ -331,6 +336,7 @@ class BlockStorageLocationUtil {
       this.blockIds = blockIds;
       this.dnTokens = dnTokens;
       this.connectToDnViaHostname = connectToDnViaHostname;
+      this.parentSpan = parentSpan;
     }
     
     public DatanodeInfo getDatanodeInfo() {
@@ -342,6 +348,8 @@ class BlockStorageLocationUtil {
       HdfsBlocksMetadata metadata = null;
       // Create the RPC proxy and make the RPC
       ClientDatanodeProtocol cdp = null;
+      TraceScope scope =
+          Trace.startSpan("getHdfsBlocksMetadata", parentSpan);
       try {
         cdp = DFSUtil.createClientDatanodeProtocolProxy(datanode, configuration,
             timeout, connectToDnViaHostname);
@@ -350,6 +358,7 @@ class BlockStorageLocationUtil {
         // Bubble this up to the caller, handle with the Future
         throw e;
       } finally {
+        scope.close();
         if (cdp != null) {
           RPC.stopProxy(cdp);
         }

File diff suppressed because it is too large
+ 285 - 64
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java


+ 85 - 59
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSInotifyEventInputStream.java

@@ -26,6 +26,9 @@ import org.apache.hadoop.hdfs.inotify.EventBatchList;
 import org.apache.hadoop.hdfs.inotify.MissingEventsException;
 import org.apache.hadoop.hdfs.protocol.ClientProtocol;
 import org.apache.hadoop.util.Time;
+import org.htrace.Sampler;
+import org.htrace.Trace;
+import org.htrace.TraceScope;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -44,6 +47,11 @@ public class DFSInotifyEventInputStream {
   public static Logger LOG = LoggerFactory.getLogger(DFSInotifyEventInputStream
       .class);
 
+  /**
+   * The trace sampler to use when making RPCs to the NameNode.
+   */
+  private final Sampler<?> traceSampler;
+
   private final ClientProtocol namenode;
   private Iterator<EventBatch> it;
   private long lastReadTxid;
@@ -59,12 +67,15 @@ public class DFSInotifyEventInputStream {
 
   private static final int INITIAL_WAIT_MS = 10;
 
-  DFSInotifyEventInputStream(ClientProtocol namenode) throws IOException {
-    this(namenode, namenode.getCurrentEditLogTxid()); // only consider new txn's
+  DFSInotifyEventInputStream(Sampler<?> traceSampler, ClientProtocol namenode)
+        throws IOException {
+    // Only consider new transaction IDs.
+    this(traceSampler, namenode, namenode.getCurrentEditLogTxid());
   }
 
-  DFSInotifyEventInputStream(ClientProtocol namenode, long lastReadTxid)
-      throws IOException {
+  DFSInotifyEventInputStream(Sampler traceSampler, ClientProtocol namenode,
+        long lastReadTxid) throws IOException {
+    this.traceSampler = traceSampler;
     this.namenode = namenode;
     this.it = Iterators.emptyIterator();
     this.lastReadTxid = lastReadTxid;
@@ -87,39 +98,45 @@ public class DFSInotifyEventInputStream {
    * The next available batch of events will be returned.
    */
   public EventBatch poll() throws IOException, MissingEventsException {
-    // need to keep retrying until the NN sends us the latest committed txid
-    if (lastReadTxid == -1) {
-      LOG.debug("poll(): lastReadTxid is -1, reading current txid from NN");
-      lastReadTxid = namenode.getCurrentEditLogTxid();
-      return null;
-    }
-    if (!it.hasNext()) {
-      EventBatchList el = namenode.getEditsFromTxid(lastReadTxid + 1);
-      if (el.getLastTxid() != -1) {
-        // we only want to set syncTxid when we were actually able to read some
-        // edits on the NN -- otherwise it will seem like edits are being
-        // generated faster than we can read them when the problem is really
-        // that we are temporarily unable to read edits
-        syncTxid = el.getSyncTxid();
-        it = el.getBatches().iterator();
-        long formerLastReadTxid = lastReadTxid;
-        lastReadTxid = el.getLastTxid();
-        if (el.getFirstTxid() != formerLastReadTxid + 1) {
-          throw new MissingEventsException(formerLastReadTxid + 1,
-              el.getFirstTxid());
+    TraceScope scope =
+        Trace.startSpan("inotifyPoll", traceSampler);
+    try {
+      // need to keep retrying until the NN sends us the latest committed txid
+      if (lastReadTxid == -1) {
+        LOG.debug("poll(): lastReadTxid is -1, reading current txid from NN");
+        lastReadTxid = namenode.getCurrentEditLogTxid();
+        return null;
+      }
+      if (!it.hasNext()) {
+        EventBatchList el = namenode.getEditsFromTxid(lastReadTxid + 1);
+        if (el.getLastTxid() != -1) {
+          // we only want to set syncTxid when we were actually able to read some
+          // edits on the NN -- otherwise it will seem like edits are being
+          // generated faster than we can read them when the problem is really
+          // that we are temporarily unable to read edits
+          syncTxid = el.getSyncTxid();
+          it = el.getBatches().iterator();
+          long formerLastReadTxid = lastReadTxid;
+          lastReadTxid = el.getLastTxid();
+          if (el.getFirstTxid() != formerLastReadTxid + 1) {
+            throw new MissingEventsException(formerLastReadTxid + 1,
+                el.getFirstTxid());
+          }
+        } else {
+          LOG.debug("poll(): read no edits from the NN when requesting edits " +
+            "after txid {}", lastReadTxid);
+          return null;
         }
+      }
+
+      if (it.hasNext()) { // can be empty if el.getLastTxid != -1 but none of the
+        // newly seen edit log ops actually got converted to events
+        return it.next();
       } else {
-        LOG.debug("poll(): read no edits from the NN when requesting edits " +
-          "after txid {}", lastReadTxid);
         return null;
       }
-    }
-
-    if (it.hasNext()) { // can be empty if el.getLastTxid != -1 but none of the
-      // newly seen edit log ops actually got converted to events
-      return it.next();
-    } else {
-      return null;
+    } finally {
+      scope.close();
     }
   }
 
@@ -163,25 +180,29 @@ public class DFSInotifyEventInputStream {
    */
   public EventBatch poll(long time, TimeUnit tu) throws IOException,
       InterruptedException, MissingEventsException {
-    long initialTime = Time.monotonicNow();
-    long totalWait = TimeUnit.MILLISECONDS.convert(time, tu);
-    long nextWait = INITIAL_WAIT_MS;
+    TraceScope scope = Trace.startSpan("inotifyPollWithTimeout", traceSampler);
     EventBatch next = null;
-    while ((next = poll()) == null) {
-      long timeLeft = totalWait - (Time.monotonicNow() - initialTime);
-      if (timeLeft <= 0) {
-        LOG.debug("timed poll(): timed out");
-        break;
-      } else if (timeLeft < nextWait * 2) {
-        nextWait = timeLeft;
-      } else {
-        nextWait *= 2;
+    try {
+      long initialTime = Time.monotonicNow();
+      long totalWait = TimeUnit.MILLISECONDS.convert(time, tu);
+      long nextWait = INITIAL_WAIT_MS;
+      while ((next = poll()) == null) {
+        long timeLeft = totalWait - (Time.monotonicNow() - initialTime);
+        if (timeLeft <= 0) {
+          LOG.debug("timed poll(): timed out");
+          break;
+        } else if (timeLeft < nextWait * 2) {
+          nextWait = timeLeft;
+        } else {
+          nextWait *= 2;
+        }
+        LOG.debug("timed poll(): poll() returned null, sleeping for {} ms",
+            nextWait);
+        Thread.sleep(nextWait);
       }
-      LOG.debug("timed poll(): poll() returned null, sleeping for {} ms",
-          nextWait);
-      Thread.sleep(nextWait);
+    } finally {
+      scope.close();
     }
-
     return next;
   }
 
@@ -196,18 +217,23 @@ public class DFSInotifyEventInputStream {
    */
   public EventBatch take() throws IOException, InterruptedException,
       MissingEventsException {
+    TraceScope scope = Trace.startSpan("inotifyTake", traceSampler);
     EventBatch next = null;
-    int nextWaitMin = INITIAL_WAIT_MS;
-    while ((next = poll()) == null) {
-      // sleep for a random period between nextWaitMin and nextWaitMin * 2
-      // to avoid stampedes at the NN if there are multiple clients
-      int sleepTime = nextWaitMin + rng.nextInt(nextWaitMin);
-      LOG.debug("take(): poll() returned null, sleeping for {} ms", sleepTime);
-      Thread.sleep(sleepTime);
-      // the maximum sleep is 2 minutes
-      nextWaitMin = Math.min(60000, nextWaitMin * 2);
+    try {
+      int nextWaitMin = INITIAL_WAIT_MS;
+      while ((next = poll()) == null) {
+        // sleep for a random period between nextWaitMin and nextWaitMin * 2
+        // to avoid stampedes at the NN if there are multiple clients
+        int sleepTime = nextWaitMin + rng.nextInt(nextWaitMin);
+        LOG.debug("take(): poll() returned null, sleeping for {} ms", sleepTime);
+        Thread.sleep(sleepTime);
+        // the maximum sleep is 2 minutes
+        nextWaitMin = Math.min(60000, nextWaitMin * 2);
+      }
+    } finally {
+      scope.close();
     }
 
     return next;
   }
-}
+}

+ 9 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/CacheDirectiveIterator.java

@@ -27,6 +27,9 @@ import org.apache.hadoop.fs.InvalidRequestException;
 import org.apache.hadoop.ipc.RemoteException;
 
 import com.google.common.base.Preconditions;
+import org.htrace.Sampler;
+import org.htrace.Trace;
+import org.htrace.TraceScope;
 
 /**
  * CacheDirectiveIterator is a remote iterator that iterates cache directives.
@@ -39,12 +42,14 @@ public class CacheDirectiveIterator
 
   private CacheDirectiveInfo filter;
   private final ClientProtocol namenode;
+  private final Sampler<?> traceSampler;
 
   public CacheDirectiveIterator(ClientProtocol namenode,
-      CacheDirectiveInfo filter) {
+      CacheDirectiveInfo filter, Sampler<?> traceSampler) {
     super(0L);
     this.namenode = namenode;
     this.filter = filter;
+    this.traceSampler = traceSampler;
   }
 
   private static CacheDirectiveInfo removeIdFromFilter(CacheDirectiveInfo filter) {
@@ -89,6 +94,7 @@ public class CacheDirectiveIterator
   public BatchedEntries<CacheDirectiveEntry> makeRequest(Long prevKey)
       throws IOException {
     BatchedEntries<CacheDirectiveEntry> entries = null;
+    TraceScope scope = Trace.startSpan("listCacheDirectives", traceSampler);
     try {
       entries = namenode.listCacheDirectives(prevKey, filter);
     } catch (IOException e) {
@@ -110,6 +116,8 @@ public class CacheDirectiveIterator
             "Did not find requested id " + id);
       }
       throw e;
+    } finally {
+      scope.close();
     }
     Preconditions.checkNotNull(entries);
     return entries;

+ 12 - 2
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/CachePoolIterator.java

@@ -23,6 +23,9 @@ import java.io.IOException;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.fs.BatchedRemoteIterator;
+import org.htrace.Sampler;
+import org.htrace.Trace;
+import org.htrace.TraceScope;
 
 /**
  * CachePoolIterator is a remote iterator that iterates cache pools.
@@ -34,16 +37,23 @@ public class CachePoolIterator
     extends BatchedRemoteIterator<String, CachePoolEntry> {
 
   private final ClientProtocol namenode;
+  private final Sampler traceSampler;
 
-  public CachePoolIterator(ClientProtocol namenode) {
+  public CachePoolIterator(ClientProtocol namenode, Sampler traceSampler) {
     super("");
     this.namenode = namenode;
+    this.traceSampler = traceSampler;
   }
 
   @Override
   public BatchedEntries<CachePoolEntry> makeRequest(String prevKey)
       throws IOException {
-    return namenode.listCachePools(prevKey);
+    TraceScope scope = Trace.startSpan("listCachePools", traceSampler);
+    try {
+      return namenode.listCachePools(prevKey);
+    } finally {
+      scope.close();
+    }
   }
 
   @Override

+ 13 - 2
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/EncryptionZoneIterator.java

@@ -23,6 +23,9 @@ import java.io.IOException;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.fs.BatchedRemoteIterator;
+import org.htrace.Sampler;
+import org.htrace.Trace;
+import org.htrace.TraceScope;
 
 /**
  * EncryptionZoneIterator is a remote iterator that iterates over encryption
@@ -34,16 +37,24 @@ public class EncryptionZoneIterator
     extends BatchedRemoteIterator<Long, EncryptionZone> {
 
   private final ClientProtocol namenode;
+  private final Sampler<?> traceSampler;
 
-  public EncryptionZoneIterator(ClientProtocol namenode) {
+  public EncryptionZoneIterator(ClientProtocol namenode,
+                                Sampler<?> traceSampler) {
     super(Long.valueOf(0));
     this.namenode = namenode;
+    this.traceSampler = traceSampler;
   }
 
   @Override
   public BatchedEntries<EncryptionZone> makeRequest(Long prevId)
       throws IOException {
-    return namenode.listEncryptionZones(prevId);
+    TraceScope scope = Trace.startSpan("listEncryptionZones", traceSampler);
+    try {
+      return namenode.listEncryptionZones(prevId);
+    } finally {
+      scope.close();
+    }
   }
 
   @Override

+ 2 - 1
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCacheDirectives.java

@@ -88,6 +88,7 @@ import org.apache.hadoop.util.GSet;
 import org.apache.log4j.Level;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
+import org.htrace.Sampler;
 import org.junit.After;
 import org.junit.Assert;
 import org.junit.Before;
@@ -908,7 +909,7 @@ public class TestCacheDirectives {
 
     // Uncache and check each path in sequence
     RemoteIterator<CacheDirectiveEntry> entries =
-      new CacheDirectiveIterator(nnRpc, null);
+      new CacheDirectiveIterator(nnRpc, null, Sampler.NEVER);
     for (int i=0; i<numFiles; i++) {
       CacheDirectiveEntry entry = entries.next();
       nnRpc.removeCacheDirective(entry.getInfo().getId());

Some files were not shown because too many files changed in this diff