Browse Source

HDFS-17683. Add metrics for acquiring dataset read/write lock. (#7211). Contributed by farmmamba.

Signed-off-by: He Xiaoqiao <hexiaoqiao@apache.org>
hfutatzhanghb 3 months ago
parent
commit
fd4aa2a0d4

+ 4 - 0
hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md

@@ -534,6 +534,10 @@ Each metrics record contains tags such as SessionId and Hostname as additional i
 | `ProcessedCommandsOpNumOps` | Total number of processed commands operations |
 | `ProcessedCommandsOpAvgTime` | Average time of processed commands operations in milliseconds |
 | `NullStorageBlockReports` | Number of blocks in IBRs that failed due to null storage |
+| `AcquireDatasetReadLockNumOps` | Total number of acquiring dataset read lock operations |
+| `AcquireDatasetReadLockAvgTime` | Average time of acquiring dataset read lock operations in nanoseconds |
+| `AcquireDatasetWriteLockNumOps` | Total number of acquiring dataset write lock operations |
+| `AcquireDatasetWriteLockAvgTime` | Average time of acquiring dataset write lock operations in nanoseconds |
 
 FsVolume
 --------

+ 2 - 2
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java

@@ -512,7 +512,7 @@ public class DataNode extends ReconfigurableBase
     this.pipelineSupportSlownode = false;
     this.socketFactory = NetUtils.getDefaultSocketFactory(conf);
     this.dnConf = new DNConf(this);
-    this.dataSetLockManager = new DataSetLockManager(conf);
+    this.dataSetLockManager = new DataSetLockManager(conf, this);
     initOOBTimeout();
     storageLocationChecker = null;
     volumeChecker = new DatasetVolumeChecker(conf, new Timer());
@@ -535,7 +535,7 @@ public class DataNode extends ReconfigurableBase
     super(conf);
     this.tracer = createTracer(conf);
     this.fileIoProvider = new FileIoProvider(conf, this);
-    this.dataSetLockManager = new DataSetLockManager(conf);
+    this.dataSetLockManager = new DataSetLockManager(conf, this);
     this.blockScanner = new BlockScanner(this);
     this.lastDiskErrorCheck = 0;
     this.maxNumberOfBlocksToLog = conf.getLong(DFS_MAX_NUM_BLOCKS_TO_LOG_KEY,

+ 16 - 5
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataSetLockManager.java

@@ -27,6 +27,7 @@ import java.util.HashMap;
 import java.util.Stack;
 import java.util.concurrent.locks.ReentrantReadWriteLock;
 
+import org.apache.hadoop.util.Time;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -40,6 +41,7 @@ public class DataSetLockManager implements DataNodeLockManager<AutoCloseDataSetL
   private boolean isFair = true;
   private final boolean openLockTrace;
   private Exception lastException;
+  private DataNode datanode;
 
   /**
    * Class for maintain lockMap and is thread safe.
@@ -143,17 +145,18 @@ public class DataSetLockManager implements DataNodeLockManager<AutoCloseDataSetL
     }
   }
 
-  public DataSetLockManager(Configuration conf) {
+  public DataSetLockManager() {
+    this.openLockTrace = true;
+  }
+
+  public DataSetLockManager(Configuration conf, DataNode dn) {
     this.isFair = conf.getBoolean(
         DFSConfigKeys.DFS_DATANODE_LOCK_FAIR_KEY,
         DFSConfigKeys.DFS_DATANODE_LOCK_FAIR_DEFAULT);
     this.openLockTrace = conf.getBoolean(
         DFSConfigKeys.DFS_DATANODE_LOCKMANAGER_TRACE,
         DFSConfigKeys.DFS_DATANODE_LOCKMANAGER_TRACE_DEFAULT);
-  }
-
-  public DataSetLockManager() {
-    this.openLockTrace = true;
+    this.datanode = dn;
   }
 
   @Override
@@ -214,6 +217,7 @@ public class DataSetLockManager implements DataNodeLockManager<AutoCloseDataSetL
    * Return a not null ReadLock.
    */
   private AutoCloseDataSetLock getReadLock(LockLevel level, String... resources) {
+    long startTimeNanos = Time.monotonicNowNanos();
     String lockName = generateLockName(level, resources);
     AutoCloseDataSetLock lock = lockMap.getReadLock(lockName);
     if (lock == null) {
@@ -226,6 +230,9 @@ public class DataSetLockManager implements DataNodeLockManager<AutoCloseDataSetL
     if (openLockTrace) {
       putThreadName(getThreadName());
     }
+    if (datanode != null) {
+      datanode.metrics.addAcquireDataSetReadLock(Time.monotonicNowNanos() - startTimeNanos);
+    }
     return lock;
   }
 
@@ -233,6 +240,7 @@ public class DataSetLockManager implements DataNodeLockManager<AutoCloseDataSetL
    * Return a not null WriteLock.
    */
   private AutoCloseDataSetLock getWriteLock(LockLevel level, String... resources) {
+    long startTimeNanos = Time.monotonicNowNanos();
     String lockName = generateLockName(level, resources);
     AutoCloseDataSetLock lock = lockMap.getWriteLock(lockName);
     if (lock == null) {
@@ -245,6 +253,9 @@ public class DataSetLockManager implements DataNodeLockManager<AutoCloseDataSetL
     if (openLockTrace) {
       putThreadName(getThreadName());
     }
+    if (datanode != null) {
+      datanode.metrics.addAcquireDataSetWriteLock(Time.monotonicNowNanos() - startTimeNanos);
+    }
     return lock;
   }
 

+ 12 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeMetrics.java

@@ -218,6 +218,10 @@ public class DataNodeMetrics {
   @Metric("Milliseconds spent on calling NN rpc")
   private MutableRatesWithAggregation
       nnRpcLatency = registry.newRatesWithAggregation("nnRpcLatency");
+  @Metric("Nanoseconds spent on acquire dataset write lock")
+  private MutableRate acquireDatasetWriteLock;
+  @Metric("Nanoseconds spent on acquire dataset read lock")
+  private MutableRate acquireDatasetReadLock;
 
   final String name;
   JvmMetrics jvmMetrics = null;
@@ -817,4 +821,12 @@ public class DataNodeMetrics {
   public void incrNullStorageBlockReports() {
     nullStorageBlockReports.incr();
   }
+
+  public void addAcquireDataSetReadLock(long latency) {
+    acquireDatasetReadLock.add(latency);
+  }
+
+  public void addAcquireDataSetWriteLock(long latency) {
+    acquireDatasetWriteLock.add(latency);
+  }
 }

+ 20 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeMetrics.java

@@ -19,6 +19,7 @@ package org.apache.hadoop.hdfs.server.datanode;
 
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY;
 import static org.apache.hadoop.test.MetricsAsserts.assertCounter;
+import static org.apache.hadoop.test.MetricsAsserts.assertCounterGt;
 import static org.apache.hadoop.test.MetricsAsserts.assertInverseQuantileGauges;
 import static org.apache.hadoop.test.MetricsAsserts.assertQuantileGauges;
 import static org.apache.hadoop.test.MetricsAsserts.getLongCounter;
@@ -816,4 +817,23 @@ public class TestDataNodeMetrics {
       }, 100, 10000);
     }
   }
+
+  @Test
+  public void testDataNodeDatasetLockMetrics() throws IOException {
+    Configuration conf = new HdfsConfiguration();
+    try (MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build()) {
+      FileSystem fs = cluster.getFileSystem();
+      // Create and read a 1 byte file
+      Path tmpfile = new Path("/tmp.txt");
+      DFSTestUtil.createFile(fs, tmpfile,
+              (long)1, (short)1, 1L);
+      DFSTestUtil.readFile(fs, tmpfile);
+      List<DataNode> datanodes = cluster.getDataNodes();
+      assertEquals(datanodes.size(), 1);
+      DataNode datanode = datanodes.get(0);
+      MetricsRecordBuilder rb = getMetrics(datanode.getMetrics().name());
+      assertCounterGt("AcquireDatasetWriteLockNumOps", (long)1, rb);
+      assertCounterGt("AcquireDatasetReadLockNumOps", (long)1, rb);
+    }
+  }
 }