浏览代码

HDFS-17208. Add the metrics PendingAsyncDiskOperations in datanode (#6109). Contributed by Haiyang Hu.

Reviewed-by: Tao Li <tomscut@apache.org>
Signed-off-by: He Xiaoqiao <hexiaoqiao@apache.org>
huhaiyang 1 年之前
父节点
当前提交
0ed484ac62

+ 5 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetAsyncDiskService.java

@@ -161,7 +161,11 @@ class FsDatasetAsyncDiskService {
       executors.remove(storageId);
     }
   }
-  
+
+  /**
+   * The count of pending and running asynchronous disk operations,
+   * include deletion of block files and requesting sync_file_range() operations.
+   */
   synchronized long countPendingDeletions() {
     long count = 0;
     for (ThreadPoolExecutor exec : executors.values()) {

+ 5 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java

@@ -3822,5 +3822,10 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
   public void setLastDirScannerFinishTime(long time) {
     this.lastDirScannerFinishTime = time;
   }
+
+  @Override
+  public long getPendingAsyncDeletions() {
+    return asyncDiskService.countPendingDeletions();
+  }
 }
 

+ 4 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeMetricHelper.java

@@ -73,7 +73,10 @@ public class DataNodeMetricHelper {
           " blocks failed in cache eviction"),
         beanClass.getNumBlocksFailedToUncache())
         .addGauge(Interns.info("LastDirectoryScannerFinishTime",
-        "Finish time of the last directory scan"), beanClass.getLastDirScannerFinishTime());
+        "Finish time of the last directory scan"), beanClass.getLastDirScannerFinishTime())
+        .addGauge(Interns.info("PendingAsyncDeletions",
+            "The count of pending and running asynchronous disk operations"),
+            beanClass.getPendingAsyncDeletions());
   }
 
 }

+ 5 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/FSDatasetMBean.java

@@ -127,4 +127,9 @@ public interface FSDatasetMBean extends MetricsSource {
    * Returns the last time in milliseconds when the directory scanner successfully ran.
    */
   long getLastDirScannerFinishTime();
+
+  /**
+   * Returns the count of pending and running asynchronous disk operations.
+   */
+  long getPendingAsyncDeletions();
 }

+ 5 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/SimulatedFSDataset.java

@@ -944,6 +944,11 @@ public class SimulatedFSDataset implements FsDatasetSpi<FsVolumeSpi> {
     return 0L;
   }
 
+  @Override
+  public long getPendingAsyncDeletions() {
+    return 0;
+  }
+
   /**
    * Get metrics from the metrics source
    *

+ 4 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/extdataset/ExternalDatasetImpl.java

@@ -483,4 +483,8 @@ public class ExternalDatasetImpl implements FsDatasetSpi<ExternalVolumeImpl> {
     return 0L;
   }
 
+  @Override
+  public long getPendingAsyncDeletions() {
+    return 0;
+  }
 }

+ 28 - 1
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestFsDatasetImpl.java

@@ -19,6 +19,7 @@ package org.apache.hadoop.hdfs.server.datanode.fsdataset.impl;
 
 import java.io.InputStream;
 import java.io.OutputStream;
+import java.lang.management.ManagementFactory;
 import java.nio.file.Files;
 import java.nio.file.Paths;
 import java.util.Random;
@@ -131,6 +132,14 @@ import static org.mockito.Mockito.when;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import javax.management.AttributeNotFoundException;
+import javax.management.InstanceNotFoundException;
+import javax.management.MBeanException;
+import javax.management.MBeanServer;
+import javax.management.MalformedObjectNameException;
+import javax.management.ObjectName;
+import javax.management.ReflectionException;
+
 public class TestFsDatasetImpl {
 
   private static final Logger LOG = LoggerFactory.getLogger(
@@ -1842,7 +1851,8 @@ public class TestFsDatasetImpl {
    */
   @Test
   public void testAysncDiskServiceDeleteReplica()
-      throws IOException, InterruptedException, TimeoutException {
+      throws IOException, InterruptedException, TimeoutException, MalformedObjectNameException,
+      ReflectionException, AttributeNotFoundException, InstanceNotFoundException, MBeanException {
     HdfsConfiguration config = new HdfsConfiguration();
     // Bump up replication interval.
     config.setInt(DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_INTERVAL_SECONDS_KEY, 10);
@@ -1896,6 +1906,17 @@ public class TestFsDatasetImpl {
       // If this replica is deleted from memory, the client would got an ReplicaNotFoundException.
       assertNotNull(ds.getStoredBlock(bpid, extendedBlock.getBlockId()));
 
+      assertEquals(1, ds.asyncDiskService.countPendingDeletions());
+      assertEquals(1, ds.getPendingAsyncDeletions());
+
+      // Validate PendingAsyncDeletions metrics.
+      MBeanServer mbs = ManagementFactory.getPlatformMBeanServer();
+      ObjectName mxbeanName = new ObjectName(
+          "Hadoop:service=DataNode,name=FSDatasetState-" + dn.getDatanodeUuid());
+      long pendingAsyncDeletions = (long) mbs.getAttribute(mxbeanName,
+          "PendingAsyncDeletions");
+      assertEquals(1, pendingAsyncDeletions);
+
       // Make it resume the removeReplicaFromMem method.
       semaphore.release(1);
 
@@ -1903,6 +1924,12 @@ public class TestFsDatasetImpl {
       GenericTestUtils.waitFor(() ->
           ds.asyncDiskService.countPendingDeletions() == 0, 100, 1000);
 
+      assertEquals(0, ds.getPendingAsyncDeletions());
+
+      pendingAsyncDeletions = (long) mbs.getAttribute(mxbeanName,
+          "PendingAsyncDeletions");
+      assertEquals(0, pendingAsyncDeletions);
+
       // Sleep for two heartbeat times.
       Thread.sleep(config.getTimeDuration(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY,
           DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_DEFAULT,