Browse Source

HDFS-11560. Expose slow disks via NameNode JMX. Contributed by Hanisha Koneru.

Hanisha Koneru 8 years ago
parent
commit
73835c73e2

+ 9 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java

@@ -1907,5 +1907,14 @@ public class DatanodeManager {
   public SlowDiskTracker getSlowDiskTracker() {
     return slowDiskTracker;
   }
+  /**
+   * Retrieve information about slow disks as a JSON.
+   * Returns null if we are not tracking slow disks.
+   * @return
+   */
+  public String getSlowDisksReport() {
+    return slowDiskTracker != null ?
+        slowDiskTracker.getSlowDiskReportAsJsonString() : null;
+  }
 }
 

+ 3 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/SlowDiskTracker.java

@@ -256,6 +256,9 @@ public class SlowDiskTracker {
   public String getSlowDiskReportAsJsonString() {
     ObjectMapper objectMapper = new ObjectMapper();
     try {
+      if (slowDisksReport.isEmpty()) {
+        return null;
+      }
       return objectMapper.writeValueAsString(slowDisksReport);
     } catch (JsonProcessingException e) {
       // Failed to serialize. Don't log the exception call stack.

+ 6 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java

@@ -1826,6 +1826,12 @@ public class NameNode extends ReconfigurableBase implements
         .getSlowPeersReport();
   }
 
+  @Override //NameNodeStatusMXBean
+  public String getSlowDisksReport() {
+    return namesystem.getBlockManager().getDatanodeManager()
+        .getSlowDisksReport();
+  }
+
   /**
    * Shutdown the NN immediately in an ungraceful way. Used when it would be
    * unsafe for the NN to continue operating, e.g. during a failed HA state

+ 8 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeStatusMXBean.java

@@ -75,4 +75,12 @@ public interface NameNodeStatusMXBean {
    * enabled. The report is in a JSON format.
    */
   String getSlowPeersReport();
+
+
+  /**
+   *  Gets the topN slow disks in the cluster, if the feature is enabled.
+   *
+   *  @return JSON string of list of diskIDs and latencies
+   */
+  String getSlowDisksReport();
 }

+ 2 - 11
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestSlowDiskTracker.java

@@ -393,18 +393,9 @@ public class TestSlowDiskTracker {
     timer.advance(reportValidityMs);
 
     tracker.updateSlowDiskReportAsync(timer.monotonicNow());
+    Thread.sleep(OUTLIERS_REPORT_INTERVAL*2);
 
-    GenericTestUtils.waitFor(new Supplier<Boolean>() {
-      @Override
-      public Boolean get() {
-        return tracker.getSlowDiskReportAsJsonString() != null;
-      }
-    }, 500, 5000);
-
-    ArrayList<DiskLatency> jsonReport = getAndDeserializeJson(
-        tracker.getSlowDiskReportAsJsonString());
-
-    assertTrue(jsonReport.isEmpty());
+    assertTrue(tracker.getSlowDiskReportAsJsonString() == null);
   }
 
   private boolean isDiskInReports(ArrayList<DiskLatency> reports,

+ 57 - 2
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeStatusMXBean.java

@@ -17,17 +17,23 @@
  */
 package org.apache.hadoop.hdfs.server.namenode;
 
+import com.google.common.base.Supplier;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager;
+import org.apache.hadoop.hdfs.server.datanode.DataNode;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
-import org.apache.hadoop.hdfs.server.datanode.TestDataNodeMXBean;
+import org.apache.hadoop.test.GenericTestUtils;
 import org.junit.Assert;
 import org.junit.Test;
 
 import javax.management.MBeanServer;
 import javax.management.ObjectName;
 import java.lang.management.ManagementFactory;
+import java.util.List;
+import java.util.concurrent.TimeUnit;
 
 /**
  * Class for testing {@link NameNodeStatusMXBean} implementation.
@@ -38,7 +44,7 @@ public class TestNameNodeStatusMXBean {
       TestNameNodeStatusMXBean.class);
 
   @Test(timeout = 120000L)
-  public void testDataNodeMXBean() throws Exception {
+  public void testNameNodeStatusMXBean() throws Exception {
     Configuration conf = new Configuration();
     MiniDFSCluster cluster = null;
 
@@ -84,6 +90,55 @@ public class TestNameNodeStatusMXBean {
       String slowPeersReport = (String)mbs.getAttribute(mxbeanName,
           "SlowPeersReport");
       Assert.assertEquals(nn.getSlowPeersReport(), slowPeersReport);
+
+      // Get attribute "SlowDisksReport"
+      String slowDisksReport = (String)mbs.getAttribute(mxbeanName,
+          "SlowDisksReport");
+      Assert.assertEquals(nn.getSlowDisksReport(), slowDisksReport);
+    } finally {
+      if (cluster != null) {
+        cluster.shutdown();
+      }
+    }
+  }
+
+  @Test
+  public void testNameNodeMXBeanSlowDisksEnabled() throws Exception {
+    Configuration conf = new Configuration();
+    conf.setDouble(
+        DFSConfigKeys.DFS_DATANODE_FILEIO_PROFILING_SAMPLING_FRACTION_KEY, 1.0);
+    conf.setTimeDuration(
+        DFSConfigKeys.DFS_DATANODE_OUTLIERS_REPORT_INTERVAL_KEY,
+        1000, TimeUnit.MILLISECONDS);
+    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build();
+
+    try {
+      List<DataNode> datanodes = cluster.getDataNodes();
+      Assert.assertEquals(datanodes.size(), 1);
+      DataNode datanode = datanodes.get(0);
+      String slowDiskPath = "test/data1/slowVolume";
+      datanode.getDiskMetrics().addSlowDiskForTesting(slowDiskPath, null);
+
+      NameNode nn = cluster.getNameNode();
+      DatanodeManager datanodeManager = nn.getNamesystem().getBlockManager()
+          .getDatanodeManager();
+
+      MBeanServer mbs = ManagementFactory.getPlatformMBeanServer();
+      ObjectName mxbeanName = new ObjectName(
+          "Hadoop:service=NameNode,name=NameNodeStatus");
+
+      GenericTestUtils.waitFor(new Supplier<Boolean>() {
+        @Override
+        public Boolean get() {
+          return (datanodeManager.getSlowDisksReport() != null);
+        }
+      }, 1000, 100000);
+
+      String slowDisksReport = (String)mbs.getAttribute(
+          mxbeanName, "SlowDisksReport");
+      Assert.assertEquals(datanodeManager.getSlowDisksReport(),
+          slowDisksReport);
+      Assert.assertTrue(slowDisksReport.contains(slowDiskPath));
     } finally {
       if (cluster != null) {
         cluster.shutdown();