浏览代码

HDFS-10534. NameNode WebUI should display DataNode usage rate with a certain percentile. Contributed by Kai Sasaki.

Zhe Zhang 9 年之前
父节点
当前提交
0424056a77

+ 5 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java

@@ -451,6 +451,11 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
       "dfs.namenode.metrics.logger.period.seconds";
   public static final int     DFS_NAMENODE_METRICS_LOGGER_PERIOD_SECONDS_DEFAULT =
       600;
+  public static final String DFS_NAMENODE_METRICS_NODE_USAGE_PERCENTILE =
+      "dfs.namenode.metrics.node-usage.percentile";
+  public static final double DFS_NAMENODE_METRICS_NODE_USAGE_PERCENTILE_DEFAULT
+      = 0.95;
+
   public static final String DFS_DATANODE_METRICS_LOGGER_PERIOD_SECONDS_KEY =
       "dfs.datanode.metrics.logger.period.seconds";
   public static final int DFS_DATANODE_METRICS_LOGGER_PERIOD_SECONDS_DEFAULT =

+ 20 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java

@@ -66,6 +66,8 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ENABLE_RETRY_CAC
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_INODE_ATTRIBUTES_PROVIDER_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LAZY_PERSIST_FILE_SCRUB_INTERVAL_SEC;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LAZY_PERSIST_FILE_SCRUB_INTERVAL_SEC_DEFAULT;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_METRICS_NODE_USAGE_PERCENTILE;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_METRICS_NODE_USAGE_PERCENTILE_DEFAULT;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_MAX_OBJECTS_DEFAULT;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_MAX_OBJECTS_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY;
@@ -528,6 +530,8 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
 
   private INodeAttributeProvider inodeAttributeProvider;
 
+  private final double percentileFactor;
+
   /**
    * If the NN is in safemode, and not due to manual / low resources, we
    * assume it must be because of startup. If the NN had low resources during
@@ -824,7 +828,15 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
       alwaysUseDelegationTokensForTests = conf.getBoolean(
           DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY,
           DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_DEFAULT);
-      
+
+      this.percentileFactor = conf.getDouble(
+          DFS_NAMENODE_METRICS_NODE_USAGE_PERCENTILE,
+          DFS_NAMENODE_METRICS_NODE_USAGE_PERCENTILE_DEFAULT);
+
+      Preconditions.checkArgument(0.0 < this.percentileFactor
+          && this.percentileFactor <= 1.0, "Node usage percentile " +
+          "factor must be between 0 and 1.");
+
       this.dtSecretManager = createDelegationTokenSecretManager(conf);
       this.dir = new FSDirectory(this, conf);
       this.snapshotManager = new SnapshotManager(dir);
@@ -5614,6 +5626,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
     float max = 0;
     float min = 0;
     float dev = 0;
+    float percentile = 0;
 
     final Map<String, Map<String,Object>> info =
         new HashMap<String, Map<String,Object>>();
@@ -5639,6 +5652,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
       median = usages[usages.length / 2];
       max = usages[usages.length - 1];
       min = usages[0];
+      percentile = usages[(int)((usages.length - 1) * percentileFactor)];
 
       for (i = 0; i < usages.length; i++) {
         dev += (usages[i] - totalDfsUsed) * (usages[i] - totalDfsUsed);
@@ -5651,6 +5665,11 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
     innerInfo.put("median", StringUtils.format("%.2f%%", median));
     innerInfo.put("max", StringUtils.format("%.2f%%", max));
     innerInfo.put("stdDev", StringUtils.format("%.2f%%", dev));
+    final Map<String, Object> percentileInfo = new HashMap<String, Object>();
+    percentileInfo.put("name", StringUtils.format("%dth percentile",
+        (int)(percentileFactor * 100)));
+    percentileInfo.put("value", StringUtils.format("%.2f%%", percentile));
+    innerInfo.put("percentile", percentileInfo);
     info.put("nodeUsage", innerInfo);
 
     return JSON.toString(info);

+ 11 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml

@@ -1896,6 +1896,17 @@
   </description>
 </property>
 
+<property>
+  <name>dfs.namenode.metrics.node-usage.percentile</name>
+  <value>0.95</value>
+  <description>
+    This setting specifies the percentile level to report node usage metrics.
+    For example 0.95 means reporting the node usage for the 95th percentile
+    of all DataNodes. If this setting is at 0.95 and the reported node usage
+    is 70%, it means 95% of DataNodes have a usage below 70%.
+  </description>
+</property>
+
 <property>
   <name>dfs.datanode.metrics.logger.period.seconds</name>
   <value>600</value>

+ 2 - 2
hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.html

@@ -166,8 +166,8 @@
   <tr><th> Non DFS Used:</th><td>{NonDfsUsedSpace|fmt_bytes}</td></tr>
   <tr><th> DFS Remaining:</th><td>{Free|fmt_bytes} ({PercentRemaining|fmt_percentage})</td></tr>
   <tr><th> Block Pool Used:</th><td>{BlockPoolUsedSpace|fmt_bytes} ({PercentBlockPoolUsed|fmt_percentage})</td></tr>
-  <tr><th> DataNodes usages% (Min/Median/Max/stdDev): </th>
-	<td>{#NodeUsage.nodeUsage}{min} / {median} / {max} / {stdDev}{/NodeUsage.nodeUsage}</td></tr>
+  <tr><th> {#NodeUsage.nodeUsage}DataNodes usages% (Min/Median/Max/stdDev/{percentile.name}):{/NodeUsage.nodeUsage} </th>
+    <td>{#NodeUsage.nodeUsage}{min} / {median} / {max} / {stdDev} / {percentile.value}{/NodeUsage.nodeUsage}</td></tr>
 {/nn}
 
 {#fs}

+ 10 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeMXBean.java

@@ -173,6 +173,16 @@ public class TestNameNodeMXBean {
       String nodeUsage = (String) (mbs.getAttribute(mxbeanName,
           "NodeUsage"));
       assertEquals("Bad value for NodeUsage", fsn.getNodeUsage(), nodeUsage);
+      Map<String, Map<String, Object>> usage
+          = (Map<String, Map<String, Object>>)JSON.parse(nodeUsage);
+      assertTrue(usage.get("nodeUsage").containsKey("min"));
+      assertTrue(usage.get("nodeUsage").containsKey("median"));
+      assertTrue(usage.get("nodeUsage").containsKey("max"));
+      assertTrue(usage.get("nodeUsage").containsKey("percentile"));
+      Map<String, Object> percentileInfo
+          = (Map<String, Object>)usage.get("nodeUsage").get("percentile");
+      assertTrue(percentileInfo.containsKey("name"));
+      assertTrue(percentileInfo.containsKey("value"));
       // get attribute NameJournalStatus
       String nameJournalStatus = (String) (mbs.getAttribute(mxbeanName,
           "NameJournalStatus"));