Parcourir la source

HDFS-16678. RBF should supports disable getNodeUsage() in RBFMetrics (#4606)

xuzq il y a 2 ans
Parent
commit
e0c8c6eed4

+ 25 - 16
hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/RBFMetrics.java

@@ -50,6 +50,7 @@ import javax.management.NotCompliantMBeanException;
 import javax.management.ObjectName;
 import javax.management.StandardMBean;
 
+import org.apache.commons.math3.stat.descriptive.moment.StandardDeviation;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType;
@@ -113,6 +114,8 @@ public class RBFMetrics implements RouterMBean, FederationMBean {
   /** Prevent holding the page from load too long. */
   private final long timeOut;
 
+  /** Enable/Disable getNodeUsage. **/
+  private boolean enableGetDNUsage;
 
   /** Router interface. */
   private final Router router;
@@ -175,6 +178,8 @@ public class RBFMetrics implements RouterMBean, FederationMBean {
     Configuration conf = router.getConfig();
     this.timeOut = conf.getTimeDuration(RBFConfigKeys.DN_REPORT_TIME_OUT,
         RBFConfigKeys.DN_REPORT_TIME_OUT_MS_DEFAULT, TimeUnit.MILLISECONDS);
+    this.enableGetDNUsage = conf.getBoolean(RBFConfigKeys.DFS_ROUTER_ENABLE_GET_DN_USAGE_KEY,
+        RBFConfigKeys.DFS_ROUTER_ENABLE_GET_DN_USAGE_DEFAULT);
     this.topTokenRealOwners = conf.getInt(
         RBFConfigKeys.DFS_ROUTER_METRICS_TOP_NUM_TOKEN_OWNERS_KEY,
         RBFConfigKeys.DFS_ROUTER_METRICS_TOP_NUM_TOKEN_OWNERS_KEY_DEFAULT);
@@ -184,6 +189,11 @@ public class RBFMetrics implements RouterMBean, FederationMBean {
     ms.register(RBFMetrics.class.getName(), "RBFActivity Metrics", this);
   }
 
+  @VisibleForTesting
+  public void setEnableGetDNUsage(boolean enableGetDNUsage) {
+    this.enableGetDNUsage = enableGetDNUsage;
+  }
+
   /**
    * Unregister the JMX beans.
    */
@@ -537,35 +547,34 @@ public class RBFMetrics implements RouterMBean, FederationMBean {
 
   @Override // NameNodeMXBean
   public String getNodeUsage() {
-    float median = 0;
-    float max = 0;
-    float min = 0;
-    float dev = 0;
+    double median = 0;
+    double max = 0;
+    double min = 0;
+    double dev = 0;
 
     final Map<String, Map<String, Object>> info = new HashMap<>();
     try {
-      RouterRpcServer rpcServer = this.router.getRpcServer();
-      DatanodeInfo[] live = rpcServer.getDatanodeReport(
-          DatanodeReportType.LIVE, false, timeOut);
+      DatanodeInfo[] live = null;
+      if (this.enableGetDNUsage) {
+        RouterRpcServer rpcServer = this.router.getRpcServer();
+        live = rpcServer.getDatanodeReport(DatanodeReportType.LIVE, false, timeOut);
+      } else {
+        LOG.debug("Getting node usage is disabled.");
+      }
 
-      if (live.length > 0) {
-        float totalDfsUsed = 0;
-        float[] usages = new float[live.length];
+      if (live != null && live.length > 0) {
+        double[] usages = new double[live.length];
         int i = 0;
         for (DatanodeInfo dn : live) {
           usages[i++] = dn.getDfsUsedPercent();
-          totalDfsUsed += dn.getDfsUsedPercent();
         }
-        totalDfsUsed /= live.length;
         Arrays.sort(usages);
         median = usages[usages.length / 2];
         max = usages[usages.length - 1];
         min = usages[0];
 
-        for (i = 0; i < usages.length; i++) {
-          dev += (usages[i] - totalDfsUsed) * (usages[i] - totalDfsUsed);
-        }
-        dev = (float) Math.sqrt(dev / usages.length);
+        StandardDeviation deviation = new StandardDeviation();
+        dev = deviation.evaluate(usages);
       }
     } catch (IOException e) {
       LOG.error("Cannot get the live nodes: {}", e.getMessage());

+ 3 - 0
hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RBFConfigKeys.java

@@ -321,6 +321,9 @@ public class RBFConfigKeys extends CommonConfigurationKeysPublic {
       FEDERATION_ROUTER_PREFIX + "dn-report.cache-expire";
   public static final long DN_REPORT_CACHE_EXPIRE_MS_DEFAULT =
       TimeUnit.SECONDS.toMillis(10);
+  public static final String DFS_ROUTER_ENABLE_GET_DN_USAGE_KEY =
+      FEDERATION_ROUTER_PREFIX + "enable.get.dn.usage";
+  public static final boolean DFS_ROUTER_ENABLE_GET_DN_USAGE_DEFAULT = true;
 
   // HDFS Router-based federation quota
   public static final String DFS_ROUTER_QUOTA_ENABLE =

+ 10 - 0
hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/resources/hdfs-rbf-default.xml

@@ -195,6 +195,16 @@
     </description>
   </property>
 
+  <property>
+    <name>dfs.federation.router.enable.get.dn.usage</name>
+    <value>true</value>
+    <description>
+      If true, the getNodeUsage method in RBFMetrics will return an up-to-date
+      result collecting from downstream nameservices. But it will take a long
+      time and take up thread resources. If false, it will return a mock result with all 0.
+    </description>
+  </property>
+
   <property>
     <name>dfs.federation.router.metrics.class</name>
     <value>org.apache.hadoop.hdfs.server.federation.metrics.FederationRPCPerformanceMonitor</value>

+ 31 - 0
hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterRpc.java

@@ -131,6 +131,7 @@ import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.service.Service.STATE;
 import org.apache.hadoop.test.GenericTestUtils;
 import org.apache.hadoop.test.LambdaTestUtils;
+import org.codehaus.jettison.json.JSONException;
 import org.codehaus.jettison.json.JSONObject;
 import org.junit.AfterClass;
 import org.junit.Before;
@@ -2181,4 +2182,34 @@ public class TestRouterRpc {
       routerDFS.delete(dirPath, true);
     }
   }
+
+  @Test
+  public void testDisableNodeUsageInRBFMetrics() throws JSONException {
+    RBFMetrics rbfMetrics = router.getRouter().getMetrics();
+    FederationRPCMetrics federationRPCMetrics = router.getRouter().getRpcServer().getRPCMetrics();
+
+    long proxyOpBefore = federationRPCMetrics.getProxyOps();
+    String nodeUsageEnable = router.getRouter().getMetrics().getNodeUsage();
+    assertNotNull(nodeUsageEnable);
+    long proxyOpAfterWithEnable = federationRPCMetrics.getProxyOps();
+    assertEquals(proxyOpBefore + 2, proxyOpAfterWithEnable);
+
+    rbfMetrics.setEnableGetDNUsage(false);
+    String nodeUsageDisable = rbfMetrics.getNodeUsage();
+    assertNotNull(nodeUsageDisable);
+    long proxyOpAfterWithDisable = federationRPCMetrics.getProxyOps();
+    assertEquals(proxyOpAfterWithEnable, proxyOpAfterWithDisable);
+    JSONObject jsonObject = new JSONObject(nodeUsageDisable);
+    JSONObject json = jsonObject.getJSONObject("nodeUsage");
+    assertEquals("0.00%", json.get("min"));
+    assertEquals("0.00%", json.get("median"));
+    assertEquals("0.00%", json.get("max"));
+    assertEquals("0.00%", json.get("stdDev"));
+
+    rbfMetrics.setEnableGetDNUsage(true);
+    String nodeUsageWithReEnable = rbfMetrics.getNodeUsage();
+    assertNotNull(nodeUsageWithReEnable);
+    long proxyOpAfterWithReEnable = federationRPCMetrics.getProxyOps();
+    assertEquals(proxyOpAfterWithDisable + 2, proxyOpAfterWithReEnable);
+  }
 }