瀏覽代碼

HDFS-16707. RBF: Expose RouterRpcFairnessPolicyController related request record metrics for each nameservice to Prometheus (#4665). Contributed by Jiale Qi.

Reviewed-by: Inigo Goiri <inigoiri@apache.org>
Signed-off-by: Ayush Saxena <ayushsaxena@apache.org>
SevenAddSix 2 年之前
父節點
當前提交
1079890ae3

+ 19 - 0
hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/FederationRPCPerformanceMonitor.java

@@ -186,6 +186,25 @@ public class FederationRPCPerformanceMonitor implements RouterRpcMonitor {
     }
   }
 
+  @Override
+  public void proxyOpPermitRejected(String nsId) {
+    if (metrics != null) {
+      metrics.incrProxyOpPermitRejected();
+    }
+    if (nameserviceRPCMetricsMap != null &&
+        nameserviceRPCMetricsMap.containsKey(nsId)) {
+      nameserviceRPCMetricsMap.get(nsId).incrProxyOpPermitRejected();
+    }
+  }
+
+  @Override
+  public void proxyOpPermitAccepted(String nsId) {
+    if (nameserviceRPCMetricsMap != null &&
+        nameserviceRPCMetricsMap.containsKey(nsId)) {
+      nameserviceRPCMetricsMap.get(nsId).incrProxyOpPermitAccepted();
+    }
+  }
+
   @Override
   public void proxyOpFailureClientOverloaded() {
     if (metrics != null) {

+ 3 - 0
hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/NameserviceRPCMBean.java

@@ -37,4 +37,7 @@ public interface NameserviceRPCMBean {
 
   long getProxyOpNoNamenodes();
 
+  long getProxyOpPermitRejected();
+
+  long getProxyOpPermitAccepted();
 }

+ 30 - 6
hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/NameserviceRPCMetrics.java

@@ -22,6 +22,7 @@ import org.apache.hadoop.metrics2.MetricsSystem;
 import org.apache.hadoop.metrics2.annotation.Metric;
 import org.apache.hadoop.metrics2.annotation.Metrics;
 import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
+import org.apache.hadoop.metrics2.lib.MetricsRegistry;
 import org.apache.hadoop.metrics2.lib.MutableCounterLong;
 import org.apache.hadoop.metrics2.lib.MutableRate;
 
@@ -37,6 +38,7 @@ public class NameserviceRPCMetrics implements NameserviceRPCMBean {
   public final static String NAMESERVICE_RPC_METRICS_PREFIX = "NameserviceActivity-";
 
   private final String nsId;
+  private final MetricsRegistry registry = new MetricsRegistry("NameserviceRPCActivity");
 
   @Metric("Time for the Router to proxy an operation to the Nameservice")
   private MutableRate proxy;
@@ -49,19 +51,24 @@ public class NameserviceRPCMetrics implements NameserviceRPCMBean {
   private MutableCounterLong proxyOpFailureCommunicate;
   @Metric("Number of operations to hit no namenodes available")
   private MutableCounterLong proxyOpNoNamenodes;
+  @Metric("Number of operations to hit permit limits")
+  private MutableCounterLong proxyOpPermitRejected;
+  @Metric("Number of operations accepted to hit a namenode")
+  private MutableCounterLong proxyOpPermitAccepted;
 
   public NameserviceRPCMetrics(Configuration conf, String nsId) {
-    this.nsId = nsId;
+    this.nsId = NAMESERVICE_RPC_METRICS_PREFIX + nsId;
+    registry.tag("ns", "Nameservice", nsId);
   }
 
   public static NameserviceRPCMetrics create(Configuration conf,
       String nameService) {
     MetricsSystem ms = DefaultMetricsSystem.instance();
-    String name = NAMESERVICE_RPC_METRICS_PREFIX + (nameService.isEmpty()
-        ? "UndefinedNameService"+ ThreadLocalRandom.current().nextInt()
-        : nameService);
-    return ms.register(name, "HDFS Federation NameService RPC Metrics",
-        new NameserviceRPCMetrics(conf, name));
+    String nsId = (nameService.isEmpty() ?
+        "UndefinedNameService" + ThreadLocalRandom.current().nextInt() :
+        nameService);
+    return ms.register(NAMESERVICE_RPC_METRICS_PREFIX + nsId,
+        "HDFS Federation NameService RPC Metrics", new NameserviceRPCMetrics(conf, nsId));
   }
 
   public void incrProxyOpFailureStandby() {
@@ -91,6 +98,23 @@ public class NameserviceRPCMetrics implements NameserviceRPCMBean {
     return proxyOpNoNamenodes.value();
   }
 
+  public void incrProxyOpPermitRejected() {
+    proxyOpPermitRejected.incr();
+  }
+
+  @Override
+  public long getProxyOpPermitRejected() {
+    return proxyOpPermitRejected.value();
+  }
+
+  public void incrProxyOpPermitAccepted() {
+    proxyOpPermitAccepted.incr();
+  }
+
+  @Override
+  public long getProxyOpPermitAccepted() {
+    return proxyOpPermitAccepted.value();
+  }
 
   /**
    * Add the time to proxy an operation from the moment the Router sends it to

+ 4 - 1
hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterRpcClient.java

@@ -1629,7 +1629,7 @@ public class RouterRpcClient {
         // Throw StandByException,
         // Clients could fail over and try another router.
         if (rpcMonitor != null) {
-          rpcMonitor.getRPCMetrics().incrProxyOpPermitRejected();
+          rpcMonitor.proxyOpPermitRejected(nsId);
         }
         incrRejectedPermitForNs(nsId);
         LOG.debug("Permit denied for ugi: {} for method: {}",
@@ -1639,6 +1639,9 @@ public class RouterRpcClient {
                 " is overloaded for NS: " + nsId;
         throw new StandbyException(msg);
       }
+      if (rpcMonitor != null) {
+        rpcMonitor.proxyOpPermitAccepted(nsId);
+      }
       incrAcceptedPermitForNs(nsId);
     }
   }

+ 10 - 0
hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterRpcMonitor.java

@@ -77,6 +77,16 @@ public interface RouterRpcMonitor {
    */
   void proxyOpFailureCommunicate(String nsId);
 
+  /**
+   * Rejected to proxy an operation to a Namenode.
+   */
+  void proxyOpPermitRejected(String nsId);
+
+  /**
+   * Accepted to proxy an operation to a Namenode.
+   */
+  void proxyOpPermitAccepted(String nsId);
+
   /**
    * Failed to proxy an operation to a Namenode because the client was
    * overloaded.