浏览代码

HDFS-17290: Adds disconnected client rpc backoff metrics (#6359)

Lei Yang 1 年之前
父节点
当前提交
661c784662

+ 7 - 0
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java

@@ -3133,6 +3133,13 @@ public abstract class Server {
       // For example, IPC clients using FailoverOnNetworkExceptionRetry handle
       // RetriableException.
       rpcMetrics.incrClientBackoff();
+      // Clients that are directly put into lowest priority queue are backed off and disconnected.
+      if (cqe.getCause() instanceof RpcServerException) {
+        RpcServerException ex = (RpcServerException) cqe.getCause();
+        if (ex.getRpcStatusProto() == RpcStatusProto.FATAL) {
+          rpcMetrics.incrClientBackoffDisconnected();
+        }
+      }
       // unwrap retriable exception.
       throw cqe.getCause();
     }

+ 18 - 0
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/metrics/RpcMetrics.java

@@ -141,6 +141,8 @@ public class RpcMetrics {
   MutableCounterLong rpcAuthorizationSuccesses;
   @Metric("Number of client backoff requests")
   MutableCounterLong rpcClientBackoff;
+  @Metric("Number of disconnected client backoff requests")
+  MutableCounterLong rpcClientBackoffDisconnected;
   @Metric("Number of slow RPC calls")
   MutableCounterLong rpcSlowCalls;
   @Metric("Number of requeue calls")
@@ -342,6 +344,22 @@ public class RpcMetrics {
     rpcClientBackoff.incr();
   }
 
+  /**
+   * Client was disconnected due to backoff
+   */
+  public void incrClientBackoffDisconnected() {
+    rpcClientBackoffDisconnected.incr();
+  }
+
+  /**
+   * Returns the number of disconnected backoffs.
+   * @return long
+   */
+  public long getClientBackoffDisconnected() {
+    return rpcClientBackoffDisconnected.value();
+  }
+
+
   /**
    * Increments the Slow RPC counter.
    */

+ 1 - 0
hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md

@@ -87,6 +87,7 @@ The default timeunit used for RPC metrics is milliseconds (as per the below desc
 | `RpcAuthorizationFailures` | Total number of authorization failures |
 | `RpcAuthorizationSuccesses` | Total number of authorization successes |
 | `RpcClientBackoff` | Total number of client backoff requests |
+| `RpcClientBackoffDisconnected` | Total number of client backoff requests that are disconnected. This is a subset of RpcClientBackoff |
 | `RpcSlowCalls` | Total number of slow RPC calls |
 | `RpcRequeueCalls` | Total number of requeue RPC calls |
 | `RpcCallsSuccesses` | Total number of RPC calls that are successfully processed |

+ 1 - 0
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestRPC.java

@@ -1528,6 +1528,7 @@ public class TestRPC extends TestRpcBase {
         IOException unwrapExeption = re.unwrapRemoteException();
         if (unwrapExeption instanceof RetriableException) {
           succeeded = true;
+          assertEquals(1L, server.getRpcMetrics().getClientBackoffDisconnected());
         } else {
           lastException = unwrapExeption;
         }