Browse Source

ZOOKEEPER-3858: Add metrics to track server unavailable time

Author: Jie Huang <jiehuang@fb.com>

Reviewers: Michael Han <hanm@apache.org>

Closes #1378 from jhuan31/ZOOKEEPER-3858
Jie Huang 4 năm trước cách đây
mục cha
commit
e4c175fba8

+ 6 - 0
zookeeper-server/src/main/java/org/apache/zookeeper/server/ServerMetrics.java

@@ -134,6 +134,9 @@ public final class ServerMetrics {
         SESSIONLESS_CONNECTIONS_EXPIRED = metricsContext.getCounter("sessionless_connections_expired");
         STALE_SESSIONS_EXPIRED = metricsContext.getCounter("stale_sessions_expired");
 
+        UNAVAILABLE_TIME = metricsContext.getSummary("unavailable_time", DetailLevel.BASIC);
+        LEADER_UNAVAILABLE_TIME = metricsContext.getSummary("leader_unavailable_time", DetailLevel.BASIC);
+
         /*
          * Number of requests that are in the session queue.
          */
@@ -310,6 +313,9 @@ public final class ServerMetrics {
     public final Counter SESSIONLESS_CONNECTIONS_EXPIRED;
     public final Counter STALE_SESSIONS_EXPIRED;
 
+    public final Summary UNAVAILABLE_TIME;
+    public final Summary LEADER_UNAVAILABLE_TIME;
+
     // Connection throttling related
     public final Summary CONNECTION_TOKEN_DEFICIT;
     public final Counter CONNECTION_REJECTED;

+ 14 - 0
zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumPeer.java

@@ -54,6 +54,7 @@ import org.apache.zookeeper.KeeperException.BadArgumentsException;
 import org.apache.zookeeper.common.AtomicFileWritingIdiom;
 import org.apache.zookeeper.common.AtomicFileWritingIdiom.WriterStatement;
 import org.apache.zookeeper.common.QuorumX509Util;
+import org.apache.zookeeper.common.Time;
 import org.apache.zookeeper.common.X509Exception;
 import org.apache.zookeeper.jmx.MBeanRegistry;
 import org.apache.zookeeper.jmx.ZKMBeanInfo;
@@ -530,6 +531,7 @@ public class QuorumPeer extends ZooKeeperThread implements QuorumStats.Provider
      */
     public long start_fle, end_fle; // fle = fast leader election
     public static final String FLE_TIME_UNIT = "MS";
+    private long unavailableStartTime;
 
     /*
      * Default value of peer is participant
@@ -859,6 +861,14 @@ public class QuorumPeer extends ZooKeeperThread implements QuorumStats.Provider
     }
 
     public void setZabState(ZabState zabState) {
+        if ((zabState == ZabState.BROADCAST) && (unavailableStartTime != 0)) {
+            long unavailableTime = Time.currentElapsedTime() - unavailableStartTime;
+            ServerMetrics.getMetrics().UNAVAILABLE_TIME.add(unavailableTime);
+            if (getPeerState() == ServerState.LEADING) {
+                ServerMetrics.getMetrics().LEADER_UNAVAILABLE_TIME.add(unavailableTime);
+            }
+            unavailableStartTime = 0;
+        }
         this.zabState.set(zabState);
         LOG.info("Peer state changed: {}", getDetailedPeerState());
     }
@@ -1369,6 +1379,10 @@ public class QuorumPeer extends ZooKeeperThread implements QuorumStats.Provider
              * Main loop
              */
             while (running) {
+                if (unavailableStartTime == 0) {
+                    unavailableStartTime = Time.currentElapsedTime();
+                }
+
                 switch (getPeerState()) {
                 case LOOKING:
                     LOG.info("LOOKING");