|
@@ -516,16 +516,22 @@ public abstract class Server {
|
|
|
private final long metricsUpdaterInterval;
|
|
|
private final ScheduledExecutorService scheduledExecutorService;
|
|
|
|
|
|
- private boolean logSlowRPC = false;
|
|
|
+ private volatile boolean logSlowRPC = false;
|
|
|
+ /** Threshold time for log slow rpc. */
|
|
|
+ private volatile long logSlowRPCThresholdTime;
|
|
|
|
|
|
/**
|
|
|
* Checks if LogSlowRPC is set true.
|
|
|
* @return true, if LogSlowRPC is set true, false, otherwise.
|
|
|
*/
|
|
|
- protected boolean isLogSlowRPC() {
|
|
|
+ public boolean isLogSlowRPC() {
|
|
|
return logSlowRPC;
|
|
|
}
|
|
|
|
|
|
+ public long getLogSlowRPCThresholdTime() {
|
|
|
+ return logSlowRPCThresholdTime;
|
|
|
+ }
|
|
|
+
|
|
|
public int getNumInProcessHandler() {
|
|
|
return numInProcessHandler.get();
|
|
|
}
|
|
@@ -543,10 +549,16 @@ public abstract class Server {
|
|
|
* @param logSlowRPCFlag input logSlowRPCFlag.
|
|
|
*/
|
|
|
@VisibleForTesting
|
|
|
- protected void setLogSlowRPC(boolean logSlowRPCFlag) {
|
|
|
+ public void setLogSlowRPC(boolean logSlowRPCFlag) {
|
|
|
this.logSlowRPC = logSlowRPCFlag;
|
|
|
}
|
|
|
|
|
|
+ @VisibleForTesting
|
|
|
+ public void setLogSlowRPCThresholdTime(long logSlowRPCThresholdMs) {
|
|
|
+ this.logSlowRPCThresholdTime = rpcMetrics.getMetricsTimeUnit().
|
|
|
+ convert(logSlowRPCThresholdMs, TimeUnit.MILLISECONDS);
|
|
|
+ }
|
|
|
+
|
|
|
private void setPurgeIntervalNanos(int purgeInterval) {
|
|
|
int tmpPurgeInterval = CommonConfigurationKeysPublic.
|
|
|
IPC_SERVER_PURGE_INTERVAL_MINUTES_DEFAULT;
|
|
@@ -568,12 +580,15 @@ public abstract class Server {
|
|
|
* @param methodName - RPC Request method name
|
|
|
* @param details - Processing Detail.
|
|
|
*
|
|
|
- * if this request took too much time relative to other requests
|
|
|
- * we consider that as a slow RPC. 3 is a magic number that comes
|
|
|
- * from 3 sigma deviation. A very simple explanation can be found
|
|
|
- * by searching for 68-95-99.7 rule. We flag an RPC as slow RPC
|
|
|
- * if and only if it falls above 99.7% of requests. We start this logic
|
|
|
- * only once we have enough sample size.
|
|
|
+ * If a request took significant more time than other requests,
|
|
|
+ * and its processing time is at least `logSlowRPCThresholdMs` we consider that as a slow RPC.
|
|
|
+ *
|
|
|
+ * The definition rules for calculating whether the current request took too much time
|
|
|
+ * compared to other requests are as follows:
|
|
|
+ * 3 is a magic number that comes from 3 sigma deviation.
|
|
|
+ * A very simple explanation can be found by searching for 68-95-99.7 rule.
|
|
|
+ * We flag an RPC as slow RPC if and only if it falls above 99.7% of requests.
|
|
|
+ * We start this logic only once we have enough sample size.
|
|
|
*/
|
|
|
void logSlowRpcCalls(String methodName, Call call,
|
|
|
ProcessingDetails details) {
|
|
@@ -587,15 +602,14 @@ public abstract class Server {
|
|
|
final double threeSigma = rpcMetrics.getProcessingMean() +
|
|
|
(rpcMetrics.getProcessingStdDev() * deviation);
|
|
|
|
|
|
- long processingTime =
|
|
|
- details.get(Timing.PROCESSING, rpcMetrics.getMetricsTimeUnit());
|
|
|
+ final TimeUnit metricsTimeUnit = rpcMetrics.getMetricsTimeUnit();
|
|
|
+ long processingTime = details.get(Timing.PROCESSING, metricsTimeUnit);
|
|
|
if ((rpcMetrics.getProcessingSampleCount() > minSampleSize) &&
|
|
|
- (processingTime > threeSigma)) {
|
|
|
- LOG.warn(
|
|
|
- "Slow RPC : {} took {} {} to process from client {},"
|
|
|
- + " the processing detail is {}",
|
|
|
- methodName, processingTime, rpcMetrics.getMetricsTimeUnit(), call,
|
|
|
- details.toString());
|
|
|
+ (processingTime > threeSigma) &&
|
|
|
+ (processingTime > getLogSlowRPCThresholdTime())) {
|
|
|
+ LOG.warn("Slow RPC : {} took {} {} to process from client {}, the processing detail is {}," +
|
|
|
+ " and the threshold time is {} {}.", methodName, processingTime, metricsTimeUnit,
|
|
|
+ call, details.toString(), getLogSlowRPCThresholdTime(), metricsTimeUnit);
|
|
|
rpcMetrics.incrSlowRpc();
|
|
|
}
|
|
|
}
|
|
@@ -3359,6 +3373,10 @@ public abstract class Server {
|
|
|
CommonConfigurationKeysPublic.IPC_SERVER_LOG_SLOW_RPC,
|
|
|
CommonConfigurationKeysPublic.IPC_SERVER_LOG_SLOW_RPC_DEFAULT));
|
|
|
|
|
|
+ this.setLogSlowRPCThresholdTime(conf.getLong(
|
|
|
+ CommonConfigurationKeysPublic.IPC_SERVER_LOG_SLOW_RPC_THRESHOLD_MS_KEY,
|
|
|
+ CommonConfigurationKeysPublic.IPC_SERVER_LOG_SLOW_RPC_THRESHOLD_MS_DEFAULT));
|
|
|
+
|
|
|
this.setPurgeIntervalNanos(conf.getInt(
|
|
|
CommonConfigurationKeysPublic.IPC_SERVER_PURGE_INTERVAL_MINUTES_KEY,
|
|
|
CommonConfigurationKeysPublic.IPC_SERVER_PURGE_INTERVAL_MINUTES_DEFAULT));
|