Browse Source

HDFS-5220 Expose group resolution time as metric (jxiang via cmccabe)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1555986 13f79535-47bb-0310-9956-ffa450edef68
Colin McCabe 11 years ago
parent
commit
5745523e98

+ 3 - 1
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java

@@ -21,7 +21,6 @@ package org.apache.hadoop.fs;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.http.lib.StaticUserWebFilter;
-import org.apache.hadoop.security.authorize.Service;
 
 /** 
  * This class contains constants for configuration keys used
@@ -240,4 +239,7 @@ public class CommonConfigurationKeys extends CommonConfigurationKeysPublic {
   /** Default value for IPC_SERVER_CONNECTION_IDLE_SCAN_INTERVAL_KEY */
   public static final int IPC_CLIENT_CONNECTION_IDLESCANINTERVAL_DEFAULT =
       10000;
+
+  public static final String HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS =
+    "hadoop.user.group.metrics.percentiles.intervals";
 }

+ 1 - 0
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/Groups.java

@@ -138,6 +138,7 @@ public class Groups {
     List<String> groupList = impl.getGroups(user);
     long endMs = Time.monotonicNow();
     long deltaMs = endMs - startMs ;
+    UserGroupInformation.metrics.addGetGroups(deltaMs);
     if (deltaMs > warningDeltaMs) {
       LOG.warn("Potential performance problem: getGroups(user=" + user +") " +
           "took " + deltaMs + " milliseconds.");

+ 30 - 0
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/UserGroupInformation.java

@@ -17,6 +17,7 @@
  */
 package org.apache.hadoop.security;
 
+import static org.apache.hadoop.fs.CommonConfigurationKeys.HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS;
 
 import java.io.File;
 import java.io.IOException;
@@ -56,6 +57,8 @@ import org.apache.hadoop.io.Text;
 import org.apache.hadoop.metrics2.annotation.Metric;
 import org.apache.hadoop.metrics2.annotation.Metrics;
 import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
+import org.apache.hadoop.metrics2.lib.MetricsRegistry;
+import org.apache.hadoop.metrics2.lib.MutableQuantiles;
 import org.apache.hadoop.metrics2.lib.MutableRate;
 import org.apache.hadoop.security.SaslRpcServer.AuthMethod;
 import org.apache.hadoop.security.authentication.util.KerberosUtil;
@@ -90,14 +93,27 @@ public class UserGroupInformation {
    */
   @Metrics(about="User and group related metrics", context="ugi")
   static class UgiMetrics {
+    final MetricsRegistry registry = new MetricsRegistry("UgiMetrics");
+
     @Metric("Rate of successful kerberos logins and latency (milliseconds)")
     MutableRate loginSuccess;
     @Metric("Rate of failed kerberos logins and latency (milliseconds)")
     MutableRate loginFailure;
+    @Metric("GetGroups") MutableRate getGroups;
+    MutableQuantiles[] getGroupsQuantiles;
 
     static UgiMetrics create() {
       return DefaultMetricsSystem.instance().register(new UgiMetrics());
     }
+
+    void addGetGroups(long latency) {
+      getGroups.add(latency);
+      if (getGroupsQuantiles != null) {
+        for (MutableQuantiles q : getGroupsQuantiles) {
+          q.add(latency);
+        }
+      }
+    }
   }
   
   /**
@@ -239,6 +255,20 @@ public class UserGroupInformation {
       groups = Groups.getUserToGroupsMappingService(conf);
     }
     UserGroupInformation.conf = conf;
+
+    if (metrics.getGroupsQuantiles == null) {
+      int[] intervals = conf.getInts(HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS);
+      if (intervals != null && intervals.length > 0) {
+        final int length = intervals.length;
+        MutableQuantiles[] getGroupsQuantiles = new MutableQuantiles[length];
+        for (int i = 0; i < length; i++) {
+          getGroupsQuantiles[i] = metrics.registry.newQuantiles(
+            "getGroups" + intervals[i] + "s",
+            "Get groups", "ops", "latency", intervals[i]);
+        }
+        metrics.getGroupsQuantiles = getGroupsQuantiles;
+      }
+    }
   }
 
   /**

+ 25 - 4
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestUserGroupInformation.java

@@ -19,7 +19,6 @@ package org.apache.hadoop.security;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
 import org.apache.hadoop.io.Text;
-import org.apache.hadoop.ipc.TestSaslRPC;
 import org.apache.hadoop.metrics2.MetricsRecordBuilder;
 import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod;
 import org.apache.hadoop.security.authentication.util.KerberosName;
@@ -39,9 +38,9 @@ import java.util.Collection;
 import java.util.LinkedHashSet;
 import java.util.Set;
 
+import static org.apache.hadoop.fs.CommonConfigurationKeys.HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS;
 import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTH_TO_LOCAL;
 import static org.apache.hadoop.ipc.TestSaslRPC.*;
-import static org.apache.hadoop.security.token.delegation.TestDelegationToken.TestDelegationTokenIdentifier;
 import static org.apache.hadoop.test.MetricsAsserts.*;
 import static org.junit.Assert.*;
 import static org.mockito.Mockito.mock;
@@ -53,7 +52,9 @@ public class TestUserGroupInformation {
   final private static String GROUP2_NAME = "group2";
   final private static String GROUP3_NAME = "group3";
   final private static String[] GROUP_NAMES = 
-    new String[]{GROUP1_NAME, GROUP2_NAME, GROUP3_NAME}; 
+    new String[]{GROUP1_NAME, GROUP2_NAME, GROUP3_NAME};
+  // Rollover interval of percentile metrics (in seconds)
+  private static final int PERCENTILES_INTERVAL = 1;
   private static Configuration conf;
  
   /**
@@ -79,7 +80,8 @@ public class TestUserGroupInformation {
     // doesn't matter what it is, but getGroups needs it set...
     // use HADOOP_HOME environment variable to prevent interfering with logic
     // that finds winutils.exe
-    System.setProperty("hadoop.home.dir", System.getenv("HADOOP_HOME"));
+    String home = System.getenv("HADOOP_HOME");
+    System.setProperty("hadoop.home.dir", (home != null ? home : "."));
     // fake the realm is kerberos is enabled
     System.setProperty("java.security.krb5.kdc", "");
     System.setProperty("java.security.krb5.realm", "DEFAULT.REALM");
@@ -149,11 +151,15 @@ public class TestUserGroupInformation {
   /** Test login method */
   @Test (timeout = 30000)
   public void testLogin() throws Exception {
+    conf.set(HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS,
+      String.valueOf(PERCENTILES_INTERVAL));
+    UserGroupInformation.setConfiguration(conf);
     // login from unix
     UserGroupInformation ugi = UserGroupInformation.getCurrentUser();
     assertEquals(UserGroupInformation.getCurrentUser(),
                  UserGroupInformation.getLoginUser());
     assertTrue(ugi.getGroupNames().length >= 1);
+    verifyGroupMetrics(1);
 
     // ensure that doAs works correctly
     UserGroupInformation userGroupInfo = 
@@ -727,6 +733,21 @@ public class TestUserGroupInformation {
     }
   }
 
+  private static void verifyGroupMetrics(
+      long groups) throws InterruptedException {
+    MetricsRecordBuilder rb = getMetrics("UgiMetrics");
+    if (groups > 0) {
+      assertCounter("GetGroupsNumOps", groups, rb);
+      double avg = getDoubleGauge("GetGroupsAvgTime", rb);
+      assertTrue(avg >= 0.0);
+
+      // Sleep for an interval+slop to let the percentiles rollover
+      Thread.sleep((PERCENTILES_INTERVAL+1)*1000);
+      // Check that the percentiles were updated
+      assertQuantileGauges("GetGroups1s", rb);
+    }
+  }
+
   /**
    * Test for the case that UserGroupInformation.getCurrentUser()
    * is called when the AccessControlContext has a Subject associated

+ 2 - 0
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt

@@ -187,6 +187,8 @@ Release 2.4.0 - UNRELEASED
     HDFS-5695. Clean up TestOfflineEditsViewer and OfflineEditsViewerHelper.
     (Haohui Mai via jing9)
 
+    HDFS-5220. Expose group resolution time as metric (jxiang via cmccabe)
+
   OPTIMIZATIONS
 
     HDFS-5239.  Allow FSNamesystem lock fairness to be configurable (daryn)

+ 8 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java

@@ -482,6 +482,14 @@ public class NameNode implements NameNodeStatusMXBean {
    * @param conf the configuration
    */
   protected void initialize(Configuration conf) throws IOException {
+    if (conf.get(HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS) == null) {
+      String intervals = conf.get(DFS_METRICS_PERCENTILES_INTERVALS_KEY);
+      if (intervals != null) {
+        conf.set(HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS,
+          intervals);
+      }
+    }
+
     UserGroupInformation.setConfiguration(conf);
     loginAsNameNodeUser(conf);
 

+ 8 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java

@@ -47,6 +47,8 @@ import org.apache.hadoop.hdfs.server.datanode.DataNodeTestUtils;
 import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
 import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
 import org.apache.hadoop.metrics2.MetricsRecordBuilder;
+import org.apache.hadoop.metrics2.MetricsSource;
+import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
 import org.apache.hadoop.test.MetricsAsserts;
 import org.apache.hadoop.util.Time;
 import org.apache.log4j.Level;
@@ -108,6 +110,12 @@ public class TestNameNodeMetrics {
   
   @After
   public void tearDown() throws Exception {
+    MetricsSource source = DefaultMetricsSystem.instance().getSource("UgiMetrics");
+    if (source != null) {
+      // Run only once since the UGI metrics is cleaned up during teardown
+      MetricsRecordBuilder rb = getMetrics(source);
+      assertQuantileGauges("GetGroups1s", rb);
+    }
     cluster.shutdown();
   }