Bläddra i källkod

HADOOP-19447. Add Caching Mechanism to HostResolver to Avoid Redundant Hostname Resolutions (#7527) Contributed by Jiandan Yang.

* HADOOP-19447. Add Caching Mechanism to HostResolver to Avoid Redundant Hostname Resolutions

Reviewed-by: Tao Yang <taoyang@apache.org>
Signed-off-by: Shilun Fan <slfan1989@apache.org>
Yang Jiandan 3 veckor sedan
förälder
incheckning
d19b35a35d

+ 5 - 0
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java

@@ -243,6 +243,11 @@ public class CommonConfigurationKeys extends CommonConfigurationKeysPublic {
       "hadoop.security.token.service.use_ip";
   public static final boolean HADOOP_SECURITY_TOKEN_SERVICE_USE_IP_DEFAULT =
       true;
+  public static final String
+      HADOOP_SECURITY_HOSTNAME_CACHE_EXPIRE_INTERVAL_SECONDS =
+      "hadoop.security.hostname.cache.expire-interval.seconds";
+  public static final int
+      HADOOP_SECURITY_HOSTNAME_CACHE_EXPIRE_INTERVAL_SECONDS_DEFAULT = 0;
 
   /**
    * @see

+ 69 - 9
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/SecurityUtil.java

@@ -51,6 +51,9 @@ import org.apache.hadoop.net.NetUtils;
 import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod;
 import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.security.token.TokenInfo;
+import org.apache.hadoop.thirdparty.com.google.common.cache.CacheBuilder;
+import org.apache.hadoop.thirdparty.com.google.common.cache.CacheLoader;
+import org.apache.hadoop.thirdparty.com.google.common.cache.LoadingCache;
 import org.apache.hadoop.util.StopWatch;
 import org.apache.hadoop.util.StringUtils;
 import org.apache.hadoop.util.ZKUtil;
@@ -90,6 +93,7 @@ public final class SecurityUtil {
 
   private static boolean logSlowLookups;
   private static int slowLookupThresholdMs;
+  private static long cachingInterval = 0;
 
   static {
     setConfigurationInternal(new Configuration());
@@ -106,6 +110,10 @@ public final class SecurityUtil {
     boolean useIp = conf.getBoolean(
         CommonConfigurationKeys.HADOOP_SECURITY_TOKEN_SERVICE_USE_IP,
         CommonConfigurationKeys.HADOOP_SECURITY_TOKEN_SERVICE_USE_IP_DEFAULT);
+    cachingInterval = conf.getTimeDuration(
+        CommonConfigurationKeys.HADOOP_SECURITY_HOSTNAME_CACHE_EXPIRE_INTERVAL_SECONDS,
+        CommonConfigurationKeys.HADOOP_SECURITY_HOSTNAME_CACHE_EXPIRE_INTERVAL_SECONDS_DEFAULT,
+        TimeUnit.SECONDS);
     setTokenServiceUseIp(useIp);
 
     logSlowLookups = conf.getBoolean(
@@ -139,8 +147,8 @@ public final class SecurityUtil {
     }
     useIpForTokenService = flag;
     hostResolver = !useIpForTokenService
-        ? new QualifiedHostResolver()
-        : new StandardHostResolver();
+        ? new QualifiedHostResolver(cachingInterval)
+        : new StandardHostResolver(cachingInterval);
   }
   
   /**
@@ -586,17 +594,62 @@ public final class SecurityUtil {
       return hostResolver.getByName(hostname);
     }
   }
-  
+
   interface HostResolver {
-    InetAddress getByName(String host) throws UnknownHostException;    
+    InetAddress getByName(String host) throws UnknownHostException;
+  }
+
+  static abstract class CacheableHostResolver implements HostResolver {
+    private volatile LoadingCache<String, InetAddress> cache;
+
+    CacheableHostResolver(long expiryIntervalSecs) {
+      if (expiryIntervalSecs > 0) {
+        cache = CacheBuilder.newBuilder()
+            .expireAfterWrite(expiryIntervalSecs, TimeUnit.SECONDS)
+            .build(new CacheLoader<String, InetAddress>() {
+              @Override
+              public InetAddress load(String key) throws Exception {
+                return resolve(key);
+              }
+            });
+      }
+    }
+    protected abstract InetAddress resolve(String host) throws UnknownHostException;
+
+    @Override
+    public InetAddress getByName(String host) throws UnknownHostException {
+      if (cache != null) {
+        try {
+          return cache.get(host);
+        } catch (Exception e) {
+          Throwable cause = e.getCause();
+          if (cause instanceof UnknownHostException) {
+            throw (UnknownHostException) cause;
+          }
+          String message = (cause != null ? cause.getMessage() : "Unknown error");
+          throw new UnknownHostException("Error resolving host " + host + ": " + message);
+        }
+      } else {
+        return resolve(host);
+      }
+    }
+
+    @VisibleForTesting
+    public LoadingCache<String, InetAddress> getCache() {
+      return cache;
+    }
   }
-  
   /**
    * Uses standard java host resolution
    */
-  static class StandardHostResolver implements HostResolver {
+  static class StandardHostResolver extends CacheableHostResolver {
+
+    StandardHostResolver(long expiryIntervalSecs) {
+      super(expiryIntervalSecs);
+    }
+
     @Override
-    public InetAddress getByName(String host) throws UnknownHostException {
+    public InetAddress resolve(String host) throws UnknownHostException {
       return InetAddress.getByName(host);
     }
   }
@@ -623,7 +676,7 @@ public final class SecurityUtil {
    * NOTE: this resolver is only used if:
    *       hadoop.security.token.service.use_ip=false 
    */
-  protected static class QualifiedHostResolver implements HostResolver {
+  protected static class QualifiedHostResolver extends CacheableHostResolver {
     private List<String> searchDomains = new ArrayList<>();
     {
       ResolverConfig resolverConfig = ResolverConfig.getCurrentConfig();
@@ -632,6 +685,13 @@ public final class SecurityUtil {
       }
     }
 
+    QualifiedHostResolver() {
+      this(0);
+    }
+
+    QualifiedHostResolver(long expiryIntervalSecs) {
+      super(expiryIntervalSecs);
+    }
     /**
      * Create an InetAddress with a fully qualified hostname of the given
      * hostname.  InetAddress does not qualify an incomplete hostname that
@@ -645,7 +705,7 @@ public final class SecurityUtil {
      * @throws UnknownHostException if host does not exist
      */
     @Override
-    public InetAddress getByName(String host) throws UnknownHostException {
+    public InetAddress resolve(String host) throws UnknownHostException {
       InetAddress addr = null;
 
       if (InetAddresses.isInetAddress(host)) {

+ 10 - 0
hadoop-common-project/hadoop-common/src/main/resources/core-default.xml

@@ -4480,4 +4480,14 @@ The switch to turn S3A auditing on or off.
       </description>
   </property>
 
+  <property>
+    <name>hadoop.security.hostname.cache.expire-interval.seconds</name>
+    <value>0</value>
+    <description>The expiration time in seconds for cached hostname resolutions.
+      This cache is used to avoid repeated resolution for hostname.
+      A shorter interval may provide more up-to-date resolutions,
+      while a longer interval reduces lookup overhead.
+      If the value is less than or equal to 0, the cache is disabled entirely.
+    </description>
+  </property>
 </configuration>

+ 122 - 0
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestSecurityUtil.java

@@ -24,14 +24,20 @@ import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertFalse;
 import static org.junit.jupiter.api.Assertions.assertNotNull;
 import static org.junit.jupiter.api.Assertions.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertSame;
+import static org.junit.jupiter.api.Assertions.assertNotSame;
+import static org.junit.jupiter.api.Assertions.assertNull;
+import static org.junit.jupiter.api.Assertions.assertThrows;
 
 import java.io.File;
 import java.io.IOException;
 import java.net.InetAddress;
 import java.net.InetSocketAddress;
 import java.net.URI;
+import java.net.UnknownHostException;
 import java.nio.charset.StandardCharsets;
 import java.util.List;
+import java.util.concurrent.TimeUnit;
 
 import javax.security.auth.kerberos.KerberosPrincipal;
 
@@ -500,4 +506,120 @@ public class TestSecurityUtil {
         ZK_AUTH_VALUE.toCharArray());
     provider.flush();
   }
+
+  @Test
+  public void testInitiateHostResolver() throws Exception {
+    // 1. useIP is false and cache interval is 0
+    Configuration conf = new Configuration();
+    conf.setBoolean(CommonConfigurationKeys.HADOOP_SECURITY_TOKEN_SERVICE_USE_IP, false);
+    conf.setTimeDuration(
+        CommonConfigurationKeys.HADOOP_SECURITY_HOSTNAME_CACHE_EXPIRE_INTERVAL_SECONDS,
+        0, TimeUnit.SECONDS);
+    SecurityUtil.setConfiguration(conf);
+    SecurityUtil.HostResolver hostResolver = SecurityUtil.hostResolver;
+    assertTrue(hostResolver instanceof SecurityUtil.QualifiedHostResolver,
+        "Resolver should be a QualifiedHostResolver");
+    SecurityUtil.CacheableHostResolver cacheableHostResolver =
+        (SecurityUtil.QualifiedHostResolver) hostResolver;
+    assertNull(cacheableHostResolver.getCache(),
+        "Cache should be null when caching interval is less than or equal 0");
+
+
+    // 2. useIP is false and cache interval is 10
+    conf.setBoolean(CommonConfigurationKeys.HADOOP_SECURITY_TOKEN_SERVICE_USE_IP, false);
+    conf.setTimeDuration(
+        CommonConfigurationKeys.HADOOP_SECURITY_HOSTNAME_CACHE_EXPIRE_INTERVAL_SECONDS,
+        10, TimeUnit.SECONDS);
+    SecurityUtil.setConfiguration(conf);
+    hostResolver = SecurityUtil.hostResolver;
+    assertTrue(hostResolver instanceof SecurityUtil.QualifiedHostResolver,
+        "Resolver should be a QualifiedHostResolver");
+    cacheableHostResolver = (SecurityUtil.QualifiedHostResolver) hostResolver;
+    assertNotNull(cacheableHostResolver.getCache(),
+        "Cache should be set when caching interval is enabled");
+
+    // 3. useIP is true and cache interval is 0
+    conf.setBoolean(CommonConfigurationKeys.HADOOP_SECURITY_TOKEN_SERVICE_USE_IP, true);
+    conf.setTimeDuration(
+        CommonConfigurationKeys.HADOOP_SECURITY_HOSTNAME_CACHE_EXPIRE_INTERVAL_SECONDS,
+        0, TimeUnit.SECONDS);
+    SecurityUtil.setConfiguration(conf);
+    hostResolver = SecurityUtil.hostResolver;
+    assertTrue(hostResolver instanceof SecurityUtil.StandardHostResolver,
+        "Resolver should be a StandardHostResolver");
+    cacheableHostResolver = (SecurityUtil.StandardHostResolver) hostResolver;
+    assertNull(cacheableHostResolver.getCache(),
+        "Cache should be null when caching interval is less than or equal 0");
+
+    // 4. useIP is true and cache interval is 10
+    conf.setBoolean(CommonConfigurationKeys.HADOOP_SECURITY_TOKEN_SERVICE_USE_IP, true);
+    conf.setTimeDuration(
+        CommonConfigurationKeys.HADOOP_SECURITY_HOSTNAME_CACHE_EXPIRE_INTERVAL_SECONDS,
+        10, TimeUnit.SECONDS);
+    SecurityUtil.setConfiguration(conf);
+    hostResolver = SecurityUtil.hostResolver;
+    assertTrue(hostResolver instanceof SecurityUtil.StandardHostResolver,
+        "Resolver should be a StandardHostResolver");
+    cacheableHostResolver = (SecurityUtil.StandardHostResolver) hostResolver;
+    assertNotNull(cacheableHostResolver.getCache(),
+        "Cache should be set when caching interval is enabled");
+  }
+
+  /**
+   * Test caching behavior in QualifiedHostResolver when caching is enabled.
+   */
+  @Test
+  public void testQualifiedHostResolverCachingEnabled() throws Exception {
+    // Create a QualifiedHostResolver with expiry interval > 0
+    SecurityUtil.QualifiedHostResolver
+        resolver = new SecurityUtil.QualifiedHostResolver(1);
+    testCacheableResolve(resolver);
+  }
+
+  /**
+   * Test caching behavior in StandardHostResolver when caching is enabled.
+   */
+  @Test
+  public void testStandardHostResolverCachingEnabled() throws Exception {
+    // Create a StandardHostResolver with expiry interval > 0
+    SecurityUtil.StandardHostResolver
+        resolver = new SecurityUtil.StandardHostResolver(1);
+    testCacheableResolve(resolver);
+  }
+
+  private void  testCacheableResolve(SecurityUtil.CacheableHostResolver resolver)
+      throws Exception {
+    // Call getByName twice with the same host
+    InetAddress addr1 = resolver.getByName("127.0.0.1");
+    InetAddress addr2 = resolver.getByName("127.0.0.1");
+    assertNotNull(addr1);
+    assertNotNull(addr2);
+    // Both addresses should be the same instance (cached value)
+    assertSame(addr1, addr2);
+
+    // wait for timeout of cache item
+    Thread.sleep(1500);
+    InetAddress addr3 = resolver.getByName("127.0.0.1");
+    assertNotNull(addr3);
+    assertNotSame(addr1, addr3);
+  }
+
+  /**
+   * Test resolving non-existent hostname, show throw UnknownHostException.
+   */
+  @Test
+  public void testInvalidHostThrowsException() {
+    SecurityUtil.StandardHostResolver
+        standardHostResolver = new SecurityUtil.StandardHostResolver(10);
+    String invalidHost = "invalid_host_name_which_does_not_exist";
+    assertThrows(UnknownHostException.class, () -> {
+      standardHostResolver.getByName(invalidHost);
+    }, "Resolving an invalid host should throw UnknownHostException");
+
+    SecurityUtil.QualifiedHostResolver
+        qualifiedHostResolver = new SecurityUtil.QualifiedHostResolver(10);
+    assertThrows(UnknownHostException.class, () -> {
+      qualifiedHostResolver.getByName(invalidHost);
+    }, "Resolving an invalid host should throw UnknownHostException");
+  }
 }