瀏覽代碼

YARN-11210. Fix YARN RMAdminCLI retry logic for non-retryable kerbero… (#4563)

Co-authored-by: Kevin Wikant <wikak@amazon.com>
KevinWikant 2 年之前
父節點
當前提交
213ea03758

+ 12 - 9
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryPolicies.java

@@ -181,15 +181,20 @@ public class RetryPolicies {
   }
 
   /**
-   * A retry policy for exceptions other than RemoteException.
+   * <p>
+   * A retry policy where RemoteException and SaslException are not retried, other individual
+   * exception types can have RetryPolicy overrides, and any other exception type without an
+   * override is not retried.
+   * </p>
+   *
    * @param defaultPolicy defaultPolicy.
    * @param exceptionToPolicyMap exceptionToPolicyMap.
    * @return RetryPolicy.
    */
-  public static final RetryPolicy retryOtherThanRemoteException(
+  public static final RetryPolicy retryOtherThanRemoteAndSaslException(
       RetryPolicy defaultPolicy,
       Map<Class<? extends Exception>, RetryPolicy> exceptionToPolicyMap) {
-    return new OtherThanRemoteExceptionDependentRetry(defaultPolicy,
+    return new OtherThanRemoteAndSaslExceptionDependentRetry(defaultPolicy,
         exceptionToPolicyMap);
   }
 
@@ -589,12 +594,12 @@ public class RetryPolicies {
     }
   }
 
-  static class OtherThanRemoteExceptionDependentRetry implements RetryPolicy {
+  static class OtherThanRemoteAndSaslExceptionDependentRetry implements RetryPolicy {
 
     private RetryPolicy defaultPolicy;
     private Map<Class<? extends Exception>, RetryPolicy> exceptionToPolicyMap;
 
-    public OtherThanRemoteExceptionDependentRetry(RetryPolicy defaultPolicy,
+    OtherThanRemoteAndSaslExceptionDependentRetry(RetryPolicy defaultPolicy,
         Map<Class<? extends Exception>,
         RetryPolicy> exceptionToPolicyMap) {
       this.defaultPolicy = defaultPolicy;
@@ -605,10 +610,8 @@ public class RetryPolicies {
     public RetryAction shouldRetry(Exception e, int retries, int failovers,
         boolean isIdempotentOrAtMostOnce) throws Exception {
       RetryPolicy policy = null;
-      // ignore Remote Exception
-      if (e instanceof RemoteException) {
-        // do nothing
-      } else {
+      // ignore RemoteException and SaslException
+      if (!(e instanceof RemoteException || isSaslFailure(e))) {
         policy = exceptionToPolicyMap.get(e.getClass());
       }
       if (policy == null) {

+ 3 - 1
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java

@@ -61,6 +61,7 @@ import org.slf4j.LoggerFactory;
 
 import javax.net.SocketFactory;
 import javax.security.sasl.Sasl;
+import javax.security.sasl.SaslException;
 import java.io.*;
 import java.net.*;
 import java.nio.ByteBuffer;
@@ -1620,7 +1621,8 @@ public class Client implements AutoCloseable {
       }
 
       if (call.error != null) {
-        if (call.error instanceof RemoteException) {
+        if (call.error instanceof RemoteException ||
+            call.error instanceof SaslException) {
           call.error.fillInStackTrace();
           throw call.error;
         } else { // local exception

+ 8 - 1
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/SaslRpcClient.java

@@ -237,7 +237,14 @@ public class SaslRpcClient {
           LOG.debug("client isn't using kerberos");
           return null;
         }
-        String serverPrincipal = getServerPrincipal(authType);
+        final String serverPrincipal;
+        try {
+          serverPrincipal = getServerPrincipal(authType);
+        } catch (IllegalArgumentException ex) {
+          // YARN-11210: getServerPrincipal can throw IllegalArgumentException if Kerberos
+          // configuration is bad, this is surfaced as a non-retryable SaslException
+          throw new SaslException("Bad Kerberos server principal configuration", ex);
+        }
         if (serverPrincipal == null) {
           LOG.debug("protocol doesn't use kerberos");
           return null;

+ 1 - 1
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/retry/TestRetryProxy.java

@@ -291,7 +291,7 @@ public class TestRetryProxy {
 
     UnreliableInterface unreliable = (UnreliableInterface)
         RetryProxy.create(UnreliableInterface.class, unreliableImpl,
-            retryOtherThanRemoteException(TRY_ONCE_THEN_FAIL,
+            retryOtherThanRemoteAndSaslException(TRY_ONCE_THEN_FAIL,
                 exceptionToPolicyMap));
     // should retry with local IOException.
     unreliable.failsOnceWithIOException();

+ 6 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/HAUtil.java

@@ -64,8 +64,12 @@ public class HAUtil {
    *         configuration; else false.
    */
   public static boolean isFederationFailoverEnabled(Configuration conf) {
-    return conf.getBoolean(YarnConfiguration.FEDERATION_FAILOVER_ENABLED,
-        YarnConfiguration.DEFAULT_FEDERATION_FAILOVER_ENABLED);
+    // Federation failover is not enabled unless federation is enabled. This previously caused
+    // YARN RMProxy to use the HA Retry policy in a non-HA & non-federation environments because
+    // the default federation failover enabled value is true.
+    return isFederationEnabled(conf) &&
+        conf.getBoolean(YarnConfiguration.FEDERATION_FAILOVER_ENABLED,
+            YarnConfiguration.DEFAULT_FEDERATION_FAILOVER_ENABLED);
   }
 
   /**

+ 1 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/RMProxy.java

@@ -300,7 +300,7 @@ public class RMProxy<T> {
     // YARN-4288: local IOException is also possible.
     exceptionToPolicyMap.put(IOException.class, retryPolicy);
     // Not retry on remote IO exception.
-    return RetryPolicies.retryOtherThanRemoteException(
+    return RetryPolicies.retryOtherThanRemoteAndSaslException(
         RetryPolicies.TRY_ONCE_THEN_FAIL, exceptionToPolicyMap);
   }
 }