Kaynağa Gözat

HADOOP-16579. Upgrade to Curator 4.2.0 and ZooKeeper 3.5.5 (#1656). Contributed by Norbert Kalmár, Mate Szalay-Beko

* HADOOP-16579 - Upgrade to Apache Curator 4.2.0 and ZooKeeper 3.5.5

- Add a static initializer for the unit tests using ZooKeeper to enable
the four-letter-words diagnostic telnet commands. (this is an interface
that become disabled by default, so to keep the ZooKeeper 3.4.x behavior
we enabled it for the tests)
- Also fix ZKFailoverController to look for relevant fail-over ActiveAttempt
records. The new ZooKeeper seems to respond quicker during the fail-over
tests than the ZooKeeper, so we made sure to catch all the relevant records
by adding a new parameter to ZKFailoverontroller.waitForActiveAttempt().

Co-authored-by: Norbert Kalmár <nkalmar@cloudera.com>
Mate Szalay-Beko 5 yıl önce
ebeveyn
işleme
6d92aa7c30

+ 2 - 3
hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/util/ZKSignerSecretProvider.java

@@ -15,7 +15,6 @@ package org.apache.hadoop.security.authentication.util;
 
 import com.google.common.annotations.VisibleForTesting;
 import java.nio.ByteBuffer;
-import java.nio.charset.Charset;
 import java.security.SecureRandom;
 import java.util.Collections;
 import java.util.HashMap;
@@ -36,7 +35,7 @@ import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.zookeeper.KeeperException;
 import org.apache.zookeeper.ZooDefs.Perms;
-import org.apache.zookeeper.client.ZooKeeperSaslClient;
+import org.apache.zookeeper.client.ZKClientConfig;
 import org.apache.zookeeper.data.ACL;
 import org.apache.zookeeper.data.Id;
 import org.apache.zookeeper.data.Stat;
@@ -368,7 +367,7 @@ public class ZKSignerSecretProvider extends RolloverSignerSecretProvider {
       LOG.info("Connecting to ZooKeeper with SASL/Kerberos"
               + "and using 'sasl' ACLs");
       String principal = setJaasConfiguration(config);
-      System.setProperty(ZooKeeperSaslClient.LOGIN_CONTEXT_NAME_KEY,
+      System.setProperty(ZKClientConfig.LOGIN_CONTEXT_NAME_KEY,
               JAAS_LOGIN_ENTRY_NAME);
       System.setProperty("zookeeper.authProvider.1",
               "org.apache.zookeeper.server.auth.SASLAuthenticationProvider");

+ 9 - 6
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ZKFailoverController.java

@@ -442,14 +442,16 @@ public abstract class ZKFailoverController {
    * </ul>
    * 
    * @param timeoutMillis number of millis to wait
+   * @param onlyAfterNanoTime accept attempt records only after a given
+   * timestamp. Use this parameter to ignore the old attempt records from a
+   * previous fail-over attempt.
    * @return the published record, or null if the timeout elapses or the
    * service becomes unhealthy 
    * @throws InterruptedException if the thread is interrupted.
    */
-  private ActiveAttemptRecord waitForActiveAttempt(int timeoutMillis)
-      throws InterruptedException {
-    long st = System.nanoTime();
-    long waitUntil = st + TimeUnit.NANOSECONDS.convert(
+  private ActiveAttemptRecord waitForActiveAttempt(int timeoutMillis,
+      long onlyAfterNanoTime) throws InterruptedException {
+    long waitUntil = onlyAfterNanoTime + TimeUnit.NANOSECONDS.convert(
         timeoutMillis, TimeUnit.MILLISECONDS);
     
     do {
@@ -466,7 +468,7 @@ public abstract class ZKFailoverController {
 
       synchronized (activeAttemptRecordLock) {
         if ((lastActiveAttemptRecord != null &&
-            lastActiveAttemptRecord.nanoTime >= st)) {
+            lastActiveAttemptRecord.nanoTime >= onlyAfterNanoTime)) {
           return lastActiveAttemptRecord;
         }
         // Only wait 1sec so that we periodically recheck the health state
@@ -660,6 +662,7 @@ public abstract class ZKFailoverController {
     List<ZKFCProtocol> otherZkfcs = new ArrayList<ZKFCProtocol>(otherNodes.size());
 
     // Phase 3: ask the other nodes to yield from the election.
+    long st = System.nanoTime();
     HAServiceTarget activeNode = null;
     for (HAServiceTarget remote : otherNodes) {
       // same location, same node - may not always be == equality
@@ -678,7 +681,7 @@ public abstract class ZKFailoverController {
 
     // Phase 4: wait for the normal election to make the local node
     // active.
-    ActiveAttemptRecord attempt = waitForActiveAttempt(timeout + 60000);
+    ActiveAttemptRecord attempt = waitForActiveAttempt(timeout + 60000, st);
     
     if (attempt == null) {
       // We didn't even make an attempt to become active.

+ 3 - 3
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/ZKDelegationTokenSecretManager.java

@@ -59,7 +59,7 @@ import org.apache.zookeeper.CreateMode;
 import org.apache.zookeeper.KeeperException;
 import org.apache.zookeeper.KeeperException.NoNodeException;
 import org.apache.zookeeper.ZooDefs.Perms;
-import org.apache.zookeeper.client.ZooKeeperSaslClient;
+import org.apache.zookeeper.client.ZKClientConfig;
 import org.apache.zookeeper.data.ACL;
 import org.apache.zookeeper.data.Id;
 import org.slf4j.Logger;
@@ -173,8 +173,8 @@ public abstract class ZKDelegationTokenSecretManager<TokenIdent extends Abstract
           LOG.info("Connecting to ZooKeeper with SASL/Kerberos"
               + "and using 'sasl' ACLs");
           String principal = setJaasConfiguration(conf);
-          System.setProperty(ZooKeeperSaslClient.LOGIN_CONTEXT_NAME_KEY,
-              JAAS_LOGIN_ENTRY_NAME);
+          System.setProperty(ZKClientConfig.LOGIN_CONTEXT_NAME_KEY,
+                             JAAS_LOGIN_ENTRY_NAME);
           System.setProperty("zookeeper.authProvider.1",
               "org.apache.zookeeper.server.auth.SASLAuthenticationProvider");
           aclProvider = new SASLOwnerACLProvider(principal);

+ 20 - 18
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/curator/ZKCuratorManager.java

@@ -20,13 +20,13 @@ package org.apache.hadoop.util.curator;
 import java.io.IOException;
 import java.nio.charset.Charset;
 import java.util.ArrayList;
+import java.util.LinkedList;
 import java.util.List;
 
 import org.apache.curator.framework.AuthInfo;
 import org.apache.curator.framework.CuratorFramework;
 import org.apache.curator.framework.CuratorFrameworkFactory;
-import org.apache.curator.framework.api.transaction.CuratorTransaction;
-import org.apache.curator.framework.api.transaction.CuratorTransactionFinal;
+import org.apache.curator.framework.api.transaction.CuratorOp;
 import org.apache.curator.retry.RetryNTimes;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.conf.Configuration;
@@ -387,43 +387,45 @@ public final class ZKCuratorManager {
   /**
    * Use curator transactions to ensure zk-operations are performed in an all
    * or nothing fashion. This is equivalent to using ZooKeeper#multi.
-   *
-   * TODO (YARN-3774): Curator 3.0 introduces CuratorOp similar to Op. We ll
-   * have to rewrite this inner class when we adopt that.
    */
   public class SafeTransaction {
-    private CuratorTransactionFinal transactionFinal;
     private String fencingNodePath;
+    private List<CuratorOp> curatorOperations = new LinkedList<>();
 
     SafeTransaction(List<ACL> fencingACL, String fencingNodePath)
         throws Exception {
       this.fencingNodePath = fencingNodePath;
-      CuratorTransaction transaction = curator.inTransaction();
-      transactionFinal = transaction.create()
-          .withMode(CreateMode.PERSISTENT).withACL(fencingACL)
-          .forPath(fencingNodePath, new byte[0]).and();
+      curatorOperations.add(curator.transactionOp().create()
+                              .withMode(CreateMode.PERSISTENT)
+                              .withACL(fencingACL)
+                              .forPath(fencingNodePath, new byte[0]));
     }
 
     public void commit() throws Exception {
-      transactionFinal = transactionFinal.delete()
-          .forPath(fencingNodePath).and();
-      transactionFinal.commit();
+      curatorOperations.add(curator.transactionOp().delete()
+                              .forPath(fencingNodePath));
+      curator.transaction().forOperations(curatorOperations);
+      curatorOperations.clear();
     }
 
     public void create(String path, byte[] data, List<ACL> acl, CreateMode mode)
         throws Exception {
-      transactionFinal = transactionFinal.create()
-          .withMode(mode).withACL(acl).forPath(path, data).and();
+      curatorOperations.add(curator.transactionOp().create()
+                              .withMode(mode)
+                              .withACL(acl)
+                              .forPath(path, data));
     }
 
     public void delete(String path) throws Exception {
-      transactionFinal = transactionFinal.delete().forPath(path).and();
+      curatorOperations.add(curator.transactionOp().delete()
+                              .forPath(path));
     }
 
     public void setData(String path, byte[] data, int version)
         throws Exception {
-      transactionFinal = transactionFinal.setData()
-          .withVersion(version).forPath(path, data).and();
+      curatorOperations.add(curator.transactionOp().setData()
+                              .withVersion(version)
+                              .forPath(path, data));
     }
   }
 }

+ 9 - 0
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/ClientBaseWithFixes.java

@@ -65,6 +65,15 @@ public abstract class ClientBaseWithFixes extends ZKTestCase {
     public static int CONNECTION_TIMEOUT = 30000;
     static final File BASETEST = GenericTestUtils.getTestDir();
 
+  static {
+    // The 4-letter-words commands are simple diagnostics telnet commands in
+    // ZooKeeper. Since ZooKeeper 3.5, these are disabled by default due to
+    // security concerns: https://issues.apache.org/jira/browse/ZOOKEEPER-2693
+    // We are enabling them for the tests here, as some tests in hadoop or in
+    // other projects might still use them
+    System.setProperty("zookeeper.4lw.commands.whitelist", "*");
+  }
+
     protected final String hostPort = initHostPort();
     protected int maxCnxns = 0;
     protected ServerCnxnFactory serverFactory = null;

+ 11 - 10
hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/client/impl/zk/RegistrySecurity.java

@@ -32,6 +32,7 @@ import org.apache.hadoop.service.ServiceStateException;
 import org.apache.hadoop.util.ZKUtil;
 import org.apache.zookeeper.Environment;
 import org.apache.zookeeper.ZooDefs;
+import org.apache.zookeeper.client.ZKClientConfig;
 import org.apache.zookeeper.client.ZooKeeperSaslClient;
 import org.apache.zookeeper.data.ACL;
 import org.apache.zookeeper.data.Id;
@@ -769,19 +770,19 @@ public class RegistrySecurity extends AbstractService {
             JaasConfiguration jconf =
                 new JaasConfiguration(jaasClientEntry, principal, keytab);
             javax.security.auth.login.Configuration.setConfiguration(jconf);
-            setSystemPropertyIfUnset(ZooKeeperSaslClient.ENABLE_CLIENT_SASL_KEY,
-                "true");
-            setSystemPropertyIfUnset(ZooKeeperSaslClient.LOGIN_CONTEXT_NAME_KEY,
-                jaasClientEntry);
+            setSystemPropertyIfUnset(ZKClientConfig.ENABLE_CLIENT_SASL_KEY,
+                                     "true");
+            setSystemPropertyIfUnset(ZKClientConfig.LOGIN_CONTEXT_NAME_KEY,
+                                     jaasClientEntry);
           } else {
             // in this case, jaas config is specified so we will not change it
             LOG.info("Using existing ZK sasl configuration: " +
-                "jaasClientEntry = " + System.getProperty(
-                    ZooKeeperSaslClient.LOGIN_CONTEXT_NAME_KEY, "Client") +
-                ", sasl client = " + System.getProperty(
-                    ZooKeeperSaslClient.ENABLE_CLIENT_SASL_KEY,
-                    ZooKeeperSaslClient.ENABLE_CLIENT_SASL_DEFAULT) +
-                ", jaas = " + existingJaasConf);
+              "jaasClientEntry = " + System.getProperty(
+              ZKClientConfig.LOGIN_CONTEXT_NAME_KEY, "Client") +
+              ", sasl client = " + System.getProperty(
+              ZKClientConfig.ENABLE_CLIENT_SASL_KEY,
+              ZKClientConfig.ENABLE_CLIENT_SASL_DEFAULT) +
+              ", jaas = " + existingJaasConf);
           }
           break;
 

+ 3 - 3
hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/client/impl/zk/ZookeeperConfigOptions.java

@@ -18,7 +18,7 @@
 
 package org.apache.hadoop.registry.client.impl.zk;
 
-import org.apache.zookeeper.client.ZooKeeperSaslClient;
+import org.apache.zookeeper.client.ZKClientConfig;
 import org.apache.zookeeper.server.ZooKeeperSaslServer;
 
 /**
@@ -62,10 +62,10 @@ public interface ZookeeperConfigOptions {
    *
    * <p>
    *   Default value is derived from
-   *   {@link ZooKeeperSaslClient#LOGIN_CONTEXT_NAME_KEY}
+   *   {@link ZKClientConfig#LOGIN_CONTEXT_NAME_KEY}
    */
   String PROP_ZK_SASL_CLIENT_CONTEXT =
-      ZooKeeperSaslClient.LOGIN_CONTEXT_NAME_KEY;
+    ZKClientConfig.LOGIN_CONTEXT_NAME_KEY;
 
   /**
    * The SASL client username: {@value}.

+ 20 - 5
hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/server/services/MicroZookeeperService.java

@@ -42,6 +42,7 @@ import java.io.IOException;
 import java.io.PrintWriter;
 import java.io.StringWriter;
 import java.net.InetSocketAddress;
+import java.net.ServerSocket;
 import java.net.UnknownHostException;
 
 /**
@@ -121,7 +122,7 @@ public class MicroZookeeperService
    * @throws UnknownHostException if the server cannot resolve the host
    */
   private InetSocketAddress getAddress(int port) throws UnknownHostException {
-    return new InetSocketAddress(host, port < 0 ? 0 : port);
+    return new InetSocketAddress(host, port <= 0 ? getRandomAvailablePort() : port);
   }
 
   /**
@@ -227,10 +228,8 @@ public class MicroZookeeperService
 
     setupSecurity();
 
-    ZooKeeperServer zkServer = new ZooKeeperServer();
     FileTxnSnapLog ftxn = new FileTxnSnapLog(dataDir, dataDir);
-    zkServer.setTxnLogFactory(ftxn);
-    zkServer.setTickTime(tickTime);
+    ZooKeeperServer zkServer = new ZooKeeperServer(ftxn, tickTime);
 
     LOG.info("Starting Local Zookeeper service");
     factory = ServerCnxnFactory.createFactory();
@@ -245,7 +244,7 @@ public class MicroZookeeperService
       PrintWriter pw = new PrintWriter(sw);
       zkServer.dumpConf(pw);
       pw.flush();
-      LOG.debug(sw.toString());
+      LOG.debug("ZooKeeper config:\n" + sw.toString());
     }
     binding = new BindingInformation();
     binding.ensembleProvider = new FixedEnsembleProvider(connectString);
@@ -279,4 +278,20 @@ public class MicroZookeeperService
         "Service is not started: binding information undefined");
     return binding;
   }
+
+  /**
+   * Returns with a random open port can be used to set as server port for ZooKeeper.
+   * @return a random open port or 0 (in case of error)
+   */
+  private int getRandomAvailablePort() {
+      port = 0;
+      try {
+        final ServerSocket s = new ServerSocket(0);
+        port = s.getLocalPort();
+        s.close();
+      } catch (IOException e) {
+        LOG.warn("ERROR during selecting random port for ZooKeeper server to bind." , e);
+      }
+      return port;
+  }
 }

+ 66 - 2
hadoop-project/pom.xml

@@ -87,8 +87,8 @@
     <protobuf.version>3.7.1</protobuf.version>
     <protoc.path>${env.HADOOP_PROTOC_PATH}</protoc.path>
 
-    <zookeeper.version>3.4.13</zookeeper.version>
-    <curator.version>2.13.0</curator.version>
+    <zookeeper.version>3.5.6</zookeeper.version>
+    <curator.version>4.2.0</curator.version>
     <findbugs.version>3.0.0</findbugs.version>
     <spotbugs.version>3.1.0-RC1</spotbugs.version>
     <dnsjava.version>2.1.7</dnsjava.version>
@@ -1204,6 +1204,46 @@
             <groupId>jline</groupId>
             <artifactId>jline</artifactId>
           </exclusion>
+          <exclusion>
+            <groupId>commons-cli</groupId>
+            <artifactId>commons-cli</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>io.netty</groupId>
+            <artifactId>netty-all</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>io.netty</groupId>
+            <artifactId>netty-handler</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>io.netty</groupId>
+            <artifactId>netty-transport-native-epoll</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>commons-collections</groupId>
+            <artifactId>commons-collections</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.apache.kerby</groupId>
+            <artifactId>kerb-core</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.apache.kerby</groupId>
+            <artifactId>kerb-simplekdc</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.apache.kerby</groupId>
+            <artifactId>kerby-config</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.slf4j</groupId>
+            <artifactId>slf4j-api</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.slf4j</groupId>
+            <artifactId>slf4j-log4j12</artifactId>
+          </exclusion>
         </exclusions>
       </dependency>
       <dependency>
@@ -1340,6 +1380,20 @@
        <groupId>org.apache.curator</groupId>
        <artifactId>curator-client</artifactId>
        <version>${curator.version}</version>
+       <exclusions>
+         <exclusion>
+           <groupId>org.apache.zookeeper</groupId>
+           <artifactId>zookeeper</artifactId>
+         </exclusion>
+         <exclusion>
+           <groupId>com.google.guava</groupId>
+           <artifactId>guava</artifactId>
+         </exclusion>
+         <exclusion>
+           <groupId>org.slf4j</groupId>
+           <artifactId>slf4j-api</artifactId>
+         </exclusion>
+       </exclusions>
      </dependency>
      <dependency>
        <groupId>org.apache.curator</groupId>
@@ -1350,6 +1404,16 @@
        <groupId>org.apache.curator</groupId>
        <artifactId>curator-test</artifactId>
        <version>${curator.version}</version>
+       <exclusions>
+         <exclusion>
+           <groupId>org.apache.zookeeper</groupId>
+           <artifactId>zookeeper</artifactId>
+         </exclusion>
+         <exclusion>
+           <groupId>com.google.guava</groupId>
+           <artifactId>guava</artifactId>
+         </exclusion>
+       </exclusions>
      </dependency>
      <dependency>
        <groupId>org.bouncycastle</groupId>

+ 15 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/lib/TestZKClient.java

@@ -26,6 +26,7 @@ import java.io.OutputStream;
 import java.net.InetSocketAddress;
 import java.net.Socket;
 
+import org.apache.hadoop.net.ServerSocketUtil;
 import org.junit.Assert;
 
 import org.apache.hadoop.yarn.lib.ZKClient;
@@ -39,11 +40,14 @@ import org.junit.Test;
 
 public class TestZKClient  {
 
-  public static int CONNECTION_TIMEOUT = 30000;
+  private static int CONNECTION_TIMEOUT = 30000;
+  private static int DEFAULT_PORT = 20384;
   static final File BASETEST =
     new File(System.getProperty("build.test.dir", "target/zookeeper-build"));
 
-  protected String hostPort = "127.0.0.1:2000";
+  protected String hostPort = "127.0.0.1:" + getOpenPort();
+
+
   protected int maxCnxns = 0;
   protected NIOServerCnxnFactory factory = null;
   protected ZooKeeperServer zks;
@@ -140,6 +144,7 @@ public class TestZKClient  {
   @Before
   public void setUp() throws IOException, InterruptedException {
     System.setProperty("zookeeper.preAllocSize", "100");
+    System.setProperty("zookeeper.4lw.commands.whitelist", "*");
     FileTxnLog.setPreallocSize(100 * 1024);
     if (!BASETEST.exists()) {
       BASETEST.mkdirs();
@@ -186,4 +191,12 @@ public class TestZKClient  {
     client.unregisterService("/nodemanager");
   }
 
+  private int getOpenPort() {
+    try {
+      return ServerSocketUtil.getPorts(1)[0];
+    } catch (IOException e) {
+      return DEFAULT_PORT;
+    }
+  }
+
 }

+ 1 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestLeaderElectorService.java

@@ -169,8 +169,7 @@ public class TestLeaderElectorService {
         service.getCuratorClient().getZookeeperClient();
     // this will expire current curator client session. curator will re-establish
     // the session. RM will first relinquish leadership and re-acquire leadership
-    KillSession
-        .kill(client.getZooKeeper(), client.getCurrentConnectionString());
+    KillSession.kill(client.getZooKeeper());
 
     waitFor(rm1, HAServiceState.ACTIVE);
   }