Bladeren bron

HDFS-3835. Long-lived 2NN cannot perform a checkpoint if security is enabled and the NN restarts with outstanding delegation tokens. Contributed by Aaron T. Myers.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1376188 13f79535-47bb-0310-9956-ffa450edef68
Aaron Myers 12 jaren geleden
bovenliggende
commit
de8b34a70e

+ 11 - 1
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/AbstractDelegationTokenSecretManager.java

@@ -113,6 +113,16 @@ extends AbstractDelegationTokenIdentifier>
     }
   }
   
+  /**
+   * Reset all data structures and mutable state.
+   */
+  public synchronized void reset() {
+    currentId = 0;
+    allKeys.clear();
+    delegationTokenSequenceNumber = 0;
+    currentTokens.clear();
+  }
+  
   /** 
    * Add a previously used master key to cache (when NN restarts), 
    * should be called before activate().
@@ -190,7 +200,6 @@ extends AbstractDelegationTokenIdentifier>
   
   @Override
   protected synchronized byte[] createPassword(TokenIdent identifier) {
-    LOG.info("Creating password for identifier: "+identifier);
     int sequenceNum;
     long now = Time.now();
     sequenceNum = ++delegationTokenSequenceNumber;
@@ -198,6 +207,7 @@ extends AbstractDelegationTokenIdentifier>
     identifier.setMaxDate(now + tokenMaxLifetime);
     identifier.setMasterKeyId(currentId);
     identifier.setSequenceNumber(sequenceNum);
+    LOG.info("Creating password for identifier: " + identifier);
     byte[] password = createPassword(identifier.getBytes(), currentKey.getKey());
     currentTokens.put(identifier, new DelegationTokenInformation(now
         + tokenRenewInterval, password));

+ 3 - 0
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt

@@ -661,6 +661,9 @@ Branch-2 ( Unreleased changes )
 
     HDFS-3837. Fix DataNode.recoverBlock findbugs warning. (eli)
 
+    HDFS-3835. Long-lived 2NN cannot perform a checkpoint if security is
+    enabled and the NN restarts with outstanding delegation tokens. (atm)
+
   BREAKDOWN OF HDFS-3042 SUBTASKS
 
     HDFS-2185. HDFS portion of ZK-based FailoverController (todd)

+ 1 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java

@@ -556,6 +556,7 @@ public class FSImage implements Closeable {
    */
   void reloadFromImageFile(File file, FSNamesystem target) throws IOException {
     target.dir.reset();
+    target.dtSecretManager.reset();
 
     LOG.debug("Reloading namespace from " + file);
     loadFSImage(file, target, null);

+ 1 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java

@@ -298,7 +298,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
   // Scan interval is not configurable.
   private static final long DELEGATION_TOKEN_REMOVER_SCAN_INTERVAL =
     TimeUnit.MILLISECONDS.convert(1, TimeUnit.HOURS);
-  private final DelegationTokenSecretManager dtSecretManager;
+  final DelegationTokenSecretManager dtSecretManager;
   private final boolean alwaysUseDelegationTokensForTests;
   
 

+ 1 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java

@@ -376,6 +376,7 @@ public class SecondaryNameNode implements Runnable {
               downloadImage = false;
               LOG.info("Image has not changed. Will not download image.");
             } else {
+              LOG.info("Image has changed. Downloading updated image from NN.");
               MD5Hash downloadedHash = TransferFsImage.downloadImageToStorage(
                   nnHostPort, sig.mostRecentCheckpointTxId, dstImage.getStorage(), true);
               dstImage.saveDigestAndRenameCheckpointImage(

+ 44 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java

@@ -67,6 +67,7 @@ import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
 import org.apache.hadoop.hdfs.server.protocol.RemoteEditLog;
 import org.apache.hadoop.hdfs.server.protocol.RemoteEditLogManifest;
 import org.apache.hadoop.hdfs.tools.DFSAdmin;
+import org.apache.hadoop.io.Text;
 import org.apache.hadoop.net.NetUtils;
 import org.apache.hadoop.test.GenericTestUtils;
 import org.apache.hadoop.test.GenericTestUtils.DelayAnswer;
@@ -1835,6 +1836,49 @@ public class TestCheckpoint {
     }
   }
   
+  /**
+   * Regression test for HDFS-3835 - "Long-lived 2NN cannot perform a
+   * checkpoint if security is enabled and the NN restarts without outstanding
+   * delegation tokens"
+   */
+  @Test
+  public void testSecondaryNameNodeWithDelegationTokens() throws IOException {
+    MiniDFSCluster cluster = null;
+    SecondaryNameNode secondary = null;
+    
+    Configuration conf = new HdfsConfiguration();
+    conf.setBoolean(
+        DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY, true);
+    try {
+      cluster = new MiniDFSCluster.Builder(conf).numDataNodes(numDatanodes)
+          .format(true).build();
+      
+      assertNotNull(cluster.getNamesystem().getDelegationToken(new Text("atm")));
+  
+      secondary = startSecondaryNameNode(conf);
+
+      // Checkpoint once, so the 2NN loads the DT into its in-memory sate.
+      secondary.doCheckpoint();
+      
+      // Perform a saveNamespace, so that the NN has a new fsimage, and the 2NN
+      // therefore needs to download a new fsimage the next time it performs a
+      // checkpoint.
+      cluster.getNameNodeRpc().setSafeMode(SafeModeAction.SAFEMODE_ENTER);
+      cluster.getNameNodeRpc().saveNamespace();
+      cluster.getNameNodeRpc().setSafeMode(SafeModeAction.SAFEMODE_LEAVE);
+      
+      // Ensure that the 2NN can still perform a checkpoint.
+      secondary.doCheckpoint();
+    } finally {
+      if (secondary != null) {
+        secondary.shutdown();
+      }
+      if (cluster != null) {
+        cluster.shutdown();
+      }
+    }
+  }
+  
   @Test
   public void testCommandLineParsing() throws ParseException {
     SecondaryNameNode.CommandLineOpts opts =