瀏覽代碼

HDFS-3921. NN will prematurely consider blocks missing when entering active state while still in safe mode. Contributed by Aaron T. Myers.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1408534 13f79535-47bb-0310-9956-ffa450edef68
Aaron Myers 12 年之前
父節點
當前提交
067eabb820

+ 3 - 0
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt

@@ -226,6 +226,9 @@ Release 2.0.3-alpha - Unreleased
     HDFS-4164. fuse_dfs: add -lrt to the compiler command line on Linux.
     (Colin Patrick McCabe via eli)
 
+    HDFS-3921. NN will prematurely consider blocks missing when entering active
+    state while still in safe mode. (atm)
+
 Release 2.0.2-alpha - 2012-09-07 
 
   INCOMPATIBLE CHANGES

+ 7 - 3
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java

@@ -643,13 +643,17 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
         LOG.info("Catching up to latest edits from old active before " +
             "taking over writer role in edits logs");
         editLogTailer.catchupDuringFailover();
-        blockManager.setPostponeBlocksFromFuture(false);
         
-        LOG.info("Reprocessing replication and invalidation queues");
+        blockManager.setPostponeBlocksFromFuture(false);
         blockManager.getDatanodeManager().markAllDatanodesStale();
         blockManager.clearQueues();
         blockManager.processAllPendingDNMessages();
-        blockManager.processMisReplicatedBlocks();
+        
+        if (!isInSafeMode() ||
+            (isInSafeMode() && safeMode.isPopulatingReplQueues())) {
+          LOG.info("Reprocessing replication and invalidation queues");
+          blockManager.processMisReplicatedBlocks();
+        }
         
         if (LOG.isDebugEnabled()) {
           LOG.debug("NameNode metadata after re-processing " +

+ 26 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java

@@ -630,6 +630,32 @@ public class TestHASafeMode {
     assertEquals(0L, nn1.getNamesystem().getPendingReplicationBlocks());
   }
   
+  /**
+   * Make sure that when we transition to active in safe mode that we don't
+   * prematurely consider blocks missing just because not all DNs have reported
+   * yet.
+   * 
+   * This is a regression test for HDFS-3921.
+   */
+  @Test
+  public void testNoPopulatingReplQueuesWhenStartingActiveInSafeMode()
+      throws IOException {
+    DFSTestUtil.createFile(fs, new Path("/test"), 15*BLOCK_SIZE, (short)3, 1L);
+    
+    // Stop the DN so that when the NN restarts not all blocks wil be reported
+    // and the NN won't leave safe mode.
+    cluster.stopDataNode(1);
+    // Restart the namenode but don't wait for it to hear from all DNs (since
+    // one DN is deliberately shut down.)
+    cluster.restartNameNode(0, false);
+    cluster.transitionToActive(0);
+    
+    assertTrue(cluster.getNameNode(0).isInSafeMode());
+    // We shouldn't yet consider any blocks "missing" since we're in startup
+    // safemode, i.e. not all DNs may have reported.
+    assertEquals(0, cluster.getNamesystem(0).getMissingBlocksCount());
+  }
+  
   /**
    * Print a big banner in the test log to make debug easier.
    */