Procházet zdrojové kódy

HDFS-2795. Standby NN takes a long time to recover from a dead DN starting up. Contributed by Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1232285 13f79535-47bb-0310-9956-ffa450edef68
Todd Lipcon před 13 roky
rodič
revize
0c1450ca5d

+ 2 - 0
hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt

@@ -111,3 +111,5 @@ HDFS-2747. Entering safe mode after starting SBN can NPE. (Uma Maheswara Rao G v
 HDFS-2772. On transition to active, standby should not swallow ELIE. (atm)
 
 HDFS-2767. ConfiguredFailoverProxyProvider should support NameNodeProtocol. (Uma Maheswara Rao G via todd)
+
+HDFS-2795. Standby NN takes a long time to recover from a dead DN starting up. (todd)

+ 3 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java

@@ -2502,6 +2502,9 @@ assert storedBlock.findDatanode(dn) < 0 : "Block " + block
       final int curReplicasDelta, int expectedReplicasDelta) {
     namesystem.writeLock();
     try {
+      if (!namesystem.isPopulatingReplQueues()) {
+        return;
+      }
       NumberReplicas repl = countNodes(block);
       int curExpectedReplicas = getReplication(block);
       if (isNeededReplication(block, curExpectedReplicas, repl.liveReplicas())) {

+ 33 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerTestUtil.java

@@ -24,8 +24,11 @@ import java.util.Iterator;
 import java.util.Set;
 
 import org.apache.hadoop.hdfs.protocol.Block;
+import org.apache.hadoop.hdfs.server.datanode.DataNode;
 import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
+import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.util.Daemon;
+import org.junit.Assert;
 
 public class BlockManagerTestUtil {
   public static void setNodeReplicationLimit(final BlockManager blockManager,
@@ -144,4 +147,34 @@ public class BlockManagerTestUtil {
     work += bm.computeReplicationWork(Integer.MAX_VALUE);
     return work;
   }
+
+  /**
+   * Ensure that the given NameNode marks the specified DataNode as
+   * entirely dead/expired.
+   * @param nn the NameNode to manipulate
+   * @param dnName the name of the DataNode
+   */
+  public static void noticeDeadDatanode(NameNode nn, String dnName) {
+    FSNamesystem namesystem = nn.getNamesystem();
+    namesystem.writeLock();
+    try {
+      DatanodeManager dnm = namesystem.getBlockManager().getDatanodeManager();
+      HeartbeatManager hbm = dnm.getHeartbeatManager();
+      DatanodeDescriptor[] dnds = hbm.getDatanodes();
+      DatanodeDescriptor theDND = null;
+      for (DatanodeDescriptor dnd : dnds) {
+        if (dnd.getName().equals(dnName)) {
+          theDND = dnd;
+        }
+      }
+      Assert.assertNotNull("Could not find DN with name: " + dnName, theDND);
+      
+      synchronized (hbm) {
+        theDND.setLastUpdate(0);
+        hbm.heartbeatCheck();
+      }
+    } finally {
+      namesystem.writeUnlock();
+    }
+  }
 }

+ 4 - 19
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestNodeCount.java

@@ -81,15 +81,8 @@ public class TestNodeCount extends TestCase {
       DataNodeProperties dnprop = cluster.stopDataNode(datanode.getName());
       
       // make sure that NN detects that the datanode is down
-      try {
-        namesystem.writeLock();
-        synchronized (hm) {
-          datanode.setLastUpdate(0); // mark it dead
-          hm.heartbeatCheck();
-        }
-      } finally {
-        namesystem.writeUnlock();
-      }
+      BlockManagerTestUtil.noticeDeadDatanode(
+          cluster.getNameNode(), datanode.getName());
       
       // the block will be replicated
       DFSTestUtil.waitReplication(fs, FILE_PATH, REPLICATION_FACTOR);
@@ -121,16 +114,8 @@ public class TestNodeCount extends TestCase {
       // bring down non excessive datanode
       dnprop = cluster.stopDataNode(nonExcessDN.getName());
       // make sure that NN detects that the datanode is down
-      
-      try {
-        namesystem.writeLock();
-        synchronized(hm) {
-          nonExcessDN.setLastUpdate(0); // mark it dead
-          hm.heartbeatCheck();
-        }
-      } finally {
-        namesystem.writeUnlock();
-      }
+      BlockManagerTestUtil.noticeDeadDatanode(
+          cluster.getNameNode(), nonExcessDN.getName());
 
       // The block should be replicated
       initializeTimeout(TIMEOUT);

+ 78 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyIsHot.java

@@ -33,13 +33,16 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hdfs.AppendTestUtil;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSTestUtil;
 import org.apache.hadoop.hdfs.HAUtil;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties;
 import org.apache.hadoop.hdfs.MiniDFSNNTopology;
 import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
 import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
+import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil;
 import org.apache.hadoop.hdfs.server.datanode.DataNode;
 import org.apache.hadoop.hdfs.server.datanode.DataNodeAdapter;
 import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
@@ -131,6 +134,81 @@ public class TestStandbyIsHot {
       cluster.shutdown();
     }
   }
+  
+  /**
+   * Regression test for HDFS-2795:
+   *  - Start an HA cluster with a DN.
+   *  - Write several blocks to the FS with replication 1.
+   *  - Shutdown the DN
+   *  - Wait for the NNs to declare the DN dead. All blocks will be under-replicated.
+   *  - Restart the DN.
+   * In the bug, the standby node would only very slowly notice the blocks returning
+   * to the cluster.
+   */
+  @Test
+  public void testDatanodeRestarts() throws Exception {
+    Configuration conf = new Configuration();
+    conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, 1024);
+    // We read from the standby to watch block locations
+    HAUtil.setAllowStandbyReads(conf, true);
+    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
+      .nnTopology(MiniDFSNNTopology.simpleHATopology())
+      .numDataNodes(1)
+      .build();
+    try {
+      NameNode nn0 = cluster.getNameNode(0);
+      NameNode nn1 = cluster.getNameNode(1);
+      nn1.getNamesystem().getEditLogTailer().setSleepTime(250);
+      nn1.getNamesystem().getEditLogTailer().interrupt();
+
+      cluster.transitionToActive(0);
+      
+      // Create 5 blocks.
+      DFSTestUtil.createFile(cluster.getFileSystem(0), 
+          TEST_FILE_PATH, 5*1024, (short)1, 1L);
+      
+      HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
+      
+      // Stop the DN.
+      DataNode dn = cluster.getDataNodes().get(0);
+      String dnName = dn.getDatanodeId().getName(); 
+      DataNodeProperties dnProps = cluster.stopDataNode(0);
+      
+      // Make sure both NNs register it as dead.
+      BlockManagerTestUtil.noticeDeadDatanode(nn0, dnName);
+      BlockManagerTestUtil.noticeDeadDatanode(nn1, dnName);
+      
+      BlockManagerTestUtil.updateState(nn0.getNamesystem().getBlockManager());
+      BlockManagerTestUtil.updateState(nn1.getNamesystem().getBlockManager());
+      assertEquals(5, nn0.getNamesystem().getUnderReplicatedBlocks());
+      
+      // The SBN will not have any blocks in its neededReplication queue
+      // since the SBN doesn't process replication.
+      assertEquals(0, nn1.getNamesystem().getUnderReplicatedBlocks());
+      
+      LocatedBlocks locs = nn1.getRpcServer().getBlockLocations(
+          TEST_FILE, 0, 1);
+      assertEquals("Standby should have registered that the block has no replicas",
+          0, locs.get(0).getLocations().length);
+      
+      cluster.restartDataNode(dnProps);
+      // Wait for both NNs to re-register the DN.
+      cluster.waitActive(0);
+      cluster.waitActive(1);
+      
+      BlockManagerTestUtil.updateState(nn0.getNamesystem().getBlockManager());
+      BlockManagerTestUtil.updateState(nn1.getNamesystem().getBlockManager());
+      assertEquals(0, nn0.getNamesystem().getUnderReplicatedBlocks());
+      assertEquals(0, nn1.getNamesystem().getUnderReplicatedBlocks());
+      
+      locs = nn1.getRpcServer().getBlockLocations(
+          TEST_FILE, 0, 1);
+      assertEquals("Standby should have registered that the block has replicas again",
+          1, locs.get(0).getLocations().length);
+    } finally {
+      cluster.shutdown();
+    }
+  }
 
   static void waitForBlockLocations(final MiniDFSCluster cluster,
       final NameNode nn,