Browse Source

HDFS-9493. Test o.a.h.hdfs.server.namenode.TestMetaSave fails in trunk. (Tony Wu via lei)

(cherry picked from commit fd8065a763ff68db265ef23a7d4f97558e213ef9)

Conflicts:
	hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
	hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestMetaSave.java
	hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerTestUtil.java
Lei Xu 9 years ago
parent
commit
894345e2f0

+ 3 - 0
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt

@@ -1651,6 +1651,9 @@ Release 2.8.0 - UNRELEASED
     HDFS-9584. NPE in distcp when ssl configuration file does not exist in
     class path. (Surendra Singh Lilhore via Xiaoyu Yao)
 
+    HDFS-9493. Test o.a.h.hdfs.server.namenode.TestMetaSave fails in trunk.
+    (Tony Wu via lei)
+
 Release 2.7.3 - UNRELEASED
 
   INCOMPATIBLE CHANGES

+ 13 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerTestUtil.java

@@ -306,4 +306,17 @@ public class BlockManagerTestUtil {
       throws ExecutionException, InterruptedException {
     dm.getDecomManager().runMonitorForTest();
   }
+
+  /**
+   * Check if a given Datanode (specified by uuid) is removed. Removed means the
+   * Datanode is no longer present in HeartbeatManager and NetworkTopology.
+   * @param nn Namenode
+   * @param dnUuid Datanode UUID
+   * @return true if datanode is removed.
+   */
+  public static boolean isDatanodeRemoved(NameNode nn, String dnUuid){
+      final DatanodeManager dnm =
+          nn.getNamesystem().getBlockManager().getDatanodeManager();
+      return !dnm.getNetworkTopology().contains(dnm.getDatanode(dnUuid));
+  }
 }

+ 45 - 14
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestMetaSave.java

@@ -28,19 +28,25 @@ import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStreamReader;
 import java.util.Random;
+import java.util.concurrent.TimeoutException;
 
+import com.google.common.base.Supplier;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.CommonConfigurationKeys;
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil;
+import org.apache.hadoop.hdfs.server.datanode.DataNode;
 import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
 import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
+import org.apache.hadoop.test.GenericTestUtils;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
 import org.junit.Test;
 
 /**
@@ -65,8 +71,8 @@ public class TestMetaSave {
     stm.close();
   }
 
-  @BeforeClass
-  public static void setUp() throws IOException {
+  @Before
+  public void setUp() throws IOException {
     // start a cluster
     Configuration conf = new HdfsConfiguration();
 
@@ -75,6 +81,7 @@ public class TestMetaSave {
     conf.setInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_INTERVAL_KEY, 1000);
     conf.setLong(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1L);
     conf.setLong(DFSConfigKeys.DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY, 1L);
+    conf.setLong(DFSConfigKeys.DFS_NAMENODE_STALE_DATANODE_INTERVAL_KEY, 5L);
     cluster = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DATA_NODES).build();
     cluster.waitActive();
     fileSys = cluster.getFileSystem();
@@ -85,15 +92,16 @@ public class TestMetaSave {
    * Tests metasave
    */
   @Test
-  public void testMetaSave() throws IOException, InterruptedException {
+  public void testMetaSave()
+      throws IOException, InterruptedException, TimeoutException {
     for (int i = 0; i < 2; i++) {
       Path file = new Path("/filestatus" + i);
       createFile(fileSys, file);
     }
 
-    cluster.stopDataNode(1);
-    // wait for namenode to discover that a datanode is dead
-    Thread.sleep(15000);
+    // stop datanode and wait for namenode to discover that a datanode is dead
+    stopDatanodeAndWait(1);
+
     nnRpc.setReplication("/filestatus0", (short) 4);
 
     nnRpc.metaSave("metasave.out.txt");
@@ -126,15 +134,15 @@ public class TestMetaSave {
    */
   @Test
   public void testMetasaveAfterDelete()
-      throws IOException, InterruptedException {
+      throws IOException, InterruptedException, TimeoutException {
     for (int i = 0; i < 2; i++) {
       Path file = new Path("/filestatus" + i);
       createFile(fileSys, file);
     }
 
-    cluster.stopDataNode(1);
-    // wait for namenode to discover that a datanode is dead
-    Thread.sleep(15000);
+    // stop datanode and wait for namenode to discover that a datanode is dead
+    stopDatanodeAndWait(1);
+
     nnRpc.setReplication("/filestatus0", (short) 4);
     nnRpc.delete("/filestatus0", true);
     nnRpc.delete("/filestatus1", true);
@@ -211,8 +219,8 @@ public class TestMetaSave {
     }
   }
 
-  @AfterClass
-  public static void tearDown() throws IOException {
+  @After
+  public void tearDown() throws IOException {
     if (fileSys != null)
       fileSys.close();
     if (cluster != null)
@@ -228,4 +236,27 @@ public class TestMetaSave {
   private static File getLogFile(String name) {
     return new File(System.getProperty("hadoop.log.dir"), name);
   }
+
+  /**
+   * Stop a DN, notify NN the death of DN and wait for NN to remove the DN.
+   *
+   * @param dnIdx Index of the Datanode in MiniDFSCluster
+   * @throws TimeoutException
+   * @throws InterruptedException
+   */
+  private void stopDatanodeAndWait(final int dnIdx)
+      throws TimeoutException, InterruptedException {
+    final DataNode dnToStop = cluster.getDataNodes().get(dnIdx);
+    cluster.stopDataNode(dnIdx);
+    BlockManagerTestUtil.noticeDeadDatanode(
+        cluster.getNameNode(), dnToStop.getDatanodeId().getXferAddr());
+    // wait for namenode to discover that a datanode is dead
+    GenericTestUtils.waitFor(new Supplier<Boolean>() {
+      @Override
+      public Boolean get() {
+        return BlockManagerTestUtil.isDatanodeRemoved(
+            cluster.getNameNode(), dnToStop.getDatanodeUuid());
+      }
+    }, 1000, 30000);
+  }
 }