瀏覽代碼

HDFS-8056. Decommissioned dead nodes should continue to be counted as dead after NN restart. (mingma)

(cherry picked from commit 1c4951a7a09433fbbcfe26f243d6c2d8043c71bb)
Ming Ma 9 年之前
父節點
當前提交
5a3db21563

+ 3 - 0
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt

@@ -798,6 +798,9 @@ Release 2.8.0 - UNRELEASED
     HDFS-9252. Change TestFileTruncate to use FsDatasetTestUtils to get block
     file size and genstamp. (Lei (Eddy) Xu via cmccabe)
 
+    HDFS-8056. Decommissioned dead nodes should continue to be counted as dead
+    after NN restart. (mingma)
+
   OPTIMIZATIONS
 
     HDFS-8026. Trace FSOutputSummer#writeChecksumChunks rather than

+ 4 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java

@@ -1267,7 +1267,7 @@ public class DatanodeManager {
 
     if (listDeadNodes) {
       for (InetSocketAddress addr : includedNodes) {
-        if (foundNodes.matchedBy(addr) || excludedNodes.match(addr)) {
+        if (foundNodes.matchedBy(addr)) {
           continue;
         }
         // The remaining nodes are ones that are referenced by the hosts
@@ -1284,6 +1284,9 @@ public class DatanodeManager {
                 addr.getPort() == 0 ? defaultXferPort : addr.getPort(),
                 defaultInfoPort, defaultInfoSecurePort, defaultIpcPort));
         setDatanodeDead(dn);
+        if (excludedNodes.match(addr)) {
+          dn.setDecommissioned();
+        }
         nodes.add(dn);
       }
     }

+ 35 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommission.java

@@ -924,6 +924,41 @@ public class TestDecommission {
     cluster.shutdown();
   }
 
+  /**
+   * Tests dead node count after restart of namenode
+   **/
+  @Test(timeout=360000)
+  public void testDeadNodeCountAfterNamenodeRestart()throws Exception {
+    LOG.info("Starting test testDeadNodeCountAfterNamenodeRestart");
+    int numNamenodes = 1;
+    int numDatanodes = 2;
+
+    startCluster(numNamenodes, numDatanodes, conf);
+
+    DFSClient client = getDfsClient(cluster.getNameNode(), conf);
+    DatanodeInfo[] info = client.datanodeReport(DatanodeReportType.LIVE);
+    DatanodeInfo excludedDatanode = info[0];
+    String excludedDatanodeName = info[0].getXferAddr();
+
+    writeConfigFile(hostsFile, new ArrayList<String>(Arrays.asList(
+        excludedDatanodeName, info[1].getXferAddr())));
+    decommissionNode(0, excludedDatanode.getDatanodeUuid(), null,
+        AdminStates.DECOMMISSIONED);
+
+    cluster.stopDataNode(excludedDatanodeName);
+    DFSTestUtil.waitForDatanodeState(
+        cluster, excludedDatanode.getDatanodeUuid(), false, 20000);
+
+    //Restart the namenode
+    cluster.restartNameNode();
+
+    assertEquals("There should be one node alive", 1,
+        client.datanodeReport(DatanodeReportType.LIVE).length);
+    assertEquals("There should be one node dead", 1,
+        client.datanodeReport(DatanodeReportType.DEAD).length);
+    cluster.shutdown();
+  }
+
   /**
    * Test using a "registration name" in a host include file.
    *

+ 1 - 1
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestHostFileManager.java

@@ -151,7 +151,7 @@ public class TestHostFileManager {
     Assert.assertEquals(1, dm.getDatanodeListForReport(HdfsConstants
             .DatanodeReportType.DEAD).size());
     excludedNodes.add(entry("127.0.0.3"));
-    Assert.assertEquals(0, dm.getDatanodeListForReport(HdfsConstants
+    Assert.assertEquals(1, dm.getDatanodeListForReport(HdfsConstants
             .DatanodeReportType.DEAD).size());
   }
 }