Browse Source

HDFS-8670. Better to exclude decommissioned nodes for namenode NodeUsage JMX (Contributed by J.Andreina)

Vinayakumar B 9 years ago
parent
commit
6374ee0db4

+ 3 - 0
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt

@@ -1093,6 +1093,9 @@ Release 2.8.0 - UNRELEASED
 
     HDFS-8785. TestDistributedFileSystem is failing in trunk. (Xiaoyu Yao)
 
+    HDFS-8670. Better to exclude decommissioned nodes for namenode NodeUsage JMX
+    (J.Andreina via vinayakumarb)
+
 Release 2.7.2 - UNRELEASED
 
   INCOMPATIBLE CHANGES

+ 6 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java

@@ -5999,6 +5999,12 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
         new HashMap<String, Map<String,Object>>();
     final List<DatanodeDescriptor> live = new ArrayList<DatanodeDescriptor>();
     blockManager.getDatanodeManager().fetchDatanodes(live, null, true);
+    for (Iterator<DatanodeDescriptor> it = live.iterator(); it.hasNext();) {
+      DatanodeDescriptor node = it.next();
+      if (node.isDecommissionInProgress() || node.isDecommissioned()) {
+        it.remove();
+      }
+    }
 
     if (live.size() > 0) {
       float totalDfsUsed = 0;

+ 98 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommission.java

@@ -28,6 +28,7 @@ import java.util.Arrays;
 import java.util.Collection;
 import java.util.Iterator;
 import java.util.List;
+import java.util.Map;
 import java.util.Random;
 import java.util.concurrent.ExecutionException;
 
@@ -56,6 +57,7 @@ import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager;
 import org.apache.hadoop.hdfs.server.blockmanagement.DecommissionManager;
 import org.apache.hadoop.hdfs.server.datanode.DataNode;
 import org.apache.hadoop.hdfs.server.datanode.DataNodeTestUtils;
+import org.apache.hadoop.hdfs.server.datanode.SimulatedFSDataset;
 import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
 import org.apache.hadoop.hdfs.server.namenode.ha.HATestUtil;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
@@ -68,6 +70,7 @@ import org.junit.Assert;
 import org.junit.Before;
 import org.junit.Ignore;
 import org.junit.Test;
+import org.mortbay.util.ajax.JSON;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -1127,4 +1130,99 @@ public class TestDecommission {
     assertEquals("Unexpected number of pending nodes", pending,
         decomManager.getNumPendingNodes());
   }
+
+  /**
+   * Decommissioned node should not be considered while calculating node usage
+   * @throws InterruptedException
+   */
+  @Test
+  public void testNodeUsageAfterDecommissioned()
+      throws IOException, InterruptedException {
+    nodeUsageVerification(2, new long[] { 26384L, 26384L },
+        AdminStates.DECOMMISSIONED);
+  }
+
+  /**
+   * DECOMMISSION_INPROGRESS node should not be considered
+   * while calculating node usage
+   * @throws InterruptedException
+   */
+  @Test
+  public void testNodeUsageWhileDecommissioining()
+      throws IOException, InterruptedException {
+    nodeUsageVerification(1, new long[] { 26384L },
+        AdminStates.DECOMMISSION_INPROGRESS);
+  }
+
+  @SuppressWarnings({ "unchecked" })
+  public void nodeUsageVerification(int numDatanodes, long[] nodesCapacity,
+      AdminStates decommissionState) throws IOException, InterruptedException {
+    Map<String, Map<String, String>> usage = null;
+    DatanodeInfo decommissionedNodeInfo = null;
+    String zeroNodeUsage = "0.00%";
+    conf.setInt(DFSConfigKeys.DFS_REPLICATION_KEY, 1);
+    conf.setInt(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1);
+    conf.setInt(DFSConfigKeys.DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY, 1);
+    FileSystem fileSys = null;
+    Path file1 = new Path("testNodeUsage.dat");
+    try {
+      SimulatedFSDataset.setFactory(conf);
+      cluster =
+          new MiniDFSCluster.Builder(conf)
+              .nnTopology(MiniDFSNNTopology.simpleFederatedTopology(1))
+              .numDataNodes(numDatanodes)
+              .simulatedCapacities(nodesCapacity).build();
+      cluster.waitActive();
+      DFSClient client = getDfsClient(cluster.getNameNode(0), conf);
+      validateCluster(client, numDatanodes);
+
+      ArrayList<ArrayList<DatanodeInfo>> namenodeDecomList =
+          new ArrayList<ArrayList<DatanodeInfo>>(1);
+      namenodeDecomList.add(0, new ArrayList<DatanodeInfo>(numDatanodes));
+
+      if (decommissionState == AdminStates.DECOMMISSIONED) {
+        // Move datanode1 to Decommissioned state
+        ArrayList<DatanodeInfo> decommissionedNode = namenodeDecomList.get(0);
+        decommissionedNodeInfo = decommissionNode(0, null,
+            decommissionedNode, decommissionState);
+      }
+      // Write a file(replica 1).Hence will be written to only one live node.
+      fileSys = cluster.getFileSystem(0);
+      FSNamesystem ns = cluster.getNamesystem(0);
+      writeFile(fileSys, file1, 1);
+      Thread.sleep(2000);
+
+      // min NodeUsage should not be 0.00%
+      usage = (Map<String, Map<String, String>>) JSON.parse(ns.getNodeUsage());
+      String minUsageBeforeDecom = usage.get("nodeUsage").get("min");
+      assertTrue(!minUsageBeforeDecom.equalsIgnoreCase(zeroNodeUsage));
+
+      if (decommissionState == AdminStates.DECOMMISSION_INPROGRESS) {
+        // Start decommissioning datanode
+        ArrayList<DatanodeInfo> decommissioningNodes = namenodeDecomList.
+            get(0);
+        decommissionedNodeInfo = decommissionNode(0, null,
+            decommissioningNodes, decommissionState);
+        // NodeUsage should not include DECOMMISSION_INPROGRESS node
+        // (minUsage should be 0.00%)
+        usage = (Map<String, Map<String, String>>)
+            JSON.parse(ns.getNodeUsage());
+        assertTrue(usage.get("nodeUsage").get("min").
+            equalsIgnoreCase(zeroNodeUsage));
+      }
+      // Recommission node
+      recommissionNode(0, decommissionedNodeInfo);
+
+      usage = (Map<String, Map<String, String>>) JSON.parse(ns.getNodeUsage());
+      String nodeusageAfterRecommi =
+          decommissionState == AdminStates.DECOMMISSION_INPROGRESS
+              ? minUsageBeforeDecom
+              : zeroNodeUsage;
+      assertTrue(usage.get("nodeUsage").get("min").
+          equalsIgnoreCase(nodeusageAfterRecommi));
+    } finally {
+      cleanupFile(fileSys, file1);
+      cluster.shutdown();
+    }
+  }
 }