Browse Source

HDFS-17052. Improve BlockPlacementPolicyRackFaultTolerant to avoid choose nodes failed when no enough Rack. (#5759). Contributed by Hualong Zhang and Shuyan Zhang.

Signed-off-by: He Xiaoqiao <hexiaoqiao@apache.org>
zhtttylz 1 year ago
parent
commit
750c0fc631

+ 14 - 3
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyRackFaultTolerant.java

@@ -170,7 +170,7 @@ public class BlockPlacementPolicyRackFaultTolerant extends BlockPlacementPolicyD
     NotEnoughReplicasException lastException = e;
     int bestEffortMaxNodesPerRack = maxNodesPerRack;
     while (results.size() != totalReplicaExpected &&
-        numResultsOflastChoose != results.size()) {
+        bestEffortMaxNodesPerRack < totalReplicaExpected) {
       // Exclude the chosen nodes
       final Set<Node> newExcludeNodes = new HashSet<>();
       for (DatanodeStorageInfo resultStorage : results) {
@@ -192,11 +192,22 @@ public class BlockPlacementPolicyRackFaultTolerant extends BlockPlacementPolicyD
       } finally {
         excludedNodes.addAll(newExcludeNodes);
       }
+      // To improve performance, the maximum value of 'bestEffortMaxNodesPerRack'
+      // is calculated only when it is not possible to select a node.
+      if (numResultsOflastChoose == results.size()) {
+        Map<String, Integer> nodesPerRack = new HashMap<>();
+        for (DatanodeStorageInfo dsInfo : results) {
+          String rackName = dsInfo.getDatanodeDescriptor().getNetworkLocation();
+          nodesPerRack.merge(rackName, 1, Integer::sum);
+        }
+        bestEffortMaxNodesPerRack =
+            Math.max(bestEffortMaxNodesPerRack, Collections.max(nodesPerRack.values()));
+      }
     }
 
-    if (numResultsOflastChoose != totalReplicaExpected) {
+    if (results.size() != totalReplicaExpected) {
       LOG.debug("Best effort placement failed: expecting {} replicas, only "
-          + "chose {}.", totalReplicaExpected, numResultsOflastChoose);
+          + "chose {}.", totalReplicaExpected, results.size());
       throw lastException;
     }
   }

+ 61 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestReconstructStripedBlocks.java

@@ -19,6 +19,7 @@ package org.apache.hadoop.hdfs.server.namenode;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.StorageType;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSTestUtil;
 import org.apache.hadoop.hdfs.DistributedFileSystem;
@@ -52,6 +53,7 @@ import org.junit.Test;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import java.util.Arrays;
 import java.util.BitSet;
 import java.util.Iterator;
 import java.util.List;
@@ -515,4 +517,63 @@ public class TestReconstructStripedBlocks {
     assertEquals(9, bm.countNodes(blockInfo).liveReplicas());
   }
 
+  @Test
+  public void testReconstructionWithStorageTypeNotEnough() throws Exception {
+    final HdfsConfiguration conf = new HdfsConfiguration();
+    conf.setInt(DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_INTERVAL_SECONDS_KEY, 1);
+
+    // Nine disk node eleven archive node.
+    int numDn = groupSize * 2 + 2;
+    StorageType[][] storageTypes = new StorageType[numDn][];
+    Arrays.fill(storageTypes, 0, groupSize,
+        new StorageType[]{StorageType.DISK, StorageType.DISK});
+    Arrays.fill(storageTypes, groupSize, numDn,
+        new StorageType[]{StorageType.ARCHIVE, StorageType.ARCHIVE});
+
+    // Nine disk racks and one archive rack.
+    String[] racks = {
+        "/rack1", "/rack2", "/rack3", "/rack4", "/rack5", "/rack6", "/rack7", "/rack8",
+        "/rack9", "/rack0", "/rack0", "/rack0", "/rack0", "/rack0", "/rack0", "/rack0",
+        "/rack0", "/rack0", "/rack0", "/rack0"};
+
+    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(numDn)
+        .storageTypes(storageTypes)
+        .racks(racks)
+        .build();
+    cluster.waitActive();
+    DistributedFileSystem fs = cluster.getFileSystem();
+    fs.enableErasureCodingPolicy(
+        StripedFileTestUtil.getDefaultECPolicy().getName());
+
+    try {
+      fs.mkdirs(dirPath);
+      fs.setStoragePolicy(dirPath, "COLD");
+      fs.setErasureCodingPolicy(dirPath,
+          StripedFileTestUtil.getDefaultECPolicy().getName());
+      DFSTestUtil.createFile(fs, filePath,
+          cellSize * dataBlocks * 2, (short) 1, 0L);
+
+      // Stop one dn.
+      LocatedBlocks blks = fs.getClient().getLocatedBlocks(filePath.toString(), 0);
+      LocatedStripedBlock block = (LocatedStripedBlock) blks.getLastLocatedBlock();
+      DatanodeInfo dnToStop = block.getLocations()[0];
+      cluster.stopDataNode(dnToStop.getXferAddr());
+      cluster.setDataNodeDead(dnToStop);
+
+      // Wait for reconstruction to happen.
+      StripedFileTestUtil.waitForReconstructionFinished(filePath, fs, groupSize);
+      blks = fs.getClient().getLocatedBlocks(filePath.toString(), 0);
+      block = (LocatedStripedBlock) blks.getLastLocatedBlock();
+      BitSet bitSet = new BitSet(groupSize);
+      for (byte index : block.getBlockIndices()) {
+        bitSet.set(index);
+      }
+      for (int i = 0; i < groupSize; i++) {
+        Assert.assertTrue(bitSet.get(i));
+      }
+    } finally {
+      cluster.shutdown();
+    }
+  }
+
 }