1
0
Преглед на файлове

HDFS-15186. Erasure Coding: Decommission may generate the parity block's content with all 0 in some case. Contributed by Yao Guangdong.

Ayush Saxena преди 5 години
родител
ревизия
429da635ec

+ 4 - 2
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java

@@ -2445,14 +2445,16 @@ public class BlockManager implements BlockStatsMXBean {
       if (priority != LowRedundancyBlocks.QUEUE_HIGHEST_PRIORITY
           && (!node.isDecommissionInProgress() && !node.isEnteringMaintenance())
           && node.getNumberOfBlocksToBeReplicated() >= maxReplicationStreams) {
-        if (isStriped && state == StoredReplicaState.LIVE) {
+        if (isStriped && (state == StoredReplicaState.LIVE
+            || state == StoredReplicaState.DECOMMISSIONING)) {
           liveBusyBlockIndices.add(blockIndex);
         }
         continue; // already reached replication limit
       }
 
       if (node.getNumberOfBlocksToBeReplicated() >= replicationStreamsHardLimit) {
-        if (isStriped && state == StoredReplicaState.LIVE) {
+        if (isStriped && (state == StoredReplicaState.LIVE
+            || state == StoredReplicaState.DECOMMISSIONING)) {
           liveBusyBlockIndices.add(blockIndex);
         }
         continue;

+ 2 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java

@@ -639,7 +639,8 @@ public class DatanodeDescriptor extends DatanodeInfo {
     pendingReplicationWithoutTargets++;
   }
 
-  void decrementPendingReplicationWithoutTargets() {
+  @VisibleForTesting
+  public void decrementPendingReplicationWithoutTargets() {
     pendingReplicationWithoutTargets--;
   }
 

+ 63 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommissionWithStriped.java

@@ -350,6 +350,69 @@ public class TestDecommissionWithStriped {
         fileChecksum1.equals(fileChecksum2));
   }
 
+  /**
+   * Decommission may generate the parity block's content with all 0
+   * in some case.
+   * @throws Exception
+   */
+  @Test(timeout = 120000)
+  public void testDecommission2NodeWithBusyNode() throws Exception {
+    byte busyDNIndex = 6;
+    byte decommissionDNIndex = 6;
+    byte decommissionDNIndex2 = 8;
+    //1. create EC file
+    final Path ecFile = new Path(ecDir, "testDecommission2NodeWithBusyNode");
+    int writeBytes = cellSize * dataBlocks;
+    writeStripedFile(dfs, ecFile, writeBytes);
+
+    Assert.assertEquals(0, bm.numOfUnderReplicatedBlocks());
+    FileChecksum fileChecksum1 = dfs.getFileChecksum(ecFile, writeBytes);
+
+    //2. make once DN busy
+    final INodeFile fileNode = cluster.getNamesystem().getFSDirectory()
+        .getINode4Write(ecFile.toString()).asFile();
+    BlockInfo firstBlock = fileNode.getBlocks()[0];
+    DatanodeStorageInfo[] dnStorageInfos = bm.getStorages(firstBlock);
+    DatanodeDescriptor busyNode = dnStorageInfos[busyDNIndex]
+        .getDatanodeDescriptor();
+    for (int j = 0; j < replicationStreamsHardLimit; j++) {
+      busyNode.incrementPendingReplicationWithoutTargets();
+    }
+
+    //3. decommissioning one node
+    List<DatanodeInfo> decommissionNodes = new ArrayList<>();
+    decommissionNodes.add(dnStorageInfos[decommissionDNIndex]
+        .getDatanodeDescriptor());
+    decommissionNodes.add(dnStorageInfos[decommissionDNIndex2]
+        .getDatanodeDescriptor());
+    decommissionNode(0, decommissionNodes, AdminStates.DECOMMISSION_INPROGRESS);
+
+    //4. wait for decommissioning and not busy block to replicate(9-2+1=8)
+    GenericTestUtils.waitFor(
+        () -> bm.countNodes(firstBlock).liveReplicas() >= 8,
+        100, 60000);
+
+    //5. release busy DN, make the decommissioning and busy block can replicate
+    busyNode.decrementPendingReplicationWithoutTargets();
+
+    //6. decommissioned one node,make the decommission finished
+    decommissionNode(0, decommissionNodes, AdminStates.DECOMMISSIONED);
+
+    //7. Busy DN shouldn't be reconstructed
+    DatanodeStorageInfo[] newDnStorageInfos = bm.getStorages(firstBlock);
+    Assert.assertEquals("Busy DN shouldn't be reconstructed",
+        dnStorageInfos[busyDNIndex].getStorageID(),
+        newDnStorageInfos[busyDNIndex].getStorageID());
+
+    //8. check the checksum of a file
+    FileChecksum fileChecksum2 = dfs.getFileChecksum(ecFile, writeBytes);
+    Assert.assertEquals("Checksum mismatches!", fileChecksum1, fileChecksum2);
+
+    //9. check the data is correct
+    StripedFileTestUtil.checkData(dfs, ecFile, writeBytes, decommissionNodes,
+        null, blockGroupSize);
+  }
+
   /**
    * Tests to verify that the file checksum should be able to compute after the
    * decommission operation.