|
@@ -32,6 +32,7 @@ import org.apache.hadoop.hdfs.server.datanode.DataNodeTestUtils;
|
|
|
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
|
|
|
import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
|
|
|
import org.apache.hadoop.hdfs.util.StripedBlockUtil;
|
|
|
+import org.apache.hadoop.test.GenericTestUtils;
|
|
|
import org.junit.After;
|
|
|
import org.junit.Assert;
|
|
|
import org.junit.Before;
|
|
@@ -50,6 +51,7 @@ import static org.apache.hadoop.hdfs.ReadStripedFileWithDecodingHelper.BLOCK_SIZ
|
|
|
import static org.apache.hadoop.hdfs.ReadStripedFileWithDecodingHelper.CELL_SIZE;
|
|
|
import static org.apache.hadoop.hdfs.ReadStripedFileWithDecodingHelper.NUM_DATA_UNITS;
|
|
|
import static org.apache.hadoop.hdfs.ReadStripedFileWithDecodingHelper.NUM_PARITY_UNITS;
|
|
|
+import static org.apache.hadoop.hdfs.ReadStripedFileWithDecodingHelper.findDataNodeAtIndex;
|
|
|
import static org.apache.hadoop.hdfs.ReadStripedFileWithDecodingHelper.findFirstDataNode;
|
|
|
import static org.apache.hadoop.hdfs.ReadStripedFileWithDecodingHelper.initializeCluster;
|
|
|
import static org.apache.hadoop.hdfs.ReadStripedFileWithDecodingHelper.tearDownCluster;
|
|
@@ -96,7 +98,7 @@ public class TestReadStripedFileWithDecoding {
|
|
|
.get(0);
|
|
|
final LocatedBlock[] blks = StripedBlockUtil.parseStripedBlockGroup(slb,
|
|
|
CELL_SIZE, NUM_DATA_UNITS, NUM_PARITY_UNITS);
|
|
|
- // find the first block file
|
|
|
+ // Find the first block file.
|
|
|
File storageDir = cluster.getInstanceStorageDir(dnIndex, 0);
|
|
|
File blkFile = MiniDFSCluster.getBlockFile(storageDir, blks[0].getBlock());
|
|
|
Assert.assertTrue("Block file does not exist", blkFile.exists());
|
|
@@ -169,6 +171,129 @@ public class TestReadStripedFileWithDecoding {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+ /**
|
|
|
+ * This unit test try to cover the below situation:
|
|
|
+ * Suppose we have an EC file with RS(d,p) policy and block group id
|
|
|
+ * is blk_-9223372036845119810_1920002.
|
|
|
+ * If the first and second data block in this ec block group are corrupted,
|
|
|
+ * meanwhile we read this EC file.
|
|
|
+ * It will trigger reportBadBlock RPC and
|
|
|
+ * add the blk_-9223372036845119810_1920002
|
|
|
+ * and blk_-9223372036845119809_1920002 blocks to corruptReplicas.
|
|
|
+ * It will also reconstruct the two blocks and send IBR to namenode,
|
|
|
+ * then execute BlockManager#addStoredBlock and
|
|
|
+ * invalidateCorruptReplicas method. Suppose we first receive the IBR of
|
|
|
+ * blk_-9223372036845119810_1920002, then in invalidateCorruptReplicas method,
|
|
|
+ * it will only invalidate 9223372036845119809_1920002 on the two datanodes contains
|
|
|
+ * the two corrupt blocks.
|
|
|
+ *
|
|
|
+ * @throws Exception
|
|
|
+ */
|
|
|
+ @Test
|
|
|
+ public void testCorruptionECBlockInvalidate() throws Exception {
|
|
|
+
|
|
|
+ final Path file = new Path("/invalidate_corrupted");
|
|
|
+ final int length = BLOCK_SIZE * NUM_DATA_UNITS;
|
|
|
+ final byte[] bytes = StripedFileTestUtil.generateBytes(length);
|
|
|
+ DFSTestUtil.writeFile(dfs, file, bytes);
|
|
|
+
|
|
|
+ int dnIndex = findFirstDataNode(cluster, dfs, file,
|
|
|
+ CELL_SIZE * NUM_DATA_UNITS);
|
|
|
+ int dnIndex2 = findDataNodeAtIndex(cluster, dfs, file,
|
|
|
+ CELL_SIZE * NUM_DATA_UNITS, 2);
|
|
|
+ Assert.assertNotEquals(-1, dnIndex);
|
|
|
+ Assert.assertNotEquals(-1, dnIndex2);
|
|
|
+
|
|
|
+ LocatedStripedBlock slb = (LocatedStripedBlock) dfs.getClient()
|
|
|
+ .getLocatedBlocks(file.toString(), 0, CELL_SIZE * NUM_DATA_UNITS)
|
|
|
+ .get(0);
|
|
|
+ final LocatedBlock[] blks = StripedBlockUtil.parseStripedBlockGroup(slb,
|
|
|
+ CELL_SIZE, NUM_DATA_UNITS, NUM_PARITY_UNITS);
|
|
|
+
|
|
|
+ final Block b = blks[0].getBlock().getLocalBlock();
|
|
|
+ final Block b2 = blks[1].getBlock().getLocalBlock();
|
|
|
+
|
|
|
+ // Find the first block file.
|
|
|
+ File storageDir = cluster.getInstanceStorageDir(dnIndex, 0);
|
|
|
+ File blkFile = MiniDFSCluster.getBlockFile(storageDir, blks[0].getBlock());
|
|
|
+ Assert.assertTrue("Block file does not exist", blkFile.exists());
|
|
|
+ // Corrupt the block file.
|
|
|
+ LOG.info("Deliberately corrupting file " + blkFile.getName());
|
|
|
+ try (FileOutputStream out = new FileOutputStream(blkFile)) {
|
|
|
+ out.write("corruption".getBytes());
|
|
|
+ out.flush();
|
|
|
+ }
|
|
|
+
|
|
|
+ // Find the second block file.
|
|
|
+ File storageDir2 = cluster.getInstanceStorageDir(dnIndex2, 0);
|
|
|
+ File blkFile2 = MiniDFSCluster.getBlockFile(storageDir2, blks[1].getBlock());
|
|
|
+ Assert.assertTrue("Block file does not exist", blkFile2.exists());
|
|
|
+ // Corrupt the second block file.
|
|
|
+ LOG.info("Deliberately corrupting file " + blkFile2.getName());
|
|
|
+ try (FileOutputStream out = new FileOutputStream(blkFile2)) {
|
|
|
+ out.write("corruption".getBytes());
|
|
|
+ out.flush();
|
|
|
+ }
|
|
|
+
|
|
|
+ // Disable the heartbeat from DN so that the corrupted block record is kept
|
|
|
+ // in NameNode.
|
|
|
+ for (DataNode dataNode : cluster.getDataNodes()) {
|
|
|
+ DataNodeTestUtils.setHeartbeatsDisabledForTests(dataNode, true);
|
|
|
+ }
|
|
|
+ try {
|
|
|
+ // Do stateful read.
|
|
|
+ StripedFileTestUtil.verifyStatefulRead(dfs, file, length, bytes,
|
|
|
+ ByteBuffer.allocate(1024));
|
|
|
+
|
|
|
+ // Check whether the corruption has been reported to the NameNode.
|
|
|
+ final FSNamesystem ns = cluster.getNamesystem();
|
|
|
+ final BlockManager bm = ns.getBlockManager();
|
|
|
+ BlockInfo blockInfo = (ns.getFSDirectory().getINode4Write(file.toString())
|
|
|
+ .asFile().getBlocks())[0];
|
|
|
+ GenericTestUtils.waitFor(() -> {
|
|
|
+ if (bm.getCorruptReplicas(blockInfo) == null) {
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+ return bm.getCorruptReplicas(blockInfo).size() == 2;
|
|
|
+ }, 250, 60000);
|
|
|
+ // Double check.
|
|
|
+ Assert.assertEquals(2, bm.getCorruptReplicas(blockInfo).size());
|
|
|
+
|
|
|
+ DatanodeDescriptor dnd =
|
|
|
+ NameNodeAdapter.getDatanode(ns, cluster.getDataNodes().get(dnIndex).getDatanodeId());
|
|
|
+
|
|
|
+ DatanodeDescriptor dnd2 =
|
|
|
+ NameNodeAdapter.getDatanode(ns, cluster.getDataNodes().get(dnIndex2).getDatanodeId());
|
|
|
+
|
|
|
+ for (DataNode datanode : cluster.getDataNodes()) {
|
|
|
+ if (!datanode.getDatanodeUuid().equals(dnd.getDatanodeUuid()) &&
|
|
|
+ !datanode.getDatanodeUuid().equals(dnd2.getDatanodeUuid())) {
|
|
|
+ DataNodeTestUtils.setHeartbeatsDisabledForTests(datanode, false);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ GenericTestUtils.waitFor(() -> {
|
|
|
+ return bm.containsInvalidateBlock(
|
|
|
+ blks[0].getLocations()[0], b) || dnd.containsInvalidateBlock(b);
|
|
|
+ }, 250, 60000);
|
|
|
+ Assert.assertTrue(bm.containsInvalidateBlock(
|
|
|
+ blks[0].getLocations()[0], b) || dnd.containsInvalidateBlock(b));
|
|
|
+
|
|
|
+ GenericTestUtils.waitFor(() -> {
|
|
|
+ return bm.containsInvalidateBlock(
|
|
|
+ blks[1].getLocations()[0], b2) || dnd2.containsInvalidateBlock(b2);
|
|
|
+ }, 250, 60000);
|
|
|
+
|
|
|
+ Assert.assertTrue(bm.containsInvalidateBlock(
|
|
|
+ blks[1].getLocations()[0], b2) || dnd2.containsInvalidateBlock(b2));
|
|
|
+
|
|
|
+ } finally {
|
|
|
+ for (DataNode datanode : cluster.getDataNodes()) {
|
|
|
+ DataNodeTestUtils.setHeartbeatsDisabledForTests(datanode, false);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
@Test
|
|
|
public void testMoreThanOneCorruptedBlock() throws IOException {
|
|
|
final Path file = new Path("/corrupted");
|