|
@@ -0,0 +1,68 @@
|
|
|
+package org.apache.hadoop.dfs;
|
|
|
+
|
|
|
+import java.io.File;
|
|
|
+import java.io.IOException;
|
|
|
+
|
|
|
+import org.apache.hadoop.conf.Configuration;
|
|
|
+import org.apache.hadoop.fs.FileSystem;
|
|
|
+import org.apache.hadoop.fs.Path;
|
|
|
+import org.apache.hadoop.dfs.DFSTestUtil;
|
|
|
+import org.apache.hadoop.dfs.MiniDFSCluster;
|
|
|
+import org.apache.hadoop.dfs.TestDatanodeBlockScanner;
|
|
|
+import org.apache.hadoop.dfs.Block;
|
|
|
+import org.apache.hadoop.dfs.DatanodeID;
|
|
|
+
|
|
|
+import junit.framework.TestCase;
|
|
|
+
|
|
|
+public class TestOverReplicatedBlocks extends TestCase {
|
|
|
+ /** Test processOverReplicatedBlock can handle corrupt replicas fine.
|
|
|
+ * It make sure that it won't treat corrupt replicas as valid ones
|
|
|
+ * thus prevents NN deleting valid replicas but keeping
|
|
|
+ * corrupt ones.
|
|
|
+ */
|
|
|
+ public void testProcesOverReplicateBlock() throws IOException {
|
|
|
+ Configuration conf = new Configuration();
|
|
|
+ conf.setLong("dfs.blockreport.intervalMsec", 1000L);
|
|
|
+ conf.set("dfs.replication.pending.timeout.sec", Integer.toString(2));
|
|
|
+ MiniDFSCluster cluster = new MiniDFSCluster(conf, 3, true, null);
|
|
|
+ FileSystem fs = cluster.getFileSystem();
|
|
|
+
|
|
|
+ try {
|
|
|
+ final Path fileName = new Path("/foo1");
|
|
|
+ DFSTestUtil.createFile(fs, fileName, 2, (short)3, 0L);
|
|
|
+ DFSTestUtil.waitReplication(fs, fileName, (short)3);
|
|
|
+
|
|
|
+ // corrupt the block on datanode 0
|
|
|
+ Block block = DFSTestUtil.getFirstBlock(fs, fileName);
|
|
|
+ TestDatanodeBlockScanner.corruptReplica(block.getBlockName(), 0);
|
|
|
+ File scanLog = new File(System.getProperty("test.build.data"),
|
|
|
+ "dfs/data/data1/current/dncp_block_verification.log.curr");
|
|
|
+ assertTrue(scanLog.delete());
|
|
|
+ // restart the datanode so the corrupt replica will be detected
|
|
|
+ cluster.restartDataNode(0);
|
|
|
+ DFSTestUtil.waitReplication(fs, fileName, (short)2);
|
|
|
+
|
|
|
+ final DatanodeID corruptDataNode =
|
|
|
+ cluster.getDataNodes().get(2).dnRegistration;
|
|
|
+ final FSNamesystem namesystem = FSNamesystem.getFSNamesystem();
|
|
|
+ synchronized (namesystem.heartbeats) {
|
|
|
+ // set live datanode's remaining space to be 0
|
|
|
+ // so they will be chosen to be deleted when over-replication occurs
|
|
|
+ for (DatanodeDescriptor datanode : namesystem.heartbeats) {
|
|
|
+ if (!corruptDataNode.equals(datanode)) {
|
|
|
+ datanode.updateHeartbeat(100L, 100L, 0L, 0);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // decrease the replication factor to 1;
|
|
|
+ namesystem.setReplication(fileName.toString(), (short)1);
|
|
|
+
|
|
|
+ // corrupt one won't be chosen to be excess one
|
|
|
+ // without 4910 the number of live replicas would be 0: block gets lost
|
|
|
+ assertEquals(1, namesystem.countNodes(block).liveReplicas());
|
|
|
+ }
|
|
|
+ } finally {
|
|
|
+ cluster.shutdown();
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|