浏览代码

HDFS-2290. Block with corrupt replica is not getting replicated. Contributed by Benoy Antony.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-0.22@1173324 13f79535-47bb-0310-9956-ffa450edef68
Konstantin Shvachko 13 年之前
父节点
当前提交
ac43da7b55

+ 3 - 0
hdfs/CHANGES.txt

@@ -628,6 +628,9 @@ Release 0.22.0 - Unreleased
 
     HDFS-2341. Contribs not building. (Joep Rottinghuis via shv)
 
+    HDFS-2290. Block with corrupt replica is not getting replicated.
+    (Benoy Antony via shv)
+
 Release 0.21.1 - Unreleased
 
   IMPROVEMENTS

+ 2 - 2
hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/BlockManager.java

@@ -625,7 +625,7 @@ public class BlockManager {
 
     // Add this replica to corruptReplicas Map
     corruptReplicas.addToCorruptReplicasMap(storedBlock, node);
-    if (countNodes(storedBlock).liveReplicas() > inode.getReplication()) {
+    if (countNodes(storedBlock).liveReplicas() >= inode.getReplication()) {
       // the block is over-replicated so invalidate the replicas immediately
       invalidateBlock(storedBlock, node);
     } else {
@@ -651,7 +651,7 @@ public class BlockManager {
     // Check how many copies we have of the block. If we have at least one
     // copy on a live node, then we can delete it.
     int count = countNodes(blk).liveReplicas();
-    if (count > 1) {
+    if (count >= 1) {
       addToInvalidates(blk, dn);
       removeStoredBlock(blk, node);
       if(NameNode.stateChangeLog.isDebugEnabled()) {

+ 310 - 0
hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestProcessCorruptBlocks.java

@@ -0,0 +1,310 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode;
+
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.File;
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.hdfs.DFSTestUtil;
+import org.apache.hadoop.hdfs.HdfsConfiguration;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties;
+import org.apache.hadoop.hdfs.protocol.Block;
+import org.junit.Test;
+
+public class TestProcessCorruptBlocks  {
+  /**
+   * The corrupt block has to be removed when the number of valid replicas
+   * matches replication factor for the file. In this the above condition is
+   * tested by reducing the replication factor 
+   * The test strategy : 
+   *   Bring up Cluster with 3 DataNodes
+   *   Create a file of replication factor 3 
+   *   Corrupt one replica of a block of the file 
+   *   Verify that there are still 2 good replicas and 1 corrupt replica
+   *    (corrupt replica should not be removed since number of good
+   *     replicas (2) is less than replication factor (3))
+   *   Set the replication factor to 2 
+   *   Verify that the corrupt replica is removed. 
+   *     (corrupt replica  should not be removed since number of good
+   *      replicas (2) is equal to replication factor (2))
+   */
+  @Test
+  public void testWhenDecreasingReplication() throws IOException {
+    Configuration conf = new HdfsConfiguration();
+    conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000L);
+    conf.set(DFSConfigKeys.DFS_NAMENODE_REPLICATION_PENDING_TIMEOUT_SEC_KEY,
+        Integer.toString(2));
+    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3)
+        .build();
+    FileSystem fs = cluster.getFileSystem();
+    final FSNamesystem namesystem = cluster.getNamesystem();
+
+    try {
+      final Path fileName = new Path("/foo1");
+      DFSTestUtil.createFile(fs, fileName, 2, (short) 3, 0L);
+      DFSTestUtil.waitReplication(fs, fileName, (short) 3);
+
+      Block block = DFSTestUtil.getFirstBlock(fs, fileName);
+      corruptBlock(cluster, fs, fileName, 0, block);
+
+      DFSTestUtil.waitReplication(fs, fileName, (short) 2);
+
+      assertEquals(2, namesystem.blockManager.countNodes(block)
+          .liveReplicas());
+      assertEquals(1, namesystem.blockManager.countNodes(block)
+          .corruptReplicas());
+
+      namesystem.setReplication(fileName.toString(), (short) 2);
+
+      // wait for 3 seconds so that all block reports are processed.
+      try {
+        Thread.sleep(3000);
+      } catch (InterruptedException ignored) {
+      }
+
+      assertEquals(2, namesystem.blockManager.countNodes(block)
+          .liveReplicas());
+      assertEquals(0, namesystem.blockManager.countNodes(block)
+          .corruptReplicas());
+
+    } finally {
+      cluster.shutdown();
+    }
+  }
+
+  /**
+   * The corrupt block has to be removed when the number of valid replicas
+   * matches replication factor for the file. In this test, the above 
+   * condition is achieved by increasing the number of good replicas by 
+   * replicating on a new Datanode. 
+   * The test strategy : 
+   *   Bring up Cluster with 3 DataNodes
+   *   Create a file  of replication factor 3
+   *   Corrupt one replica of a block of the file 
+   *   Verify that there are still 2 good replicas and 1 corrupt replica 
+   *     (corrupt replica should not be removed since number of good replicas
+   *      (2) is less  than replication factor (3)) 
+   *   Start a new data node 
+   *   Verify that the a new replica is created and corrupt replica is
+   *   removed.
+   * 
+   */
+  @Test
+  public void testByAddingAnExtraDataNode() throws IOException {
+    Configuration conf = new HdfsConfiguration();
+    conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000L);
+    conf.set(DFSConfigKeys.DFS_NAMENODE_REPLICATION_PENDING_TIMEOUT_SEC_KEY,
+        Integer.toString(2));
+    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(4)
+        .build();
+    FileSystem fs = cluster.getFileSystem();
+    final FSNamesystem namesystem = cluster.getNamesystem();
+    DataNodeProperties dnPropsFourth = cluster.stopDataNode(3);
+
+    try {
+      final Path fileName = new Path("/foo1");
+      DFSTestUtil.createFile(fs, fileName, 2, (short) 3, 0L);
+      DFSTestUtil.waitReplication(fs, fileName, (short) 3);
+
+      Block block = DFSTestUtil.getFirstBlock(fs, fileName);
+      corruptBlock(cluster, fs, fileName, 0, block);
+
+      DFSTestUtil.waitReplication(fs, fileName, (short) 2);
+
+      assertEquals(2, namesystem.blockManager.countNodes(block)
+          .liveReplicas());
+      assertEquals(1, namesystem.blockManager.countNodes(block)
+          .corruptReplicas());
+
+      cluster.restartDataNode(dnPropsFourth);
+
+      DFSTestUtil.waitReplication(fs, fileName, (short) 3);
+
+      assertEquals(3, namesystem.blockManager.countNodes(block)
+          .liveReplicas());
+      assertEquals(0, namesystem.blockManager.countNodes(block)
+          .corruptReplicas());
+    } finally {
+      cluster.shutdown();
+    }
+  }
+
+  /**
+   * The corrupt block has to be removed when the number of valid replicas
+   * matches replication factor for the file. The above condition should hold
+   * true as long as there is one good replica. This test verifies that.
+   * 
+   * The test strategy : 
+   *   Bring up Cluster with 2 DataNodes
+   *   Create a file of replication factor 2 
+   *   Corrupt one replica of a block of the file 
+   *   Verify that there is  one good replicas and 1 corrupt replica 
+   *     (corrupt replica should not be removed since number of good 
+   *     replicas (1) is less than replication factor (2)).
+   *   Set the replication factor to 1 
+   *   Verify that the corrupt replica is removed. 
+   *     (corrupt replica should  be removed since number of good
+   *      replicas (1) is equal to replication factor (1))
+   */
+  @Test
+  public void testWithReplicationFactorAsOne() throws IOException {
+    Configuration conf = new HdfsConfiguration();
+    conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000L);
+    conf.set(DFSConfigKeys.DFS_NAMENODE_REPLICATION_PENDING_TIMEOUT_SEC_KEY,
+        Integer.toString(2));
+    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2)
+        .build();
+    FileSystem fs = cluster.getFileSystem();
+    final FSNamesystem namesystem = cluster.getNamesystem();
+
+    try {
+      final Path fileName = new Path("/foo1");
+      DFSTestUtil.createFile(fs, fileName, 2, (short) 2, 0L);
+      DFSTestUtil.waitReplication(fs, fileName, (short) 2);
+
+      Block block = DFSTestUtil.getFirstBlock(fs, fileName);
+      corruptBlock(cluster, fs, fileName, 0, block);
+
+      DFSTestUtil.waitReplication(fs, fileName, (short) 1);
+
+      assertEquals(1, namesystem.blockManager.countNodes(block)
+          .liveReplicas());
+      assertEquals(1, namesystem.blockManager.countNodes(block)
+          .corruptReplicas());
+
+      namesystem.setReplication(fileName.toString(), (short) 1);
+
+      // wait for 3 seconds so that all block reports are processed.
+      try {
+        Thread.sleep(3000);
+      } catch (InterruptedException ignored) {
+      }
+
+      assertEquals(1, namesystem.blockManager.countNodes(block)
+          .liveReplicas());
+      assertEquals(0, namesystem.blockManager.countNodes(block)
+          .corruptReplicas());
+
+    } finally {
+      cluster.shutdown();
+    }
+  }
+
+  /**
+   * None of the blocks can be removed if all blocks are corrupt.
+   * 
+   * The test strategy : 
+   *    Bring up Cluster with 3 DataNodes
+   *    Create a file of replication factor 3 
+   *    Corrupt all three replicas 
+   *    Verify that all replicas are corrupt and 3 replicas are present.
+   *    Set the replication factor to 1 
+   *    Verify that all replicas are corrupt and 3 replicas are present.
+   */
+  @Test
+  public void testWithAllCorruptReplicas() throws IOException {
+    Configuration conf = new HdfsConfiguration();
+    conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000L);
+    conf.set(DFSConfigKeys.DFS_NAMENODE_REPLICATION_PENDING_TIMEOUT_SEC_KEY,
+        Integer.toString(2));
+    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3)
+        .build();
+    FileSystem fs = cluster.getFileSystem();
+    final FSNamesystem namesystem = cluster.getNamesystem();
+
+    try {
+      final Path fileName = new Path("/foo1");
+      DFSTestUtil.createFile(fs, fileName, 2, (short) 3, 0L);
+      DFSTestUtil.waitReplication(fs, fileName, (short) 3);
+
+      Block block = DFSTestUtil.getFirstBlock(fs, fileName);
+      corruptBlock(cluster, fs, fileName, 0, block);
+
+      corruptBlock(cluster, fs, fileName, 1, block);
+
+      corruptBlock(cluster, fs, fileName, 2, block);
+
+      // wait for 3 seconds so that all block reports are processed.
+      try {
+        Thread.sleep(3000);
+      } catch (InterruptedException ignored) {
+      }
+
+      assertEquals(0, namesystem.blockManager
+          .countNodes(block).liveReplicas());
+      assertEquals(3, namesystem.blockManager.countNodes(block)
+          .corruptReplicas());
+
+      namesystem.setReplication(fileName.toString(), (short) 1);
+
+      // wait for 3 seconds so that all block reports are processed.
+      try {
+        Thread.sleep(3000);
+      } catch (InterruptedException ignored) {
+      }
+
+      assertEquals(0, namesystem.blockManager.countNodes(block)
+          .liveReplicas());
+      assertEquals(3, namesystem.blockManager.countNodes(block)
+          .corruptReplicas());
+
+    } finally {
+      cluster.shutdown();
+    }
+  }
+
+  private void corruptBlock(MiniDFSCluster cluster, FileSystem fs,
+      final Path fileName, int dnIndex, Block block) throws IOException {
+    // corrupt the block on datanode dnIndex
+    // the indexes change once the nodes are restarted.
+    // But the datadirectory will not change
+    assertTrue(cluster.corruptReplica(block.getBlockName(), dnIndex));
+
+    DataNodeProperties dnProps = cluster.stopDataNode(0);
+
+    // Each datanode has multiple data dirs, check each
+    for (int dn = dnIndex * 2; dn < dnIndex * 2 + 2; dn++) {
+      File dataDir = new File(MiniDFSCluster.getBaseDirectory() + "data");
+      File scanLogFile = new File(dataDir, "data" + (dn + 1)
+          + MiniDFSCluster.FINALIZED_DIR_NAME
+          + "dncp_block_verification.log.curr");
+      if (scanLogFile.exists()) {
+        // wait for one minute for deletion to succeed;
+        for (int i = 0; !scanLogFile.delete(); i++) {
+          assertTrue("Could not delete log file in one minute", i < 60);
+          try {
+            Thread.sleep(1000);
+          } catch (InterruptedException ignored) {
+          }
+        }
+      }
+    }
+
+    // restart the detained so the corrupt replica will be detected
+    cluster.restartDataNode(dnProps);
+  }
+}