Browse Source

Merge -r 732776:732777 from trunk to move the change of HADOOP-4910 into branch 0.20

git-svn-id: https://svn.apache.org/repos/asf/hadoop/core/branches/branch-0.20@732784 13f79535-47bb-0310-9956-ffa450edef68
Hairong Kuang 16 years ago
parent
commit
82ba0bd2d0

+ 2 - 0
CHANGES.txt

@@ -1658,6 +1658,8 @@ Release 0.18.3 - Unreleased
     HADOOP-4971. A long (unexpected) delay at datanodes could make subsequent
     block reports from many datanode at the same time. (Raghu Angadi)
     
+    HADOOP-4910. NameNode should exclude replicas when choosing excessive
+    replicas to delete to avoid data lose. (hairong)
 
 Release 0.18.2 - 2008-11-03
 

+ 5 - 1
src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java

@@ -2997,13 +2997,17 @@ public class FSNamesystem implements FSConstants, FSNamesystemMBean {
       delNodeHint = null;
     }
     Collection<DatanodeDescriptor> nonExcess = new ArrayList<DatanodeDescriptor>();
+    Collection<DatanodeDescriptor> corruptNodes = corruptReplicas.getNodes(block);
     for (Iterator<DatanodeDescriptor> it = blocksMap.nodeIterator(block); 
          it.hasNext();) {
       DatanodeDescriptor cur = it.next();
       Collection<Block> excessBlocks = excessReplicateMap.get(cur.getStorageID());
       if (excessBlocks == null || !excessBlocks.contains(block)) {
         if (!cur.isDecommissionInProgress() && !cur.isDecommissioned()) {
-          nonExcess.add(cur);
+          // exclude corrupt replicas
+          if (corruptNodes == null || !corruptNodes.contains(cur)) {
+            nonExcess.add(cur);
+          }
         }
       }
     }

+ 1 - 1
src/test/org/apache/hadoop/hdfs/MiniDFSCluster.java

@@ -638,7 +638,7 @@ public class MiniDFSCluster {
   /*
    * Restart a particular datanode
    */
-  synchronized boolean restartDataNode(int i) throws IOException {
+  public synchronized boolean restartDataNode(int i) throws IOException {
     DataNodeProperties dnprop = stopDataNode(i);
     if (dnprop == null) {
       return false;

+ 3 - 3
src/test/org/apache/hadoop/hdfs/TestDatanodeBlockScanner.java

@@ -144,7 +144,7 @@ public class TestDatanodeBlockScanner extends TestCase {
     cluster.shutdown();
   }
 
-  boolean corruptReplica(String blockName, int replica) throws IOException {
+  public static boolean corruptReplica(String blockName, int replica) throws IOException {
     Random random = new Random();
     File baseDir = new File(System.getProperty("test.build.data"), "dfs/data");
     boolean corrupted = false;
@@ -423,7 +423,7 @@ public class TestDatanodeBlockScanner extends TestCase {
     }
   }
   
-  private void truncateReplica(String blockName, int dnIndex) throws IOException {
+  private static void truncateReplica(String blockName, int dnIndex) throws IOException {
     File baseDir = new File(System.getProperty("test.build.data"), "dfs/data");
     for (int i=dnIndex*2; i<dnIndex*2+2; i++) {
       File blockFile = new File(baseDir, "data" + (i+1)+ "/current/" + 
@@ -437,7 +437,7 @@ public class TestDatanodeBlockScanner extends TestCase {
     }
   }
   
-  private void waitForBlockDeleted(String blockName, int dnIndex) 
+  private static void waitForBlockDeleted(String blockName, int dnIndex) 
   throws IOException, InterruptedException {
     File baseDir = new File(System.getProperty("test.build.data"), "dfs/data");
     File blockFile1 = new File(baseDir, "data" + (2*dnIndex+1)+ "/current/" + 

+ 69 - 0
src/test/org/apache/hadoop/hdfs/server/namenode/TestOverReplicatedBlocks.java

@@ -0,0 +1,69 @@
+package org.apache.hadoop.hdfs.server.namenode;
+
+import java.io.File;
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.DFSTestUtil;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.TestDatanodeBlockScanner;
+import org.apache.hadoop.hdfs.protocol.Block;
+import org.apache.hadoop.hdfs.protocol.DatanodeID;
+
+import junit.framework.TestCase;
+
+public class TestOverReplicatedBlocks extends TestCase {
+  /** Test processOverReplicatedBlock can handle corrupt replicas fine.
+   * It make sure that it won't treat corrupt replicas as valid ones 
+   * thus prevents NN deleting valid replicas but keeping
+   * corrupt ones.
+   */
+  public void testProcesOverReplicateBlock() throws IOException {
+    Configuration conf = new Configuration();
+    conf.setLong("dfs.blockreport.intervalMsec", 1000L);
+    conf.set("dfs.replication.pending.timeout.sec", Integer.toString(2));
+    MiniDFSCluster cluster = new MiniDFSCluster(conf, 3, true, null);
+    FileSystem fs = cluster.getFileSystem();
+
+    try {
+      final Path fileName = new Path("/foo1");
+      DFSTestUtil.createFile(fs, fileName, 2, (short)3, 0L);
+      DFSTestUtil.waitReplication(fs, fileName, (short)3);
+      
+      // corrupt the block on datanode 0
+      Block block = DFSTestUtil.getFirstBlock(fs, fileName);
+      TestDatanodeBlockScanner.corruptReplica(block.getBlockName(), 0);
+      // remove block scanner log to trigger block scanning
+      File scanLog = new File(System.getProperty("test.build.data"),
+          "dfs/data/data1/current/dncp_block_verification.log.curr");
+      assertTrue(scanLog.delete()); 
+      // restart the datanode so the corrupt replica will be detected
+      cluster.restartDataNode(0);
+      DFSTestUtil.waitReplication(fs, fileName, (short)2);
+      
+      final DatanodeID corruptDataNode = 
+        cluster.getDataNodes().get(2).dnRegistration;
+      final FSNamesystem namesystem = FSNamesystem.getFSNamesystem();
+      synchronized (namesystem.heartbeats) {
+        // set live datanode's remaining space to be 0 
+        // so they will be chosen to be deleted when over-replication occurs
+        for (DatanodeDescriptor datanode : namesystem.heartbeats) {
+          if (!corruptDataNode.equals(datanode)) {
+            datanode.updateHeartbeat(100L, 100L, 0L, 0);
+          }
+        }
+        
+        // decrease the replication factor to 1; 
+        namesystem.setReplication(fileName.toString(), (short)1);
+
+        // corrupt one won't be chosen to be excess one
+        // without 4910 the number of live replicas would be 0: block gets lost
+        assertEquals(1, namesystem.countNodes(block).liveReplicas());
+      }
+    } finally {
+      cluster.shutdown();
+    }
+  }
+}