Преглед изворни кода

Merge -r 732776:732777 from trunk to move the change of HADOOP-4910 into branch 0.18.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/core/branches/branch-0.18@732788 13f79535-47bb-0310-9956-ffa450edef68
Hairong Kuang пре 16 година
родитељ
комит
9d9dd8b9a5

+ 3 - 0
CHANGES.txt

@@ -133,6 +133,9 @@ Release 0.18.3 - Unreleased
     HADOOP-4971. A long (unexpected) delay at datanodes could make subsequent
     block reports from many datanode at the same time. (Raghu Angadi)
     
+    HADOOP-4910. NameNode should exclude replicas when choosing excessive
+    replicas to delete to avoid data lose. (hairong)
+
 Release 0.18.2 - 2008-11-03
 
   BUG FIXES

+ 5 - 1
src/hdfs/org/apache/hadoop/dfs/FSNamesystem.java

@@ -2963,13 +2963,17 @@ class FSNamesystem implements FSConstants, FSNamesystemMBean {
       delNodeHint = null;
     }
     Collection<DatanodeDescriptor> nonExcess = new ArrayList<DatanodeDescriptor>();
+    Collection<DatanodeDescriptor> corruptNodes = corruptReplicas.getNodes(block);
     for (Iterator<DatanodeDescriptor> it = blocksMap.nodeIterator(block); 
          it.hasNext();) {
       DatanodeDescriptor cur = it.next();
       Collection<Block> excessBlocks = excessReplicateMap.get(cur.getStorageID());
       if (excessBlocks == null || !excessBlocks.contains(block)) {
         if (!cur.isDecommissionInProgress() && !cur.isDecommissioned()) {
-          nonExcess.add(cur);
+          // exclude corrupt replicas
+          if (corruptNodes == null || !corruptNodes.contains(cur)) {
+            nonExcess.add(cur);
+          }
         }
       }
     }

+ 1 - 1
src/test/org/apache/hadoop/dfs/MiniDFSCluster.java

@@ -624,7 +624,7 @@ public class MiniDFSCluster {
   /*
    * Restart a particular datanode
    */
-  synchronized boolean restartDataNode(int i) throws IOException {
+  public synchronized boolean restartDataNode(int i) throws IOException {
     DataNodeProperties dnprop = stopDataNode(i);
     if (dnprop == null) {
       return false;

+ 3 - 3
src/test/org/apache/hadoop/dfs/TestDatanodeBlockScanner.java

@@ -141,7 +141,7 @@ public class TestDatanodeBlockScanner extends TestCase {
     cluster.shutdown();
   }
 
-  boolean corruptReplica(String blockName, int replica) throws IOException {
+  public static boolean corruptReplica(String blockName, int replica) throws IOException {
     Random random = new Random();
     File baseDir = new File(System.getProperty("test.build.data"), "dfs/data");
     boolean corrupted = false;
@@ -420,7 +420,7 @@ public class TestDatanodeBlockScanner extends TestCase {
     }
   }
   
-  private void truncateReplica(String blockName, int dnIndex) throws IOException {
+  private static void truncateReplica(String blockName, int dnIndex) throws IOException {
     File baseDir = new File(System.getProperty("test.build.data"), "dfs/data");
     for (int i=dnIndex*2; i<dnIndex*2+2; i++) {
       File blockFile = new File(baseDir, "data" + (i+1)+ "/current/" + 
@@ -434,7 +434,7 @@ public class TestDatanodeBlockScanner extends TestCase {
     }
   }
   
-  private void waitForBlockDeleted(String blockName, int dnIndex) 
+  private static void waitForBlockDeleted(String blockName, int dnIndex) 
   throws IOException, InterruptedException {
     File baseDir = new File(System.getProperty("test.build.data"), "dfs/data");
     File blockFile1 = new File(baseDir, "data" + (2*dnIndex+1)+ "/current/" + 

+ 68 - 0
src/test/org/apache/hadoop/dfs/TestOverReplicatedBlocks.java

@@ -0,0 +1,68 @@
+package org.apache.hadoop.dfs;
+
+import java.io.File;
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.dfs.DFSTestUtil;
+import org.apache.hadoop.dfs.MiniDFSCluster;
+import org.apache.hadoop.dfs.TestDatanodeBlockScanner;
+import org.apache.hadoop.dfs.Block;
+import org.apache.hadoop.dfs.DatanodeID;
+
+import junit.framework.TestCase;
+
+public class TestOverReplicatedBlocks extends TestCase {
+  /** Test processOverReplicatedBlock can handle corrupt replicas fine.
+   * It make sure that it won't treat corrupt replicas as valid ones 
+   * thus prevents NN deleting valid replicas but keeping
+   * corrupt ones.
+   */
+  public void testProcesOverReplicateBlock() throws IOException {
+    Configuration conf = new Configuration();
+    conf.setLong("dfs.blockreport.intervalMsec", 1000L);
+    conf.set("dfs.replication.pending.timeout.sec", Integer.toString(2));
+    MiniDFSCluster cluster = new MiniDFSCluster(conf, 3, true, null);
+    FileSystem fs = cluster.getFileSystem();
+
+    try {
+      final Path fileName = new Path("/foo1");
+      DFSTestUtil.createFile(fs, fileName, 2, (short)3, 0L);
+      DFSTestUtil.waitReplication(fs, fileName, (short)3);
+      
+      // corrupt the block on datanode 0
+      Block block = DFSTestUtil.getFirstBlock(fs, fileName);
+      TestDatanodeBlockScanner.corruptReplica(block.getBlockName(), 0);
+      File scanLog = new File(System.getProperty("test.build.data"),
+          "dfs/data/data1/current/dncp_block_verification.log.curr");
+      assertTrue(scanLog.delete()); 
+      // restart the datanode so the corrupt replica will be detected
+      cluster.restartDataNode(0);
+      DFSTestUtil.waitReplication(fs, fileName, (short)2);
+      
+      final DatanodeID corruptDataNode = 
+        cluster.getDataNodes().get(2).dnRegistration;
+      final FSNamesystem namesystem = FSNamesystem.getFSNamesystem();
+      synchronized (namesystem.heartbeats) {
+        // set live datanode's remaining space to be 0 
+        // so they will be chosen to be deleted when over-replication occurs
+        for (DatanodeDescriptor datanode : namesystem.heartbeats) {
+          if (!corruptDataNode.equals(datanode)) {
+            datanode.updateHeartbeat(100L, 100L, 0L, 0);
+          }
+        }
+        
+        // decrease the replication factor to 1; 
+        namesystem.setReplication(fileName.toString(), (short)1);
+
+        // corrupt one won't be chosen to be excess one
+        // without 4910 the number of live replicas would be 0: block gets lost
+        assertEquals(1, namesystem.countNodes(block).liveReplicas());
+      }
+    } finally {
+      cluster.shutdown();
+    }
+  }
+}