Browse Source

Merge -r 746009:746010 from trunk to move the change of HADOOP-4692 to branch 0.20.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/core/branches/branch-0.20@746011 13f79535-47bb-0310-9956-ffa450edef68
Hairong Kuang 16 years ago
parent
commit
4d66838b46

+ 3 - 0
CHANGES.txt

@@ -607,6 +607,9 @@ Release 0.20.0 - Unreleased
     HADOOP-5254. The Configuration class should be able to work with XML
     parsers that do not support xmlinclude. (Steve Loughran via dhruba)
 
+    HADOOP-4692. Namenode in infinite loop for replicating/deleting corrupt
+    blocks. (hairong)
+
 Release 0.19.1 - Unreleased
 
   IMPROVEMENTS

+ 15 - 1
src/hdfs/org/apache/hadoop/hdfs/server/datanode/DataNode.java

@@ -925,6 +925,7 @@ public class DataNode extends Configured
                               DatanodeInfo xferTargets[] 
                               ) throws IOException {
     if (!data.isValidBlock(block)) {
+      // block does not exist or is under-construction
       String errStr = "Can't send invalid block " + block;
       LOG.info(errStr);
       namenode.errorReport(dnRegistration, 
@@ -933,6 +934,19 @@ public class DataNode extends Configured
       return;
     }
 
+    // Check if NN recorded length matches on-disk length 
+    long onDiskLength = data.getLength(block);
+    if (block.getNumBytes() > onDiskLength) {
+      // Shorter on-disk len indicates corruption so report NN the corrupt block
+      namenode.reportBadBlocks(new LocatedBlock[]{
+          new LocatedBlock(block, new DatanodeInfo[] {
+              new DatanodeInfo(dnRegistration)})});
+      LOG.info("Can't replicate block " + block
+          + " because on-disk length " + onDiskLength 
+          + " is shorter than NameNode recorded length " + block.getNumBytes());
+      return;
+    }
+    
     int numTargets = xferTargets.length;
     if (numTargets > 0) {
       if (LOG.isInfoEnabled()) {
@@ -1113,7 +1127,7 @@ public class DataNode extends Configured
         out = new DataOutputStream(new BufferedOutputStream(baseStream, 
                                                             SMALL_BUFFER_SIZE));
 
-        blockSender = new BlockSender(b, 0, -1, false, false, false, 
+        blockSender = new BlockSender(b, 0, b.getNumBytes(), false, false, false, 
             datanode);
         DatanodeInfo srcNode = new DatanodeInfo(dnRegistration);
 

+ 8 - 4
src/test/org/apache/hadoop/hdfs/TestDatanodeBlockScanner.java

@@ -402,7 +402,7 @@ public class TestDatanodeBlockScanner extends TestCase {
       String block = DFSTestUtil.getFirstBlock(fs, fileName).getBlockName();
 
       // Truncate replica of block
-      truncateReplica(block, 0);
+      changeReplicaLength(block, 0, -1);
 
       cluster.shutdown();
 
@@ -423,18 +423,22 @@ public class TestDatanodeBlockScanner extends TestCase {
     }
   }
   
-  private static void truncateReplica(String blockName, int dnIndex) throws IOException {
+  /**
+   * Change the length of a block at datanode dnIndex
+   */
+  static boolean changeReplicaLength(String blockName, int dnIndex, int lenDelta) throws IOException {
     File baseDir = new File(System.getProperty("test.build.data"), "dfs/data");
     for (int i=dnIndex*2; i<dnIndex*2+2; i++) {
       File blockFile = new File(baseDir, "data" + (i+1)+ "/current/" + 
                                blockName);
       if (blockFile.exists()) {
         RandomAccessFile raFile = new RandomAccessFile(blockFile, "rw");
-        raFile.setLength(raFile.length()-1);
+        raFile.setLength(raFile.length()+lenDelta);
         raFile.close();
-        break;
+        return true;
       }
     }
+    return false;
   }
   
   private static void waitForBlockDeleted(String blockName, int dnIndex) 

+ 60 - 1
src/test/org/apache/hadoop/hdfs/TestReplication.java

@@ -388,7 +388,66 @@ public class TestReplication extends TestCase {
       if (cluster != null) {
         cluster.shutdown();
       }
+    }  
+  }
+  
+  /**
+   * Test if replication can detect mismatched length on-disk blocks
+   * @throws Exception
+   */
+  public void testReplicateLenMismatchedBlock() throws Exception {
+    MiniDFSCluster cluster = new MiniDFSCluster(new Configuration(), 2, true, null);
+    try {
+      cluster.waitActive();
+      // test truncated block
+      changeBlockLen(cluster, -1);
+      // test extended block
+      changeBlockLen(cluster, 1);
+    } finally {
+      cluster.shutdown();
     }
-  }  
+  }
   
+  private void changeBlockLen(MiniDFSCluster cluster, 
+      int lenDelta) throws IOException, InterruptedException {
+    final Path fileName = new Path("/file1");
+    final short REPLICATION_FACTOR = (short)1;
+    final FileSystem fs = cluster.getFileSystem();
+    final int fileLen = fs.getConf().getInt("io.bytes.per.checksum", 512);
+    DFSTestUtil.createFile(fs, fileName, fileLen, REPLICATION_FACTOR, 0);
+    DFSTestUtil.waitReplication(fs, fileName, REPLICATION_FACTOR);
+
+    String block = DFSTestUtil.getFirstBlock(fs, fileName).getBlockName();
+
+    // Change the length of a replica
+    for (int i=0; i<cluster.getDataNodes().size(); i++) {
+      if (TestDatanodeBlockScanner.changeReplicaLength(block, i, lenDelta)) {
+        break;
+      }
+    }
+
+    // increase the file's replication factor
+    fs.setReplication(fileName, (short)(REPLICATION_FACTOR+1));
+
+    // block replication triggers corrupt block detection
+    DFSClient dfsClient = new DFSClient(new InetSocketAddress("localhost", 
+        cluster.getNameNodePort()), fs.getConf());
+    LocatedBlocks blocks = dfsClient.namenode.getBlockLocations(
+        fileName.toString(), 0, fileLen);
+    if (lenDelta < 0) { // replica truncated
+    	while (!blocks.get(0).isCorrupt() || 
+    			REPLICATION_FACTOR != blocks.get(0).getLocations().length) {
+    		Thread.sleep(100);
+    		blocks = dfsClient.namenode.getBlockLocations(
+    				fileName.toString(), 0, fileLen);
+    	}
+    } else { // no corruption detected; block replicated
+    	while (REPLICATION_FACTOR+1 != blocks.get(0).getLocations().length) {
+    		Thread.sleep(100);
+    		blocks = dfsClient.namenode.getBlockLocations(
+    				fileName.toString(), 0, fileLen);
+    	}
+    }
+    fs.delete(fileName, true);
+  }
 }