Selaa lähdekoodia

Merge -r 727211:727212 from trunk to move the change of HADOOP-4810 into branch 0.18.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/core/branches/branch-0.18@727215 13f79535-47bb-0310-9956-ffa450edef68
Hairong Kuang 16 vuotta sitten
vanhempi
commit
05faab5b6c

+ 2 - 0
CHANGES.txt

@@ -105,6 +105,8 @@ Release 0.18.3 - Unreleased
     HADOOP-4857. Fixes TestUlimit to have exactly 1 map in the jobs spawned.
     (Ravi Gummadi via ddas)
 
+    HADOOP-4810. Data lost at cluster startup time. (hairong)
+
 Release 0.18.2 - 2008-11-03
 
   BUG FIXES

+ 11 - 13
src/hdfs/org/apache/hadoop/dfs/FSNamesystem.java

@@ -1393,13 +1393,13 @@ class FSNamesystem implements FSConstants, FSNamesystemMBean {
     } else {
       INodeFile inode = blocksMap.getINode(blk);
       assert inode!=null : (blk + " in blocksMap must belongs to a file.");
+      // Add this replica to corruptReplicas Map 
+      corruptReplicas.addToCorruptReplicasMap(blk, node);
       if (countNodes(blk).liveReplicas()>inode.getReplication()) {
         // the block is over-replicated so invalidate the replicas immediately
         invalidateBlock(blk, node);
       } else {
-        // Add this replica to corruptReplicas Map and 
         // add the block to neededReplication 
-        corruptReplicas.addToCorruptReplicasMap(blk, node);
         updateNeededReplications(blk, -1, 0);
       }
     }
@@ -1413,9 +1413,6 @@ class FSNamesystem implements FSConstants, FSNamesystemMBean {
     NameNode.stateChangeLog.info("DIR* NameSystem.invalidateBlock: " 
                                  + blk + " on " 
                                  + dn.getName());
-    if (isInSafeMode()) {
-      throw new SafeModeException("Cannot invalidate block " + blk, safeMode);
-    }
     DatanodeDescriptor node = getDatanode(dn);
     if (node == null) {
       throw new IOException("Cannot invalidate block " + blk +
@@ -2759,7 +2756,7 @@ class FSNamesystem implements FSConstants, FSNamesystemMBean {
     assert storedBlock != null : "Block must be stored by now";
 
     if (block != storedBlock) {
-      if (block.getNumBytes() > 0) {
+      if (block.getNumBytes() >= 0) {
         long cursize = storedBlock.getNumBytes();
         if (cursize == 0) {
           storedBlock.setNumBytes(block.getNumBytes());
@@ -2771,12 +2768,13 @@ class FSNamesystem implements FSConstants, FSNamesystemMBean {
           try {
             if (cursize > block.getNumBytes()) {
               // new replica is smaller in size than existing block.
-              // Delete new replica.
-              LOG.warn("Deleting block " + block + " from " + node.getName());
-              invalidateBlock(block, node);
+              // Mark the new replica as corrupt.
+              LOG.warn("Mark new replica " + block + " from " + node.getName() + 
+                  "as corrupt because its length is shorter than existing ones");
+              markBlockAsCorrupt(block, node);
             } else {
               // new replica is larger in size than existing block.
-              // Delete pre-existing replicas.
+              // Mark pre-existing replicas as corrupt.
               int numNodes = blocksMap.numNodes(block);
               int count = 0;
               DatanodeDescriptor nodes[] = new DatanodeDescriptor[numNodes];
@@ -2788,9 +2786,9 @@ class FSNamesystem implements FSConstants, FSNamesystemMBean {
                 }
               }
               for (int j = 0; j < count; j++) {
-                LOG.warn("Deleting block " + block + " from " + 
-                         nodes[j].getName());
-                invalidateBlock(block, nodes[j]);
+                LOG.warn("Mark existing replica " + block + " from " + node.getName() + 
+                " as corrupt because its length is shorter than the new one");
+                markBlockAsCorrupt(block, nodes[j]);
               }
               //
               // change the size of block in blocksMap

+ 63 - 0
src/test/org/apache/hadoop/dfs/TestDatanodeBlockScanner.java

@@ -382,4 +382,67 @@ public class TestDatanodeBlockScanner extends TestCase {
     assertTrue(blocks.get(0).isCorrupt() == false);
     cluster.shutdown();
   }
+  
+  /** Test if NameNode handles truncated blocks in block report */
+  public void testTruncatedBlockReport() throws Exception {
+    final Configuration conf = new Configuration();
+    final short REPLICATION_FACTOR = (short)2;
+
+    MiniDFSCluster cluster = new MiniDFSCluster(conf, REPLICATION_FACTOR, true, null);
+    cluster.waitActive();
+    FileSystem fs = cluster.getFileSystem();
+    try {
+      final Path fileName = new Path("/file1");
+      DFSTestUtil.createFile(fs, fileName, 1, REPLICATION_FACTOR, 0);
+      DFSTestUtil.waitReplication(fs, fileName, REPLICATION_FACTOR);
+
+      String block = DFSTestUtil.getFirstBlock(fs, fileName).getBlockName();
+
+      // Truncate replica of block
+      truncateReplica(block, 0);
+
+      cluster.shutdown();
+
+      // restart the cluster
+      cluster = new MiniDFSCluster(
+          0, conf, REPLICATION_FACTOR, false, true, null, null, null);
+      cluster.startDataNodes(conf, 1, true, null, null);
+      cluster.waitActive();  // now we have 3 datanodes
+
+      // wait for truncated block be detected and the block to be replicated
+      DFSTestUtil.waitReplication(
+          cluster.getFileSystem(), fileName, REPLICATION_FACTOR);
+      
+      // Make sure that truncated block will be deleted
+      waitForBlockDeleted(block, 0);
+    } finally {
+      cluster.shutdown();
+    }
+  }
+  
+  private void truncateReplica(String blockName, int dnIndex) throws IOException {
+    File baseDir = new File(System.getProperty("test.build.data"), "dfs/data");
+    for (int i=dnIndex*2; i<dnIndex*2+2; i++) {
+      File blockFile = new File(baseDir, "data" + (i+1)+ "/current/" + 
+                               blockName);
+      if (blockFile.exists()) {
+        RandomAccessFile raFile = new RandomAccessFile(blockFile, "rw");
+        raFile.setLength(raFile.length()-1);
+        raFile.close();
+        break;
+      }
+    }
+  }
+  
+  private void waitForBlockDeleted(String blockName, int dnIndex) 
+  throws IOException, InterruptedException {
+    File baseDir = new File(System.getProperty("test.build.data"), "dfs/data");
+    File blockFile1 = new File(baseDir, "data" + (2*dnIndex+1)+ "/current/" + 
+        blockName);
+    File blockFile2 = new File(baseDir, "data" + (2*dnIndex+2)+ "/current/" + 
+        blockName);
+    while (blockFile1.exists() || blockFile2.exists()) {
+      Thread.sleep(100);
+    }
+  }
 }