浏览代码

HDFS-5728. Block recovery will fail if the metafile does not have crc for all chunks of the block. Contributed by Vinay.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-0.23@1584871 13f79535-47bb-0310-9956-ffa450edef68
Kihwal Lee 11 年之前
父节点
当前提交
24b26fd356

+ 3 - 0
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt

@@ -22,6 +22,9 @@ Release 0.23.11 - UNRELEASED
 
     HDFS-6166. Revisit balancer so_timeout. (Nathan Roberts via kihwal)
 
+    HDFS-5728. Block recovery will fail if the metafile does not have crc 
+    for all chunks of the block. (Vinayakumar B via kihwal)
+
 Release 0.23.10 - 2013-12-09
 
   INCOMPATIBLE CHANGES

+ 18 - 4
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/FSDataset.java

@@ -433,7 +433,7 @@ class FSDataset implements FSDatasetInterface {
               blockFile.length(), genStamp, volume, blockFile.getParentFile());
         } else {
           newReplica = new ReplicaWaitingToBeRecovered(blockId,
-              validateIntegrity(blockFile, genStamp), 
+              validateIntegrityAndSetLength(blockFile, genStamp), 
               genStamp, volume, blockFile.getParentFile());
         }
 
@@ -457,7 +457,7 @@ class FSDataset implements FSDatasetInterface {
      * @param genStamp generation stamp of the block
      * @return the number of valid bytes
      */
-    private long validateIntegrity(File blockFile, long genStamp) {
+    private long validateIntegrityAndSetLength(File blockFile, long genStamp) {
       DataInputStream checksumIn = null;
       InputStream blockIn = null;
       try {
@@ -500,11 +500,25 @@ class FSDataset implements FSDatasetInterface {
         IOUtils.readFully(blockIn, buf, 0, lastChunkSize);
 
         checksum.update(buf, 0, lastChunkSize);
+        long validFileLength;
         if (checksum.compare(buf, lastChunkSize)) { // last chunk matches crc
-          return lastChunkStartPos + lastChunkSize;
+          validFileLength = lastChunkStartPos + lastChunkSize;
         } else { // last chunck is corrupt
-          return lastChunkStartPos;
+          validFileLength = lastChunkStartPos;
         }
+
+        // truncate if extra bytes are present without CRC
+        if (blockFile.length() > validFileLength) {
+          RandomAccessFile blockRAF = new RandomAccessFile(blockFile, "rw");
+          try {
+            // truncate blockFile
+            blockRAF.setLength(validFileLength);
+          } finally {
+            blockRAF.close();
+          }
+        }
+
+        return validFileLength;
       } catch (IOException e) {
         DataNode.LOG.warn(e);
         return 0;

+ 58 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java

@@ -17,19 +17,27 @@
  */
 package org.apache.hadoop.hdfs;
 
+import java.io.File;
 import java.io.IOException;
+import java.io.RandomAccessFile;
 
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties;
 import org.apache.hadoop.hdfs.protocol.Block;
+import org.apache.hadoop.hdfs.protocol.BlockLocalPathInfo;
 import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
 import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
 import org.apache.hadoop.hdfs.protocol.LocatedBlock;
+import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
 import org.apache.hadoop.hdfs.server.datanode.DataNode;
 import org.apache.hadoop.hdfs.server.datanode.TestInterDatanodeProtocol;
 import org.apache.hadoop.hdfs.server.namenode.LeaseManager;
 import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
+import org.apache.hadoop.security.UserGroupInformation;
 
 public class TestLeaseRecovery extends junit.framework.TestCase {
   static final int BLOCK_SIZE = 1024;
@@ -142,4 +150,54 @@ public class TestLeaseRecovery extends junit.framework.TestCase {
       if (cluster != null) {cluster.shutdown();}
     }
   }
+
+  /**
+   * Block Recovery when the meta file not having crcs for all chunks in block
+   * file
+   */
+  public void testBlockRecoveryWithLessMetafile() throws Exception {
+    Configuration conf = new Configuration();
+    conf.set(DFSConfigKeys.DFS_BLOCK_LOCAL_PATH_ACCESS_USER_KEY,
+        UserGroupInformation.getCurrentUser().getShortUserName());
+    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1)
+        .build();
+    Path file = new Path("/testRecoveryFile");
+    DistributedFileSystem dfs = (DistributedFileSystem)cluster.getFileSystem();
+    FSDataOutputStream out = dfs.create(file);
+    int count = 0;
+    while (count < 2 * 1024 * 1024) {
+      out.writeBytes("Data");
+      count += 4;
+    }
+    out.hsync();
+    // abort the original stream
+    ((DFSOutputStream) out.getWrappedStream()).abort();
+
+    LocatedBlocks locations = cluster.getNameNodeRpc().getBlockLocations(
+        file.toString(), 0, count);
+    ExtendedBlock block = locations.get(0).getBlock();
+    DataNode dn = cluster.getDataNodes().get(0);
+    BlockLocalPathInfo localPathInfo = dn.getBlockLocalPathInfo(block, null);
+    File metafile = new File(localPathInfo.getMetaPath());
+    assertTrue(metafile.exists());
+
+    // reduce the block meta file size
+    RandomAccessFile raf = new RandomAccessFile(metafile, "rw");
+    raf.setLength(metafile.length() - 20);
+    raf.close();
+
+    // restart DN to make replica to RWR
+    DataNodeProperties dnProp = cluster.stopDataNode(0);
+    cluster.restartDataNode(dnProp, true);
+
+    // try to recover the lease
+    DistributedFileSystem newdfs = (DistributedFileSystem) FileSystem
+        .newInstance(cluster.getConfiguration(0));
+    count = 0;
+    while (++count < 10 && !newdfs.recoverLease(file)) {
+      Thread.sleep(1000);
+    }
+    assertTrue("File should be closed", newdfs.recoverLease(file));
+
+  }
 }