Browse Source

HADOOP-2540. fsck reports missing blocks incorrectly. (dhruba)


git-svn-id: https://svn.apache.org/repos/asf/lucene/hadoop/trunk@611385 13f79535-47bb-0310-9956-ffa450edef68
Dhruba Borthakur 17 years ago
parent
commit
b53c7a6ad9

+ 2 - 0
CHANGES.txt

@@ -428,6 +428,8 @@ Release 0.15.3 - (unreleased changes)
 
     HADOOP-2562. globPaths supports {ab,cd}.  (Hairong Kuang via dhruba)
 
+    HADOOP-2540. fsck reports missing blocks incorrectly. (dhruba)
+
 Release 0.15.2 - 2008-01-02
 
   BUG FIXES

+ 16 - 4
src/java/org/apache/hadoop/dfs/FSNamesystem.java

@@ -215,6 +215,9 @@ class FSNamesystem implements FSConstants {
 
   private long maxFsObjects = 0;          // maximum number of fs objects
 
+  private long softLimit = LEASE_SOFTLIMIT_PERIOD;
+  private long hardLimit = LEASE_HARDLIMIT_PERIOD;
+
   /**
    * FSNamesystem constructor.
    */
@@ -1115,7 +1118,7 @@ class FSNamesystem implements FSConstants {
       throw new LeaseExpiredException("No lease on " + src);
     }
     INodeFileUnderConstruction pendingFile = (INodeFileUnderConstruction)file;
-    if (!pendingFile.getClientName().equals(holder)) {
+    if (holder != null && !pendingFile.getClientName().equals(holder)) {
       throw new LeaseExpiredException("Lease mismatch on " + src + " owned by "
           + pendingFile.getClientName() + " but is accessed by " + holder);
     }
@@ -1548,7 +1551,7 @@ class FSNamesystem implements FSConstants {
      * Returns true if the Hard Limit Timer has expired
      */
     public boolean expiredHardLimit() {
-      if (now() - lastUpdate > LEASE_HARDLIMIT_PERIOD) {
+      if (now() - lastUpdate > hardLimit) {
         return true;
       }
       return false;
@@ -1557,7 +1560,7 @@ class FSNamesystem implements FSConstants {
      * Returns true if the Soft Limit Timer has expired
      */
     public boolean expiredSoftLimit() {
-      if (now() - lastUpdate > LEASE_SOFTLIMIT_PERIOD) {
+      if (now() - lastUpdate > softLimit) {
         return true;
       }
       return false;
@@ -1704,7 +1707,7 @@ class FSNamesystem implements FSConstants {
     // will report this block as a missing block because no datanodes have it.
     // Delete this block.
     Block[] blocks = pendingFile.getBlocks();
-    if (blocks != null && blocks.length > 1) {
+    if (blocks != null && blocks.length > 0) {
       Block last = blocks[blocks.length - 1];
       if (last.getNumBytes() == 0) {
           pendingFile.removeBlock(last);
@@ -3918,4 +3921,13 @@ class FSNamesystem implements FSConstants {
   long getMaxObjects() {
     return maxFsObjects;
   }
+
+  /**
+   * Used by unit tests to change lease periods
+   */
+  void setLeasePeriod(long softLimit, long hardLimit) {
+    this.softLimit = softLimit;
+    this.hardLimit = hardLimit; 
+    this.lmthread.interrupt();
+  }
 }

+ 1 - 1
src/java/org/apache/hadoop/dfs/NameNode.java

@@ -85,7 +85,7 @@ public class NameNode implements ClientProtocol, DatanodeProtocol,
   public static final Log LOG = LogFactory.getLog("org.apache.hadoop.dfs.NameNode");
   public static final Log stateChangeLog = LogFactory.getLog("org.apache.hadoop.dfs.StateChange");
 
-  private FSNamesystem namesystem;
+  FSNamesystem namesystem;
   private Server server;
   private Thread emptier;
   private int handlerCount = 2;

+ 7 - 0
src/test/org/apache/hadoop/dfs/MiniDFSCluster.java

@@ -508,4 +508,11 @@ public class MiniDFSCluster {
      injectBlocks(i, blocksToInject[i]);
     }
   }
+
+  /**
+   * Set the softLimit and hardLimit of client lease periods
+   */
+  void setLeasePeriod(long soft, long hard) {
+    nameNode.namesystem.setLeasePeriod(soft, hard);
+  }
 }

+ 75 - 10
src/test/org/apache/hadoop/dfs/TestFileCreation.java

@@ -23,10 +23,8 @@ import java.net.*;
 import java.util.Random;
 
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FsShell;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.util.StringUtils;
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.dfs.FSConstants.DatanodeReportType;
@@ -47,16 +45,11 @@ public class TestFileCreation extends TestCase {
   // entire file is written, the first two blocks definitely get flushed to
   // the datanodes.
 
-  private static String TEST_ROOT_DIR =
-    new Path(System.getProperty("test.build.data","/tmp"))
-    .toString().replace(' ', '+');
-  
   //
   // creates a file but does not close it
   //
   private FSDataOutputStream createFile(FileSystem fileSys, Path name, int repl)
     throws IOException {
-    // create and write a file that contains three blocks of data
     FSDataOutputStream stm = fileSys.create(name, true,
                                             fileSys.getConf().getInt("io.file.buffer.size", 4096),
                                             (short)repl, (long)blockSize);
@@ -116,9 +109,9 @@ public class TestFileCreation extends TestCase {
 
   private void checkData(byte[] actual, int from, byte[] expected, String message) {
     for (int idx = 0; idx < actual.length; idx++) {
-      this.assertEquals(message+" byte "+(from+idx)+" differs. expected "+
-                        expected[from+idx]+" actual "+actual[idx],
-                        expected[from+idx], actual[idx]);
+      assertEquals(message+" byte "+(from+idx)+" differs. expected "+
+                   expected[from+idx]+" actual "+actual[idx],
+                   expected[from+idx], actual[idx]);
       actual[idx] = 0;
     }
   }
@@ -266,6 +259,78 @@ public class TestFileCreation extends TestCase {
     }
   }
 
+  /**
+   * Test that the filesystem removes the last block from a file if its
+   * lease expires.
+   */
+  public void testFileCreationError2() throws IOException {
+    long leasePeriod = 1000;
+    System.out.println("testFileCreationError2 start");
+    Configuration conf = new Configuration();
+    conf.setInt("heartbeat.recheck.interval", 1000);
+    conf.setInt("dfs.heartbeat.interval", 1);
+    if (simulatedStorage) {
+      conf.setBoolean(SimulatedFSDataset.CONFIG_PROPERTY_SIMULATED, true);
+    }
+    // create cluster
+    MiniDFSCluster cluster = new MiniDFSCluster(conf, 1, true, null);
+    FileSystem fs = cluster.getFileSystem();
+    cluster.waitActive();
+    InetSocketAddress addr = new InetSocketAddress("localhost",
+                                                   cluster.getNameNodePort());
+    DFSClient client = new DFSClient(addr, conf);
+
+    try {
+
+      // create a new file.
+      //
+      Path file1 = new Path("/filestatus.dat");
+      createFile(fs, file1, 1);
+      System.out.println("testFileCreationError2: "
+                         + "Created file filestatus.dat with one "
+                         + " replicas.");
+
+      LocatedBlocks locations = client.namenode.getBlockLocations(
+                                  file1.toString(), 0, Long.MAX_VALUE);
+      System.out.println("The file has " + locations.locatedBlockCount() +
+                         " blocks.");
+
+      // add another block to the file
+      LocatedBlock location = client.namenode.addBlock(file1.toString(), 
+                                                       null);
+      System.out.println("Added block " + location.getBlock());
+
+      locations = client.namenode.getBlockLocations(file1.toString(), 
+                                                    0, Long.MAX_VALUE);
+      System.out.println("The file now has " + locations.locatedBlockCount() +
+                         " blocks.");
+      
+      // set the soft and hard limit to be 1 second so that the
+      // namenode triggers lease recovery
+      cluster.setLeasePeriod(leasePeriod, leasePeriod);
+
+      // wait for the lease to expire
+      try {
+        Thread.sleep(5 * leasePeriod);
+      } catch (InterruptedException e) {
+      }
+
+      // verify that the last block was cleaned up.
+      locations = client.namenode.getBlockLocations(file1.toString(), 
+                                                    0, Long.MAX_VALUE);
+      System.out.println("locations = " + locations.locatedBlockCount());
+      assertTrue("Error blocks were not cleaned up",
+                 locations.locatedBlockCount() == 0);
+      System.out.println("testFileCreationError2 successful");
+    } finally {
+      try {
+        fs.close();
+      } catch (Exception e) {
+      }
+      cluster.shutdown();
+    }
+  }
+
 /**
  * Test that file data becomes available before file is closed.
  */