Browse Source

HDFS-1141. completeFile does not check lease ownership.
(Todd Lipcon via dhruba)



git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-0.20-append@955410 13f79535-47bb-0310-9956-ffa450edef68

Dhruba Borthakur 15 years ago
parent
commit
cebd4f2b9c

+ 3 - 0
CHANGES.txt

@@ -31,6 +31,9 @@ Release 0.20-append - Unreleased
     HDFS-445. pread should refetch block locations when necessary.
     (Todd Lipcon via dhruba)
 
+    HDFS-1141. completeFile does not check lease ownership.
+    (Todd Lipcon via dhruba)
+
   IMPROVEMENTS
 
   BUG FIXES

+ 9 - 14
src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java

@@ -1410,24 +1410,19 @@ public class FSNamesystem implements FSConstants, FSNamesystemMBean {
     NameNode.stateChangeLog.debug("DIR* NameSystem.completeFile: " + src + " for " + holder);
     if (isInSafeMode())
       throw new SafeModeException("Cannot complete file " + src, safeMode);
-    INode iFile = dir.getFileINode(src);
-    INodeFileUnderConstruction pendingFile = null;
-    Block[] fileBlocks = null;
 
-    if (iFile != null && iFile.isUnderConstruction()) {
-      pendingFile = (INodeFileUnderConstruction) iFile;
-      fileBlocks =  dir.getFileBlocks(src);
-    }
-    if (fileBlocks == null ) {    
+    INodeFileUnderConstruction pendingFile  = checkLease(src, holder);
+    Block[] fileBlocks =  dir.getFileBlocks(src);
+
+    if (fileBlocks == null ) {
       NameNode.stateChangeLog.warn("DIR* NameSystem.completeFile: "
                                    + "failed to complete " + src
-                                   + " because dir.getFileBlocks() is null " + 
-                                   " and pendingFile is " + 
-                                   ((pendingFile == null) ? "null" : 
-                                     ("from " + pendingFile.getClientMachine()))
-                                  );                      
+                                   + " because dir.getFileBlocks() is null,"
+                                   + " pending from " + pendingFile.getClientMachine());
       return CompleteFileStatus.OPERATION_FAILED;
-    } else if (!checkFileProgress(pendingFile, true)) {
+    }
+
+    if (!checkFileProgress(pendingFile, true)) {
       return CompleteFileStatus.STILL_WAITING;
     }
 

+ 80 - 0
src/test/org/apache/hadoop/hdfs/TestFileAppend4.java

@@ -674,6 +674,85 @@ public class TestFileAppend4 extends TestCase {
     }
   }
 
+
+  /**
+   * Test case that stops a writer after finalizing a block but
+   * before calling completeFile, recovers a file from another writer,
+   * starts writing from that writer, and then has the old lease holder
+   * call completeFile
+   */
+  public void testCompleteOtherLeaseHoldersFile() throws Throwable {
+    cluster = new MiniDFSCluster(conf, 3, true, null);
+
+    try {
+      cluster.waitActive();
+      NameNode preSpyNN = cluster.getNameNode();
+      NameNode spyNN = spy(preSpyNN);
+
+      // Delay completeFile
+      DelayAnswer delayer = new DelayAnswer();
+      doAnswer(delayer).when(spyNN).complete(anyString(), anyString());
+
+      DFSClient client = new DFSClient(null, spyNN, conf, null);
+      file1 = new Path("/testRecoverFinalized");
+      final OutputStream stm = client.create("/testRecoverFinalized", true);
+
+      // write 1/2 block
+      AppendTestUtil.write(stm, 0, 4096);
+      final AtomicReference<Throwable> err = new AtomicReference<Throwable>();
+      Thread t = new Thread() { 
+          public void run() {
+            try {
+              stm.close();
+            } catch (Throwable t) {
+              err.set(t);
+            }
+          }};
+      t.start();
+      LOG.info("Waiting for close to get to latch...");
+      delayer.waitForCall();
+
+      // At this point, the block is finalized on the DNs, but the file
+      // has not been completed in the NN.
+      // Lose the leases
+      LOG.info("Killing lease checker");
+      client.leasechecker.interruptAndJoin();
+
+      FileSystem fs1 = cluster.getFileSystem();
+      FileSystem fs2 = AppendTestUtil.createHdfsWithDifferentUsername(
+        fs1.getConf());
+
+      LOG.info("Recovering file");
+      recoverFile(fs2);
+
+      LOG.info("Opening file for append from new fs");
+      FSDataOutputStream appenderStream = fs2.append(file1);
+      
+      LOG.info("Writing some data from new appender");
+      AppendTestUtil.write(appenderStream, 0, 4096);
+      
+      LOG.info("Telling old close to proceed.");
+      delayer.proceed();
+      LOG.info("Waiting for close to finish.");
+      t.join();
+      LOG.info("Close finished.");
+
+      // We expect that close will get a "Lease mismatch"
+      // error.
+      Throwable thrownByClose = err.get();
+      assertNotNull(thrownByClose);
+      assertTrue(thrownByClose instanceof IOException);
+      if (!thrownByClose.getMessage().contains(
+            "Lease mismatch"))
+        throw thrownByClose;
+      
+      // The appender should be able to close properly
+      appenderStream.close();
+    } finally {
+      cluster.shutdown();
+    }
+  }  
+  
   /**
    * Test for an intermittent failure of commitBlockSynchronization.
    * This could happen if the DN crashed between calling updateBlocks
@@ -710,6 +789,7 @@ public class TestFileAppend4 extends TestCase {
     LOG.info("STOP");
   }
 
+  
   /**
    * Test that when a DN starts up with bbws from a file that got
    * removed or finalized when it was down, the block gets deleted.