Преглед на файлове

HDFS-2011. Removal and restoration of storage directories on checkpointing failure doesn't work properly. Contributed by Ravi Prakash.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1141748 13f79535-47bb-0310-9956-ffa450edef68
Matthew Foley преди 14 години
родител
ревизия
7bd41f031f

+ 3 - 0
hdfs/CHANGES.txt

@@ -556,6 +556,9 @@ Trunk (unreleased changes)
 
   BUG FIXES
 
+    HDFS-2011. Removal and restoration of storage directories on checkpointing
+    failure doesn't work properly. (Ravi Prakash via mattf)
+
     HDFS-1955. FSImage.doUpgrade() was made too fault-tolerant by HDFS-1826.
     (mattf)
 

+ 22 - 10
hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileOutputStream.java

@@ -122,19 +122,31 @@ class EditLogFileOutputStream extends EditLogOutputStream {
   public void close() throws IOException {
     // close should have been called after all pending transactions
     // have been flushed & synced.
-    int bufSize = bufCurrent.size();
-    if (bufSize != 0) {
-      throw new IOException("FSEditStream has " + bufSize
-          + " bytes still to be flushed and cannot " + "be closed.");
+    // if already closed, just skip
+    if(bufCurrent != null)
+    {
+      int bufSize = bufCurrent.size();
+      if (bufSize != 0) {
+        throw new IOException("FSEditStream has " + bufSize
+            + " bytes still to be flushed and cannot " + "be closed.");
+      }
+      bufCurrent.close();
+      bufCurrent = null;
     }
-    bufCurrent.close();
-    bufReady.close();
 
-    // remove the last INVALID marker from transaction log.
-    fc.truncate(fc.position());
-    fp.close();
+    if(bufReady != null) {
+      bufReady.close();
+      bufReady = null;
+    }
 
-    bufCurrent = bufReady = null;
+    // remove the last INVALID marker from transaction log.
+    if (fc != null && fc.isOpen()) {
+      fc.truncate(fc.position());
+      fc.close();
+    }
+    if (fp != null) {
+      fp.close();
+    }
   }
 
   /**

+ 6 - 0
hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java

@@ -511,6 +511,12 @@ public class NNStorage extends Storage implements Closeable {
         // Close any edits stream associated with this dir and remove directory
         LOG.warn("incrementCheckpointTime failed on "
                  + sd.getRoot().getPath() + ";type="+sd.getStorageDirType());
+        try {
+          reportErrorsOnDirectory(sd);
+        } catch (IOException ioe) {
+            LOG.error("Failed to report and remove NN storage directory "
+                      + sd.getRoot().getPath(), ioe);
+        }
       }
     }
   }

+ 67 - 0
hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java

@@ -21,6 +21,7 @@ import junit.framework.TestCase;
 import java.io.*;
 import java.net.InetSocketAddress;
 import java.net.URI;
+import java.util.ArrayList;
 import java.util.Collection;
 import java.util.List;
 import java.util.Iterator;
@@ -137,6 +138,72 @@ public class TestCheckpoint extends TestCase {
     resurrectNameDir(first); // put back namedir
   }
 
+  /**
+   * Tests EditLogFileOutputStream doesn't throw NullPointerException on being
+   * closed twice.
+   * See https://issues.apache.org/jira/browse/HDFS-2011
+   */
+  public void testEditLogFileOutputStreamCloses()
+    throws IOException,NullPointerException {
+    System.out.println("Testing EditLogFileOutputStream doesn't throw " +
+                       "NullPointerException on being closed twice");
+    File editLogStreamFile = null;
+    try {
+      editLogStreamFile = new File(System.getProperty("test.build.data","/tmp"),
+                                   "editLogStream.dat");
+      EditLogFileOutputStream editLogStream =
+                             new EditLogFileOutputStream(editLogStreamFile, 0);
+      editLogStream.close();
+      //Closing an twice should not throw a NullPointerException
+      editLogStream.close();
+    } finally {
+      if (editLogStreamFile != null)
+        // Cleanup the editLogStream.dat file we created
+          editLogStreamFile.delete();
+    }
+    System.out.println("Successfully tested EditLogFileOutputStream doesn't " +
+           "throw NullPointerException on being closed twice");
+  }
+
+  /**
+   * Checks that an IOException in NNStorage.setCheckpointTimeInStorage is handled
+   * correctly (by removing the storage directory)
+   * See https://issues.apache.org/jira/browse/HDFS-2011
+   */
+  public void testSetCheckpointTimeInStorageHandlesIOException() throws Exception {
+    System.out.println("Check IOException handled correctly by setCheckpointTimeInStorage");
+    NNStorage nnStorage = new NNStorage(new HdfsConfiguration());
+    ArrayList<URI> fsImageDirs = new ArrayList<URI>();
+    ArrayList<URI> editsDirs = new ArrayList<URI>();
+    File filePath =
+      new File(System.getProperty("test.build.data","/tmp"), "storageDirToCheck");
+    assertTrue("Couldn't create directory storageDirToCheck",
+               filePath.exists() || filePath.mkdirs());
+    try {
+      fsImageDirs.add(filePath.toURI());
+      editsDirs.add(filePath.toURI());
+      // Initialize NNStorage
+      nnStorage.setStorageDirectories(fsImageDirs, editsDirs);
+      assertTrue("List of storage directories didn't have storageDirToCheck.",
+                 nnStorage.getEditsDirectories().iterator().next().
+                 toString().indexOf("storageDirToCheck") != -1);
+      assertTrue("List of removed storage directories wasn't empty",
+                 nnStorage.getRemovedStorageDirs().isEmpty());
+    } finally {
+      // Delete storage directory to cause IOException in setCheckpointTimeInStorage
+      assertTrue("Couldn't remove directory " + filePath.getAbsolutePath(),
+                 filePath.delete());
+    }
+    // Just call setCheckpointTimeInStorage using any random number
+    nnStorage.setCheckpointTimeInStorage(1);
+    List<StorageDirectory> listRsd = nnStorage.getRemovedStorageDirs();
+    assertTrue("Removed directory wasn't what was expected",
+               listRsd.size() > 0 && listRsd.get(listRsd.size() - 1).getRoot().
+               toString().indexOf("storageDirToCheck") != -1);
+    System.out.println("Successfully checked IOException is handled correctly "
+                       + "by setCheckpointTimeInStorage");
+  }
+
   /*
    * Simulate namenode crashing after rolling edit log.
    */