瀏覽代碼

HDFS-3310. svn merge -c 1331064 from branch-1

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-1.0@1331082 13f79535-47bb-0310-9956-ffa450edef68
Eli Collins 13 年之前
父節點
當前提交
d4725b0c88

+ 3 - 0
CHANGES.txt

@@ -44,6 +44,9 @@ Release 1.0.3 - unreleased
     HADOOP-8294. IPC Connection becomes unusable even if server address was temporarilly
     unresolvable. Backport of HADOOP-7428. (Kihwal Lee via mattf)
 
+    HDFS-3310. Make sure that we abort when no edit log directories are left.
+    (Colin Patrick McCabe via eli)
+
 Release 1.0.2 - 2012.03.24
 
   NEW FEATURES

+ 16 - 5
src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java

@@ -29,6 +29,7 @@ import java.io.IOException;
 import java.io.RandomAccessFile;
 import java.util.ArrayList;
 import java.util.Iterator;
+import java.util.LinkedList;
 import java.lang.Math;
 import java.nio.channels.FileChannel;
 import java.nio.ByteBuffer;
@@ -428,6 +429,7 @@ public class FSEditLog {
     
     File dir = getStorageDirForStream(idx);
     editStreams.remove(idx);
+    exitIfNoStreams();
     fsimage.removeStorageDir(dir);
   }
 
@@ -446,6 +448,7 @@ public class FSEditLog {
         editStreams.remove(idx);
       }
     }
+    exitIfNoStreams();
   }
   
   /**
@@ -987,7 +990,7 @@ public class FSEditLog {
         sync = true;
 
         // swap buffers
-        assert editStreams.size() > 0 : "no editlog streams";
+        exitIfNoStreams();
         for(EditLogOutputStream eStream : editStreams) {
           try {
             eStream.setReadyToFlush();
@@ -1263,7 +1266,6 @@ public class FSEditLog {
           " edits.new files already exists in all healthy directories:" + b);
       return;
     }
-
     close(); // close existing edit log
 
     // After edit streams are closed, healthy edits files should be identical,
@@ -1274,6 +1276,7 @@ public class FSEditLog {
     // Open edits.new
     //
     Iterator<StorageDirectory> it = fsimage.dirIterator(NameNodeDirType.EDITS);
+    LinkedList<StorageDirectory> toRemove = new LinkedList<StorageDirectory>();
     while (it.hasNext()) {
       StorageDirectory sd = it.next();
       try {
@@ -1282,11 +1285,19 @@ public class FSEditLog {
         eStream.create();
         editStreams.add(eStream);
       } catch (IOException ioe) {
-        removeEditsForStorageDir(sd);
-        fsimage.updateRemovedDirs(sd, ioe);
+        FSImage.LOG.error("error retrying to reopen storage directory '" +
+            sd.getRoot().getAbsolutePath() + "'", ioe);
+        toRemove.add(sd);
         it.remove();
       }
     }
+
+    // updateRemovedDirs will abort the NameNode if it removes the last
+    // valid edit log directory.
+    for (StorageDirectory sd : toRemove) {
+      removeEditsForStorageDir(sd);
+      fsimage.updateRemovedDirs(sd);
+    }
     exitIfNoStreams();
   }
 
@@ -1319,7 +1330,7 @@ public class FSEditLog {
         if (!getEditNewFile(sd).renameTo(getEditFile(sd))) {
           sd.unlock();
           removeEditsForStorageDir(sd);
-          fsimage.updateRemovedDirs(sd, null);
+          fsimage.updateRemovedDirs(sd);
           it.remove();
         }
       }

+ 8 - 2
src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSImage.java

@@ -220,6 +220,11 @@ public class FSImage extends Storage {
     removedStorageDirs.add(sd);
   }
 
+  void updateRemovedDirs(StorageDirectory sd) {
+    LOG.warn("Removing storage dir " + sd.getRoot().getPath());
+    removedStorageDirs.add(sd);
+  }
+
   File getEditFile(StorageDirectory sd) {
     return getImageFile(sd, NameNodeFile.EDITS);
   }
@@ -644,8 +649,9 @@ public class FSImage extends Storage {
     while (it.hasNext()) {
       StorageDirectory sd = it.next();
       if (sd.getRoot().getPath().equals(dir.getPath())) {
-        updateRemovedDirs(sd, null);
+        updateRemovedDirs(sd);
         it.remove();
+        editLog.removeEditsForStorageDir(sd);
       }
     }
   }
@@ -1554,7 +1560,7 @@ public class FSImage extends Storage {
         curFile.delete();
         if (!ckpt.renameTo(curFile)) {
           editLog.removeEditsForStorageDir(sd);
-          updateRemovedDirs(sd, null);
+          updateRemovedDirs(sd);
           it.remove();
         }
       }

+ 19 - 1
src/test/org/apache/hadoop/hdfs/server/namenode/TestStorageDirectoryFailure.java

@@ -129,7 +129,6 @@ public class TestStorageDirectoryFailure {
   /** Remove storage dirs and checkpoint to trigger detection */
   public void testCheckpointAfterFailingFirstNamedir() throws IOException {
     assertEquals(0, numRemovedDirs());
-
     checkFileCreation("file0");
 
     // Remove the 1st storage dir
@@ -198,4 +197,23 @@ public class TestStorageDirectoryFailure {
     checkFileContents("file0");
     checkFileContents("file1");
   }
+
+  @Test
+  /** Test that we abort when there are no valid edit log directories
+   * remaining. */
+  public void testAbortOnNoValidEditDirs() throws IOException {
+    cluster.restartNameNode();
+    assertEquals(0, numRemovedDirs());
+    checkFileCreation("file9");
+    cluster.getNameNode().getFSImage().
+      removeStorageDir(new File(nameDirs.get(0)));
+    cluster.getNameNode().getFSImage().
+      removeStorageDir(new File(nameDirs.get(1)));
+    FSEditLog spyLog = spy(cluster.getNameNode().getFSImage().getEditLog());
+    doNothing().when(spyLog).fatalExit(anyString());
+    cluster.getNameNode().getFSImage().setEditLog(spyLog);
+    cluster.getNameNode().getFSImage().
+      removeStorageDir(new File(nameDirs.get(2)));
+    verify(spyLog, atLeastOnce()).fatalExit(anyString());
+  }
 }

+ 1 - 1
src/test/org/apache/hadoop/hdfs/server/namenode/TestStorageRestore.java

@@ -130,7 +130,7 @@ public class TestStorageRestore extends TestCase {
       
       if (sd.getRoot().equals(path2) || sd.getRoot().equals(path3)) {
         fi.getEditLog().removeEditsForStorageDir(sd);
-        fi.updateRemovedDirs(sd, null);
+        fi.updateRemovedDirs(sd);
         it.remove();
       }
     }