Browse Source

HDFS-4233. NN keeps serving even after no journals started while rolling edit. Contributed by Kihwal Lee.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-0.23@1415502 13f79535-47bb-0310-9956-ffa450edef68
Suresh Srinivas 12 years ago
parent
commit
5928a41a30

+ 2 - 0
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt

@@ -11,6 +11,8 @@ Release 0.23.6 - UNRELEASED
   OPTIMIZATIONS
 
   BUG FIXES
+    HDFS-4233. NN keeps serving even after no journals started while rolling
+    edit. (Kihwal Lee via suresh)
 
 Release 0.23.5 - UNRELEASED
 

+ 2 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java

@@ -440,7 +440,8 @@ public class FSEditLog  {
       synchronized (this) {
         if (sync) {
           try {
-            if (badJournals.size() >= journals.size()) {
+            if (badJournals.size() >= journals.size() ||
+                candidateJournals.isEmpty()) {
               final String msg =
                 "Could not sync enough journals to persistent storage. "
                 + "Unsynced transactions: " + (txid - synctxid);

+ 13 - 1
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java

@@ -47,6 +47,7 @@ import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
 import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeDirType;
 import org.apache.hadoop.hdfs.server.namenode.NNStorage;
 import org.apache.hadoop.test.GenericTestUtils;
+import org.apache.hadoop.util.ExitUtil.ExitException;
 import org.apache.hadoop.util.StringUtils;
 import org.apache.log4j.Level;
 import org.aspectj.util.FileUtil;
@@ -185,7 +186,9 @@ public class TestEditLog extends TestCase {
     MiniDFSCluster cluster = null;
     FileSystem fileSys = null;
     try {
-      cluster = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DATA_NODES).build();
+      cluster = new MiniDFSCluster.Builder(conf)
+          .numDataNodes(NUM_DATA_NODES)
+          .checkExitOnShutdown(false).build();
       cluster.waitActive();
       fileSys = cluster.getFileSystem();
       final FSNamesystem namesystem = cluster.getNamesystem();
@@ -212,6 +215,15 @@ public class TestEditLog extends TestCase {
       
       editLog.logSetReplication("fakefile", (short) 2);
       editLog.logSync();
+
+      // logSync() should fail if there is no active journal.
+      editLog.endCurrentLogSegment(true);
+      editLog.logSetReplication("fakefile", (short) 3);
+      try {
+        editLog.logSync();
+        fail("logSync() should have failed when no journal is active.");
+      } catch (ExitException e) { }
+      
       
       editLog.close();
     } finally {