Преглед на файлове

HDFS-2747. Entering safe mode after starting SBN can NPE. Contributed by Uma Maheswara Rao G.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1232176 13f79535-47bb-0310-9956-ffa450edef68
Todd Lipcon преди 13 години
родител
ревизия
2f26475a39

+ 2 - 0
hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt

@@ -105,3 +105,5 @@ HDFS-2766. Test for case where standby partially reads log and then performs che
 HDFS-2738. FSEditLog.selectinputStreams is reading through in-progress streams even when non-in-progress are requested. (atm)
 
 HDFS-2789. TestHAAdmin.testFailover is failing (eli)
+
+HDFS-2747. Entering safe mode after starting SBN can NPE. (Uma Maheswara Rao G via todd)

+ 22 - 15
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java

@@ -3774,21 +3774,28 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
   void enterSafeMode(boolean resourcesLow) throws IOException {
     writeLock();
     try {
-    // Ensure that any concurrent operations have been fully synced
-    // before entering safe mode. This ensures that the FSImage
-    // is entirely stable on disk as soon as we're in safe mode.
-    getEditLog().logSyncAll();
-    if (!isInSafeMode()) {
-      safeMode = new SafeModeInfo(resourcesLow);
-      return;
-    }
-    if (resourcesLow) {
-      safeMode.setResourcesLow();
-    }
-    safeMode.setManual();
-    getEditLog().logSyncAll();
-    NameNode.stateChangeLog.info("STATE* Safe mode is ON. " 
-                                + safeMode.getTurnOffTip());
+      // Ensure that any concurrent operations have been fully synced
+      // before entering safe mode. This ensures that the FSImage
+      // is entirely stable on disk as soon as we're in safe mode.
+      boolean isEditlogOpenForWrite = getEditLog().isOpenForWrite();
+      // Before Editlog is in OpenForWrite mode, editLogStream will be null. So,
+      // logSyncAll call can be called only when Edlitlog is in OpenForWrite mode
+      if (isEditlogOpenForWrite) {
+        getEditLog().logSyncAll();
+      }
+      if (!isInSafeMode()) {
+        safeMode = new SafeModeInfo(resourcesLow);
+        return;
+      }
+      if (resourcesLow) {
+        safeMode.setResourcesLow();
+      }
+      safeMode.setManual();
+      if (isEditlogOpenForWrite) {
+        getEditLog().logSyncAll();
+      }
+      NameNode.stateChangeLog.info("STATE* Safe mode is ON. "
+          + safeMode.getTurnOffTip());
     } finally {
       writeUnlock();
     }

+ 63 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java

@@ -35,6 +35,7 @@ import org.apache.hadoop.hdfs.DFSTestUtil;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.MiniDFSNNTopology;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil;
+import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
 import org.junit.After;
@@ -95,6 +96,68 @@ public class TestHASafeMode {
     nn1.getNamesystem().getEditLogTailer().interrupt();
   }
   
+  /**
+   * Test case for enter safemode in active namenode, when it is already in startup safemode.
+   * It is a regression test for HDFS-2747.
+   */
+  @Test
+  public void testEnterSafeModeInANNShouldNotThrowNPE() throws Exception {
+    banner("Restarting active");
+    restartActive();
+    FSNamesystem namesystem = nn0.getNamesystem();
+    String status = namesystem.getSafemode();
+    assertTrue("Bad safemode status: '" + status + "'", status
+        .startsWith("Safe mode is ON."));
+    NameNodeAdapter.enterSafeMode(nn0, false);
+    assertTrue("Failed to enter into safemode in active", namesystem
+        .isInSafeMode());
+    NameNodeAdapter.enterSafeMode(nn0, false);
+    assertTrue("Failed to enter into safemode in active", namesystem
+        .isInSafeMode());
+  }
+
+  /**
+   * Test case for enter safemode in standby namenode, when it is already in startup safemode.
+   * It is a regression test for HDFS-2747.
+   */
+  @Test
+  public void testEnterSafeModeInSBNShouldNotThrowNPE() throws Exception {
+    banner("Starting with NN0 active and NN1 standby, creating some blocks");
+    DFSTestUtil
+        .createFile(fs, new Path("/test"), 3 * BLOCK_SIZE, (short) 3, 1L);
+    // Roll edit log so that, when the SBN restarts, it will load
+    // the namespace during startup and enter safemode.
+    nn0.getRpcServer().rollEditLog();
+    banner("Creating some blocks that won't be in the edit log");
+    DFSTestUtil.createFile(fs, new Path("/test2"), 5 * BLOCK_SIZE, (short) 3,
+        1L);
+    banner("Deleting the original blocks");
+    fs.delete(new Path("/test"), true);
+    banner("Restarting standby");
+    restartStandby();
+    FSNamesystem namesystem = nn1.getNamesystem();
+    String status = namesystem.getSafemode();
+    assertTrue("Bad safemode status: '" + status + "'", status
+        .startsWith("Safe mode is ON."));
+    NameNodeAdapter.enterSafeMode(nn1, false);
+    assertTrue("Failed to enter into safemode in standby", namesystem
+        .isInSafeMode());
+    NameNodeAdapter.enterSafeMode(nn1, false);
+    assertTrue("Failed to enter into safemode in standby", namesystem
+        .isInSafeMode());
+  }
+
+  private void restartActive() throws IOException {
+    cluster.shutdownNameNode(0);
+    // Set the safemode extension to be lengthy, so that the tests
+    // can check the safemode message after the safemode conditions
+    // have been achieved, without being racy.
+    cluster.getConfiguration(0).setInt(
+        DFSConfigKeys.DFS_NAMENODE_SAFEMODE_EXTENSION_KEY, 30000);
+    cluster.restartNameNode(0);
+    nn0 = cluster.getNameNode(0);
+  }
+  
   /**
    * Tests the case where, while a standby is down, more blocks are
    * added to the namespace, but not rolled. So, when it starts up,