Quellcode durchsuchen

HDFS-16547. [SBN read] Namenode in safe mode should not be transfer to observer state (#4201)

Signed-off-by: Erik Krogen <xkrogen@apache.org>
Reviewed-by: Zengqiang Xu <xuzq_zander@163.com>
litao vor 2 Jahren
Ursprung
Commit
8f971b0e54

+ 3 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java

@@ -2009,6 +2009,9 @@ public class NameNode extends ReconfigurableBase implements
   synchronized void transitionToObserver() throws IOException {
     String operationName = "transitionToObserver";
     namesystem.checkSuperuserPrivilege(operationName);
+    if (notBecomeActiveInSafemode && isInSafeMode()) {
+      throw new ServiceFailedException(getRole() + " still not leave safemode");
+    }
     if (!haEnabled) {
       throw new ServiceFailedException("HA for namenode is not enabled");
     }

+ 8 - 3
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSHAAdmin.java

@@ -247,7 +247,7 @@ public class DFSHAAdmin extends HAAdmin {
   }
 
   private int transitionToObserver(final CommandLine cmd)
-      throws IOException, ServiceFailedException {
+      throws IOException {
     String[] argv = cmd.getArgs();
     if (argv.length != 1) {
       errOut.println("transitionToObserver: incorrect number of arguments");
@@ -262,8 +262,13 @@ public class DFSHAAdmin extends HAAdmin {
     if (!checkManualStateManagementOK(target)) {
       return -1;
     }
-    HAServiceProtocol proto = target.getProxy(getConf(), 0);
-    HAServiceProtocolHelper.transitionToObserver(proto, createReqInfo());
+    try {
+      HAServiceProtocol proto = target.getProxy(getConf(), 0);
+      HAServiceProtocolHelper.transitionToObserver(proto, createReqInfo());
+    } catch (ServiceFailedException e) {
+      errOut.println("transitionToObserver failed! " + e.getLocalizedMessage());
+      return -1;
+    }
     return 0;
   }
 

+ 1 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml

@@ -3725,7 +3725,7 @@
   <name>dfs.ha.nn.not-become-active-in-safemode</name>
   <value>false</value>
   <description>
-    This will prevent safe mode namenodes to become active while other standby
+    This will prevent safe mode namenodes to become active or observer while other standby
     namenodes might be ready to serve requests when it is set to true.
   </description>
 </property>

+ 4 - 2
hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSHighAvailabilityWithNFS.md

@@ -316,12 +316,14 @@ The order in which you set these configurations is unimportant, but the values y
           <value>hdfs://mycluster</value>
         </property>
 
-*   **dfs.ha.nn.not-become-active-in-safemode** - if prevent safe mode namenodes to become active
+*   **dfs.ha.nn.not-become-active-in-safemode** - if prevent safe mode namenodes to become active or observer
 
     Whether allow namenode to become active when it is in safemode, when it is
     set to true, namenode in safemode will report SERVICE_UNHEALTHY to ZKFC if
     auto failover is on, or will throw exception to fail the transition to
-    active if auto failover is off. For example:
+    active if auto failover is off. If you transition namenode to observer state
+    when it is in safemode, when this configuration is set to true, namenode will throw exception
+    to fail the transition to observer. For example:
 
         <property>
           <name>dfs.ha.nn.not-become-active-in-safemode</name>

+ 4 - 2
hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSHighAvailabilityWithQJM.md

@@ -376,12 +376,14 @@ The order in which you set these configurations is unimportant, but the values y
           <value>/path/to/journal/node/local/data</value>
         </property>
 
-*   **dfs.ha.nn.not-become-active-in-safemode** - if prevent safe mode namenodes to become active
+*   **dfs.ha.nn.not-become-active-in-safemode** - if prevent safe mode namenodes to become active or observer
 
     Whether allow namenode to become active when it is in safemode, when it is
     set to true, namenode in safemode will report SERVICE_UNHEALTHY to ZKFC if
     auto failover is on, or will throw exception to fail the transition to
-    active if auto failover is off. For example:
+    active if auto failover is off. If you transition namenode to observer state
+    when it is in safemode, when this configuration is set to true, namenode will throw exception
+    to fail the transition to observer. For example:
 
         <property>
           <name>dfs.ha.nn.not-become-active-in-safemode</name>

+ 22 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java

@@ -977,4 +977,26 @@ public class TestHASafeMode {
           () -> miniCluster.transitionToActive(0));
     }
   }
+
+  @Test
+  public void testTransitionToObserverWhenSafeMode() throws Exception {
+    Configuration config = new Configuration();
+    config.setBoolean(DFS_HA_NN_NOT_BECOME_ACTIVE_IN_SAFEMODE, true);
+    try (MiniDFSCluster miniCluster = new MiniDFSCluster.Builder(config,
+        new File(GenericTestUtils.getRandomizedTempPath()))
+        .nnTopology(MiniDFSNNTopology.simpleHATopology())
+        .numDataNodes(1)
+        .build()) {
+      miniCluster.waitActive();
+      miniCluster.transitionToStandby(0);
+      miniCluster.transitionToStandby(1);
+      NameNode namenode0 = miniCluster.getNameNode(0);
+      NameNode namenode1 = miniCluster.getNameNode(1);
+      NameNodeAdapter.enterSafeMode(namenode0, false);
+      NameNodeAdapter.enterSafeMode(namenode1, false);
+      LambdaTestUtils.intercept(ServiceFailedException.class,
+          "NameNode still not leave safemode",
+          () -> miniCluster.transitionToObserver(0));
+    }
+  }
 }

+ 25 - 1
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSHAAdminMiniCluster.java

@@ -17,10 +17,12 @@
  */
 package org.apache.hadoop.hdfs.tools;
 
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_NN_NOT_BECOME_ACTIVE_IN_SAFEMODE;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 
+import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
 import java.io.File;
 import java.io.IOException;
@@ -70,6 +72,7 @@ public class TestDFSHAAdminMiniCluster {
   @Before
   public void setup() throws IOException {
     conf = new Configuration();
+    conf.setBoolean(DFS_HA_NN_NOT_BECOME_ACTIVE_IN_SAFEMODE, true);
     cluster = new MiniDFSCluster.Builder(conf)
         .nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(0)
         .build();
@@ -161,7 +164,28 @@ public class TestDFSHAAdminMiniCluster {
     assertEquals(-1, runTool("-transitionToActive", "nn1"));
     assertFalse(nnode1.isActiveState());
   }
-  
+
+  /**
+   * Tests that a Namenode in safe mode should not be transfer to observer.
+   */
+  @Test
+  public void testObserverTransitionInSafeMode() throws Exception {
+    NameNodeAdapter.enterSafeMode(cluster.getNameNode(0), false);
+    DFSHAAdmin admin = new DFSHAAdmin();
+    admin.setConf(conf);
+    System.setIn(new ByteArrayInputStream("yes\n".getBytes()));
+    int result = admin.run(
+        new String[]{"-transitionToObserver", "-forcemanual", "nn1"});
+    assertEquals("State transition returned: " + result, -1, result);
+
+    NameNodeAdapter.leaveSafeMode(cluster.getNameNode(0));
+    System.setIn(new ByteArrayInputStream("yes\n".getBytes()));
+    int result1 = admin.run(
+        new String[]{"-transitionToObserver", "-forcemanual", "nn1"});
+    assertEquals("State transition returned: " + result1, 0, result1);
+    assertFalse(cluster.getNameNode(0).isInSafeMode());
+  }
+
   @Test
   public void testTryFailoverToSafeMode() throws Exception {
     conf.set(DFSConfigKeys.DFS_HA_FENCE_METHODS_KEY,