Преглед изворни кода

HDFS-16836: StandbyCheckpointer shouldn't trigger rollback fs image after RU is finalized (#5135)

Co-authored-by: Lei Yang <leyang@linkedin.com>
Lei Yang пре 2 година
родитељ
комит
cd929457c9

+ 1 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java

@@ -912,6 +912,7 @@ public class FSEditLogLoader {
       fsNamesys.getFSImage().updateStorageVersion();
       fsNamesys.getFSImage().renameCheckpoint(NameNodeFile.IMAGE_ROLLBACK,
           NameNodeFile.IMAGE);
+      fsNamesys.setNeedRollbackFsImage(false);
       break;
     }
     case OP_ADD_CACHE_DIRECTIVE: {

+ 36 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestRollingUpgrade.java

@@ -33,6 +33,9 @@ import javax.management.ObjectName;
 import javax.management.ReflectionException;
 import javax.management.openmbean.CompositeDataSupport;
 
+import org.apache.hadoop.hdfs.protocol.HdfsConstants;
+import org.apache.hadoop.hdfs.server.namenode.NameNode;
+import org.apache.hadoop.hdfs.server.namenode.ha.HATestUtil;
 import org.junit.Rule;
 import org.junit.rules.TemporaryFolder;
 import org.slf4j.Logger;
@@ -720,6 +723,39 @@ public class TestRollingUpgrade {
     }
   }
 
+  @Test
+  public void testEditLogTailerRollingUpgrade() throws IOException, InterruptedException {
+    Configuration conf = new Configuration();
+    conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
+    conf.setInt(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_PERIOD_KEY, 1);
+
+    HAUtil.setAllowStandbyReads(conf, true);
+
+    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
+        .nnTopology(MiniDFSNNTopology.simpleHATopology())
+        .numDataNodes(0)
+        .build();
+    cluster.waitActive();
+
+    cluster.transitionToActive(0);
+
+    NameNode nn1 = cluster.getNameNode(0);
+    NameNode nn2 = cluster.getNameNode(1);
+    try {
+      // RU start should trigger rollback image in standbycheckpointer
+      nn1.getRpcServer().rollingUpgrade(HdfsConstants.RollingUpgradeAction.PREPARE);
+      HATestUtil.waitForStandbyToCatchUp(nn1, nn2);
+      Assert.assertTrue(nn2.getNamesystem().isNeedRollbackFsImage());
+
+      // RU finalize should reset rollback image flag in standbycheckpointer
+      nn1.getRpcServer().rollingUpgrade(HdfsConstants.RollingUpgradeAction.FINALIZE);
+      HATestUtil.waitForStandbyToCatchUp(nn1, nn2);
+      Assert.assertFalse(nn2.getNamesystem().isNeedRollbackFsImage());
+    } finally {
+      cluster.shutdown();
+    }
+  }
+
   /**
    * In non-HA setup, after rolling upgrade prepare, the Secondary NN should
    * still be able to do checkpoint