|
@@ -1,89 +1,506 @@
|
|
|
/**
|
|
|
-* Licensed to the Apache Software Foundation (ASF) under one
|
|
|
-* or more contributor license agreements. See the NOTICE file
|
|
|
-* distributed with this work for additional information
|
|
|
-* regarding copyright ownership. The ASF licenses this file
|
|
|
-* to you under the Apache License, Version 2.0 (the
|
|
|
-* "License"); you may not use this file except in compliance
|
|
|
-* with the License. You may obtain a copy of the License at
|
|
|
-*
|
|
|
-* http://www.apache.org/licenses/LICENSE-2.0
|
|
|
-*
|
|
|
-* Unless required by applicable law or agreed to in writing, software
|
|
|
-* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
-* See the License for the specific language governing permissions and
|
|
|
-* limitations under the License.
|
|
|
-*/
|
|
|
+ * Licensed to the Apache Software Foundation (ASF) under one
|
|
|
+ * or more contributor license agreements. See the NOTICE file
|
|
|
+ * distributed with this work for additional information
|
|
|
+ * regarding copyright ownership. The ASF licenses this file
|
|
|
+ * to you under the Apache License, Version 2.0 (the
|
|
|
+ * "License"); you may not use this file except in compliance
|
|
|
+ * with the License. You may obtain a copy of the License at
|
|
|
+ *
|
|
|
+ * http://www.apache.org/licenses/LICENSE-2.0
|
|
|
+ *
|
|
|
+ * Unless required by applicable law or agreed to in writing, software
|
|
|
+ * distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
+ * See the License for the specific language governing permissions and
|
|
|
+ * limitations under the License.
|
|
|
+ */
|
|
|
package org.apache.hadoop.hdfs.server.namenode.ha;
|
|
|
|
|
|
+import static org.junit.Assert.assertEquals;
|
|
|
+import static org.junit.Assert.assertFalse;
|
|
|
import static org.junit.Assert.assertTrue;
|
|
|
import static org.junit.Assert.fail;
|
|
|
|
|
|
import java.io.File;
|
|
|
import java.io.IOException;
|
|
|
import java.net.URI;
|
|
|
+import java.net.URISyntaxException;
|
|
|
+import java.util.Collection;
|
|
|
|
|
|
import org.apache.commons.logging.Log;
|
|
|
import org.apache.commons.logging.LogFactory;
|
|
|
import org.apache.hadoop.conf.Configuration;
|
|
|
+import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
|
|
|
+import org.apache.hadoop.fs.FileSystem;
|
|
|
+import org.apache.hadoop.fs.Path;
|
|
|
+import org.apache.hadoop.hdfs.DFSConfigKeys;
|
|
|
+import org.apache.hadoop.hdfs.HdfsConfiguration;
|
|
|
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
|
|
import org.apache.hadoop.hdfs.MiniDFSNNTopology;
|
|
|
+import org.apache.hadoop.hdfs.qjournal.MiniQJMHACluster;
|
|
|
+import org.apache.hadoop.hdfs.qjournal.MiniQJMHACluster.Builder;
|
|
|
+import org.apache.hadoop.hdfs.qjournal.server.Journal;
|
|
|
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
|
|
|
import org.apache.hadoop.hdfs.server.common.Storage;
|
|
|
+import org.apache.hadoop.hdfs.server.namenode.NameNode;
|
|
|
+import org.apache.hadoop.hdfs.tools.DFSAdmin;
|
|
|
+import org.apache.hadoop.hdfs.util.PersistentLongFile;
|
|
|
import org.apache.hadoop.test.GenericTestUtils;
|
|
|
+import org.junit.Before;
|
|
|
import org.junit.Test;
|
|
|
|
|
|
-import com.google.common.collect.Lists;
|
|
|
+import com.google.common.base.Joiner;
|
|
|
|
|
|
/**
|
|
|
* Tests for upgrading with HA enabled.
|
|
|
*/
|
|
|
public class TestDFSUpgradeWithHA {
|
|
|
-
|
|
|
+
|
|
|
private static final Log LOG = LogFactory.getLog(TestDFSUpgradeWithHA.class);
|
|
|
+
|
|
|
+ private Configuration conf;
|
|
|
+
|
|
|
+ @Before
|
|
|
+ public void createConfiguration() {
|
|
|
+ conf = new HdfsConfiguration();
|
|
|
+ // Turn off persistent IPC, so that the DFSClient can survive NN restart
|
|
|
+ conf.setInt(
|
|
|
+ CommonConfigurationKeysPublic.IPC_CLIENT_CONNECTION_MAXIDLETIME_KEY,
|
|
|
+ 0);
|
|
|
+ }
|
|
|
+
|
|
|
+ private static void assertCTimesEqual(MiniDFSCluster cluster) {
|
|
|
+ long nn1CTime = cluster.getNamesystem(0).getFSImage().getStorage().getCTime();
|
|
|
+ long nn2CTime = cluster.getNamesystem(1).getFSImage().getStorage().getCTime();
|
|
|
+ assertEquals(nn1CTime, nn2CTime);
|
|
|
+ }
|
|
|
+
|
|
|
+ private static void checkClusterPreviousDirExistence(MiniDFSCluster cluster,
|
|
|
+ boolean shouldExist) {
|
|
|
+ for (int i = 0; i < 2; i++) {
|
|
|
+ checkNnPreviousDirExistence(cluster, i, shouldExist);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ private static void checkNnPreviousDirExistence(MiniDFSCluster cluster,
|
|
|
+ int index, boolean shouldExist) {
|
|
|
+ Collection<URI> nameDirs = cluster.getNameDirs(index);
|
|
|
+ for (URI nnDir : nameDirs) {
|
|
|
+ checkPreviousDirExistence(new File(nnDir), shouldExist);
|
|
|
+ }
|
|
|
+ }
|
|
|
|
|
|
+ private static void checkJnPreviousDirExistence(MiniQJMHACluster jnCluster,
|
|
|
+ boolean shouldExist) throws IOException {
|
|
|
+ for (int i = 0; i < 3; i++) {
|
|
|
+ checkPreviousDirExistence(
|
|
|
+ jnCluster.getJournalCluster().getJournalDir(i, "ns1"), shouldExist);
|
|
|
+ }
|
|
|
+ if (shouldExist) {
|
|
|
+ assertEpochFilesCopied(jnCluster);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ private static void assertEpochFilesCopied(MiniQJMHACluster jnCluster)
|
|
|
+ throws IOException {
|
|
|
+ for (int i = 0; i < 3; i++) {
|
|
|
+ File journalDir = jnCluster.getJournalCluster().getJournalDir(i, "ns1");
|
|
|
+ File currDir = new File(journalDir, "current");
|
|
|
+ File prevDir = new File(journalDir, "previous");
|
|
|
+ for (String fileName : new String[]{ Journal.LAST_PROMISED_FILENAME,
|
|
|
+ Journal.LAST_WRITER_EPOCH }) {
|
|
|
+ File prevFile = new File(prevDir, fileName);
|
|
|
+ // Possible the prev file doesn't exist, e.g. if there has never been a
|
|
|
+ // writer before the upgrade.
|
|
|
+ if (prevFile.exists()) {
|
|
|
+ PersistentLongFile prevLongFile = new PersistentLongFile(prevFile, -10);
|
|
|
+ PersistentLongFile currLongFile = new PersistentLongFile(new File(currDir,
|
|
|
+ fileName), -11);
|
|
|
+ assertTrue("Value in " + fileName + " has decreased on upgrade in "
|
|
|
+ + journalDir, prevLongFile.get() <= currLongFile.get());
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ private static void checkPreviousDirExistence(File rootDir,
|
|
|
+ boolean shouldExist) {
|
|
|
+ File previousDir = new File(rootDir, "previous");
|
|
|
+ if (shouldExist) {
|
|
|
+ assertTrue(previousDir + " does not exist", previousDir.exists());
|
|
|
+ } else {
|
|
|
+ assertFalse(previousDir + " does exist", previousDir.exists());
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ private void runFinalizeCommand(MiniDFSCluster cluster)
|
|
|
+ throws IOException {
|
|
|
+ HATestUtil.setFailoverConfigurations(cluster, conf);
|
|
|
+ new DFSAdmin(conf).finalizeUpgrade();
|
|
|
+ }
|
|
|
+
|
|
|
/**
|
|
|
- * Make sure that an HA NN refuses to start if given an upgrade-related
|
|
|
- * startup option.
|
|
|
+ * Ensure that an admin cannot finalize an HA upgrade without at least one NN
|
|
|
+ * being active.
|
|
|
*/
|
|
|
@Test
|
|
|
- public void testStartingWithUpgradeOptionsFails() throws IOException {
|
|
|
- for (StartupOption startOpt : Lists.newArrayList(new StartupOption[] {
|
|
|
- StartupOption.UPGRADE, StartupOption.FINALIZE,
|
|
|
- StartupOption.ROLLBACK })) {
|
|
|
- MiniDFSCluster cluster = null;
|
|
|
+ public void testCannotFinalizeIfNoActive() throws IOException,
|
|
|
+ URISyntaxException {
|
|
|
+ MiniDFSCluster cluster = null;
|
|
|
+ FileSystem fs = null;
|
|
|
+ try {
|
|
|
+ cluster = new MiniDFSCluster.Builder(conf)
|
|
|
+ .nnTopology(MiniDFSNNTopology.simpleHATopology())
|
|
|
+ .numDataNodes(0)
|
|
|
+ .build();
|
|
|
+
|
|
|
+ File sharedDir = new File(cluster.getSharedEditsDir(0, 1));
|
|
|
+
|
|
|
+ // No upgrade is in progress at the moment.
|
|
|
+ checkClusterPreviousDirExistence(cluster, false);
|
|
|
+ assertCTimesEqual(cluster);
|
|
|
+ checkPreviousDirExistence(sharedDir, false);
|
|
|
+
|
|
|
+ // Transition NN0 to active and do some FS ops.
|
|
|
+ cluster.transitionToActive(0);
|
|
|
+ fs = HATestUtil.configureFailoverFs(cluster, conf);
|
|
|
+ assertTrue(fs.mkdirs(new Path("/foo1")));
|
|
|
+
|
|
|
+ // Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade
|
|
|
+ // flag.
|
|
|
+ cluster.shutdownNameNode(1);
|
|
|
+ cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.UPGRADE);
|
|
|
+ cluster.restartNameNode(0, false);
|
|
|
+
|
|
|
+ checkNnPreviousDirExistence(cluster, 0, true);
|
|
|
+ checkNnPreviousDirExistence(cluster, 1, false);
|
|
|
+ checkPreviousDirExistence(sharedDir, true);
|
|
|
+
|
|
|
+ // NN0 should come up in the active state when given the -upgrade option,
|
|
|
+ // so no need to transition it to active.
|
|
|
+ assertTrue(fs.mkdirs(new Path("/foo2")));
|
|
|
+
|
|
|
+ // Restart NN0 without the -upgrade flag, to make sure that works.
|
|
|
+ cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.REGULAR);
|
|
|
+ cluster.restartNameNode(0, false);
|
|
|
+
|
|
|
+ // Make sure we can still do FS ops after upgrading.
|
|
|
+ cluster.transitionToActive(0);
|
|
|
+ assertTrue(fs.mkdirs(new Path("/foo3")));
|
|
|
+
|
|
|
+ // Now bootstrap the standby with the upgraded info.
|
|
|
+ int rc = BootstrapStandby.run(
|
|
|
+ new String[]{"-force"},
|
|
|
+ cluster.getConfiguration(1));
|
|
|
+ assertEquals(0, rc);
|
|
|
+
|
|
|
+ // Now restart NN1 and make sure that we can do ops against that as well.
|
|
|
+ cluster.restartNameNode(1);
|
|
|
+ cluster.transitionToStandby(0);
|
|
|
+ cluster.transitionToActive(1);
|
|
|
+ assertTrue(fs.mkdirs(new Path("/foo4")));
|
|
|
+
|
|
|
+ assertCTimesEqual(cluster);
|
|
|
+
|
|
|
+ // Now there's no active NN.
|
|
|
+ cluster.transitionToStandby(1);
|
|
|
+
|
|
|
try {
|
|
|
- cluster = new MiniDFSCluster.Builder(new Configuration())
|
|
|
- .nnTopology(MiniDFSNNTopology.simpleHATopology())
|
|
|
- .startupOption(startOpt)
|
|
|
- .numDataNodes(0)
|
|
|
- .build();
|
|
|
- fail("Should not have been able to start an HA NN in upgrade mode");
|
|
|
- } catch (IllegalArgumentException iae) {
|
|
|
+ runFinalizeCommand(cluster);
|
|
|
+ fail("Should not have been able to finalize upgrade with no NN active");
|
|
|
+ } catch (IOException ioe) {
|
|
|
GenericTestUtils.assertExceptionContains(
|
|
|
- "Cannot perform DFS upgrade with HA enabled.", iae);
|
|
|
- LOG.info("Got expected exception", iae);
|
|
|
- } finally {
|
|
|
- if (cluster != null) {
|
|
|
- cluster.shutdown();
|
|
|
- }
|
|
|
+ "Cannot finalize with no NameNode active", ioe);
|
|
|
+ }
|
|
|
+ } finally {
|
|
|
+ if (fs != null) {
|
|
|
+ fs.close();
|
|
|
+ }
|
|
|
+ if (cluster != null) {
|
|
|
+ cluster.shutdown();
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
-
|
|
|
+
|
|
|
/**
|
|
|
- * Make sure that an HA NN won't start if a previous upgrade was in progress.
|
|
|
+ * Make sure that an HA NN with NFS-based HA can successfully start and
|
|
|
+ * upgrade.
|
|
|
*/
|
|
|
@Test
|
|
|
- public void testStartingWithUpgradeInProgressFails() throws Exception {
|
|
|
+ public void testNfsUpgrade() throws IOException, URISyntaxException {
|
|
|
MiniDFSCluster cluster = null;
|
|
|
+ FileSystem fs = null;
|
|
|
try {
|
|
|
- cluster = new MiniDFSCluster.Builder(new Configuration())
|
|
|
+ cluster = new MiniDFSCluster.Builder(conf)
|
|
|
.nnTopology(MiniDFSNNTopology.simpleHATopology())
|
|
|
.numDataNodes(0)
|
|
|
.build();
|
|
|
|
|
|
+ File sharedDir = new File(cluster.getSharedEditsDir(0, 1));
|
|
|
+
|
|
|
+ // No upgrade is in progress at the moment.
|
|
|
+ checkClusterPreviousDirExistence(cluster, false);
|
|
|
+ assertCTimesEqual(cluster);
|
|
|
+ checkPreviousDirExistence(sharedDir, false);
|
|
|
+
|
|
|
+ // Transition NN0 to active and do some FS ops.
|
|
|
+ cluster.transitionToActive(0);
|
|
|
+ fs = HATestUtil.configureFailoverFs(cluster, conf);
|
|
|
+ assertTrue(fs.mkdirs(new Path("/foo1")));
|
|
|
+
|
|
|
+ // Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade
|
|
|
+ // flag.
|
|
|
+ cluster.shutdownNameNode(1);
|
|
|
+ cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.UPGRADE);
|
|
|
+ cluster.restartNameNode(0, false);
|
|
|
+
|
|
|
+ checkNnPreviousDirExistence(cluster, 0, true);
|
|
|
+ checkNnPreviousDirExistence(cluster, 1, false);
|
|
|
+ checkPreviousDirExistence(sharedDir, true);
|
|
|
+
|
|
|
+ // NN0 should come up in the active state when given the -upgrade option,
|
|
|
+ // so no need to transition it to active.
|
|
|
+ assertTrue(fs.mkdirs(new Path("/foo2")));
|
|
|
+
|
|
|
+ // Restart NN0 without the -upgrade flag, to make sure that works.
|
|
|
+ cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.REGULAR);
|
|
|
+ cluster.restartNameNode(0, false);
|
|
|
+
|
|
|
+ // Make sure we can still do FS ops after upgrading.
|
|
|
+ cluster.transitionToActive(0);
|
|
|
+ assertTrue(fs.mkdirs(new Path("/foo3")));
|
|
|
+
|
|
|
+ // Now bootstrap the standby with the upgraded info.
|
|
|
+ int rc = BootstrapStandby.run(
|
|
|
+ new String[]{"-force"},
|
|
|
+ cluster.getConfiguration(1));
|
|
|
+ assertEquals(0, rc);
|
|
|
+
|
|
|
+ // Now restart NN1 and make sure that we can do ops against that as well.
|
|
|
+ cluster.restartNameNode(1);
|
|
|
+ cluster.transitionToStandby(0);
|
|
|
+ cluster.transitionToActive(1);
|
|
|
+ assertTrue(fs.mkdirs(new Path("/foo4")));
|
|
|
+
|
|
|
+ assertCTimesEqual(cluster);
|
|
|
+ } finally {
|
|
|
+ if (fs != null) {
|
|
|
+ fs.close();
|
|
|
+ }
|
|
|
+ if (cluster != null) {
|
|
|
+ cluster.shutdown();
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Make sure that an HA NN can successfully upgrade when configured using
|
|
|
+ * JournalNodes.
|
|
|
+ */
|
|
|
+ @Test
|
|
|
+ public void testUpgradeWithJournalNodes() throws IOException,
|
|
|
+ URISyntaxException {
|
|
|
+ MiniQJMHACluster qjCluster = null;
|
|
|
+ FileSystem fs = null;
|
|
|
+ try {
|
|
|
+ Builder builder = new MiniQJMHACluster.Builder(conf);
|
|
|
+ builder.getDfsBuilder()
|
|
|
+ .numDataNodes(0);
|
|
|
+ qjCluster = builder.build();
|
|
|
+
|
|
|
+ MiniDFSCluster cluster = qjCluster.getDfsCluster();
|
|
|
+
|
|
|
+ // No upgrade is in progress at the moment.
|
|
|
+ checkJnPreviousDirExistence(qjCluster, false);
|
|
|
+ checkClusterPreviousDirExistence(cluster, false);
|
|
|
+ assertCTimesEqual(cluster);
|
|
|
+
|
|
|
+ // Transition NN0 to active and do some FS ops.
|
|
|
+ cluster.transitionToActive(0);
|
|
|
+ fs = HATestUtil.configureFailoverFs(cluster, conf);
|
|
|
+ assertTrue(fs.mkdirs(new Path("/foo1")));
|
|
|
+
|
|
|
+ // Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade
|
|
|
+ // flag.
|
|
|
+ cluster.shutdownNameNode(1);
|
|
|
+ cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.UPGRADE);
|
|
|
+ cluster.restartNameNode(0, false);
|
|
|
+
|
|
|
+ checkNnPreviousDirExistence(cluster, 0, true);
|
|
|
+ checkNnPreviousDirExistence(cluster, 1, false);
|
|
|
+ checkJnPreviousDirExistence(qjCluster, true);
|
|
|
+
|
|
|
+ // NN0 should come up in the active state when given the -upgrade option,
|
|
|
+ // so no need to transition it to active.
|
|
|
+ assertTrue(fs.mkdirs(new Path("/foo2")));
|
|
|
+
|
|
|
+ // Restart NN0 without the -upgrade flag, to make sure that works.
|
|
|
+ cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.REGULAR);
|
|
|
+ cluster.restartNameNode(0, false);
|
|
|
+
|
|
|
+ // Make sure we can still do FS ops after upgrading.
|
|
|
+ cluster.transitionToActive(0);
|
|
|
+ assertTrue(fs.mkdirs(new Path("/foo3")));
|
|
|
+
|
|
|
+ // Now bootstrap the standby with the upgraded info.
|
|
|
+ int rc = BootstrapStandby.run(
|
|
|
+ new String[]{"-force"},
|
|
|
+ cluster.getConfiguration(1));
|
|
|
+ assertEquals(0, rc);
|
|
|
+
|
|
|
+ // Now restart NN1 and make sure that we can do ops against that as well.
|
|
|
+ cluster.restartNameNode(1);
|
|
|
+ cluster.transitionToStandby(0);
|
|
|
+ cluster.transitionToActive(1);
|
|
|
+ assertTrue(fs.mkdirs(new Path("/foo4")));
|
|
|
+
|
|
|
+ assertCTimesEqual(cluster);
|
|
|
+ } finally {
|
|
|
+ if (fs != null) {
|
|
|
+ fs.close();
|
|
|
+ }
|
|
|
+ if (qjCluster != null) {
|
|
|
+ qjCluster.shutdown();
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ @Test
|
|
|
+ public void testFinalizeWithJournalNodes() throws IOException,
|
|
|
+ URISyntaxException {
|
|
|
+ MiniQJMHACluster qjCluster = null;
|
|
|
+ FileSystem fs = null;
|
|
|
+ try {
|
|
|
+ Builder builder = new MiniQJMHACluster.Builder(conf);
|
|
|
+ builder.getDfsBuilder()
|
|
|
+ .numDataNodes(0);
|
|
|
+ qjCluster = builder.build();
|
|
|
+
|
|
|
+ MiniDFSCluster cluster = qjCluster.getDfsCluster();
|
|
|
+
|
|
|
+ // No upgrade is in progress at the moment.
|
|
|
+ checkJnPreviousDirExistence(qjCluster, false);
|
|
|
+ checkClusterPreviousDirExistence(cluster, false);
|
|
|
+ assertCTimesEqual(cluster);
|
|
|
+
|
|
|
+ // Transition NN0 to active and do some FS ops.
|
|
|
+ cluster.transitionToActive(0);
|
|
|
+ fs = HATestUtil.configureFailoverFs(cluster, conf);
|
|
|
+ assertTrue(fs.mkdirs(new Path("/foo1")));
|
|
|
+
|
|
|
+ // Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade
|
|
|
+ // flag.
|
|
|
+ cluster.shutdownNameNode(1);
|
|
|
+ cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.UPGRADE);
|
|
|
+ cluster.restartNameNode(0, false);
|
|
|
+
|
|
|
+ assertTrue(fs.mkdirs(new Path("/foo2")));
|
|
|
+
|
|
|
+ checkNnPreviousDirExistence(cluster, 0, true);
|
|
|
+ checkNnPreviousDirExistence(cluster, 1, false);
|
|
|
+ checkJnPreviousDirExistence(qjCluster, true);
|
|
|
+
|
|
|
+ // Now bootstrap the standby with the upgraded info.
|
|
|
+ int rc = BootstrapStandby.run(
|
|
|
+ new String[]{"-force"},
|
|
|
+ cluster.getConfiguration(1));
|
|
|
+ assertEquals(0, rc);
|
|
|
+
|
|
|
+ cluster.restartNameNode(1);
|
|
|
+
|
|
|
+ runFinalizeCommand(cluster);
|
|
|
+
|
|
|
+ checkClusterPreviousDirExistence(cluster, false);
|
|
|
+ checkJnPreviousDirExistence(qjCluster, false);
|
|
|
+ assertCTimesEqual(cluster);
|
|
|
+ } finally {
|
|
|
+ if (fs != null) {
|
|
|
+ fs.close();
|
|
|
+ }
|
|
|
+ if (qjCluster != null) {
|
|
|
+ qjCluster.shutdown();
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Make sure that even if the NN which initiated the upgrade is in the standby
|
|
|
+ * state that we're allowed to finalize.
|
|
|
+ */
|
|
|
+ @Test
|
|
|
+ public void testFinalizeFromSecondNameNodeWithJournalNodes()
|
|
|
+ throws IOException, URISyntaxException {
|
|
|
+ MiniQJMHACluster qjCluster = null;
|
|
|
+ FileSystem fs = null;
|
|
|
+ try {
|
|
|
+ Builder builder = new MiniQJMHACluster.Builder(conf);
|
|
|
+ builder.getDfsBuilder()
|
|
|
+ .numDataNodes(0);
|
|
|
+ qjCluster = builder.build();
|
|
|
+
|
|
|
+ MiniDFSCluster cluster = qjCluster.getDfsCluster();
|
|
|
+
|
|
|
+ // No upgrade is in progress at the moment.
|
|
|
+ checkJnPreviousDirExistence(qjCluster, false);
|
|
|
+ checkClusterPreviousDirExistence(cluster, false);
|
|
|
+ assertCTimesEqual(cluster);
|
|
|
+
|
|
|
+ // Transition NN0 to active and do some FS ops.
|
|
|
+ cluster.transitionToActive(0);
|
|
|
+ fs = HATestUtil.configureFailoverFs(cluster, conf);
|
|
|
+ assertTrue(fs.mkdirs(new Path("/foo1")));
|
|
|
+
|
|
|
+ // Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade
|
|
|
+ // flag.
|
|
|
+ cluster.shutdownNameNode(1);
|
|
|
+ cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.UPGRADE);
|
|
|
+ cluster.restartNameNode(0, false);
|
|
|
+
|
|
|
+ checkNnPreviousDirExistence(cluster, 0, true);
|
|
|
+ checkNnPreviousDirExistence(cluster, 1, false);
|
|
|
+ checkJnPreviousDirExistence(qjCluster, true);
|
|
|
+
|
|
|
+ // Now bootstrap the standby with the upgraded info.
|
|
|
+ int rc = BootstrapStandby.run(
|
|
|
+ new String[]{"-force"},
|
|
|
+ cluster.getConfiguration(1));
|
|
|
+ assertEquals(0, rc);
|
|
|
+
|
|
|
+ cluster.restartNameNode(1);
|
|
|
+
|
|
|
+ // Make the second NN (not the one that initiated the upgrade) active when
|
|
|
+ // the finalize command is run.
|
|
|
+ cluster.transitionToStandby(0);
|
|
|
+ cluster.transitionToActive(1);
|
|
|
+
|
|
|
+ runFinalizeCommand(cluster);
|
|
|
+
|
|
|
+ checkClusterPreviousDirExistence(cluster, false);
|
|
|
+ checkJnPreviousDirExistence(qjCluster, false);
|
|
|
+ assertCTimesEqual(cluster);
|
|
|
+ } finally {
|
|
|
+ if (fs != null) {
|
|
|
+ fs.close();
|
|
|
+ }
|
|
|
+ if (qjCluster != null) {
|
|
|
+ qjCluster.shutdown();
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Make sure that an HA NN will start if a previous upgrade was in progress.
|
|
|
+ */
|
|
|
+ @Test
|
|
|
+ public void testStartingWithUpgradeInProgressSucceeds() throws Exception {
|
|
|
+ MiniDFSCluster cluster = null;
|
|
|
+ try {
|
|
|
+ cluster = new MiniDFSCluster.Builder(conf)
|
|
|
+ .nnTopology(MiniDFSNNTopology.simpleHATopology())
|
|
|
+ .numDataNodes(0)
|
|
|
+ .build();
|
|
|
+
|
|
|
// Simulate an upgrade having started.
|
|
|
for (int i = 0; i < 2; i++) {
|
|
|
for (URI uri : cluster.getNameDirs(i)) {
|
|
@@ -92,18 +509,226 @@ public class TestDFSUpgradeWithHA {
|
|
|
assertTrue(prevTmp.mkdirs());
|
|
|
}
|
|
|
}
|
|
|
-
|
|
|
+
|
|
|
cluster.restartNameNodes();
|
|
|
- fail("Should not have been able to start an HA NN with an in-progress upgrade");
|
|
|
- } catch (IOException ioe) {
|
|
|
- GenericTestUtils.assertExceptionContains(
|
|
|
- "Cannot start an HA namenode with name dirs that need recovery.",
|
|
|
- ioe);
|
|
|
- LOG.info("Got expected exception", ioe);
|
|
|
} finally {
|
|
|
if (cluster != null) {
|
|
|
cluster.shutdown();
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Test rollback with NFS shared dir.
|
|
|
+ */
|
|
|
+ @Test
|
|
|
+ public void testRollbackWithNfs() throws Exception {
|
|
|
+ MiniDFSCluster cluster = null;
|
|
|
+ FileSystem fs = null;
|
|
|
+ try {
|
|
|
+ cluster = new MiniDFSCluster.Builder(conf)
|
|
|
+ .nnTopology(MiniDFSNNTopology.simpleHATopology())
|
|
|
+ .numDataNodes(0)
|
|
|
+ .build();
|
|
|
+
|
|
|
+ File sharedDir = new File(cluster.getSharedEditsDir(0, 1));
|
|
|
+
|
|
|
+ // No upgrade is in progress at the moment.
|
|
|
+ checkClusterPreviousDirExistence(cluster, false);
|
|
|
+ assertCTimesEqual(cluster);
|
|
|
+ checkPreviousDirExistence(sharedDir, false);
|
|
|
+
|
|
|
+ // Transition NN0 to active and do some FS ops.
|
|
|
+ cluster.transitionToActive(0);
|
|
|
+ fs = HATestUtil.configureFailoverFs(cluster, conf);
|
|
|
+ assertTrue(fs.mkdirs(new Path("/foo1")));
|
|
|
+
|
|
|
+ // Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade
|
|
|
+ // flag.
|
|
|
+ cluster.shutdownNameNode(1);
|
|
|
+ cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.UPGRADE);
|
|
|
+ cluster.restartNameNode(0, false);
|
|
|
+
|
|
|
+ checkNnPreviousDirExistence(cluster, 0, true);
|
|
|
+ checkNnPreviousDirExistence(cluster, 1, false);
|
|
|
+ checkPreviousDirExistence(sharedDir, true);
|
|
|
+
|
|
|
+ // NN0 should come up in the active state when given the -upgrade option,
|
|
|
+ // so no need to transition it to active.
|
|
|
+ assertTrue(fs.mkdirs(new Path("/foo2")));
|
|
|
+
|
|
|
+ // Now bootstrap the standby with the upgraded info.
|
|
|
+ int rc = BootstrapStandby.run(
|
|
|
+ new String[]{"-force"},
|
|
|
+ cluster.getConfiguration(1));
|
|
|
+ assertEquals(0, rc);
|
|
|
+
|
|
|
+ cluster.restartNameNode(1);
|
|
|
+
|
|
|
+ checkNnPreviousDirExistence(cluster, 0, true);
|
|
|
+ checkNnPreviousDirExistence(cluster, 1, false);
|
|
|
+ checkPreviousDirExistence(sharedDir, true);
|
|
|
+ assertCTimesEqual(cluster);
|
|
|
+
|
|
|
+ // Now shut down the cluster and do the rollback.
|
|
|
+ Collection<URI> nn1NameDirs = cluster.getNameDirs(0);
|
|
|
+ cluster.shutdown();
|
|
|
+
|
|
|
+ conf.setStrings(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY, Joiner.on(",").join(nn1NameDirs));
|
|
|
+ NameNode.doRollback(conf, false);
|
|
|
+
|
|
|
+ // The rollback operation should have rolled back the first NN's local
|
|
|
+ // dirs, and the shared dir, but not the other NN's dirs. Those have to be
|
|
|
+ // done by bootstrapping the standby.
|
|
|
+ checkNnPreviousDirExistence(cluster, 0, false);
|
|
|
+ checkPreviousDirExistence(sharedDir, false);
|
|
|
+ } finally {
|
|
|
+ if (fs != null) {
|
|
|
+ fs.close();
|
|
|
+ }
|
|
|
+ if (cluster != null) {
|
|
|
+ cluster.shutdown();
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ @Test
|
|
|
+ public void testRollbackWithJournalNodes() throws IOException,
|
|
|
+ URISyntaxException {
|
|
|
+ MiniQJMHACluster qjCluster = null;
|
|
|
+ FileSystem fs = null;
|
|
|
+ try {
|
|
|
+ Builder builder = new MiniQJMHACluster.Builder(conf);
|
|
|
+ builder.getDfsBuilder()
|
|
|
+ .numDataNodes(0);
|
|
|
+ qjCluster = builder.build();
|
|
|
+
|
|
|
+ MiniDFSCluster cluster = qjCluster.getDfsCluster();
|
|
|
+
|
|
|
+ // No upgrade is in progress at the moment.
|
|
|
+ checkClusterPreviousDirExistence(cluster, false);
|
|
|
+ assertCTimesEqual(cluster);
|
|
|
+ checkJnPreviousDirExistence(qjCluster, false);
|
|
|
+
|
|
|
+ // Transition NN0 to active and do some FS ops.
|
|
|
+ cluster.transitionToActive(0);
|
|
|
+ fs = HATestUtil.configureFailoverFs(cluster, conf);
|
|
|
+ assertTrue(fs.mkdirs(new Path("/foo1")));
|
|
|
+
|
|
|
+ // Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade
|
|
|
+ // flag.
|
|
|
+ cluster.shutdownNameNode(1);
|
|
|
+ cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.UPGRADE);
|
|
|
+ cluster.restartNameNode(0, false);
|
|
|
+
|
|
|
+ checkNnPreviousDirExistence(cluster, 0, true);
|
|
|
+ checkNnPreviousDirExistence(cluster, 1, false);
|
|
|
+ checkJnPreviousDirExistence(qjCluster, true);
|
|
|
+
|
|
|
+ // NN0 should come up in the active state when given the -upgrade option,
|
|
|
+ // so no need to transition it to active.
|
|
|
+ assertTrue(fs.mkdirs(new Path("/foo2")));
|
|
|
+
|
|
|
+ // Now bootstrap the standby with the upgraded info.
|
|
|
+ int rc = BootstrapStandby.run(
|
|
|
+ new String[]{"-force"},
|
|
|
+ cluster.getConfiguration(1));
|
|
|
+ assertEquals(0, rc);
|
|
|
+
|
|
|
+ cluster.restartNameNode(1);
|
|
|
+
|
|
|
+ checkNnPreviousDirExistence(cluster, 0, true);
|
|
|
+ checkNnPreviousDirExistence(cluster, 1, false);
|
|
|
+ checkJnPreviousDirExistence(qjCluster, true);
|
|
|
+ assertCTimesEqual(cluster);
|
|
|
+
|
|
|
+ // Shut down the NNs, but deliberately leave the JNs up and running.
|
|
|
+ Collection<URI> nn1NameDirs = cluster.getNameDirs(0);
|
|
|
+ cluster.shutdown();
|
|
|
+
|
|
|
+ conf.setStrings(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY, Joiner.on(",").join(nn1NameDirs));
|
|
|
+ NameNode.doRollback(conf, false);
|
|
|
+
|
|
|
+ // The rollback operation should have rolled back the first NN's local
|
|
|
+ // dirs, and the shared dir, but not the other NN's dirs. Those have to be
|
|
|
+ // done by bootstrapping the standby.
|
|
|
+ checkNnPreviousDirExistence(cluster, 0, false);
|
|
|
+ checkJnPreviousDirExistence(qjCluster, false);
|
|
|
+ } finally {
|
|
|
+ if (fs != null) {
|
|
|
+ fs.close();
|
|
|
+ }
|
|
|
+ if (qjCluster != null) {
|
|
|
+ qjCluster.shutdown();
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Make sure that starting a second NN with the -upgrade flag fails if the
|
|
|
+ * other NN has already done that.
|
|
|
+ */
|
|
|
+ @Test
|
|
|
+ public void testCannotUpgradeSecondNameNode() throws IOException,
|
|
|
+ URISyntaxException {
|
|
|
+ MiniDFSCluster cluster = null;
|
|
|
+ FileSystem fs = null;
|
|
|
+ try {
|
|
|
+ cluster = new MiniDFSCluster.Builder(conf)
|
|
|
+ .nnTopology(MiniDFSNNTopology.simpleHATopology())
|
|
|
+ .numDataNodes(0)
|
|
|
+ .build();
|
|
|
+
|
|
|
+ File sharedDir = new File(cluster.getSharedEditsDir(0, 1));
|
|
|
+
|
|
|
+ // No upgrade is in progress at the moment.
|
|
|
+ checkClusterPreviousDirExistence(cluster, false);
|
|
|
+ assertCTimesEqual(cluster);
|
|
|
+ checkPreviousDirExistence(sharedDir, false);
|
|
|
+
|
|
|
+ // Transition NN0 to active and do some FS ops.
|
|
|
+ cluster.transitionToActive(0);
|
|
|
+ fs = HATestUtil.configureFailoverFs(cluster, conf);
|
|
|
+ assertTrue(fs.mkdirs(new Path("/foo1")));
|
|
|
+
|
|
|
+ // Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade
|
|
|
+ // flag.
|
|
|
+ cluster.shutdownNameNode(1);
|
|
|
+ cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.UPGRADE);
|
|
|
+ cluster.restartNameNode(0, false);
|
|
|
+
|
|
|
+ checkNnPreviousDirExistence(cluster, 0, true);
|
|
|
+ checkNnPreviousDirExistence(cluster, 1, false);
|
|
|
+ checkPreviousDirExistence(sharedDir, true);
|
|
|
+
|
|
|
+ // NN0 should come up in the active state when given the -upgrade option,
|
|
|
+ // so no need to transition it to active.
|
|
|
+ assertTrue(fs.mkdirs(new Path("/foo2")));
|
|
|
+
|
|
|
+ // Restart NN0 without the -upgrade flag, to make sure that works.
|
|
|
+ cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.REGULAR);
|
|
|
+ cluster.restartNameNode(0, false);
|
|
|
+
|
|
|
+ // Make sure we can still do FS ops after upgrading.
|
|
|
+ cluster.transitionToActive(0);
|
|
|
+ assertTrue(fs.mkdirs(new Path("/foo3")));
|
|
|
+
|
|
|
+ // Make sure that starting the second NN with the -upgrade flag fails.
|
|
|
+ cluster.getNameNodeInfos()[1].setStartOpt(StartupOption.UPGRADE);
|
|
|
+ try {
|
|
|
+ cluster.restartNameNode(1, false);
|
|
|
+ fail("Should not have been able to start second NN with -upgrade");
|
|
|
+ } catch (IOException ioe) {
|
|
|
+ GenericTestUtils.assertExceptionContains(
|
|
|
+ "It looks like the shared log is already being upgraded", ioe);
|
|
|
+ }
|
|
|
+ } finally {
|
|
|
+ if (fs != null) {
|
|
|
+ fs.close();
|
|
|
+ }
|
|
|
+ if (cluster != null) {
|
|
|
+ cluster.shutdown();
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
}
|