|
@@ -19,11 +19,11 @@ package org.apache.hadoop.ha;
|
|
|
|
|
|
import java.io.IOException;
|
|
import java.io.IOException;
|
|
import java.io.PrintStream;
|
|
import java.io.PrintStream;
|
|
|
|
+import java.util.Arrays;
|
|
import java.util.Map;
|
|
import java.util.Map;
|
|
|
|
|
|
import org.apache.commons.cli.Options;
|
|
import org.apache.commons.cli.Options;
|
|
import org.apache.commons.cli.CommandLine;
|
|
import org.apache.commons.cli.CommandLine;
|
|
-import org.apache.commons.cli.CommandLineParser;
|
|
|
|
import org.apache.commons.cli.GnuParser;
|
|
import org.apache.commons.cli.GnuParser;
|
|
import org.apache.commons.cli.ParseException;
|
|
import org.apache.commons.cli.ParseException;
|
|
import org.apache.commons.logging.Log;
|
|
import org.apache.commons.logging.Log;
|
|
@@ -33,6 +33,8 @@ import org.apache.hadoop.classification.InterfaceAudience;
|
|
import org.apache.hadoop.conf.Configuration;
|
|
import org.apache.hadoop.conf.Configuration;
|
|
import org.apache.hadoop.conf.Configured;
|
|
import org.apache.hadoop.conf.Configured;
|
|
import org.apache.hadoop.fs.CommonConfigurationKeys;
|
|
import org.apache.hadoop.fs.CommonConfigurationKeys;
|
|
|
|
+import org.apache.hadoop.ha.HAServiceProtocol.StateChangeRequestInfo;
|
|
|
|
+import org.apache.hadoop.ha.HAServiceProtocol.RequestSource;
|
|
import org.apache.hadoop.util.Tool;
|
|
import org.apache.hadoop.util.Tool;
|
|
import org.apache.hadoop.util.ToolRunner;
|
|
import org.apache.hadoop.util.ToolRunner;
|
|
|
|
|
|
@@ -49,6 +51,13 @@ public abstract class HAAdmin extends Configured implements Tool {
|
|
|
|
|
|
private static final String FORCEFENCE = "forcefence";
|
|
private static final String FORCEFENCE = "forcefence";
|
|
private static final String FORCEACTIVE = "forceactive";
|
|
private static final String FORCEACTIVE = "forceactive";
|
|
|
|
+
|
|
|
|
+ /**
|
|
|
|
+ * Undocumented flag which allows an administrator to use manual failover
|
|
|
|
+ * state transitions even when auto-failover is enabled. This is an unsafe
|
|
|
|
+ * operation, which is why it is not documented in the usage below.
|
|
|
|
+ */
|
|
|
|
+ private static final String FORCEMANUAL = "forcemanual";
|
|
private static final Log LOG = LogFactory.getLog(HAAdmin.class);
|
|
private static final Log LOG = LogFactory.getLog(HAAdmin.class);
|
|
|
|
|
|
private int rpcTimeoutForChecks = -1;
|
|
private int rpcTimeoutForChecks = -1;
|
|
@@ -79,6 +88,7 @@ public abstract class HAAdmin extends Configured implements Tool {
|
|
/** Output stream for errors, for use in tests */
|
|
/** Output stream for errors, for use in tests */
|
|
protected PrintStream errOut = System.err;
|
|
protected PrintStream errOut = System.err;
|
|
PrintStream out = System.out;
|
|
PrintStream out = System.out;
|
|
|
|
+ private RequestSource requestSource = RequestSource.REQUEST_BY_USER;
|
|
|
|
|
|
protected abstract HAServiceTarget resolveTarget(String string);
|
|
protected abstract HAServiceTarget resolveTarget(String string);
|
|
|
|
|
|
@@ -106,63 +116,83 @@ public abstract class HAAdmin extends Configured implements Tool {
|
|
errOut.println("Usage: HAAdmin [" + cmd + " " + usage.args + "]");
|
|
errOut.println("Usage: HAAdmin [" + cmd + " " + usage.args + "]");
|
|
}
|
|
}
|
|
|
|
|
|
- private int transitionToActive(final String[] argv)
|
|
|
|
|
|
+ private int transitionToActive(final CommandLine cmd)
|
|
throws IOException, ServiceFailedException {
|
|
throws IOException, ServiceFailedException {
|
|
- if (argv.length != 2) {
|
|
|
|
|
|
+ String[] argv = cmd.getArgs();
|
|
|
|
+ if (argv.length != 1) {
|
|
errOut.println("transitionToActive: incorrect number of arguments");
|
|
errOut.println("transitionToActive: incorrect number of arguments");
|
|
printUsage(errOut, "-transitionToActive");
|
|
printUsage(errOut, "-transitionToActive");
|
|
return -1;
|
|
return -1;
|
|
}
|
|
}
|
|
-
|
|
|
|
- HAServiceProtocol proto = resolveTarget(argv[1]).getProxy(
|
|
|
|
|
|
+ HAServiceTarget target = resolveTarget(argv[0]);
|
|
|
|
+ if (!checkManualStateManagementOK(target)) {
|
|
|
|
+ return -1;
|
|
|
|
+ }
|
|
|
|
+ HAServiceProtocol proto = target.getProxy(
|
|
getConf(), 0);
|
|
getConf(), 0);
|
|
- HAServiceProtocolHelper.transitionToActive(proto);
|
|
|
|
|
|
+ HAServiceProtocolHelper.transitionToActive(proto, createReqInfo());
|
|
return 0;
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
|
|
- private int transitionToStandby(final String[] argv)
|
|
|
|
|
|
+ private int transitionToStandby(final CommandLine cmd)
|
|
throws IOException, ServiceFailedException {
|
|
throws IOException, ServiceFailedException {
|
|
- if (argv.length != 2) {
|
|
|
|
|
|
+ String[] argv = cmd.getArgs();
|
|
|
|
+ if (argv.length != 1) {
|
|
errOut.println("transitionToStandby: incorrect number of arguments");
|
|
errOut.println("transitionToStandby: incorrect number of arguments");
|
|
printUsage(errOut, "-transitionToStandby");
|
|
printUsage(errOut, "-transitionToStandby");
|
|
return -1;
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
|
|
- HAServiceProtocol proto = resolveTarget(argv[1]).getProxy(
|
|
|
|
|
|
+ HAServiceTarget target = resolveTarget(argv[0]);
|
|
|
|
+ if (!checkManualStateManagementOK(target)) {
|
|
|
|
+ return -1;
|
|
|
|
+ }
|
|
|
|
+ HAServiceProtocol proto = target.getProxy(
|
|
getConf(), 0);
|
|
getConf(), 0);
|
|
- HAServiceProtocolHelper.transitionToStandby(proto);
|
|
|
|
|
|
+ HAServiceProtocolHelper.transitionToStandby(proto, createReqInfo());
|
|
return 0;
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
+ /**
|
|
|
|
+ * Ensure that we are allowed to manually manage the HA state of the target
|
|
|
|
+ * service. If automatic failover is configured, then the automatic
|
|
|
|
+ * failover controllers should be doing state management, and it is generally
|
|
|
|
+ * an error to use the HAAdmin command line to do so.
|
|
|
|
+ *
|
|
|
|
+ * @param target the target to check
|
|
|
|
+ * @return true if manual state management is allowed
|
|
|
|
+ */
|
|
|
|
+ private boolean checkManualStateManagementOK(HAServiceTarget target) {
|
|
|
|
+ if (target.isAutoFailoverEnabled()) {
|
|
|
|
+ if (requestSource != RequestSource.REQUEST_BY_USER_FORCED) {
|
|
|
|
+ errOut.println(
|
|
|
|
+ "Automatic failover is enabled for " + target + "\n" +
|
|
|
|
+ "Refusing to manually manage HA state, since it may cause\n" +
|
|
|
|
+ "a split-brain scenario or other incorrect state.\n" +
|
|
|
|
+ "If you are very sure you know what you are doing, please \n" +
|
|
|
|
+ "specify the " + FORCEMANUAL + " flag.");
|
|
|
|
+ return false;
|
|
|
|
+ } else {
|
|
|
|
+ LOG.warn("Proceeding with manual HA state management even though\n" +
|
|
|
|
+ "automatic failover is enabled for " + target);
|
|
|
|
+ return true;
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ return true;
|
|
|
|
+ }
|
|
|
|
|
|
- private int failover(final String[] argv)
|
|
|
|
- throws IOException, ServiceFailedException {
|
|
|
|
- boolean forceFence = false;
|
|
|
|
- boolean forceActive = false;
|
|
|
|
-
|
|
|
|
- Options failoverOpts = new Options();
|
|
|
|
- // "-failover" isn't really an option but we need to add
|
|
|
|
- // it to appease CommandLineParser
|
|
|
|
- failoverOpts.addOption("failover", false, "failover");
|
|
|
|
- failoverOpts.addOption(FORCEFENCE, false, "force fencing");
|
|
|
|
- failoverOpts.addOption(FORCEACTIVE, false, "force failover");
|
|
|
|
|
|
+ private StateChangeRequestInfo createReqInfo() {
|
|
|
|
+ return new StateChangeRequestInfo(requestSource);
|
|
|
|
+ }
|
|
|
|
|
|
- CommandLineParser parser = new GnuParser();
|
|
|
|
- CommandLine cmd;
|
|
|
|
|
|
+ private int failover(CommandLine cmd)
|
|
|
|
+ throws IOException, ServiceFailedException {
|
|
|
|
+ boolean forceFence = cmd.hasOption(FORCEFENCE);
|
|
|
|
+ boolean forceActive = cmd.hasOption(FORCEACTIVE);
|
|
|
|
|
|
- try {
|
|
|
|
- cmd = parser.parse(failoverOpts, argv);
|
|
|
|
- forceFence = cmd.hasOption(FORCEFENCE);
|
|
|
|
- forceActive = cmd.hasOption(FORCEACTIVE);
|
|
|
|
- } catch (ParseException pe) {
|
|
|
|
- errOut.println("failover: incorrect arguments");
|
|
|
|
- printUsage(errOut, "-failover");
|
|
|
|
- return -1;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
int numOpts = cmd.getOptions() == null ? 0 : cmd.getOptions().length;
|
|
int numOpts = cmd.getOptions() == null ? 0 : cmd.getOptions().length;
|
|
final String[] args = cmd.getArgs();
|
|
final String[] args = cmd.getArgs();
|
|
|
|
|
|
- if (numOpts > 2 || args.length != 2) {
|
|
|
|
|
|
+ if (numOpts > 3 || args.length != 2) {
|
|
errOut.println("failover: incorrect arguments");
|
|
errOut.println("failover: incorrect arguments");
|
|
printUsage(errOut, "-failover");
|
|
printUsage(errOut, "-failover");
|
|
return -1;
|
|
return -1;
|
|
@@ -171,7 +201,13 @@ public abstract class HAAdmin extends Configured implements Tool {
|
|
HAServiceTarget fromNode = resolveTarget(args[0]);
|
|
HAServiceTarget fromNode = resolveTarget(args[0]);
|
|
HAServiceTarget toNode = resolveTarget(args[1]);
|
|
HAServiceTarget toNode = resolveTarget(args[1]);
|
|
|
|
|
|
- FailoverController fc = new FailoverController(getConf());
|
|
|
|
|
|
+ if (!checkManualStateManagementOK(fromNode) ||
|
|
|
|
+ !checkManualStateManagementOK(toNode)) {
|
|
|
|
+ return -1;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ FailoverController fc = new FailoverController(getConf(),
|
|
|
|
+ requestSource);
|
|
|
|
|
|
try {
|
|
try {
|
|
fc.failover(fromNode, toNode, forceFence, forceActive);
|
|
fc.failover(fromNode, toNode, forceFence, forceActive);
|
|
@@ -183,18 +219,18 @@ public abstract class HAAdmin extends Configured implements Tool {
|
|
return 0;
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
|
|
- private int checkHealth(final String[] argv)
|
|
|
|
|
|
+ private int checkHealth(final CommandLine cmd)
|
|
throws IOException, ServiceFailedException {
|
|
throws IOException, ServiceFailedException {
|
|
- if (argv.length != 2) {
|
|
|
|
|
|
+ String[] argv = cmd.getArgs();
|
|
|
|
+ if (argv.length != 1) {
|
|
errOut.println("checkHealth: incorrect number of arguments");
|
|
errOut.println("checkHealth: incorrect number of arguments");
|
|
printUsage(errOut, "-checkHealth");
|
|
printUsage(errOut, "-checkHealth");
|
|
return -1;
|
|
return -1;
|
|
}
|
|
}
|
|
-
|
|
|
|
- HAServiceProtocol proto = resolveTarget(argv[1]).getProxy(
|
|
|
|
|
|
+ HAServiceProtocol proto = resolveTarget(argv[0]).getProxy(
|
|
getConf(), rpcTimeoutForChecks);
|
|
getConf(), rpcTimeoutForChecks);
|
|
try {
|
|
try {
|
|
- HAServiceProtocolHelper.monitorHealth(proto);
|
|
|
|
|
|
+ HAServiceProtocolHelper.monitorHealth(proto, createReqInfo());
|
|
} catch (HealthCheckFailedException e) {
|
|
} catch (HealthCheckFailedException e) {
|
|
errOut.println("Health check failed: " + e.getLocalizedMessage());
|
|
errOut.println("Health check failed: " + e.getLocalizedMessage());
|
|
return -1;
|
|
return -1;
|
|
@@ -202,15 +238,16 @@ public abstract class HAAdmin extends Configured implements Tool {
|
|
return 0;
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
|
|
- private int getServiceState(final String[] argv)
|
|
|
|
|
|
+ private int getServiceState(final CommandLine cmd)
|
|
throws IOException, ServiceFailedException {
|
|
throws IOException, ServiceFailedException {
|
|
- if (argv.length != 2) {
|
|
|
|
|
|
+ String[] argv = cmd.getArgs();
|
|
|
|
+ if (argv.length != 1) {
|
|
errOut.println("getServiceState: incorrect number of arguments");
|
|
errOut.println("getServiceState: incorrect number of arguments");
|
|
printUsage(errOut, "-getServiceState");
|
|
printUsage(errOut, "-getServiceState");
|
|
return -1;
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
|
|
- HAServiceProtocol proto = resolveTarget(argv[1]).getProxy(
|
|
|
|
|
|
+ HAServiceProtocol proto = resolveTarget(argv[0]).getProxy(
|
|
getConf(), rpcTimeoutForChecks);
|
|
getConf(), rpcTimeoutForChecks);
|
|
out.println(proto.getServiceStatus().getState());
|
|
out.println(proto.getServiceStatus().getState());
|
|
return 0;
|
|
return 0;
|
|
@@ -263,26 +300,101 @@ public abstract class HAAdmin extends Configured implements Tool {
|
|
printUsage(errOut);
|
|
printUsage(errOut);
|
|
return -1;
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
+
|
|
|
|
+ if (!USAGE.containsKey(cmd)) {
|
|
|
|
+ errOut.println(cmd.substring(1) + ": Unknown command");
|
|
|
|
+ printUsage(errOut);
|
|
|
|
+ return -1;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ Options opts = new Options();
|
|
|
|
+
|
|
|
|
+ // Add command-specific options
|
|
|
|
+ if ("-failover".equals(cmd)) {
|
|
|
|
+ addFailoverCliOpts(opts);
|
|
|
|
+ }
|
|
|
|
+ // Mutative commands take FORCEMANUAL option
|
|
|
|
+ if ("-transitionToActive".equals(cmd) ||
|
|
|
|
+ "-transitionToStandby".equals(cmd) ||
|
|
|
|
+ "-failover".equals(cmd)) {
|
|
|
|
+ opts.addOption(FORCEMANUAL, false,
|
|
|
|
+ "force manual control even if auto-failover is enabled");
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ CommandLine cmdLine = parseOpts(cmd, opts, argv);
|
|
|
|
+ if (cmdLine == null) {
|
|
|
|
+ // error already printed
|
|
|
|
+ return -1;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ if (cmdLine.hasOption(FORCEMANUAL)) {
|
|
|
|
+ if (!confirmForceManual()) {
|
|
|
|
+ LOG.fatal("Aborted");
|
|
|
|
+ return -1;
|
|
|
|
+ }
|
|
|
|
+ // Instruct the NNs to honor this request even if they're
|
|
|
|
+ // configured for manual failover.
|
|
|
|
+ requestSource = RequestSource.REQUEST_BY_USER_FORCED;
|
|
|
|
+ }
|
|
|
|
|
|
if ("-transitionToActive".equals(cmd)) {
|
|
if ("-transitionToActive".equals(cmd)) {
|
|
- return transitionToActive(argv);
|
|
|
|
|
|
+ return transitionToActive(cmdLine);
|
|
} else if ("-transitionToStandby".equals(cmd)) {
|
|
} else if ("-transitionToStandby".equals(cmd)) {
|
|
- return transitionToStandby(argv);
|
|
|
|
|
|
+ return transitionToStandby(cmdLine);
|
|
} else if ("-failover".equals(cmd)) {
|
|
} else if ("-failover".equals(cmd)) {
|
|
- return failover(argv);
|
|
|
|
|
|
+ return failover(cmdLine);
|
|
} else if ("-getServiceState".equals(cmd)) {
|
|
} else if ("-getServiceState".equals(cmd)) {
|
|
- return getServiceState(argv);
|
|
|
|
|
|
+ return getServiceState(cmdLine);
|
|
} else if ("-checkHealth".equals(cmd)) {
|
|
} else if ("-checkHealth".equals(cmd)) {
|
|
- return checkHealth(argv);
|
|
|
|
|
|
+ return checkHealth(cmdLine);
|
|
} else if ("-help".equals(cmd)) {
|
|
} else if ("-help".equals(cmd)) {
|
|
return help(argv);
|
|
return help(argv);
|
|
} else {
|
|
} else {
|
|
- errOut.println(cmd.substring(1) + ": Unknown command");
|
|
|
|
- printUsage(errOut);
|
|
|
|
- return -1;
|
|
|
|
|
|
+ // we already checked command validity above, so getting here
|
|
|
|
+ // would be a coding error
|
|
|
|
+ throw new AssertionError("Should not get here, command: " + cmd);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+ private boolean confirmForceManual() throws IOException {
|
|
|
|
+ return ToolRunner.confirmPrompt(
|
|
|
|
+ "You have specified the " + FORCEMANUAL + " flag. This flag is " +
|
|
|
|
+ "dangerous, as it can induce a split-brain scenario that WILL " +
|
|
|
|
+ "CORRUPT your HDFS namespace, possibly irrecoverably.\n" +
|
|
|
|
+ "\n" +
|
|
|
|
+ "It is recommended not to use this flag, but instead to shut down the " +
|
|
|
|
+ "cluster and disable automatic failover if you prefer to manually " +
|
|
|
|
+ "manage your HA state.\n" +
|
|
|
|
+ "\n" +
|
|
|
|
+ "You may abort safely by answering 'n' or hitting ^C now.\n" +
|
|
|
|
+ "\n" +
|
|
|
|
+ "Are you sure you want to continue?");
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /**
|
|
|
|
+ * Add CLI options which are specific to the failover command and no
|
|
|
|
+ * others.
|
|
|
|
+ */
|
|
|
|
+ private void addFailoverCliOpts(Options failoverOpts) {
|
|
|
|
+ failoverOpts.addOption(FORCEFENCE, false, "force fencing");
|
|
|
|
+ failoverOpts.addOption(FORCEACTIVE, false, "force failover");
|
|
|
|
+ // Don't add FORCEMANUAL, since that's added separately for all commands
|
|
|
|
+ // that change state.
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ private CommandLine parseOpts(String cmdName, Options opts, String[] argv) {
|
|
|
|
+ try {
|
|
|
|
+ // Strip off the first arg, since that's just the command name
|
|
|
|
+ argv = Arrays.copyOfRange(argv, 1, argv.length);
|
|
|
|
+ return new GnuParser().parse(opts, argv);
|
|
|
|
+ } catch (ParseException pe) {
|
|
|
|
+ errOut.println(cmdName.substring(1) +
|
|
|
|
+ ": incorrect arguments");
|
|
|
|
+ printUsage(errOut, cmdName);
|
|
|
|
+ return null;
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
private int help(String[] argv) {
|
|
private int help(String[] argv) {
|
|
if (argv.length != 2) {
|
|
if (argv.length != 2) {
|
|
printUsage(errOut, "-help");
|
|
printUsage(errOut, "-help");
|