|
@@ -764,15 +764,45 @@ public class ResourceManager extends CompositeService implements Recoverable {
|
|
|
}
|
|
|
|
|
|
@Private
|
|
|
- public static class RMFatalEventDispatcher
|
|
|
- implements EventHandler<RMFatalEvent> {
|
|
|
-
|
|
|
+ private class RMFatalEventDispatcher implements EventHandler<RMFatalEvent> {
|
|
|
@Override
|
|
|
public void handle(RMFatalEvent event) {
|
|
|
- LOG.fatal("Received a " + RMFatalEvent.class.getName() + " of type " +
|
|
|
- event.getType().name() + ". Cause:\n" + event.getCause());
|
|
|
+ LOG.error("Received " + event);
|
|
|
|
|
|
- ExitUtil.terminate(1, event.getCause());
|
|
|
+ if (HAUtil.isHAEnabled(getConfig())) {
|
|
|
+ // If we're in an HA config, the right answer is always to go into
|
|
|
+ // standby.
|
|
|
+ LOG.warn("Transitioning the resource manager to standby.");
|
|
|
+ handleTransitionToStandByInNewThread();
|
|
|
+ } else {
|
|
|
+ // If we're stand-alone, we probably want to shut down, but the if and
|
|
|
+ // how depends on the event.
|
|
|
+ switch(event.getType()) {
|
|
|
+ case STATE_STORE_FENCED:
|
|
|
+ LOG.fatal("State store fenced even though the resource manager " +
|
|
|
+ "is not configured for high availability. Shutting down this " +
|
|
|
+ "resource manager to protect the integrity of the state store.");
|
|
|
+ ExitUtil.terminate(1, event.getExplanation());
|
|
|
+ break;
|
|
|
+ case STATE_STORE_OP_FAILED:
|
|
|
+ if (YarnConfiguration.shouldRMFailFast(getConfig())) {
|
|
|
+ LOG.fatal("Shutting down the resource manager because a state " +
|
|
|
+ "store operation failed, and the resource manager is " +
|
|
|
+ "configured to fail fast. See the yarn.fail-fast and " +
|
|
|
+ "yarn.resourcemanager.fail-fast properties.");
|
|
|
+ ExitUtil.terminate(1, event.getExplanation());
|
|
|
+ } else {
|
|
|
+ LOG.warn("Ignoring state store operation failure because the " +
|
|
|
+ "resource manager is not configured to fail fast. See the " +
|
|
|
+ "yarn.fail-fast and yarn.resourcemanager.fail-fast " +
|
|
|
+ "properties.");
|
|
|
+ }
|
|
|
+ break;
|
|
|
+ default:
|
|
|
+ LOG.fatal("Shutting down the resource manager.");
|
|
|
+ ExitUtil.terminate(1, event.getExplanation());
|
|
|
+ }
|
|
|
+ }
|
|
|
}
|
|
|
}
|
|
|
|
|
@@ -780,7 +810,7 @@ public class ResourceManager extends CompositeService implements Recoverable {
|
|
|
* Transition to standby state in a new thread. The transition operation is
|
|
|
* asynchronous to avoid deadlock caused by cyclic dependency.
|
|
|
*/
|
|
|
- public void handleTransitionToStandByInNewThread() {
|
|
|
+ private void handleTransitionToStandByInNewThread() {
|
|
|
Thread standByTransitionThread =
|
|
|
new Thread(activeServices.standByTransitionRunnable);
|
|
|
standByTransitionThread.setName("StandByTransitionThread");
|