|
@@ -81,6 +81,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent;
|
|
|
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEventType;
|
|
|
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppFailedAttemptEvent;
|
|
|
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppImpl;
|
|
|
+import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState;
|
|
|
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptContainerFinishedEvent;
|
|
|
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptRegistrationEvent;
|
|
|
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptStatusupdateEvent;
|
|
@@ -1039,6 +1040,9 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
|
|
|
@Override
|
|
|
public RMAppAttemptState transition(RMAppAttemptImpl appAttempt,
|
|
|
RMAppAttemptEvent event) {
|
|
|
+ RMApp rmApp = appAttempt.rmContext.getRMApps().get(
|
|
|
+ appAttempt.getAppAttemptId().getApplicationId());
|
|
|
+
|
|
|
/*
|
|
|
* If last attempt recovered final state is null .. it means attempt was
|
|
|
* started but AM container may or may not have started / finished.
|
|
@@ -1046,8 +1050,6 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
|
|
|
*/
|
|
|
if (appAttempt.recoveredFinalState != null) {
|
|
|
appAttempt.progress = 1.0f;
|
|
|
- RMApp rmApp =appAttempt.rmContext.getRMApps().get(
|
|
|
- appAttempt.getAppAttemptId().getApplicationId());
|
|
|
// We will replay the final attempt only if last attempt is in final
|
|
|
// state but application is not in final state.
|
|
|
if (rmApp.getCurrentAppAttempt() == appAttempt
|
|
@@ -1060,7 +1062,24 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
|
|
|
appAttempt, event);
|
|
|
}
|
|
|
return appAttempt.recoveredFinalState;
|
|
|
- } else {
|
|
|
+ } else if (RMAppImpl.isAppInFinalState(rmApp)) {
|
|
|
+ // Somehow attempt final state was not saved but app final state was saved.
|
|
|
+ // Skip adding the attempt into scheduler
|
|
|
+ RMAppState appState = ((RMAppImpl) rmApp).getRecoveredFinalState();
|
|
|
+ LOG.warn(rmApp.getApplicationId() + " final state (" + appState
|
|
|
+ + ") was recorded, but " + appAttempt.applicationAttemptId
|
|
|
+ + " final state (" + appAttempt.recoveredFinalState
|
|
|
+ + ") was not recorded.");
|
|
|
+ switch (appState) {
|
|
|
+ case FINISHED:
|
|
|
+ return RMAppAttemptState.FINISHED;
|
|
|
+ case FAILED:
|
|
|
+ return RMAppAttemptState.FAILED;
|
|
|
+ case KILLED:
|
|
|
+ return RMAppAttemptState.KILLED;
|
|
|
+ }
|
|
|
+ return RMAppAttemptState.FAILED;
|
|
|
+ } else{
|
|
|
// Add the current attempt to the scheduler.
|
|
|
if (appAttempt.rmContext.isWorkPreservingRecoveryEnabled()) {
|
|
|
// Need to register an app attempt before AM can register
|
|
@@ -1094,6 +1113,7 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+
|
|
|
private void rememberTargetTransitions(RMAppAttemptEvent event,
|
|
|
Object transitionToDo, RMAppAttemptState targetFinalState) {
|
|
|
transitionTodo = transitionToDo;
|