|
@@ -77,6 +77,7 @@ import org.apache.hadoop.mapreduce.v2.api.records.TaskType;
|
|
import org.apache.hadoop.mapreduce.v2.app2.AppContext;
|
|
import org.apache.hadoop.mapreduce.v2.app2.AppContext;
|
|
import org.apache.hadoop.mapreduce.v2.app2.TaskAttemptListener;
|
|
import org.apache.hadoop.mapreduce.v2.app2.TaskAttemptListener;
|
|
import org.apache.hadoop.mapreduce.v2.app2.TaskHeartbeatHandler;
|
|
import org.apache.hadoop.mapreduce.v2.app2.TaskHeartbeatHandler;
|
|
|
|
+import org.apache.hadoop.mapreduce.v2.app2.job.JobStateInternal;
|
|
import org.apache.hadoop.mapreduce.v2.app2.job.Task;
|
|
import org.apache.hadoop.mapreduce.v2.app2.job.Task;
|
|
import org.apache.hadoop.mapreduce.v2.app2.job.TaskAttempt;
|
|
import org.apache.hadoop.mapreduce.v2.app2.job.TaskAttempt;
|
|
import org.apache.hadoop.mapreduce.v2.app2.job.event.JobCounterUpdateEvent;
|
|
import org.apache.hadoop.mapreduce.v2.app2.job.event.JobCounterUpdateEvent;
|
|
@@ -217,123 +218,123 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app2.job.Job,
|
|
new UpdatedNodesTransition();
|
|
new UpdatedNodesTransition();
|
|
|
|
|
|
protected static final
|
|
protected static final
|
|
- StateMachineFactory<JobImpl, JobState, JobEventType, JobEvent>
|
|
|
|
|
|
+ StateMachineFactory<JobImpl, JobStateInternal, JobEventType, JobEvent>
|
|
stateMachineFactory
|
|
stateMachineFactory
|
|
- = new StateMachineFactory<JobImpl, JobState, JobEventType, JobEvent>
|
|
|
|
- (JobState.NEW)
|
|
|
|
|
|
+ = new StateMachineFactory<JobImpl, JobStateInternal, JobEventType, JobEvent>
|
|
|
|
+ (JobStateInternal.NEW)
|
|
|
|
|
|
// Transitions from NEW state
|
|
// Transitions from NEW state
|
|
- .addTransition(JobState.NEW, JobState.NEW,
|
|
|
|
|
|
+ .addTransition(JobStateInternal.NEW, JobStateInternal.NEW,
|
|
JobEventType.JOB_DIAGNOSTIC_UPDATE,
|
|
JobEventType.JOB_DIAGNOSTIC_UPDATE,
|
|
DIAGNOSTIC_UPDATE_TRANSITION)
|
|
DIAGNOSTIC_UPDATE_TRANSITION)
|
|
- .addTransition(JobState.NEW, JobState.NEW,
|
|
|
|
|
|
+ .addTransition(JobStateInternal.NEW, JobStateInternal.NEW,
|
|
JobEventType.JOB_COUNTER_UPDATE, COUNTER_UPDATE_TRANSITION)
|
|
JobEventType.JOB_COUNTER_UPDATE, COUNTER_UPDATE_TRANSITION)
|
|
.addTransition
|
|
.addTransition
|
|
- (JobState.NEW,
|
|
|
|
- EnumSet.of(JobState.INITED, JobState.FAILED),
|
|
|
|
|
|
+ (JobStateInternal.NEW,
|
|
|
|
+ EnumSet.of(JobStateInternal.INITED, JobStateInternal.FAILED),
|
|
JobEventType.JOB_INIT,
|
|
JobEventType.JOB_INIT,
|
|
new InitTransition())
|
|
new InitTransition())
|
|
- .addTransition(JobState.NEW, JobState.KILLED,
|
|
|
|
|
|
+ .addTransition(JobStateInternal.NEW, JobStateInternal.KILLED,
|
|
JobEventType.JOB_KILL,
|
|
JobEventType.JOB_KILL,
|
|
new KillNewJobTransition())
|
|
new KillNewJobTransition())
|
|
- .addTransition(JobState.NEW, JobState.ERROR,
|
|
|
|
|
|
+ .addTransition(JobStateInternal.NEW, JobStateInternal.ERROR,
|
|
JobEventType.INTERNAL_ERROR,
|
|
JobEventType.INTERNAL_ERROR,
|
|
INTERNAL_ERROR_TRANSITION)
|
|
INTERNAL_ERROR_TRANSITION)
|
|
// Ignore-able events
|
|
// Ignore-able events
|
|
- .addTransition(JobState.NEW, JobState.NEW,
|
|
|
|
|
|
+ .addTransition(JobStateInternal.NEW, JobStateInternal.NEW,
|
|
JobEventType.JOB_UPDATED_NODES)
|
|
JobEventType.JOB_UPDATED_NODES)
|
|
|
|
|
|
// Transitions from INITED state
|
|
// Transitions from INITED state
|
|
- .addTransition(JobState.INITED, JobState.INITED,
|
|
|
|
|
|
+ .addTransition(JobStateInternal.INITED, JobStateInternal.INITED,
|
|
JobEventType.JOB_DIAGNOSTIC_UPDATE,
|
|
JobEventType.JOB_DIAGNOSTIC_UPDATE,
|
|
DIAGNOSTIC_UPDATE_TRANSITION)
|
|
DIAGNOSTIC_UPDATE_TRANSITION)
|
|
- .addTransition(JobState.INITED, JobState.INITED,
|
|
|
|
|
|
+ .addTransition(JobStateInternal.INITED, JobStateInternal.INITED,
|
|
JobEventType.JOB_COUNTER_UPDATE, COUNTER_UPDATE_TRANSITION)
|
|
JobEventType.JOB_COUNTER_UPDATE, COUNTER_UPDATE_TRANSITION)
|
|
- .addTransition(JobState.INITED, JobState.RUNNING,
|
|
|
|
|
|
+ .addTransition(JobStateInternal.INITED, JobStateInternal.RUNNING,
|
|
JobEventType.JOB_START,
|
|
JobEventType.JOB_START,
|
|
new StartTransition())
|
|
new StartTransition())
|
|
- .addTransition(JobState.INITED, JobState.KILLED,
|
|
|
|
|
|
+ .addTransition(JobStateInternal.INITED, JobStateInternal.KILLED,
|
|
JobEventType.JOB_KILL,
|
|
JobEventType.JOB_KILL,
|
|
new KillInitedJobTransition())
|
|
new KillInitedJobTransition())
|
|
- .addTransition(JobState.INITED, JobState.ERROR,
|
|
|
|
|
|
+ .addTransition(JobStateInternal.INITED, JobStateInternal.ERROR,
|
|
JobEventType.INTERNAL_ERROR,
|
|
JobEventType.INTERNAL_ERROR,
|
|
INTERNAL_ERROR_TRANSITION)
|
|
INTERNAL_ERROR_TRANSITION)
|
|
// Ignore-able events
|
|
// Ignore-able events
|
|
- .addTransition(JobState.INITED, JobState.INITED,
|
|
|
|
|
|
+ .addTransition(JobStateInternal.INITED, JobStateInternal.INITED,
|
|
JobEventType.JOB_UPDATED_NODES)
|
|
JobEventType.JOB_UPDATED_NODES)
|
|
|
|
|
|
// Transitions from RUNNING state
|
|
// Transitions from RUNNING state
|
|
- .addTransition(JobState.RUNNING, JobState.RUNNING,
|
|
|
|
|
|
+ .addTransition(JobStateInternal.RUNNING, JobStateInternal.RUNNING,
|
|
JobEventType.JOB_TASK_ATTEMPT_COMPLETED,
|
|
JobEventType.JOB_TASK_ATTEMPT_COMPLETED,
|
|
TASK_ATTEMPT_COMPLETED_EVENT_TRANSITION)
|
|
TASK_ATTEMPT_COMPLETED_EVENT_TRANSITION)
|
|
.addTransition
|
|
.addTransition
|
|
- (JobState.RUNNING,
|
|
|
|
- EnumSet.of(JobState.RUNNING, JobState.SUCCEEDED, JobState.FAILED),
|
|
|
|
|
|
+ (JobStateInternal.RUNNING,
|
|
|
|
+ EnumSet.of(JobStateInternal.RUNNING, JobStateInternal.SUCCEEDED, JobStateInternal.FAILED),
|
|
JobEventType.JOB_TASK_COMPLETED,
|
|
JobEventType.JOB_TASK_COMPLETED,
|
|
new TaskCompletedTransition())
|
|
new TaskCompletedTransition())
|
|
.addTransition
|
|
.addTransition
|
|
- (JobState.RUNNING,
|
|
|
|
- EnumSet.of(JobState.RUNNING, JobState.SUCCEEDED, JobState.FAILED),
|
|
|
|
|
|
+ (JobStateInternal.RUNNING,
|
|
|
|
+ EnumSet.of(JobStateInternal.RUNNING, JobStateInternal.SUCCEEDED, JobStateInternal.FAILED),
|
|
JobEventType.JOB_COMPLETED,
|
|
JobEventType.JOB_COMPLETED,
|
|
new JobNoTasksCompletedTransition())
|
|
new JobNoTasksCompletedTransition())
|
|
- .addTransition(JobState.RUNNING, JobState.KILL_WAIT,
|
|
|
|
|
|
+ .addTransition(JobStateInternal.RUNNING, JobStateInternal.KILL_WAIT,
|
|
JobEventType.JOB_KILL, new KillTasksTransition())
|
|
JobEventType.JOB_KILL, new KillTasksTransition())
|
|
- .addTransition(JobState.RUNNING, JobState.RUNNING,
|
|
|
|
|
|
+ .addTransition(JobStateInternal.RUNNING, JobStateInternal.RUNNING,
|
|
JobEventType.JOB_UPDATED_NODES,
|
|
JobEventType.JOB_UPDATED_NODES,
|
|
UPDATED_NODES_TRANSITION)
|
|
UPDATED_NODES_TRANSITION)
|
|
- .addTransition(JobState.RUNNING, JobState.RUNNING,
|
|
|
|
|
|
+ .addTransition(JobStateInternal.RUNNING, JobStateInternal.RUNNING,
|
|
JobEventType.JOB_MAP_TASK_RESCHEDULED,
|
|
JobEventType.JOB_MAP_TASK_RESCHEDULED,
|
|
new MapTaskRescheduledTransition())
|
|
new MapTaskRescheduledTransition())
|
|
- .addTransition(JobState.RUNNING, JobState.RUNNING,
|
|
|
|
|
|
+ .addTransition(JobStateInternal.RUNNING, JobStateInternal.RUNNING,
|
|
JobEventType.JOB_DIAGNOSTIC_UPDATE,
|
|
JobEventType.JOB_DIAGNOSTIC_UPDATE,
|
|
DIAGNOSTIC_UPDATE_TRANSITION)
|
|
DIAGNOSTIC_UPDATE_TRANSITION)
|
|
- .addTransition(JobState.RUNNING, JobState.RUNNING,
|
|
|
|
|
|
+ .addTransition(JobStateInternal.RUNNING, JobStateInternal.RUNNING,
|
|
JobEventType.JOB_COUNTER_UPDATE, COUNTER_UPDATE_TRANSITION)
|
|
JobEventType.JOB_COUNTER_UPDATE, COUNTER_UPDATE_TRANSITION)
|
|
- .addTransition(JobState.RUNNING, JobState.RUNNING,
|
|
|
|
|
|
+ .addTransition(JobStateInternal.RUNNING, JobStateInternal.RUNNING,
|
|
JobEventType.JOB_TASK_ATTEMPT_FETCH_FAILURE,
|
|
JobEventType.JOB_TASK_ATTEMPT_FETCH_FAILURE,
|
|
new TaskAttemptFetchFailureTransition())
|
|
new TaskAttemptFetchFailureTransition())
|
|
.addTransition(
|
|
.addTransition(
|
|
- JobState.RUNNING,
|
|
|
|
- JobState.ERROR, JobEventType.INTERNAL_ERROR,
|
|
|
|
|
|
+ JobStateInternal.RUNNING,
|
|
|
|
+ JobStateInternal.ERROR, JobEventType.INTERNAL_ERROR,
|
|
INTERNAL_ERROR_TRANSITION)
|
|
INTERNAL_ERROR_TRANSITION)
|
|
|
|
|
|
// Transitions from KILL_WAIT state.
|
|
// Transitions from KILL_WAIT state.
|
|
.addTransition
|
|
.addTransition
|
|
- (JobState.KILL_WAIT,
|
|
|
|
- EnumSet.of(JobState.KILL_WAIT, JobState.KILLED),
|
|
|
|
|
|
+ (JobStateInternal.KILL_WAIT,
|
|
|
|
+ EnumSet.of(JobStateInternal.KILL_WAIT, JobStateInternal.KILLED),
|
|
JobEventType.JOB_TASK_COMPLETED,
|
|
JobEventType.JOB_TASK_COMPLETED,
|
|
new KillWaitTaskCompletedTransition())
|
|
new KillWaitTaskCompletedTransition())
|
|
- .addTransition(JobState.KILL_WAIT, JobState.KILL_WAIT,
|
|
|
|
|
|
+ .addTransition(JobStateInternal.KILL_WAIT, JobStateInternal.KILL_WAIT,
|
|
JobEventType.JOB_TASK_ATTEMPT_COMPLETED,
|
|
JobEventType.JOB_TASK_ATTEMPT_COMPLETED,
|
|
TASK_ATTEMPT_COMPLETED_EVENT_TRANSITION)
|
|
TASK_ATTEMPT_COMPLETED_EVENT_TRANSITION)
|
|
- .addTransition(JobState.KILL_WAIT, JobState.KILL_WAIT,
|
|
|
|
|
|
+ .addTransition(JobStateInternal.KILL_WAIT, JobStateInternal.KILL_WAIT,
|
|
JobEventType.JOB_DIAGNOSTIC_UPDATE,
|
|
JobEventType.JOB_DIAGNOSTIC_UPDATE,
|
|
DIAGNOSTIC_UPDATE_TRANSITION)
|
|
DIAGNOSTIC_UPDATE_TRANSITION)
|
|
- .addTransition(JobState.KILL_WAIT, JobState.KILL_WAIT,
|
|
|
|
|
|
+ .addTransition(JobStateInternal.KILL_WAIT, JobStateInternal.KILL_WAIT,
|
|
JobEventType.JOB_COUNTER_UPDATE, COUNTER_UPDATE_TRANSITION)
|
|
JobEventType.JOB_COUNTER_UPDATE, COUNTER_UPDATE_TRANSITION)
|
|
.addTransition(
|
|
.addTransition(
|
|
- JobState.KILL_WAIT,
|
|
|
|
- JobState.ERROR, JobEventType.INTERNAL_ERROR,
|
|
|
|
|
|
+ JobStateInternal.KILL_WAIT,
|
|
|
|
+ JobStateInternal.ERROR, JobEventType.INTERNAL_ERROR,
|
|
INTERNAL_ERROR_TRANSITION)
|
|
INTERNAL_ERROR_TRANSITION)
|
|
// Ignore-able events
|
|
// Ignore-able events
|
|
- .addTransition(JobState.KILL_WAIT, JobState.KILL_WAIT,
|
|
|
|
|
|
+ .addTransition(JobStateInternal.KILL_WAIT, JobStateInternal.KILL_WAIT,
|
|
EnumSet.of(JobEventType.JOB_KILL,
|
|
EnumSet.of(JobEventType.JOB_KILL,
|
|
JobEventType.JOB_UPDATED_NODES,
|
|
JobEventType.JOB_UPDATED_NODES,
|
|
JobEventType.JOB_MAP_TASK_RESCHEDULED,
|
|
JobEventType.JOB_MAP_TASK_RESCHEDULED,
|
|
JobEventType.JOB_TASK_ATTEMPT_FETCH_FAILURE))
|
|
JobEventType.JOB_TASK_ATTEMPT_FETCH_FAILURE))
|
|
|
|
|
|
// Transitions from SUCCEEDED state
|
|
// Transitions from SUCCEEDED state
|
|
- .addTransition(JobState.SUCCEEDED, JobState.SUCCEEDED,
|
|
|
|
|
|
+ .addTransition(JobStateInternal.SUCCEEDED, JobStateInternal.SUCCEEDED,
|
|
JobEventType.JOB_DIAGNOSTIC_UPDATE,
|
|
JobEventType.JOB_DIAGNOSTIC_UPDATE,
|
|
DIAGNOSTIC_UPDATE_TRANSITION)
|
|
DIAGNOSTIC_UPDATE_TRANSITION)
|
|
- .addTransition(JobState.SUCCEEDED, JobState.SUCCEEDED,
|
|
|
|
|
|
+ .addTransition(JobStateInternal.SUCCEEDED, JobStateInternal.SUCCEEDED,
|
|
JobEventType.JOB_COUNTER_UPDATE, COUNTER_UPDATE_TRANSITION)
|
|
JobEventType.JOB_COUNTER_UPDATE, COUNTER_UPDATE_TRANSITION)
|
|
.addTransition(
|
|
.addTransition(
|
|
- JobState.SUCCEEDED,
|
|
|
|
- JobState.ERROR, JobEventType.INTERNAL_ERROR,
|
|
|
|
|
|
+ JobStateInternal.SUCCEEDED,
|
|
|
|
+ JobStateInternal.ERROR, JobEventType.INTERNAL_ERROR,
|
|
INTERNAL_ERROR_TRANSITION)
|
|
INTERNAL_ERROR_TRANSITION)
|
|
// Ignore-able events
|
|
// Ignore-able events
|
|
- .addTransition(JobState.SUCCEEDED, JobState.SUCCEEDED,
|
|
|
|
|
|
+ .addTransition(JobStateInternal.SUCCEEDED, JobStateInternal.SUCCEEDED,
|
|
EnumSet.of(JobEventType.JOB_KILL,
|
|
EnumSet.of(JobEventType.JOB_KILL,
|
|
JobEventType.JOB_UPDATED_NODES,
|
|
JobEventType.JOB_UPDATED_NODES,
|
|
JobEventType.JOB_TASK_ATTEMPT_FETCH_FAILURE,
|
|
JobEventType.JOB_TASK_ATTEMPT_FETCH_FAILURE,
|
|
@@ -341,17 +342,17 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app2.job.Job,
|
|
JobEventType.JOB_TASK_COMPLETED))
|
|
JobEventType.JOB_TASK_COMPLETED))
|
|
|
|
|
|
// Transitions from FAILED state
|
|
// Transitions from FAILED state
|
|
- .addTransition(JobState.FAILED, JobState.FAILED,
|
|
|
|
|
|
+ .addTransition(JobStateInternal.FAILED, JobStateInternal.FAILED,
|
|
JobEventType.JOB_DIAGNOSTIC_UPDATE,
|
|
JobEventType.JOB_DIAGNOSTIC_UPDATE,
|
|
DIAGNOSTIC_UPDATE_TRANSITION)
|
|
DIAGNOSTIC_UPDATE_TRANSITION)
|
|
- .addTransition(JobState.FAILED, JobState.FAILED,
|
|
|
|
|
|
+ .addTransition(JobStateInternal.FAILED, JobStateInternal.FAILED,
|
|
JobEventType.JOB_COUNTER_UPDATE, COUNTER_UPDATE_TRANSITION)
|
|
JobEventType.JOB_COUNTER_UPDATE, COUNTER_UPDATE_TRANSITION)
|
|
.addTransition(
|
|
.addTransition(
|
|
- JobState.FAILED,
|
|
|
|
- JobState.ERROR, JobEventType.INTERNAL_ERROR,
|
|
|
|
|
|
+ JobStateInternal.FAILED,
|
|
|
|
+ JobStateInternal.ERROR, JobEventType.INTERNAL_ERROR,
|
|
INTERNAL_ERROR_TRANSITION)
|
|
INTERNAL_ERROR_TRANSITION)
|
|
// Ignore-able events
|
|
// Ignore-able events
|
|
- .addTransition(JobState.FAILED, JobState.FAILED,
|
|
|
|
|
|
+ .addTransition(JobStateInternal.FAILED, JobStateInternal.FAILED,
|
|
EnumSet.of(JobEventType.JOB_KILL,
|
|
EnumSet.of(JobEventType.JOB_KILL,
|
|
JobEventType.JOB_UPDATED_NODES,
|
|
JobEventType.JOB_UPDATED_NODES,
|
|
JobEventType.JOB_TASK_ATTEMPT_FETCH_FAILURE,
|
|
JobEventType.JOB_TASK_ATTEMPT_FETCH_FAILURE,
|
|
@@ -359,17 +360,17 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app2.job.Job,
|
|
JobEventType.JOB_TASK_COMPLETED))
|
|
JobEventType.JOB_TASK_COMPLETED))
|
|
|
|
|
|
// Transitions from KILLED state
|
|
// Transitions from KILLED state
|
|
- .addTransition(JobState.KILLED, JobState.KILLED,
|
|
|
|
|
|
+ .addTransition(JobStateInternal.KILLED, JobStateInternal.KILLED,
|
|
JobEventType.JOB_DIAGNOSTIC_UPDATE,
|
|
JobEventType.JOB_DIAGNOSTIC_UPDATE,
|
|
DIAGNOSTIC_UPDATE_TRANSITION)
|
|
DIAGNOSTIC_UPDATE_TRANSITION)
|
|
- .addTransition(JobState.KILLED, JobState.KILLED,
|
|
|
|
|
|
+ .addTransition(JobStateInternal.KILLED, JobStateInternal.KILLED,
|
|
JobEventType.JOB_COUNTER_UPDATE, COUNTER_UPDATE_TRANSITION)
|
|
JobEventType.JOB_COUNTER_UPDATE, COUNTER_UPDATE_TRANSITION)
|
|
.addTransition(
|
|
.addTransition(
|
|
- JobState.KILLED,
|
|
|
|
- JobState.ERROR, JobEventType.INTERNAL_ERROR,
|
|
|
|
|
|
+ JobStateInternal.KILLED,
|
|
|
|
+ JobStateInternal.ERROR, JobEventType.INTERNAL_ERROR,
|
|
INTERNAL_ERROR_TRANSITION)
|
|
INTERNAL_ERROR_TRANSITION)
|
|
// Ignore-able events
|
|
// Ignore-able events
|
|
- .addTransition(JobState.KILLED, JobState.KILLED,
|
|
|
|
|
|
+ .addTransition(JobStateInternal.KILLED, JobStateInternal.KILLED,
|
|
EnumSet.of(JobEventType.JOB_KILL,
|
|
EnumSet.of(JobEventType.JOB_KILL,
|
|
JobEventType.JOB_UPDATED_NODES,
|
|
JobEventType.JOB_UPDATED_NODES,
|
|
JobEventType.JOB_TASK_ATTEMPT_FETCH_FAILURE,
|
|
JobEventType.JOB_TASK_ATTEMPT_FETCH_FAILURE,
|
|
@@ -378,8 +379,8 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app2.job.Job,
|
|
|
|
|
|
// No transitions from INTERNAL_ERROR state. Ignore all.
|
|
// No transitions from INTERNAL_ERROR state. Ignore all.
|
|
.addTransition(
|
|
.addTransition(
|
|
- JobState.ERROR,
|
|
|
|
- JobState.ERROR,
|
|
|
|
|
|
+ JobStateInternal.ERROR,
|
|
|
|
+ JobStateInternal.ERROR,
|
|
EnumSet.of(JobEventType.JOB_INIT,
|
|
EnumSet.of(JobEventType.JOB_INIT,
|
|
JobEventType.JOB_KILL,
|
|
JobEventType.JOB_KILL,
|
|
JobEventType.JOB_TASK_COMPLETED,
|
|
JobEventType.JOB_TASK_COMPLETED,
|
|
@@ -389,12 +390,12 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app2.job.Job,
|
|
JobEventType.JOB_UPDATED_NODES,
|
|
JobEventType.JOB_UPDATED_NODES,
|
|
JobEventType.JOB_TASK_ATTEMPT_FETCH_FAILURE,
|
|
JobEventType.JOB_TASK_ATTEMPT_FETCH_FAILURE,
|
|
JobEventType.INTERNAL_ERROR))
|
|
JobEventType.INTERNAL_ERROR))
|
|
- .addTransition(JobState.ERROR, JobState.ERROR,
|
|
|
|
|
|
+ .addTransition(JobStateInternal.ERROR, JobStateInternal.ERROR,
|
|
JobEventType.JOB_COUNTER_UPDATE, COUNTER_UPDATE_TRANSITION)
|
|
JobEventType.JOB_COUNTER_UPDATE, COUNTER_UPDATE_TRANSITION)
|
|
// create the topology tables
|
|
// create the topology tables
|
|
.installTopology();
|
|
.installTopology();
|
|
|
|
|
|
- private final StateMachine<JobState, JobEventType, JobEvent> stateMachine;
|
|
|
|
|
|
+ private final StateMachine<JobStateInternal, JobEventType, JobEvent> stateMachine;
|
|
|
|
|
|
//changing fields while the job is running
|
|
//changing fields while the job is running
|
|
private int numMapTasks;
|
|
private int numMapTasks;
|
|
@@ -461,7 +462,7 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app2.job.Job,
|
|
stateMachine = stateMachineFactory.make(this);
|
|
stateMachine = stateMachineFactory.make(this);
|
|
}
|
|
}
|
|
|
|
|
|
- protected StateMachine<JobState, JobEventType, JobEvent> getStateMachine() {
|
|
|
|
|
|
+ protected StateMachine<JobStateInternal, JobEventType, JobEvent> getStateMachine() {
|
|
return stateMachine;
|
|
return stateMachine;
|
|
}
|
|
}
|
|
|
|
|
|
@@ -540,9 +541,9 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app2.job.Job,
|
|
readLock.lock();
|
|
readLock.lock();
|
|
|
|
|
|
try {
|
|
try {
|
|
- JobState state = getState();
|
|
|
|
- if (state == JobState.ERROR || state == JobState.FAILED
|
|
|
|
- || state == JobState.KILLED || state == JobState.SUCCEEDED) {
|
|
|
|
|
|
+ JobStateInternal state = getInternalState();
|
|
|
|
+ if (state == JobStateInternal.ERROR || state == JobStateInternal.FAILED
|
|
|
|
+ || state == JobStateInternal.KILLED || state == JobStateInternal.SUCCEEDED) {
|
|
this.mayBeConstructFinalFullCounters();
|
|
this.mayBeConstructFinalFullCounters();
|
|
return fullCounters;
|
|
return fullCounters;
|
|
}
|
|
}
|
|
@@ -607,7 +608,7 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app2.job.Job,
|
|
diagsb.append(s).append("\n");
|
|
diagsb.append(s).append("\n");
|
|
}
|
|
}
|
|
|
|
|
|
- if (getState() == JobState.NEW) {
|
|
|
|
|
|
+ if (getInternalState() == JobStateInternal.NEW) {
|
|
return MRBuilderUtils.newJobReport(jobId, jobName, username, state,
|
|
return MRBuilderUtils.newJobReport(jobId, jobName, username, state,
|
|
appSubmitTime, startTime, finishTime, setupProgress, 0.0f, 0.0f,
|
|
appSubmitTime, startTime, finishTime, setupProgress, 0.0f, 0.0f,
|
|
cleanupProgress, jobFile, amInfos, isUber, diagsb.toString());
|
|
cleanupProgress, jobFile, amInfos, isUber, diagsb.toString());
|
|
@@ -693,7 +694,7 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app2.job.Job,
|
|
public JobState getState() {
|
|
public JobState getState() {
|
|
readLock.lock();
|
|
readLock.lock();
|
|
try {
|
|
try {
|
|
- return getStateMachine().getCurrentState();
|
|
|
|
|
|
+ return getExternalState(getStateMachine().getCurrentState());
|
|
} finally {
|
|
} finally {
|
|
readLock.unlock();
|
|
readLock.unlock();
|
|
}
|
|
}
|
|
@@ -712,10 +713,10 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app2.job.Job,
|
|
*/
|
|
*/
|
|
public void handle(JobEvent event) {
|
|
public void handle(JobEvent event) {
|
|
LOG.debug("Processing " + event.getJobId() + " of type " + event.getType());
|
|
LOG.debug("Processing " + event.getJobId() + " of type " + event.getType());
|
|
- LOG.info("XXX: Processing " + event.getJobId() + " of type " + event.getType() + " while in state: " + getState());
|
|
|
|
|
|
+ LOG.info("XXX: Processing " + event.getJobId() + " of type " + event.getType() + " while in state: " + getInternalState());
|
|
try {
|
|
try {
|
|
writeLock.lock();
|
|
writeLock.lock();
|
|
- JobState oldState = getState();
|
|
|
|
|
|
+ JobStateInternal oldState = getInternalState();
|
|
try {
|
|
try {
|
|
getStateMachine().doTransition(event.getType(), event);
|
|
getStateMachine().doTransition(event.getType(), event);
|
|
} catch (InvalidStateTransitonException e) {
|
|
} catch (InvalidStateTransitonException e) {
|
|
@@ -726,9 +727,9 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app2.job.Job,
|
|
JobEventType.INTERNAL_ERROR));
|
|
JobEventType.INTERNAL_ERROR));
|
|
}
|
|
}
|
|
//notify the eventhandler of state change
|
|
//notify the eventhandler of state change
|
|
- if (oldState != getState()) {
|
|
|
|
|
|
+ if (oldState != getInternalState()) {
|
|
LOG.info(jobId + "Job Transitioned from " + oldState + " to "
|
|
LOG.info(jobId + "Job Transitioned from " + oldState + " to "
|
|
- + getState());
|
|
|
|
|
|
+ + getInternalState());
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
@@ -737,6 +738,24 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app2.job.Job,
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+ @Private
|
|
|
|
+ public JobStateInternal getInternalState() {
|
|
|
|
+ readLock.lock();
|
|
|
|
+ try {
|
|
|
|
+ return getStateMachine().getCurrentState();
|
|
|
|
+ } finally {
|
|
|
|
+ readLock.unlock();
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ private static JobState getExternalState(JobStateInternal smState) {
|
|
|
|
+ if (smState == JobStateInternal.KILL_WAIT) {
|
|
|
|
+ return JobState.KILLED;
|
|
|
|
+ } else {
|
|
|
|
+ return JobState.valueOf(smState.name());
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
//helpful in testing
|
|
//helpful in testing
|
|
protected void addTask(Task task) {
|
|
protected void addTask(Task task) {
|
|
synchronized (tasksSyncHandle) {
|
|
synchronized (tasksSyncHandle) {
|
|
@@ -777,7 +796,7 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app2.job.Job,
|
|
return FileSystem.get(conf);
|
|
return FileSystem.get(conf);
|
|
}
|
|
}
|
|
|
|
|
|
- static JobState checkJobCompleteSuccess(JobImpl job) {
|
|
|
|
|
|
+ static JobStateInternal checkJobCompleteSuccess(JobImpl job) {
|
|
// check for Job success
|
|
// check for Job success
|
|
if (job.completedTaskCount == job.tasks.size()) {
|
|
if (job.completedTaskCount == job.tasks.size()) {
|
|
try {
|
|
try {
|
|
@@ -786,18 +805,18 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app2.job.Job,
|
|
} catch (IOException e) {
|
|
} catch (IOException e) {
|
|
LOG.error("Could not do commit for Job", e);
|
|
LOG.error("Could not do commit for Job", e);
|
|
job.logJobHistoryFinishedEvent();
|
|
job.logJobHistoryFinishedEvent();
|
|
- return job.finished(JobState.FAILED);
|
|
|
|
|
|
+ return job.finished(JobStateInternal.FAILED);
|
|
}
|
|
}
|
|
job.logJobHistoryFinishedEvent();
|
|
job.logJobHistoryFinishedEvent();
|
|
// TODO: Maybe set cleanup progress. Otherwise job progress will
|
|
// TODO: Maybe set cleanup progress. Otherwise job progress will
|
|
// always stay at 0.95 when reported from an AM.
|
|
// always stay at 0.95 when reported from an AM.
|
|
- return job.finished(JobState.SUCCEEDED);
|
|
|
|
|
|
+ return job.finished(JobStateInternal.SUCCEEDED);
|
|
}
|
|
}
|
|
return null;
|
|
return null;
|
|
}
|
|
}
|
|
|
|
|
|
- JobState finished(JobState finalState) {
|
|
|
|
- if (getState() == JobState.RUNNING) {
|
|
|
|
|
|
+ JobStateInternal finished(JobStateInternal finalState) {
|
|
|
|
+ if (getInternalState() == JobStateInternal.RUNNING) {
|
|
metrics.endRunningJob(this);
|
|
metrics.endRunningJob(this);
|
|
}
|
|
}
|
|
if (finishTime == 0) setFinishTime();
|
|
if (finishTime == 0) setFinishTime();
|
|
@@ -1010,7 +1029,7 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app2.job.Job,
|
|
*/
|
|
*/
|
|
|
|
|
|
public static class InitTransition
|
|
public static class InitTransition
|
|
- implements MultipleArcTransition<JobImpl, JobEvent, JobState> {
|
|
|
|
|
|
+ implements MultipleArcTransition<JobImpl, JobEvent, JobStateInternal> {
|
|
|
|
|
|
/**
|
|
/**
|
|
* Note that this transition method is called directly (and synchronously)
|
|
* Note that this transition method is called directly (and synchronously)
|
|
@@ -1020,7 +1039,7 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app2.job.Job,
|
|
* way; MR version is).
|
|
* way; MR version is).
|
|
*/
|
|
*/
|
|
@Override
|
|
@Override
|
|
- public JobState transition(JobImpl job, JobEvent event) {
|
|
|
|
|
|
+ public JobStateInternal transition(JobImpl job, JobEvent event) {
|
|
job.metrics.submittedJob(job);
|
|
job.metrics.submittedJob(job);
|
|
job.metrics.preparingJob(job);
|
|
job.metrics.preparingJob(job);
|
|
try {
|
|
try {
|
|
@@ -1086,7 +1105,7 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app2.job.Job,
|
|
createReduceTasks(job);
|
|
createReduceTasks(job);
|
|
|
|
|
|
job.metrics.endPreparingJob(job);
|
|
job.metrics.endPreparingJob(job);
|
|
- return JobState.INITED;
|
|
|
|
|
|
+ return JobStateInternal.INITED;
|
|
//TODO XXX Should JobInitedEvent be generated here (instead of in StartTransition)
|
|
//TODO XXX Should JobInitedEvent be generated here (instead of in StartTransition)
|
|
|
|
|
|
} catch (IOException e) {
|
|
} catch (IOException e) {
|
|
@@ -1095,7 +1114,7 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app2.job.Job,
|
|
+ StringUtils.stringifyException(e));
|
|
+ StringUtils.stringifyException(e));
|
|
job.abortJob(org.apache.hadoop.mapreduce.JobStatus.State.FAILED);
|
|
job.abortJob(org.apache.hadoop.mapreduce.JobStatus.State.FAILED);
|
|
job.metrics.endPreparingJob(job);
|
|
job.metrics.endPreparingJob(job);
|
|
- return job.finished(JobState.FAILED);
|
|
|
|
|
|
+ return job.finished(JobStateInternal.FAILED);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
@@ -1303,9 +1322,9 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app2.job.Job,
|
|
JobUnsuccessfulCompletionEvent failedEvent =
|
|
JobUnsuccessfulCompletionEvent failedEvent =
|
|
new JobUnsuccessfulCompletionEvent(job.oldJobId,
|
|
new JobUnsuccessfulCompletionEvent(job.oldJobId,
|
|
job.finishTime, 0, 0,
|
|
job.finishTime, 0, 0,
|
|
- JobState.KILLED.toString());
|
|
|
|
|
|
+ JobStateInternal.KILLED.toString());
|
|
job.eventHandler.handle(new JobHistoryEvent(job.jobId, failedEvent));
|
|
job.eventHandler.handle(new JobHistoryEvent(job.jobId, failedEvent));
|
|
- job.finished(JobState.KILLED);
|
|
|
|
|
|
+ job.finished(JobStateInternal.KILLED);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
@@ -1315,7 +1334,7 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app2.job.Job,
|
|
public void transition(JobImpl job, JobEvent event) {
|
|
public void transition(JobImpl job, JobEvent event) {
|
|
job.abortJob(org.apache.hadoop.mapreduce.JobStatus.State.KILLED);
|
|
job.abortJob(org.apache.hadoop.mapreduce.JobStatus.State.KILLED);
|
|
job.addDiagnostic("Job received Kill in INITED state.");
|
|
job.addDiagnostic("Job received Kill in INITED state.");
|
|
- job.finished(JobState.KILLED);
|
|
|
|
|
|
+ job.finished(JobStateInternal.KILLED);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
@@ -1415,10 +1434,10 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app2.job.Job,
|
|
}
|
|
}
|
|
|
|
|
|
private static class TaskCompletedTransition implements
|
|
private static class TaskCompletedTransition implements
|
|
- MultipleArcTransition<JobImpl, JobEvent, JobState> {
|
|
|
|
|
|
+ MultipleArcTransition<JobImpl, JobEvent, JobStateInternal> {
|
|
|
|
|
|
@Override
|
|
@Override
|
|
- public JobState transition(JobImpl job, JobEvent event) {
|
|
|
|
|
|
+ public JobStateInternal transition(JobImpl job, JobEvent event) {
|
|
job.completedTaskCount++;
|
|
job.completedTaskCount++;
|
|
LOG.info("Num completed Tasks: " + job.completedTaskCount);
|
|
LOG.info("Num completed Tasks: " + job.completedTaskCount);
|
|
JobTaskEvent taskEvent = (JobTaskEvent) event;
|
|
JobTaskEvent taskEvent = (JobTaskEvent) event;
|
|
@@ -1434,7 +1453,7 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app2.job.Job,
|
|
return checkJobForCompletion(job);
|
|
return checkJobForCompletion(job);
|
|
}
|
|
}
|
|
|
|
|
|
- protected JobState checkJobForCompletion(JobImpl job) {
|
|
|
|
|
|
+ protected JobStateInternal checkJobForCompletion(JobImpl job) {
|
|
//check for Job failure
|
|
//check for Job failure
|
|
if (job.failedMapTaskCount*100 >
|
|
if (job.failedMapTaskCount*100 >
|
|
job.allowedMapFailuresPercent*job.numMapTasks ||
|
|
job.allowedMapFailuresPercent*job.numMapTasks ||
|
|
@@ -1448,16 +1467,16 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app2.job.Job,
|
|
LOG.info(diagnosticMsg);
|
|
LOG.info(diagnosticMsg);
|
|
job.addDiagnostic(diagnosticMsg);
|
|
job.addDiagnostic(diagnosticMsg);
|
|
job.abortJob(org.apache.hadoop.mapreduce.JobStatus.State.FAILED);
|
|
job.abortJob(org.apache.hadoop.mapreduce.JobStatus.State.FAILED);
|
|
- return job.finished(JobState.FAILED);
|
|
|
|
|
|
+ return job.finished(JobStateInternal.FAILED);
|
|
}
|
|
}
|
|
|
|
|
|
- JobState jobCompleteSuccess = JobImpl.checkJobCompleteSuccess(job);
|
|
|
|
|
|
+ JobStateInternal jobCompleteSuccess = JobImpl.checkJobCompleteSuccess(job);
|
|
if (jobCompleteSuccess != null) {
|
|
if (jobCompleteSuccess != null) {
|
|
return jobCompleteSuccess;
|
|
return jobCompleteSuccess;
|
|
}
|
|
}
|
|
|
|
|
|
//return the current state, Job not finished yet
|
|
//return the current state, Job not finished yet
|
|
- return job.getState();
|
|
|
|
|
|
+ return job.getInternalState();
|
|
}
|
|
}
|
|
|
|
|
|
private void taskSucceeded(JobImpl job, Task task) {
|
|
private void taskSucceeded(JobImpl job, Task task) {
|
|
@@ -1491,17 +1510,17 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app2.job.Job,
|
|
|
|
|
|
// Transition class for handling jobs with no tasks
|
|
// Transition class for handling jobs with no tasks
|
|
static class JobNoTasksCompletedTransition implements
|
|
static class JobNoTasksCompletedTransition implements
|
|
- MultipleArcTransition<JobImpl, JobEvent, JobState> {
|
|
|
|
|
|
+ MultipleArcTransition<JobImpl, JobEvent, JobStateInternal> {
|
|
|
|
|
|
@Override
|
|
@Override
|
|
- public JobState transition(JobImpl job, JobEvent event) {
|
|
|
|
- JobState jobCompleteSuccess = JobImpl.checkJobCompleteSuccess(job);
|
|
|
|
|
|
+ public JobStateInternal transition(JobImpl job, JobEvent event) {
|
|
|
|
+ JobStateInternal jobCompleteSuccess = JobImpl.checkJobCompleteSuccess(job);
|
|
if (jobCompleteSuccess != null) {
|
|
if (jobCompleteSuccess != null) {
|
|
return jobCompleteSuccess;
|
|
return jobCompleteSuccess;
|
|
}
|
|
}
|
|
|
|
|
|
// Return the current state, Job not finished yet
|
|
// Return the current state, Job not finished yet
|
|
- return job.getState();
|
|
|
|
|
|
+ return job.getInternalState();
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
@@ -1518,14 +1537,14 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app2.job.Job,
|
|
private static class KillWaitTaskCompletedTransition extends
|
|
private static class KillWaitTaskCompletedTransition extends
|
|
TaskCompletedTransition {
|
|
TaskCompletedTransition {
|
|
@Override
|
|
@Override
|
|
- protected JobState checkJobForCompletion(JobImpl job) {
|
|
|
|
|
|
+ protected JobStateInternal checkJobForCompletion(JobImpl job) {
|
|
if (job.completedTaskCount == job.tasks.size()) {
|
|
if (job.completedTaskCount == job.tasks.size()) {
|
|
job.setFinishTime();
|
|
job.setFinishTime();
|
|
job.abortJob(org.apache.hadoop.mapreduce.JobStatus.State.KILLED);
|
|
job.abortJob(org.apache.hadoop.mapreduce.JobStatus.State.KILLED);
|
|
- return job.finished(JobState.KILLED);
|
|
|
|
|
|
+ return job.finished(JobStateInternal.KILLED);
|
|
}
|
|
}
|
|
//return the current state, Job not finished yet
|
|
//return the current state, Job not finished yet
|
|
- return job.getState();
|
|
|
|
|
|
+ return job.getInternalState();
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
@@ -1579,9 +1598,9 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app2.job.Job,
|
|
JobUnsuccessfulCompletionEvent failedEvent =
|
|
JobUnsuccessfulCompletionEvent failedEvent =
|
|
new JobUnsuccessfulCompletionEvent(job.oldJobId,
|
|
new JobUnsuccessfulCompletionEvent(job.oldJobId,
|
|
job.finishTime, 0, 0,
|
|
job.finishTime, 0, 0,
|
|
- JobState.ERROR.toString());
|
|
|
|
|
|
+ JobStateInternal.ERROR.toString());
|
|
job.eventHandler.handle(new JobHistoryEvent(job.jobId, failedEvent));
|
|
job.eventHandler.handle(new JobHistoryEvent(job.jobId, failedEvent));
|
|
- job.finished(JobState.ERROR);
|
|
|
|
|
|
+ job.finished(JobStateInternal.ERROR);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|