|
@@ -1543,6 +1543,20 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job,
|
|
SingleArcTransition<JobImpl, JobEvent> {
|
|
SingleArcTransition<JobImpl, JobEvent> {
|
|
@Override
|
|
@Override
|
|
public void transition(JobImpl job, JobEvent event) {
|
|
public void transition(JobImpl job, JobEvent event) {
|
|
|
|
+ //get number of shuffling reduces
|
|
|
|
+ int shufflingReduceTasks = 0;
|
|
|
|
+ for (TaskId taskId : job.reduceTasks) {
|
|
|
|
+ Task task = job.tasks.get(taskId);
|
|
|
|
+ if (TaskState.RUNNING.equals(task.getState())) {
|
|
|
|
+ for(TaskAttempt attempt : task.getAttempts().values()) {
|
|
|
|
+ if(attempt.getPhase() == Phase.SHUFFLE) {
|
|
|
|
+ shufflingReduceTasks++;
|
|
|
|
+ break;
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
JobTaskAttemptFetchFailureEvent fetchfailureEvent =
|
|
JobTaskAttemptFetchFailureEvent fetchfailureEvent =
|
|
(JobTaskAttemptFetchFailureEvent) event;
|
|
(JobTaskAttemptFetchFailureEvent) event;
|
|
for (org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId mapId :
|
|
for (org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId mapId :
|
|
@@ -1551,20 +1565,6 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job,
|
|
fetchFailures = (fetchFailures == null) ? 1 : (fetchFailures+1);
|
|
fetchFailures = (fetchFailures == null) ? 1 : (fetchFailures+1);
|
|
job.fetchFailuresMapping.put(mapId, fetchFailures);
|
|
job.fetchFailuresMapping.put(mapId, fetchFailures);
|
|
|
|
|
|
- //get number of shuffling reduces
|
|
|
|
- int shufflingReduceTasks = 0;
|
|
|
|
- for (TaskId taskId : job.reduceTasks) {
|
|
|
|
- Task task = job.tasks.get(taskId);
|
|
|
|
- if (TaskState.RUNNING.equals(task.getState())) {
|
|
|
|
- for(TaskAttempt attempt : task.getAttempts().values()) {
|
|
|
|
- if(attempt.getReport().getPhase() == Phase.SHUFFLE) {
|
|
|
|
- shufflingReduceTasks++;
|
|
|
|
- break;
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
float failureRate = shufflingReduceTasks == 0 ? 1.0f :
|
|
float failureRate = shufflingReduceTasks == 0 ? 1.0f :
|
|
(float) fetchFailures / shufflingReduceTasks;
|
|
(float) fetchFailures / shufflingReduceTasks;
|
|
// declare faulty if fetch-failures >= max-allowed-failures
|
|
// declare faulty if fetch-failures >= max-allowed-failures
|