|
@@ -26,6 +26,7 @@ import java.util.Set;
|
|
|
import java.util.HashSet;
|
|
|
import java.util.Arrays;
|
|
|
|
|
|
+import com.google.common.annotations.VisibleForTesting;
|
|
|
import org.apache.commons.lang.StringUtils;
|
|
|
import org.apache.commons.logging.Log;
|
|
|
import org.apache.commons.logging.LogFactory;
|
|
@@ -43,6 +44,7 @@ import org.apache.hadoop.mapreduce.Job;
|
|
|
import org.apache.hadoop.mapreduce.JobID;
|
|
|
import org.apache.hadoop.mapreduce.JobPriority;
|
|
|
import org.apache.hadoop.mapreduce.JobStatus;
|
|
|
+import org.apache.hadoop.mapreduce.MRJobConfig;
|
|
|
import org.apache.hadoop.mapreduce.TaskAttemptID;
|
|
|
import org.apache.hadoop.mapreduce.TaskCompletionEvent;
|
|
|
import org.apache.hadoop.mapreduce.TaskReport;
|
|
@@ -268,7 +270,7 @@ public class CLI extends Configured implements Tool {
|
|
|
System.out.println("Created job " + job.getJobID());
|
|
|
exitCode = 0;
|
|
|
} else if (getStatus) {
|
|
|
- Job job = cluster.getJob(JobID.forName(jobid));
|
|
|
+ Job job = getJob(JobID.forName(jobid));
|
|
|
if (job == null) {
|
|
|
System.out.println("Could not find job " + jobid);
|
|
|
} else {
|
|
@@ -283,7 +285,7 @@ public class CLI extends Configured implements Tool {
|
|
|
exitCode = 0;
|
|
|
}
|
|
|
} else if (getCounter) {
|
|
|
- Job job = cluster.getJob(JobID.forName(jobid));
|
|
|
+ Job job = getJob(JobID.forName(jobid));
|
|
|
if (job == null) {
|
|
|
System.out.println("Could not find job " + jobid);
|
|
|
} else {
|
|
@@ -299,7 +301,7 @@ public class CLI extends Configured implements Tool {
|
|
|
}
|
|
|
}
|
|
|
} else if (killJob) {
|
|
|
- Job job = cluster.getJob(JobID.forName(jobid));
|
|
|
+ Job job = getJob(JobID.forName(jobid));
|
|
|
if (job == null) {
|
|
|
System.out.println("Could not find job " + jobid);
|
|
|
} else {
|
|
@@ -323,7 +325,7 @@ public class CLI extends Configured implements Tool {
|
|
|
}
|
|
|
}
|
|
|
} else if (setJobPriority) {
|
|
|
- Job job = cluster.getJob(JobID.forName(jobid));
|
|
|
+ Job job = getJob(JobID.forName(jobid));
|
|
|
if (job == null) {
|
|
|
System.out.println("Could not find job " + jobid);
|
|
|
} else {
|
|
@@ -339,7 +341,7 @@ public class CLI extends Configured implements Tool {
|
|
|
viewHistory(historyFile, viewAllHistory);
|
|
|
exitCode = 0;
|
|
|
} else if (listEvents) {
|
|
|
- listEvents(cluster.getJob(JobID.forName(jobid)), fromEvent, nEvents);
|
|
|
+ listEvents(getJob(JobID.forName(jobid)), fromEvent, nEvents);
|
|
|
exitCode = 0;
|
|
|
} else if (listJobs) {
|
|
|
listJobs(cluster);
|
|
@@ -354,11 +356,11 @@ public class CLI extends Configured implements Tool {
|
|
|
listBlacklistedTrackers(cluster);
|
|
|
exitCode = 0;
|
|
|
} else if (displayTasks) {
|
|
|
- displayTasks(cluster.getJob(JobID.forName(jobid)), taskType, taskState);
|
|
|
+ displayTasks(getJob(JobID.forName(jobid)), taskType, taskState);
|
|
|
exitCode = 0;
|
|
|
} else if(killTask) {
|
|
|
TaskAttemptID taskID = TaskAttemptID.forName(taskid);
|
|
|
- Job job = cluster.getJob(taskID.getJobID());
|
|
|
+ Job job = getJob(taskID.getJobID());
|
|
|
if (job == null) {
|
|
|
System.out.println("Could not find job " + jobid);
|
|
|
} else if (job.killTask(taskID, false)) {
|
|
@@ -370,7 +372,7 @@ public class CLI extends Configured implements Tool {
|
|
|
}
|
|
|
} else if(failTask) {
|
|
|
TaskAttemptID taskID = TaskAttemptID.forName(taskid);
|
|
|
- Job job = cluster.getJob(taskID.getJobID());
|
|
|
+ Job job = getJob(taskID.getJobID());
|
|
|
if (job == null) {
|
|
|
System.out.println("Could not find job " + jobid);
|
|
|
} else if(job.killTask(taskID, true)) {
|
|
@@ -531,6 +533,29 @@ public class CLI extends Configured implements Tool {
|
|
|
protected static String getTaskLogURL(TaskAttemptID taskId, String baseUrl) {
|
|
|
return (baseUrl + "/tasklog?plaintext=true&attemptid=" + taskId);
|
|
|
}
|
|
|
+
|
|
|
+ @VisibleForTesting
|
|
|
+ Job getJob(JobID jobid) throws IOException, InterruptedException {
|
|
|
+
|
|
|
+ int maxRetry = getConf().getInt(MRJobConfig.MR_CLIENT_JOB_MAX_RETRIES,
|
|
|
+ MRJobConfig.DEFAULT_MR_CLIENT_JOB_MAX_RETRIES);
|
|
|
+ long retryInterval = getConf()
|
|
|
+ .getLong(MRJobConfig.MR_CLIENT_JOB_RETRY_INTERVAL,
|
|
|
+ MRJobConfig.DEFAULT_MR_CLIENT_JOB_RETRY_INTERVAL);
|
|
|
+ Job job = cluster.getJob(jobid);
|
|
|
+
|
|
|
+ for (int i = 0; i < maxRetry; ++i) {
|
|
|
+ if (job != null) {
|
|
|
+ return job;
|
|
|
+ }
|
|
|
+ LOG.info("Could not obtain job info after " + String.valueOf(i + 1)
|
|
|
+ + " attempt(s). Sleeping for " + String.valueOf(retryInterval / 1000)
|
|
|
+ + " seconds and retrying.");
|
|
|
+ Thread.sleep(retryInterval);
|
|
|
+ job = cluster.getJob(jobid);
|
|
|
+ }
|
|
|
+ return job;
|
|
|
+ }
|
|
|
|
|
|
|
|
|
/**
|