Browse Source

MAPREDUCE-5888. Failed job leaves hung AM after it unregisters (Jason Lowe via jeagles)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1594318 13f79535-47bb-0310-9956-ffa450edef68
Jonathan Turner Eagles 11 years ago
parent
commit
f8dad8a751

+ 3 - 0
hadoop-mapreduce-project/CHANGES.txt

@@ -80,6 +80,9 @@ Release 2.5.0 - UNRELEASED
     MAPREDUCE-5884. History server uses short user name when canceling tokens
     MAPREDUCE-5884. History server uses short user name when canceling tokens
     (Mohammad Kamrul Islam via jlowe)
     (Mohammad Kamrul Islam via jlowe)
 
 
+    MAPREDUCE-5888. Failed job leaves hung AM after it unregisters (Jason Lowe
+    via jeagles)
+
 Release 2.4.1 - UNRELEASED
 Release 2.4.1 - UNRELEASED
 
 
   INCOMPATIBLE CHANGES
   INCOMPATIBLE CHANGES

+ 12 - 2
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/JobImpl.java

@@ -32,6 +32,7 @@ import java.util.Map.Entry;
 import java.util.Set;
 import java.util.Set;
 import java.util.concurrent.ScheduledFuture;
 import java.util.concurrent.ScheduledFuture;
 import java.util.concurrent.ScheduledThreadPoolExecutor;
 import java.util.concurrent.ScheduledThreadPoolExecutor;
+import java.util.concurrent.ThreadFactory;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.locks.Lock;
 import java.util.concurrent.locks.Lock;
 import java.util.concurrent.locks.ReadWriteLock;
 import java.util.concurrent.locks.ReadWriteLock;
@@ -129,6 +130,8 @@ import org.apache.hadoop.yarn.state.StateMachine;
 import org.apache.hadoop.yarn.state.StateMachineFactory;
 import org.apache.hadoop.yarn.state.StateMachineFactory;
 import org.apache.hadoop.yarn.util.Clock;
 import org.apache.hadoop.yarn.util.Clock;
 
 
+import com.google.common.util.concurrent.ThreadFactoryBuilder;
+
 /** Implementation of Job interface. Maintains the state machines of Job.
 /** Implementation of Job interface. Maintains the state machines of Job.
  * The read and write calls use ReadWriteLock for concurrency.
  * The read and write calls use ReadWriteLock for concurrency.
  */
  */
@@ -644,8 +647,8 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job,
   
   
   private JobStateInternal forcedState = null;
   private JobStateInternal forcedState = null;
 
 
-  //Executor used for running future tasks. Setting thread pool size to 1
-  private ScheduledThreadPoolExecutor executor = new ScheduledThreadPoolExecutor(1);
+  //Executor used for running future tasks.
+  private ScheduledThreadPoolExecutor executor;
   private ScheduledFuture failWaitTriggerScheduledFuture;
   private ScheduledFuture failWaitTriggerScheduledFuture;
 
 
   private JobState lastNonFinalState = JobState.NEW;
   private JobState lastNonFinalState = JobState.NEW;
@@ -687,6 +690,13 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job,
     this.aclsManager = new JobACLsManager(conf);
     this.aclsManager = new JobACLsManager(conf);
     this.username = System.getProperty("user.name");
     this.username = System.getProperty("user.name");
     this.jobACLs = aclsManager.constructJobACLs(conf);
     this.jobACLs = aclsManager.constructJobACLs(conf);
+
+    ThreadFactory threadFactory = new ThreadFactoryBuilder()
+      .setNameFormat("Job Fail Wait Timeout Monitor #%d")
+      .setDaemon(true)
+      .build();
+    this.executor = new ScheduledThreadPoolExecutor(1, threadFactory);
+
     // This "this leak" is okay because the retained pointer is in an
     // This "this leak" is okay because the retained pointer is in an
     //  instance variable.
     //  instance variable.
     stateMachine = stateMachineFactory.make(this);
     stateMachine = stateMachineFactory.make(this);