Browse Source

HADOOP-316. Fix a potential deadlock in the jobtracker. Contributed by Owen.

git-svn-id: https://svn.apache.org/repos/asf/lucene/hadoop/trunk@416451 13f79535-47bb-0310-9956-ffa450edef68
Doug Cutting 19 years ago
parent
commit
58afe30c57
2 changed files with 27 additions and 24 deletions
  1. 3 0
      CHANGES.txt
  2. 24 24
      src/java/org/apache/hadoop/mapred/JobTracker.java

+ 3 - 0
CHANGES.txt

@@ -38,6 +38,9 @@ Trunk (unreleased changes)
     "reduce" (generating output).  Long-term, the "sort" phase will
     also be removed.  (omalley via cutting)
 
+ 9. HADOOP-316.  Fix a potential deadlock in the jobtracker.
+    (omalley via cutting)
+
 
 Release 0.3.2 - 2006-06-09
 

+ 24 - 24
src/java/org/apache/hadoop/mapred/JobTracker.java

@@ -113,32 +113,32 @@ public class JobTracker implements MRConstants, InterTrackerProtocol, JobSubmiss
             Thread.sleep(TASKTRACKER_EXPIRY_INTERVAL/3);
             long now = System.currentTimeMillis();
             LOG.debug("Starting launching task sweep");
-            synchronized (launchingTasks) {
-              Iterator itr = launchingTasks.entrySet().iterator();
-              while (itr.hasNext()) {
-                Map.Entry pair = (Map.Entry) itr.next();
-                String taskId = (String) pair.getKey();
-                long age = now - ((Long) pair.getValue()).longValue();
-                LOG.info(taskId + " is " + age + " ms debug.");
-                if (age > TASKTRACKER_EXPIRY_INTERVAL) {
-                  LOG.info("Launching task " + taskId + " timed out.");
-                  TaskInProgress tip = null;
-                  synchronized (JobTracker.this) {
+            synchronized (JobTracker.this) {
+              synchronized (launchingTasks) {
+                Iterator itr = launchingTasks.entrySet().iterator();
+                while (itr.hasNext()) {
+                  Map.Entry pair = (Map.Entry) itr.next();
+                  String taskId = (String) pair.getKey();
+                  long age = now - ((Long) pair.getValue()).longValue();
+                  LOG.info(taskId + " is " + age + " ms debug.");
+                  if (age > TASKTRACKER_EXPIRY_INTERVAL) {
+                    LOG.info("Launching task " + taskId + " timed out.");
+                    TaskInProgress tip = null;
                     tip = (TaskInProgress) taskidToTIPMap.get(taskId);
+                    if (tip != null) {
+                      JobInProgress job = tip.getJob();
+                      String trackerName = getAssignedTracker(taskId);
+                      TaskTrackerStatus trackerStatus = 
+                        getTaskTracker(trackerName);
+                      job.failedTask(tip, taskId, "Error launching task", 
+                                     trackerStatus.getHost(), trackerName);
+                    }
+                    itr.remove();
+                  } else {
+                    // the tasks are sorted by start time, so once we find
+                    // one that we want to keep, we are done for this cycle.
+                    break;
                   }
-                  if (tip != null) {
-                     JobInProgress job = tip.getJob();
-                     String trackerName = getAssignedTracker(taskId);
-                     TaskTrackerStatus trackerStatus = 
-                       getTaskTracker(trackerName);
-                     job.failedTask(tip, taskId, "Error launching task", 
-                                    trackerStatus.getHost(), trackerName);
-                  }
-                  itr.remove();
-                } else {
-                  // the tasks are sorted by start time, so once we find
-                  // one that we want to keep, we are done for this cycle.
-                  break;
                 }
               }
             }