Kaynağa Gözat

HADOOP-600. Fix a race condition in the JobTracker. Contributed by Arun.

git-svn-id: https://svn.apache.org/repos/asf/lucene/hadoop/trunk@494172 13f79535-47bb-0310-9956-ffa450edef68
Doug Cutting 18 yıl önce
ebeveyn
işleme
31a595da86
2 değiştirilmiş dosya ile 39 ekleme ve 27 silme
  1. 3 0
      CHANGES.txt
  2. 36 27
      src/java/org/apache/hadoop/mapred/JobTracker.java

+ 3 - 0
CHANGES.txt

@@ -11,6 +11,9 @@ Trunk (unreleased changes)
 
 
  3. HADOOP-815.  Fix memory leaks in JobTracker. (Arun C Murthy via cutting)
  3. HADOOP-815.  Fix memory leaks in JobTracker. (Arun C Murthy via cutting)
 
 
+ 4. HADOOP-600.  Fix a race condition in JobTracker.
+    (Arun C Murthy via cutting)
+
 
 
 Release 0.10.0 - 2007-01-05
 Release 0.10.0 - 2007-01-05
 
 

+ 36 - 27
src/java/org/apache/hadoop/mapred/JobTracker.java

@@ -221,36 +221,45 @@ public class JobTracker implements MRConstants, InterTrackerProtocol, JobSubmiss
                 //
                 //
                 // Loop through all expired items in the queue
                 // Loop through all expired items in the queue
                 //
                 //
-                synchronized (taskTrackers) {
+                // Need to lock the JobTracker here since we are
+                // manipulating it's data-structures via
+                // ExpireTrackers.run -> JobTracker.lostTaskTracker ->
+                // JobInProgress.failedTask -> JobTracker.markCompleteTaskAttempt
+                // Also need to lock JobTracker before locking 'taskTracker' &
+                // 'trackerExpiryQueue' to prevent deadlock:
+                // @see {@link JobTracker.processHeartbeat(TaskTrackerStatus, boolean)} 
+                synchronized (JobTracker.this) {
+                  synchronized (taskTrackers) {
                     synchronized (trackerExpiryQueue) {
                     synchronized (trackerExpiryQueue) {
-                        long now = System.currentTimeMillis();
-                        TaskTrackerStatus leastRecent = null;
-                        while ((trackerExpiryQueue.size() > 0) &&
-                               ((leastRecent = (TaskTrackerStatus) trackerExpiryQueue.first()) != null) &&
-                               (now - leastRecent.getLastSeen() > TASKTRACKER_EXPIRY_INTERVAL)) {
-
-                            // Remove profile from head of queue
-                            trackerExpiryQueue.remove(leastRecent);
-                            String trackerName = leastRecent.getTrackerName();
-
-                            // Figure out if last-seen time should be updated, or if tracker is dead
-                            TaskTrackerStatus newProfile = (TaskTrackerStatus) taskTrackers.get(leastRecent.getTrackerName());
-                            // Items might leave the taskTracker set through other means; the
-                            // status stored in 'taskTrackers' might be null, which means the
-                            // tracker has already been destroyed.
-                            if (newProfile != null) {
-                                if (now - newProfile.getLastSeen() > TASKTRACKER_EXPIRY_INTERVAL) {
-                                    // Remove completely
-                                    updateTaskTrackerStatus(trackerName, null);
-                                    lostTaskTracker(leastRecent.getTrackerName(),
-                                                    leastRecent.getHost());
-                                } else {
-                                    // Update time by inserting latest profile
-                                    trackerExpiryQueue.add(newProfile);
-                                }
-                            }
+                      long now = System.currentTimeMillis();
+                      TaskTrackerStatus leastRecent = null;
+                      while ((trackerExpiryQueue.size() > 0) &&
+                              ((leastRecent = (TaskTrackerStatus) trackerExpiryQueue.first()) != null) &&
+                              (now - leastRecent.getLastSeen() > TASKTRACKER_EXPIRY_INTERVAL)) {
+                        
+                        // Remove profile from head of queue
+                        trackerExpiryQueue.remove(leastRecent);
+                        String trackerName = leastRecent.getTrackerName();
+                        
+                        // Figure out if last-seen time should be updated, or if tracker is dead
+                        TaskTrackerStatus newProfile = (TaskTrackerStatus) taskTrackers.get(leastRecent.getTrackerName());
+                        // Items might leave the taskTracker set through other means; the
+                        // status stored in 'taskTrackers' might be null, which means the
+                        // tracker has already been destroyed.
+                        if (newProfile != null) {
+                          if (now - newProfile.getLastSeen() > TASKTRACKER_EXPIRY_INTERVAL) {
+                            // Remove completely
+                            updateTaskTrackerStatus(trackerName, null);
+                            lostTaskTracker(leastRecent.getTrackerName(),
+                                    leastRecent.getHost());
+                          } else {
+                            // Update time by inserting latest profile
+                            trackerExpiryQueue.add(newProfile);
+                          }
                         }
                         }
+                      }
                     }
                     }
+                  }
                 }
                 }
               } catch (Exception t) {
               } catch (Exception t) {
                 LOG.error("Tracker Expiry Thread got exception: " +
                 LOG.error("Tracker Expiry Thread got exception: " +