Преглед изворни кода

MAPREDUCE-6436. JobHistory cache issue. Contributed by Kai Sasaki

(cherry picked from commit 5b7078d06921893200163a3d29c8901c3c0107cb)
Zhihai Xu пре 9 година
родитељ
комит
2d125f2bee

+ 34 - 3
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryFileManager.java

@@ -219,13 +219,21 @@ public class HistoryFileManager extends AbstractService {
         // keeping the cache size exactly at the maximum.
         Iterator<JobId> keys = cache.navigableKeySet().iterator();
         long cutoff = System.currentTimeMillis() - maxAge;
+
+        // MAPREDUCE-6436: In order to reduce the number of logs written
+        // in case of a lot of move pending histories.
+        JobId firstInIntermediateKey = null;
+        int inIntermediateCount = 0;
+        JobId firstMoveFailedKey = null;
+        int moveFailedCount = 0;
+
         while(cache.size() > maxSize && keys.hasNext()) {
           JobId key = keys.next();
           HistoryFileInfo firstValue = cache.get(key);
           if(firstValue != null) {
             synchronized(firstValue) {
               if (firstValue.isMovePending()) {
-                if(firstValue.didMoveFail() && 
+                if(firstValue.didMoveFail() &&
                     firstValue.jobIndexInfo.getFinishTime() <= cutoff) {
                   cache.remove(key);
                   //Now lets try to delete it
@@ -236,8 +244,17 @@ public class HistoryFileManager extends AbstractService {
                     		" that could not be moved to done.", e);
                   }
                 } else {
-                  LOG.warn("Waiting to remove " + key
-                      + " from JobListCache because it is not in done yet.");
+                  if (firstValue.didMoveFail()) {
+                    if (moveFailedCount == 0) {
+                      firstMoveFailedKey = key;
+                    }
+                    moveFailedCount += 1;
+                  } else {
+                    if (inIntermediateCount == 0) {
+                      firstInIntermediateKey = key;
+                    }
+                    inIntermediateCount += 1;
+                  }
                 }
               } else {
                 cache.remove(key);
@@ -245,6 +262,20 @@ public class HistoryFileManager extends AbstractService {
             }
           }
         }
+        // Log output only for first jobhisotry in pendings to restrict
+        // the total number of logs.
+        if (inIntermediateCount > 0) {
+          LOG.warn("Waiting to remove IN_INTERMEDIATE state histories " +
+                  "(e.g. " + firstInIntermediateKey + ") from JobListCache " +
+                  "because it is not in done yet. Total count is " +
+                  inIntermediateCount + ".");
+        }
+        if (moveFailedCount > 0) {
+          LOG.warn("Waiting to remove MOVE_FAILED state histories " +
+                  "(e.g. " + firstMoveFailedKey + ") from JobListCache " +
+                  "because it is not in done yet. Total count is " +
+                  moveFailedCount + ".");
+        }
       }
       return old;
     }

+ 2 - 0
hadoop-yarn-project/CHANGES.txt

@@ -27,6 +27,8 @@ Release 2.6.3 - UNRELEASED
 
   IMPROVEMENTS
 
+  MAPREDUCE-6436. JobHistory cache issue. (Kai Sasaki via zxu)
+
   OPTIMIZATIONS
 
   BUG FIXES