瀏覽代碼

YARN-6246. Identifying starved apps does not need the scheduler writelock
(Contributed by Karthik Kambatla via Daniel Templeton)

Daniel Templeton 8 年之前
父節點
當前提交
d5b71e4175

+ 4 - 5
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSLeafQueue.java

@@ -198,13 +198,10 @@ public class FSLeafQueue extends FSQueue {
   }
 
   @Override
-  public void updateInternal(boolean checkStarvation) {
+  void updateInternal() {
     readLock.lock();
     try {
       policy.computeShares(runnableApps, getFairShare());
-      if (checkStarvation) {
-        updateStarvedApps();
-      }
     } finally {
       readLock.unlock();
     }
@@ -283,8 +280,10 @@ public class FSLeafQueue extends FSQueue {
    * If this queue is starving due to fairshare, there must be at least
    * one application that is starved. And, even if the queue is not
    * starved due to fairshare, there might still be starved applications.
+   *
+   * Caller does not need read/write lock on the leaf queue.
    */
-  private void updateStarvedApps() {
+  void updateStarvedApps() {
     // Fetch apps with pending demand
     TreeSet<FSAppAttempt> appsWithDemand = fetchAppsWithDemand(false);
 

+ 2 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSParentQueue.java

@@ -79,13 +79,13 @@ public class FSParentQueue extends FSQueue {
   }
 
   @Override
-  public void updateInternal(boolean checkStarvation) {
+  void updateInternal() {
     readLock.lock();
     try {
       policy.computeShares(childQueues, getFairShare());
       for (FSQueue childQueue : childQueues) {
         childQueue.getMetrics().setFairShare(childQueue.getFairShare());
-        childQueue.updateInternal(checkStarvation);
+        childQueue.updateInternal();
       }
     } finally {
       readLock.unlock();

+ 13 - 6
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSQueue.java

@@ -326,16 +326,23 @@ public abstract class FSQueue implements Queue, Schedulable {
 
   /**
    * Recomputes the shares for all child queues and applications based on this
-   * queue's current share, and checks for starvation.
+   * queue's current share.
    *
-   * @param checkStarvation whether to check for fairshare or minshare
-   *                        starvation on update
+   * To be called holding the scheduler writelock.
    */
-  abstract void updateInternal(boolean checkStarvation);
+  abstract void updateInternal();
 
-  public void update(Resource fairShare, boolean checkStarvation) {
+  /**
+   * Set the queue's fairshare and update the demand/fairshare of child
+   * queues/applications.
+   *
+   * To be called holding the scheduler writelock.
+   *
+   * @param fairShare
+   */
+  public void update(Resource fairShare) {
     setFairShare(fairShare);
-    updateInternal(checkStarvation);
+    updateInternal();
   }
 
   /**

+ 19 - 8
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java

@@ -366,20 +366,31 @@ public class FairScheduler extends
    */
   @VisibleForTesting
   public void update() {
-    try {
-      writeLock.lock();
-
-      FSQueue rootQueue = queueMgr.getRootQueue();
+    FSQueue rootQueue = queueMgr.getRootQueue();
 
+    // Update demands and fairshares
+    writeLock.lock();
+    try {
       // Recursively update demands for all queues
       rootQueue.updateDemand();
-
-      Resource clusterResource = getClusterResource();
-      rootQueue.update(clusterResource, shouldAttemptPreemption());
+      rootQueue.update(getClusterResource());
 
       // Update metrics
       updateRootQueueMetrics();
+    } finally {
+      writeLock.unlock();
+    }
+
+    readLock.lock();
+    try {
+      // Update starvation stats and identify starved applications
+      if (shouldAttemptPreemption()) {
+        for (FSLeafQueue queue : queueMgr.getLeafQueues()) {
+          queue.updateStarvedApps();
+        }
+      }
 
+      // Log debug information
       if (LOG.isDebugEnabled()) {
         if (--updatesToSkipForDebug < 0) {
           updatesToSkipForDebug = UPDATE_DEBUG_FREQUENCY;
@@ -387,7 +398,7 @@ public class FairScheduler extends
         }
       }
     } finally {
-      writeLock.unlock();
+      readLock.unlock();
     }
   }