Browse Source

YARN-2526. SLS can deadlock when all the threads are taken by AMSimulators. (Wei Yan via kasha)

(cherry picked from commit 28d99db99236ff2a6e4a605802820e2b512225f9)
Karthik Kambatla 10 năm trước cách đây
mục cha
commit
fe9d1ee96f

+ 23 - 20
hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/appmaster/MRAMSimulator.java

@@ -179,26 +179,8 @@ public class MRAMSimulator extends AMSimulator {
         return rm.getApplicationMasterService().allocate(request);
       }
     });
-
-    // waiting until the AM container is allocated
-    while (true) {
-      if (response != null && ! response.getAllocatedContainers().isEmpty()) {
-        // get AM container
-        Container container = response.getAllocatedContainers().get(0);
-        se.getNmMap().get(container.getNodeId())
-                .addNewContainer(container, -1L);
-        // start AM container
-        amContainer = container;
-        LOG.debug(MessageFormat.format("Application {0} starts its " +
-                "AM container ({1}).", appId, amContainer.getId()));
-        isAMContainerRunning = true;
-        break;
-      }
-      // this sleep time is different from HeartBeat
-      Thread.sleep(1000);
-      // send out empty request
-      sendContainerRequest();
-      response = responseQueue.take();
+    if (response != null) {
+      responseQueue.put(response);
     }
   }
 
@@ -206,6 +188,26 @@ public class MRAMSimulator extends AMSimulator {
   @SuppressWarnings("unchecked")
   protected void processResponseQueue()
           throws InterruptedException, YarnException, IOException {
+    // Check whether receive the am container
+    if (!isAMContainerRunning) {
+      if (!responseQueue.isEmpty()) {
+        AllocateResponse response = responseQueue.take();
+        if (response != null
+            && !response.getAllocatedContainers().isEmpty()) {
+          // Get AM container
+          Container container = response.getAllocatedContainers().get(0);
+          se.getNmMap().get(container.getNodeId())
+              .addNewContainer(container, -1L);
+          // Start AM container
+          amContainer = container;
+          LOG.debug(MessageFormat.format("Application {0} starts its " +
+              "AM container ({1}).", appId, amContainer.getId()));
+          isAMContainerRunning = true;
+        }
+      }
+      return;
+    }
+
     while (! responseQueue.isEmpty()) {
       AllocateResponse response = responseQueue.take();
 
@@ -262,6 +264,7 @@ public class MRAMSimulator extends AMSimulator {
         LOG.debug(MessageFormat.format("Application {0} sends out event " +
                 "to clean up its AM container.", appId));
         isFinished = true;
+        break;
       }
 
       // check allocated containers

+ 3 - 0
hadoop-yarn-project/CHANGES.txt

@@ -276,6 +276,9 @@ Release 2.6.0 - UNRELEASED
     YARN-2519. Credential Provider related unit tests failed on Windows.
     (Xiaoyu Yao via cnauroth)
 
+    YARN-2526. SLS can deadlock when all the threads are taken by AMSimulators. 
+    (Wei Yan via kasha)
+
 Release 2.5.1 - UNRELEASED
 
   INCOMPATIBLE CHANGES