Преглед изворни кода

YARN-10649. Fix RMNodeImpl.updateExistContainers leak (#2719). Contributed by Max Xie

Neil пре 4 година
родитељ
комит
d615e2d3bd

+ 7 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java

@@ -1464,6 +1464,11 @@ public class RMNodeImpl implements RMNode, EventHandler<RMNodeEvent> {
     return nodeUpdateQueue.size();
   }
 
+  // For test only.
+  @VisibleForTesting
+  public Map<ContainerId, ContainerStatus> getUpdatedExistContainers() {
+    return this.updatedExistContainers;
+  }
   // For test only.
   @VisibleForTesting
   public Set<ContainerId> getLaunchedContainers() {
@@ -1582,6 +1587,7 @@ public class RMNodeImpl implements RMNode, EventHandler<RMNodeEvent> {
       } else {
         // A finished container
         launchedContainers.remove(containerId);
+        updatedExistContainers.remove(containerId);
         if (completedContainers.add(containerId)) {
           newlyCompletedContainers.add(remoteContainer);
         }
@@ -1595,6 +1601,7 @@ public class RMNodeImpl implements RMNode, EventHandler<RMNodeEvent> {
         findLostContainers(numRemoteRunningContainers, containerStatuses);
     for (ContainerStatus remoteContainer : lostContainers) {
       ContainerId containerId = remoteContainer.getContainerId();
+      updatedExistContainers.remove(containerId);
       if (completedContainers.add(containerId)) {
         newlyCompletedContainers.add(remoteContainer);
       }

+ 10 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMNodeTransitions.java

@@ -1096,6 +1096,12 @@ public class TestRMNodeTransitions {
         node.getLaunchedContainers().contains(cid1));
     Assert.assertTrue("second container not running",
         node.getLaunchedContainers().contains(cid2));
+    assertEquals("unexpected number of running containers",
+        2, node.getUpdatedExistContainers().size());
+    Assert.assertTrue("first container not running",
+        node.getUpdatedExistContainers().containsKey(cid1));
+    Assert.assertTrue("second container not running",
+        node.getUpdatedExistContainers().containsKey(cid2));
     assertEquals("already completed containers",
         0, completedContainers.size());
     containerStats.remove(0);
@@ -1115,6 +1121,10 @@ public class TestRMNodeTransitions {
         1, node.getLaunchedContainers().size());
     Assert.assertTrue("second container not running",
         node.getLaunchedContainers().contains(cid2));
+    assertEquals("unexpected number of running containers",
+        1, node.getUpdatedExistContainers().size());
+    Assert.assertTrue("second container not running",
+        node.getUpdatedExistContainers().containsKey(cid2));
   }
 
   @Test