فهرست منبع

YARN-9126. Fix container clean up for reinitialization.
Contributed by Chandni Singh

(cherry picked from commit e815fd9c49e80b9200dd8852abe74fe219ad9110)

Eric Yang 6 سال پیش
والد
کامیت
29c9c8a893

+ 3 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java

@@ -111,6 +111,7 @@ public class ContainerLaunch implements Callable<Integer> {
     Shell.appendScriptExtension("launch_container");
 
   public static final String FINAL_CONTAINER_TOKENS_FILE = "container_tokens";
+  public static final String SYSFS_DIR = "sysfs";
 
   private static final String PID_FILE_NAME_FMT = "%s.pid";
   static final String EXIT_CODE_FILE_SUFFIX = ".exitcode";
@@ -1722,6 +1723,8 @@ public class ContainerLaunch implements Callable<Integer> {
     deleteAsUser(new Path(containerWorkDir, CONTAINER_SCRIPT));
     // delete TokensPath
     deleteAsUser(new Path(containerWorkDir, FINAL_CONTAINER_TOKENS_FILE));
+    // delete sysfs dir
+    deleteAsUser(new Path(containerWorkDir, SYSFS_DIR));
 
     // delete symlinks because launch script will create symlinks again
     try {

+ 31 - 19
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainersLauncher.java

@@ -46,7 +46,6 @@ import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
-import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ResourceLocalizationService;
 
 import com.google.common.annotations.VisibleForTesting;
@@ -152,25 +151,10 @@ public class ContainersLauncher extends AbstractService
         containerLauncher.submit(launch);
         break;
       case CLEANUP_CONTAINER:
+        cleanup(event, containerId, true);
+        break;
       case CLEANUP_CONTAINER_FOR_REINIT:
-        ContainerLaunch existingLaunch = running.remove(containerId);
-        if (existingLaunch == null) {
-          // Container not launched.
-          // triggering KILLING to CONTAINER_CLEANEDUP_AFTER_KILL transition.
-          dispatcher.getEventHandler().handle(
-              new ContainerExitEvent(containerId,
-                  ContainerEventType.CONTAINER_KILLED_ON_REQUEST,
-                  Shell.WINDOWS ? ContainerExecutor.ExitCode.FORCE_KILLED.getExitCode() :
-                  ContainerExecutor.ExitCode.TERMINATED.getExitCode(),
-                  "Container terminated before launch."));
-          return;
-        }
-
-        // Cleanup a container whether it is running/killed/completed, so that
-        // no sub-processes are alive.
-        ContainerCleanup cleanup = new ContainerCleanup(context, getConfig(),
-            dispatcher, exec, event.getContainer(), existingLaunch);
-        containerLauncher.submit(cleanup);
+        cleanup(event, containerId, false);
         break;
       case SIGNAL_CONTAINER:
         SignalContainersLauncherEvent signalEvent =
@@ -221,4 +205,32 @@ public class ContainersLauncher extends AbstractService
         break;
     }
   }
+
+  @VisibleForTesting
+  void cleanup(ContainersLauncherEvent event, ContainerId containerId,
+      boolean async) {
+    ContainerLaunch existingLaunch = running.remove(containerId);
+    if (existingLaunch == null) {
+      // Container not launched.
+      // triggering KILLING to CONTAINER_CLEANEDUP_AFTER_KILL transition.
+      dispatcher.getEventHandler().handle(
+          new ContainerExitEvent(containerId,
+              ContainerEventType.CONTAINER_KILLED_ON_REQUEST,
+              Shell.WINDOWS ?
+                  ContainerExecutor.ExitCode.FORCE_KILLED.getExitCode() :
+                  ContainerExecutor.ExitCode.TERMINATED.getExitCode(),
+              "Container terminated before launch."));
+      return;
+    }
+
+    // Cleanup a container whether it is running/killed/completed, so that
+    // no sub-processes are alive.
+    ContainerCleanup cleanup = new ContainerCleanup(context, getConfig(),
+        dispatcher, exec, event.getContainer(), existingLaunch);
+    if (async) {
+      containerLauncher.submit(cleanup);
+    } else {
+      cleanup.run();
+    }
+  }
 }

+ 10 - 4
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainersLauncher.java

@@ -31,6 +31,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManag
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationImpl;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerImpl;
+import org.junit.Assert;
 import org.junit.Before;
 import org.junit.Test;
 import org.mockito.InjectMocks;
@@ -39,8 +40,10 @@ import org.mockito.Mockito;
 import org.mockito.MockitoAnnotations;
 
 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashMap;
+import java.util.List;
 import java.util.Map;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.ConcurrentMap;
@@ -212,11 +215,14 @@ public class TestContainersLauncher {
 
     when(event.getType())
         .thenReturn(ContainersLauncherEventType.CLEANUP_CONTAINER_FOR_REINIT);
-    assertEquals(1, dummyMap.size());
+    final List<ContainerId> cleanedContainers = new ArrayList<>();
+    doAnswer(invocation -> {
+      cleanedContainers.add((ContainerId)invocation.getArguments()[1]);
+      return null;
+    }).when(spy).cleanup(any(), any(), anyBoolean());
     spy.handle(event);
-    assertEquals(0, dummyMap.size());
-    Mockito.verify(containerLauncher, Mockito.times(1))
-        .submit(Mockito.any(ContainerCleanup.class));
+    Assert.assertEquals("container not cleaned", containerId,
+        cleanedContainers.get(0));
   }
 
   @Test