Browse Source

YARN-8194. Fixed reinitialization error for LinuxContainerExecutor.
Contributed by Chandni Singh

(cherry picked from commit f4d280f02b557885cd5e5cf36abc36eb579ccfb4)

Eric Yang 7 years ago
parent
commit
f729fb0baa

+ 37 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java

@@ -20,6 +20,8 @@ package org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher;
 
 
 import static org.apache.hadoop.fs.CreateFlag.CREATE;
 import static org.apache.hadoop.fs.CreateFlag.CREATE;
 import static org.apache.hadoop.fs.CreateFlag.OVERWRITE;
 import static org.apache.hadoop.fs.CreateFlag.OVERWRITE;
+
+import org.apache.hadoop.yarn.server.nodemanager.executor.DeletionAsUserContext;
 import org.slf4j.Logger;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.slf4j.LoggerFactory;
 
 
@@ -844,6 +846,7 @@ public class ContainerLaunch implements Callable<Integer> {
       throw new IOException("Reap container failed for container "
       throw new IOException("Reap container failed for container "
           + containerIdStr);
           + containerIdStr);
     }
     }
+    cleanupContainerFiles(getContainerWorkDir());
   }
   }
 
 
   /**
   /**
@@ -1858,4 +1861,38 @@ public class ContainerLaunch implements Callable<Integer> {
       context.getNMStateStore().storeContainerWorkDir(containerId, workDir);
       context.getNMStateStore().storeContainerWorkDir(containerId, workDir);
     }
     }
   }
   }
+
+  protected Path getContainerWorkDir() throws IOException {
+    String containerWorkDir = container.getWorkDir();
+    if (containerWorkDir == null
+        || !dirsHandler.isGoodLocalDir(containerWorkDir)) {
+      throw new IOException(
+          "Could not find a good work dir " + containerWorkDir
+              + " for container " + container);
+    }
+
+    return new Path(containerWorkDir);
+  }
+
+  /**
+   * Clean up container's files for container relaunch or cleanup.
+   */
+  protected void cleanupContainerFiles(Path containerWorkDir) {
+    LOG.debug("cleanup container {} files", containerWorkDir);
+    // delete ContainerScriptPath
+    deleteAsUser(new Path(containerWorkDir, CONTAINER_SCRIPT));
+    // delete TokensPath
+    deleteAsUser(new Path(containerWorkDir, FINAL_CONTAINER_TOKENS_FILE));
+  }
+
+  private void deleteAsUser(Path path) {
+    try {
+      exec.deleteAsUser(new DeletionAsUserContext.Builder()
+          .setUser(container.getUser())
+          .setSubDir(path)
+          .build());
+    } catch (Exception e) {
+      LOG.warn("Failed to delete " + path, e);
+    }
+  }
 }
 }

+ 2 - 34
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerRelaunch.java

@@ -34,7 +34,6 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Cont
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerExitEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerExitEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ContainerLocalizer;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ContainerLocalizer;
 import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerStartContext;
 import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerStartContext;
-import org.apache.hadoop.yarn.server.nodemanager.executor.DeletionAsUserContext;
 import org.slf4j.Logger;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.slf4j.LoggerFactory;
 
 
@@ -71,7 +70,8 @@ public class ContainerRelaunch extends ContainerLaunch {
     Path containerLogDir;
     Path containerLogDir;
     try {
     try {
       Path containerWorkDir = getContainerWorkDir();
       Path containerWorkDir = getContainerWorkDir();
-      cleanupPreviousContainerFiles(containerWorkDir);
+      // Clean up container's previous files for container relaunch.
+      cleanupContainerFiles(containerWorkDir);
 
 
       containerLogDir = getContainerLogDir();
       containerLogDir = getContainerLogDir();
 
 
@@ -148,17 +148,6 @@ public class ContainerRelaunch extends ContainerLaunch {
     return ret;
     return ret;
   }
   }
 
 
-  private Path getContainerWorkDir() throws IOException {
-    String containerWorkDir = container.getWorkDir();
-    if (containerWorkDir == null
-        || !dirsHandler.isGoodLocalDir(containerWorkDir)) {
-      throw new IOException(
-          "Could not find a good work dir " + containerWorkDir
-          + " for container " + container);
-    }
-
-    return new Path(containerWorkDir);
-  }
 
 
   private Path getContainerLogDir() throws IOException {
   private Path getContainerLogDir() throws IOException {
     String containerLogDir = container.getLogDir();
     String containerLogDir = container.getLogDir();
@@ -190,25 +179,4 @@ public class ContainerRelaunch extends ContainerLaunch {
     return dirsHandler.getLocalPathForRead(
     return dirsHandler.getLocalPathForRead(
         getPidFileSubpath(appIdStr, containerIdStr));
         getPidFileSubpath(appIdStr, containerIdStr));
   }
   }
-
-  /**
-   * Clean up container's previous files for container relaunch.
-   */
-  private void cleanupPreviousContainerFiles(Path containerWorkDir) {
-    // delete ContainerScriptPath
-    deleteAsUser(new Path(containerWorkDir, CONTAINER_SCRIPT));
-    // delete TokensPath
-    deleteAsUser(new Path(containerWorkDir, FINAL_CONTAINER_TOKENS_FILE));
-  }
-
-  private void deleteAsUser(Path path) {
-    try {
-      exec.deleteAsUser(new DeletionAsUserContext.Builder()
-          .setUser(container.getUser())
-          .setSubDir(path)
-          .build());
-    } catch (Exception e) {
-      LOG.warn("Failed to delete " + path, e);
-    }
-  }
 }
 }