Browse Source

YARN-9419. Log a warning if GPU isolation is enabled but LinuxContainerExecutor is disabled. Contribued by Andras Gyori

Szilard Nemeth 5 years ago
parent
commit
9e0d742025

+ 15 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuResourcePlugin.java

@@ -20,9 +20,12 @@ package org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugi
 
 import java.util.List;
 
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.exceptions.YarnException;
+import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor;
 import org.apache.hadoop.yarn.server.nodemanager.Context;
+import org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationExecutor;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupsHandler;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandler;
@@ -59,6 +62,7 @@ public class GpuResourcePlugin implements ResourcePlugin {
 
   @Override
   public void initialize(Context context) throws YarnException {
+    validateExecutorConfig(context.getConf());
     this.gpuDiscoverer.initialize(context.getConf(),
         new NvidiaBinaryHelper());
     this.dockerCommandPlugin =
@@ -66,6 +70,17 @@ public class GpuResourcePlugin implements ResourcePlugin {
             context.getConf());
   }
 
+  private void validateExecutorConfig(Configuration conf) {
+    Class<? extends ContainerExecutor> executorClass = conf.getClass(
+        YarnConfiguration.NM_CONTAINER_EXECUTOR, DefaultContainerExecutor.class,
+        ContainerExecutor.class);
+
+    if (executorClass.equals(DefaultContainerExecutor.class)) {
+      LOG.warn("Using GPU plugin with disabled LinuxContainerExecutor" +
+          " is considered to be unsafe.");
+    }
+  }
+
   @Override
   public ResourceHandler createResourceHandler(
       Context context, CGroupsHandler cGroupsHandler,