Pārlūkot izejas kodu

YARN-4762. Fixed CgroupHandler's creation and usage to avoid NodeManagers crashing when LinuxContainerExecutor is enabled. (Sidharta Seethana via vinodkv)

Vinod Kumar Vavilapalli 9 gadi atpakaļ
vecāks
revīzija
b2661765a5

+ 20 - 7
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/ResourceHandlerModule.java

@@ -63,7 +63,7 @@ public class ResourceHandlerModule {
   /**
    * Returns an initialized, thread-safe CGroupsHandler instance.
    */
-  public static CGroupsHandler getCGroupsHandler(Configuration conf)
+  private static CGroupsHandler getInitializedCGroupsHandler(Configuration conf)
       throws ResourceHandlerException {
     if (cGroupsHandler == null) {
       synchronized (CGroupsHandler.class) {
@@ -77,7 +77,17 @@ public class ResourceHandlerModule {
     return cGroupsHandler;
   }
 
-  private static CGroupsCpuResourceHandlerImpl getcGroupsCpuResourceHandler(
+  /**
+   * Returns a (possibly null) reference to a cGroupsHandler. This handler is
+   * non-null only if one or more of the known cgroups-based resource
+   * handlers are in use and have been initialized.
+   */
+
+  public static CGroupsHandler getCGroupsHandler() {
+    return cGroupsHandler;
+  }
+
+  private static CGroupsCpuResourceHandlerImpl getCGroupsCpuResourceHandler(
       Configuration conf) throws ResourceHandlerException {
     boolean cgroupsCpuEnabled =
         conf.getBoolean(YarnConfiguration.NM_CPU_RESOURCE_ENABLED,
@@ -92,7 +102,8 @@ public class ResourceHandlerModule {
           if (cGroupsCpuResourceHandler == null) {
             LOG.debug("Creating new cgroups cpu handler");
             cGroupsCpuResourceHandler =
-                new CGroupsCpuResourceHandlerImpl(getCGroupsHandler(conf));
+                new CGroupsCpuResourceHandlerImpl(
+                    getInitializedCGroupsHandler(conf));
             return cGroupsCpuResourceHandler;
           }
         }
@@ -112,7 +123,7 @@ public class ResourceHandlerModule {
             LOG.debug("Creating new traffic control bandwidth handler");
             trafficControlBandwidthHandler = new
                 TrafficControlBandwidthHandlerImpl(PrivilegedOperationExecutor
-                .getInstance(conf), getCGroupsHandler(conf),
+                .getInstance(conf), getInitializedCGroupsHandler(conf),
                 new TrafficController(conf, PrivilegedOperationExecutor
                     .getInstance(conf)));
           }
@@ -147,7 +158,8 @@ public class ResourceHandlerModule {
         if (cGroupsBlkioResourceHandler == null) {
           LOG.debug("Creating new cgroups blkio handler");
           cGroupsBlkioResourceHandler =
-              new CGroupsBlkioResourceHandlerImpl(getCGroupsHandler(conf));
+              new CGroupsBlkioResourceHandlerImpl(
+                  getInitializedCGroupsHandler(conf));
         }
       }
     }
@@ -170,7 +182,8 @@ public class ResourceHandlerModule {
       synchronized (MemoryResourceHandler.class) {
         if (cGroupsMemoryResourceHandler == null) {
           cGroupsMemoryResourceHandler =
-              new CGroupsMemoryResourceHandlerImpl(getCGroupsHandler(conf));
+              new CGroupsMemoryResourceHandlerImpl(
+                  getInitializedCGroupsHandler(conf));
         }
       }
     }
@@ -191,7 +204,7 @@ public class ResourceHandlerModule {
     addHandlerIfNotNull(handlerList, getOutboundBandwidthResourceHandler(conf));
     addHandlerIfNotNull(handlerList, getDiskResourceHandler(conf));
     addHandlerIfNotNull(handlerList, getMemoryResourceHandler(conf));
-    addHandlerIfNotNull(handlerList, getcGroupsCpuResourceHandler(conf));
+    addHandlerIfNotNull(handlerList, getCGroupsCpuResourceHandler(conf));
     resourceHandlerChain = new ResourceHandlerChain(handlerList);
   }
 

+ 1 - 12
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DelegatingLinuxContainerRuntime.java

@@ -27,9 +27,6 @@ import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationExecutor;
-import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupsHandler;
-import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerException;
-import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerModule;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.ContainerExecutionException;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.ContainerRuntimeContext;
 
@@ -48,19 +45,11 @@ public class DelegatingLinuxContainerRuntime implements LinuxContainerRuntime {
       throws ContainerExecutionException {
     PrivilegedOperationExecutor privilegedOperationExecutor =
         PrivilegedOperationExecutor.getInstance(conf);
-    CGroupsHandler cGroupsHandler;
-    try {
-      cGroupsHandler = ResourceHandlerModule.getCGroupsHandler(conf);
-    } catch (ResourceHandlerException e) {
-      LOG.error("Unable to get cgroups handle.");
-      throw new ContainerExecutionException(e);
-    }
-
     defaultLinuxContainerRuntime = new DefaultLinuxContainerRuntime(
         privilegedOperationExecutor);
     defaultLinuxContainerRuntime.initialize(conf);
     dockerLinuxContainerRuntime = new DockerLinuxContainerRuntime(
-        privilegedOperationExecutor, cGroupsHandler);
+        privilegedOperationExecutor);
     dockerLinuxContainerRuntime.initialize(conf);
   }
 

+ 26 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DockerLinuxContainerRuntime.java

@@ -20,6 +20,7 @@
 
 package org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime;
 
+import com.google.common.annotations.VisibleForTesting;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.classification.InterfaceAudience;
@@ -36,6 +37,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileg
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationException;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationExecutor;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupsHandler;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerModule;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.docker.DockerClient;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.docker.DockerRunCommand;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.ContainerExecutionException;
@@ -88,10 +90,25 @@ public class DockerLinuxContainerRuntime implements LinuxContainerRuntime {
     return type != null && type.equals("docker");
   }
 
+  public DockerLinuxContainerRuntime(PrivilegedOperationExecutor
+      privilegedOperationExecutor) {
+    this(privilegedOperationExecutor, ResourceHandlerModule
+        .getCGroupsHandler());
+  }
+
+  //A constructor with an injected cGroupsHandler primarily used for testing.
+  @VisibleForTesting
   public DockerLinuxContainerRuntime(PrivilegedOperationExecutor
       privilegedOperationExecutor, CGroupsHandler cGroupsHandler) {
     this.privilegedOperationExecutor = privilegedOperationExecutor;
-    this.cGroupsHandler = cGroupsHandler;
+
+    if (cGroupsHandler == null) {
+      if (LOG.isInfoEnabled()) {
+        LOG.info("cGroupsHandler is null - cgroups not in use.");
+      }
+    } else {
+      this.cGroupsHandler = cGroupsHandler;
+    }
   }
 
   @Override
@@ -113,6 +130,14 @@ public class DockerLinuxContainerRuntime implements LinuxContainerRuntime {
   public void addCGroupParentIfRequired(String resourcesOptions,
       String containerIdStr, DockerRunCommand runCommand)
       throws ContainerExecutionException {
+    if (cGroupsHandler == null) {
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("cGroupsHandler is null. cgroups are not in use. nothing to"
+            + " do.");
+      }
+      return;
+    }
+
     if (resourcesOptions.equals(
         (PrivilegedOperation.CGROUP_ARG_PREFIX + PrivilegedOperation
             .CGROUP_ARG_NO_TASKS))) {

+ 15 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/TestDockerContainerRuntime.java

@@ -429,5 +429,20 @@ public class TestDockerContainerRuntime {
     //--cgroup-parent should be added for the containerId in question
     String expectedPath = "/" + hierarchy + "/" + containerIdStr;
     Mockito.verify(command).setCGroupParent(expectedPath);
+
+    //create a runtime with a 'null' cgroups handler - i.e no
+    // cgroup-based resource handlers are in use.
+
+    runtime = new DockerLinuxContainerRuntime
+        (mockExecutor, null);
+    runtime.initialize(conf);
+
+    runtime.addCGroupParentIfRequired(resourceOptionsNone, containerIdStr,
+        command);
+    runtime.addCGroupParentIfRequired(resourceOptionsCpu, containerIdStr,
+        command);
+
+    //no --cgroup-parent should be added in either case
+    Mockito.verifyZeroInteractions(command);
   }
 }