瀏覽代碼

YARN-9235. If linux container executor is not set for a GPU cluster GpuResourceHandlerImpl is not initialized and NPE is thrown. Contributed by Antal Balint Steinbach, Adam Antal

Szilard Nemeth 6 年之前
父節點
當前提交
c416284bb7

+ 22 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuResourcePlugin.java

@@ -18,6 +18,7 @@
 
 package org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu;
 
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.exceptions.YarnException;
 import org.apache.hadoop.yarn.server.nodemanager.Context;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationExecutor;
@@ -33,8 +34,14 @@ import org.apache.hadoop.yarn.server.nodemanager.webapp.dao.gpu.GpuDeviceInforma
 import org.apache.hadoop.yarn.server.nodemanager.webapp.dao.gpu.NMGpuResourceInfo;
 
 import java.util.List;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 public class GpuResourcePlugin implements ResourcePlugin {
+
+  private static final Logger LOG =
+      LoggerFactory.getLogger(GpuResourcePlugin.class);
+
   private final GpuNodeResourceUpdateHandler resourceDiscoverHandler;
   private final GpuDiscoverer gpuDiscoverer;
   private GpuResourceHandlerImpl gpuResourceHandler = null;
@@ -84,6 +91,10 @@ public class GpuResourcePlugin implements ResourcePlugin {
   public synchronized NMResourceInfo getNMResourceInfo() throws YarnException {
     GpuDeviceInformation gpuDeviceInformation =
         gpuDiscoverer.getGpuDeviceInformation();
+
+    //At this point the gpu plugin is already enabled
+    checkGpuResourceHandler();
+
     GpuResourceAllocator gpuResourceAllocator =
         gpuResourceHandler.getGpuAllocator();
     List<GpuDevice> totalGpus = gpuResourceAllocator.getAllowedGpusCopy();
@@ -94,6 +105,17 @@ public class GpuResourcePlugin implements ResourcePlugin {
         assignedGpuDevices);
   }
 
+  private void checkGpuResourceHandler() throws YarnException {
+    if(gpuResourceHandler == null) {
+      String errorMsg =
+          "Linux Container Executor is not configured for the NodeManager. "
+              + "To fully enable GPU feature on the node also set "
+              + YarnConfiguration.NM_CONTAINER_EXECUTOR + " properly.";
+      LOG.warn(errorMsg);
+      throw new YarnException(errorMsg);
+    }
+  }
+
   @Override
   public String toString() {
     return GpuResourcePlugin.class.getName();

+ 54 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestGpuResourcePlugin.java

@@ -0,0 +1,54 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu;
+
+import static org.mockito.Mockito.mock;
+
+import org.apache.hadoop.yarn.exceptions.YarnException;
+import org.junit.Test;
+
+public class TestGpuResourcePlugin {
+
+  @Test(expected = YarnException.class)
+  public void testResourceHandlerNotInitialized() throws YarnException {
+    GpuDiscoverer gpuDiscoverer = mock(GpuDiscoverer.class);
+    GpuNodeResourceUpdateHandler gpuNodeResourceUpdateHandler =
+        mock(GpuNodeResourceUpdateHandler.class);
+
+    GpuResourcePlugin target =
+        new GpuResourcePlugin(gpuNodeResourceUpdateHandler, gpuDiscoverer);
+
+    target.getNMResourceInfo();
+  }
+
+  @Test
+  public void testResourceHandlerIsInitialized() throws YarnException {
+    GpuDiscoverer gpuDiscoverer = mock(GpuDiscoverer.class);
+    GpuNodeResourceUpdateHandler gpuNodeResourceUpdateHandler =
+        mock(GpuNodeResourceUpdateHandler.class);
+
+    GpuResourcePlugin target =
+        new GpuResourcePlugin(gpuNodeResourceUpdateHandler, gpuDiscoverer);
+
+    target.createResourceHandler(null, null, null);
+
+    //Not throwing any exception
+    target.getNMResourceInfo();
+  }
+}