Selaa lähdekoodia

YARN-7345. GPU Isolation: Incorrect minor device numbers written to devices.deny file. (Jonathan Hung via wangda)

Wangda Tan 7 vuotta sitten
vanhempi
commit
7025333d31

+ 1 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/modules/gpu/gpu-module.c

@@ -108,7 +108,7 @@ static int internal_handle_gpu_request(
     char param_value[128];
     memset(param_value, 0, sizeof(param_value));
     snprintf(param_value, sizeof(param_value), "c %d:%d rwm",
-             major_device_number, i);
+             major_device_number, minor_devices[i]);
 
     int rc = update_cgroups_parameters_func_p("devices", "deny",
       container_id, param_value);

+ 13 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/modules/gpu/test-gpu-module.cc

@@ -165,6 +165,19 @@ TEST_F(TestGpuModule, test_verify_gpu_module_calls_cgroup_parameter) {
 
   // Verify cgroups parameters
   verify_param_updated_to_cgroups(0, NULL);
+
+  /* Test case 3: block 2 non-sequential devices */
+  cgroups_parameters_invoked.clear();
+  char* argv_2[] = { (char*) "--module-gpu", (char*) "--excluded_gpus", (char*) "1,3",
+                   (char*) "--container_id", container_id };
+  rc = handle_gpu_request(&mock_update_cgroups_parameters,
+     "gpu", 5, argv_2);
+  ASSERT_EQ(0, rc) << "Should success.\n";
+
+  // Verify cgroups parameters
+  const char* expected_cgroups_argv_2[] = { "devices", "deny", container_id, "c 195:1 rwm",
+    "devices", "deny", container_id, "c 195:3 rwm"};
+  verify_param_updated_to_cgroups(8, expected_cgroups_argv_2);
 }
 
 TEST_F(TestGpuModule, test_illegal_cli_parameters) {