瀏覽代碼

YARN-5456. container-executor support for FreeBSD, NetBSD, and others if conf path is absolute. Contributed by Allen Wittenauer.

(cherry picked from commit b913677365ad77ca7daa5741c04c14df1a0313cd)
Chris Nauroth 8 年之前
父節點
當前提交
5251de00fa

+ 13 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/CMakeLists.txt

@@ -19,10 +19,20 @@ cmake_minimum_required(VERSION 2.6 FATAL_ERROR)
 list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/../../../../../hadoop-common-project/hadoop-common)
 include(HadoopCommon)
 
+# determine if container-executor.conf.dir is an absolute
+# path in case the OS we're compiling on doesn't have
+# a hook in get_executable. We'll use this define
+# later in the code to potentially throw a compile error
+string(REGEX MATCH . HCD_ONE "${HADOOP_CONF_DIR}")
+string(COMPARE EQUAL ${HCD_ONE} / HADOOP_CONF_DIR_IS_ABS)
+
 # Note: can't use -D_FILE_OFFSET_BITS=64, see MAPREDUCE-4258
 string(REPLACE "-D_FILE_OFFSET_BITS=64" "" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
 string(REPLACE "-D_FILE_OFFSET_BITS=64" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
 
+include(CheckIncludeFiles)
+check_include_files("sys/types.h;sys/sysctl.h" HAVE_SYS_SYSCTL_H)
+
 include(CheckFunctionExists)
 check_function_exists(canonicalize_file_name HAVE_CANONICALIZE_FILE_NAME)
 check_function_exists(fcloseall HAVE_FCLOSEALL)
@@ -32,6 +42,9 @@ check_function_exists(fstatat HAVE_FSTATAT)
 check_function_exists(openat HAVE_OPENAT)
 check_function_exists(unlinkat HAVE_UNLINKAT)
 
+include(CheckSymbolExists)
+check_symbol_exists(sysctl "sys/types.h;sys/sysctl.h" HAVE_SYSCTL)
+
 if(APPLE)
   include_directories( /System/Library/Frameworks )
   find_library(COCOA_LIBRARY Cocoa)

+ 12 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/config.h.cmake

@@ -18,14 +18,26 @@
 #ifndef CONFIG_H
 #define CONFIG_H
 
+/* custom configs */
+
 #cmakedefine HADOOP_CONF_DIR "@HADOOP_CONF_DIR@"
 
+#cmakedefine HADOOP_CONF_DIR_IS_ABS "@HADOOP_CONF_DIR_IS_ABS@"
+
+/* specific functions */
+
 #cmakedefine HAVE_CANONICALIZE_FILE_NAME @HAVE_CANONICALIZE_FILE_NAME@
 #cmakedefine HAVE_FCHMODAT @HAVE_FCHMODAT@
 #cmakedefine HAVE_FCLOSEALL @HAVE_FCLOSEALL@
 #cmakedefine HAVE_FDOPENDIR @HAVE_FDOPENDIR@
 #cmakedefine HAVE_FSTATAT @HAVE_FSTATAT@
 #cmakedefine HAVE_OPENAT @HAVE_OPENAT@
+#cmakedefine HAVE_SYSCTL @HAVE_SYSCTL@
 #cmakedefine HAVE_UNLINKAT @HAVE_UNLINKAT@
 
+
+/* specific headers */
+
+#cmakedefine HAVE_SYS_SYSCTL_H @HAVE_SYS_SYSCTL_H@
+
 #endif

+ 4 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/configuration.h

@@ -16,6 +16,10 @@
  * limitations under the License.
  */
 
+#ifdef __FreeBSD__
+#define _WITH_GETLINE
+#endif
+
 #include <stddef.h>
 
 /** Define a platform-independent constant instead of using PATH_MAX */

+ 21 - 5
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c

@@ -846,7 +846,23 @@ int set_user(const char *user) {
  */
 static int change_owner(const char* path, uid_t user, gid_t group) {
   if (geteuid() == user && getegid() == group) {
+
+  /*
+   * On the BSDs, this is not a guaranteed shortcut
+   * since group permissions are inherited
+   */
+
+#if defined(__FreeBSD__) || defined(__NetBSD__)
+    if (chown(path, user, group) != 0) {
+      fprintf(LOGFILE, "Can't chown %s to %d:%d - %s\n", path, user, group,
+              strerror(errno));
+      return -1;
+    }
     return 0;
+#else
+    return 0;
+#endif
+
   } else {
     uid_t old_user = geteuid();
     gid_t old_group = getegid();
@@ -884,14 +900,14 @@ int create_directory_for_user(const char* path) {
   if (ret == 0) {
     if (0 == mkdir(path, permissions) || EEXIST == errno) {
       // need to reassert the group sticky bit
-      if (chmod(path, permissions) != 0) {
-        fprintf(LOGFILE, "Can't chmod %s to add the sticky bit - %s\n",
-                path, strerror(errno));
-        ret = -1;
-      } else if (change_owner(path, user, nm_gid) != 0) {
+      if (change_owner(path, user, nm_gid) != 0) {
         fprintf(LOGFILE, "Failed to chown %s to %d:%d: %s\n", path, user, nm_gid,
             strerror(errno));
         ret = -1;
+      } else if (chmod(path, permissions) != 0) {
+        fprintf(LOGFILE, "Can't chmod %s to add the sticky bit - %s\n",
+                path, strerror(errno));
+        ret = -1;
       }
     } else {
       fprintf(LOGFILE, "Failed to create directory %s - %s\n", path,

+ 7 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.h

@@ -15,6 +15,12 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
+/* FreeBSD protects the getline() prototype. See getline(3) for more */
+#ifdef __FreeBSD__
+#define _WITH_GETLINE
+#endif
+
 #include <pwd.h>
 #include <stdio.h>
 #include <sys/types.h>
@@ -101,7 +107,7 @@ extern FILE *LOGFILE;
 extern FILE *ERRORFILE;
 
 // get the executable's filename
-char* get_executable();
+char* get_executable(char *argv0);
 
 //function used to load the configurations present in the secure config
 void read_executor_config(const char* file_name);

+ 103 - 15
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/get_executable.c

@@ -28,23 +28,27 @@
  *
  */
 
+#include "config.h"
+#include "configuration.h"
+#include "container-executor.h"
+
 #include <errno.h>
 #include <stdio.h>
 #include <string.h>
 #include <stdlib.h>
 #include <unistd.h>
-#include "config.h"
-#include "configuration.h"
-#include "container-executor.h"
+
+#ifdef HAVE_SYS_SYSCTL_H
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/sysctl.h>
+#endif
 
 /*
  * A generic function to read a link and return
  * the value for use with System V procfs.
  * With much thanks to Tom Killian, Roger Faulkner,
  * and Ron Gomes, this is pretty generic code.
- *
- * The various BSDs do not have (reliably)
- * have /proc. Custom implementations follow.
  */
 
 char *__get_exec_readproc(char *procfn) {
@@ -53,7 +57,7 @@ char *__get_exec_readproc(char *procfn) {
 
   filename = malloc(EXECUTOR_PATH_MAX);
   if (!filename) {
-    fprintf(ERRORFILE,"cannot allocate memory for filename: %s\n",strerror(errno));
+    fprintf(ERRORFILE,"cannot allocate memory for filename before readlink: %s\n",strerror(errno));
     exit(-1);
   }
   len = readlink(procfn, filename, EXECUTOR_PATH_MAX);
@@ -62,14 +66,45 @@ char *__get_exec_readproc(char *procfn) {
             strerror(errno));
     exit(-1);
   } else if (len >= EXECUTOR_PATH_MAX) {
-    fprintf(ERRORFILE,"Executable name %.*s is longer than %d characters.\n",
-            EXECUTOR_PATH_MAX, filename, EXECUTOR_PATH_MAX);
+    fprintf(ERRORFILE,"Resolved path for %s [%s] is longer than %d characters.\n",
+            procfn, filename, EXECUTOR_PATH_MAX);
     exit(-1);
   }
   filename[len] = '\0';
   return filename;
 }
 
+
+#ifdef HAVE_SYSCTL
+/*
+ * A generic function to ask the kernel via sysctl.
+ * This is used by most of the open source BSDs, as
+ * many do not reliably have a /proc mounted.
+ */
+
+char *__get_exec_sysctl(int *mib)
+{
+  char buffer[EXECUTOR_PATH_MAX];
+  char *filename;
+  size_t len;
+
+  len = sizeof(buffer);
+  if (sysctl(mib, 4, buffer, &len, NULL, 0) == -1) {
+    fprintf(ERRORFILE,"Can't get executable name from kernel: %s\n",
+      strerror(errno));
+    exit(-1);
+  }
+  filename=malloc(EXECUTOR_PATH_MAX);
+  if (!filename) {
+    fprintf(ERRORFILE,"cannot allocate memory for filename after sysctl: %s\n",strerror(errno));
+    exit(-1);
+  }
+  snprintf(filename,EXECUTOR_PATH_MAX,"%s",buffer);
+  return filename;
+}
+
+#endif /* HAVE_SYSCTL */
+
 #ifdef __APPLE__
 
 /*
@@ -80,13 +115,13 @@ char *__get_exec_readproc(char *procfn) {
 
 #include <libproc.h>
 
-char* get_executable() {
+char* get_executable(char *argv0) {
   char *filename;
   pid_t pid;
 
   filename = malloc(PROC_PIDPATHINFO_MAXSIZE);
   if (!filename) {
-    fprintf(ERRORFILE,"cannot allocate memory for filename: %s\n",strerror(errno));
+    fprintf(ERRORFILE,"cannot allocate memory for filename before proc_pidpath: %s\n",strerror(errno));
     exit(-1);
   }
   pid = getpid();
@@ -98,13 +133,33 @@ char* get_executable() {
   return filename;
 }
 
+#elif defined(__FreeBSD__)
+
+char* get_executable(char *argv0) {
+  static int mib[] = {
+    CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1
+  };
+  return __get_exec_sysctl(mib);
+}
+
 #elif defined(__linux)
 
 
-char* get_executable() {
+char* get_executable(char *argv0) {
   return __get_exec_readproc("/proc/self/exe");
 }
 
+#elif defined(__NetBSD__) && defined(KERN_PROC_PATHNAME)
+
+/* Only really new NetBSD kernels have KERN_PROC_PATHNAME */
+
+char* get_executable(char *argv0) {
+  static int mib[] = {
+    CTL_KERN, KERN_PROC_ARGS, -1, KERN_PROC_PATHNAME,
+  };
+  return __get_exec_sysctl(mib);
+}
+
 #elif defined(__sun)
 
 /*
@@ -116,12 +171,45 @@ char* get_executable() {
  * doesn't exist on Solaris hasn't read the proc(4) man page.)
  */
 
-char* get_executable() {
+char* get_executable(char *argv0) {
   return __get_exec_readproc("/proc/self/path/a.out");
 }
 
-#else
+#elif defined(HADOOP_CONF_DIR_IS_ABS)
 
-#error Cannot safely determine executable path on this operating system.
+/*
+ * This is the fallback for operating systems where
+ * we don't know how to ask the kernel where the executable
+ * is located.  It is only used if the maven property
+ * container-executor.conf.dir is set to an absolute path
+ * for security reasons.
+ */
+
+char* get_executable (char *argv0) {
+  char *filename;
 
+#ifdef HAVE_CANONICALIZE_FILE_NAME
+  filename=canonicalize_file_name(argv0);
+#else
+  filename=realpath(argv0,NULL);
 #endif
+
+  if (!filename) {
+    fprintf(ERRORFILE,"realpath of executable: %s\n",strerror(errno));
+    exit(-1);
+  }
+  return filename;
+}
+
+#else
+
+/*
+ * If we ended up here, we're on an operating system that doesn't
+ * match any of the above. This means either the OS needs to get a
+ * code added or the container-executor.conf.dir maven property
+ * should be set to an absolute path.
+ */
+
+#error Cannot safely determine executable path with a relative HADOOP_CONF_DIR on this operating system.
+
+#endif /* platform checks */

+ 3 - 3
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/main.c

@@ -139,9 +139,9 @@ in case of validation failures. Also sets up configuration / group information e
 This function is to be called in every invocation of container-executor, irrespective
 of whether an explicit checksetup operation is requested. */
 
-static void assert_valid_setup() {
+static void assert_valid_setup(char *argv0) {
   int ret;
-  char *executable_file = get_executable();
+  char *executable_file = get_executable(argv0);
   if (!executable_file) {
     fprintf(ERRORFILE,"realpath of executable: %s\n",strerror(errno));
     flush_and_close_log_files();
@@ -518,7 +518,7 @@ static int validate_run_as_user_commands(int argc, char **argv, int *operation)
 
 int main(int argc, char **argv) {
   open_log_files();
-  assert_valid_setup();
+  assert_valid_setup(argv[0]);
 
   int operation;
   int ret = validate_arguments(argc, argv, &operation);

+ 4 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/test-container-executor.c

@@ -1031,6 +1031,7 @@ int main(int argc, char **argv) {
   LOGFILE = stdout;
   ERRORFILE = stderr;
 
+  printf("Attempting to clean up from any previous runs\n");
   // clean up any junk from previous run
   if (system("chmod -R u=rwx " TEST_ROOT "; rm -fr " TEST_ROOT)) {
     exit(1);
@@ -1043,6 +1044,9 @@ int main(int argc, char **argv) {
   if (write_config_file(TEST_ROOT "/test.cfg", 1) != 0) {
     exit(1);
   }
+
+  printf("\nOur executable is %s\n",get_executable(argv[0]));
+
   read_executor_config(TEST_ROOT "/test.cfg");
 
   local_dirs = extract_values(strdup(NM_LOCAL_DIRS));