task-controller.c 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125
  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. #include "task-controller.h"
  19. //struct to store the user details
  20. struct passwd *user_detail = NULL;
  21. //LOGFILE
  22. FILE *LOGFILE;
  23. //placeholder for global cleanup operations
  24. void cleanup() {
  25. free_configurations();
  26. }
  27. //change the user to passed user for executing/killing tasks
  28. int change_user(const char * user) {
  29. if (get_user_details(user) < 0) {
  30. return -1;
  31. }
  32. if(initgroups(user_detail->pw_name, user_detail->pw_gid) != 0) {
  33. cleanup();
  34. return SETUID_OPER_FAILED;
  35. }
  36. errno = 0;
  37. setgid(user_detail->pw_gid);
  38. if (errno != 0) {
  39. fprintf(LOGFILE, "unable to setgid : %s\n", strerror(errno));
  40. cleanup();
  41. return SETUID_OPER_FAILED;
  42. }
  43. setegid(user_detail->pw_gid);
  44. if (errno != 0) {
  45. fprintf(LOGFILE, "unable to setegid : %s\n", strerror(errno));
  46. cleanup();
  47. return SETUID_OPER_FAILED;
  48. }
  49. setuid(user_detail->pw_uid);
  50. if (errno != 0) {
  51. fprintf(LOGFILE, "unable to setuid : %s\n", strerror(errno));
  52. cleanup();
  53. return SETUID_OPER_FAILED;
  54. }
  55. seteuid(user_detail->pw_uid);
  56. if (errno != 0) {
  57. fprintf(LOGFILE, "unable to seteuid : %s\n", strerror(errno));
  58. cleanup();
  59. return SETUID_OPER_FAILED;
  60. }
  61. return 0;
  62. }
  63. /**
  64. * Checks the passed value for the variable config_key against the values in
  65. * the configuration.
  66. * Returns 0 if the passed value is found in the configuration,
  67. * -1 otherwise
  68. */
  69. int check_variable_against_config(const char *config_key,
  70. const char *passed_value) {
  71. if (config_key == NULL || passed_value == NULL) {
  72. return -1;
  73. }
  74. int found = -1;
  75. const char **config_value = get_values(config_key);
  76. if (config_value == NULL) {
  77. fprintf(LOGFILE, "%s is not configured.\n", config_key);
  78. return -1;
  79. }
  80. char *full_config_value = (char *)get_value(config_key);
  81. char **config_val_ptr = (char **) config_value;
  82. while (*config_val_ptr != NULL) {
  83. if (strcmp(*config_val_ptr, passed_value) == 0) {
  84. found = 0;
  85. break;
  86. }
  87. config_val_ptr++;
  88. }
  89. if (found != 0) {
  90. fprintf(
  91. LOGFILE,
  92. "Invalid value passed: \
  93. Configured value of %s is %s. \
  94. Passed value is %s.\n",
  95. config_key, full_config_value, passed_value);
  96. }
  97. free(full_config_value);
  98. free(config_value);
  99. return found;
  100. }
  101. /**
  102. * Utility function to concatenate argB to argA using the concat_pattern
  103. */
  104. char *concatenate(char *concat_pattern, char *return_path_name, int numArgs,
  105. ...) {
  106. va_list ap;
  107. va_start(ap, numArgs);
  108. int strlen_args = 0;
  109. char *arg = NULL;
  110. int j;
  111. for (j = 0; j < numArgs; j++) {
  112. arg = va_arg(ap, char*);
  113. if (arg == NULL) {
  114. fprintf(LOGFILE, "One of the arguments passed for %s in null.\n",
  115. return_path_name);
  116. return NULL;
  117. }
  118. strlen_args += strlen(arg);
  119. }
  120. va_end(ap);
  121. char *return_path = NULL;
  122. int str_len = strlen(concat_pattern) + strlen_args;
  123. return_path = (char *) malloc(sizeof(char) * (str_len + 1));
  124. if (return_path == NULL) {
  125. fprintf(LOGFILE, "Unable to allocate memory for %s.\n", return_path_name);
  126. return NULL;
  127. }
  128. memset(return_path, '\0', str_len + 1);
  129. va_start(ap, numArgs);
  130. vsnprintf(return_path, str_len, concat_pattern, ap);
  131. va_end(ap);
  132. return return_path;
  133. }
  134. /**
  135. * Get the job-directory path from tt_root, user name and job-id
  136. */
  137. char *get_job_directory(const char * tt_root, const char *user,
  138. const char *jobid) {
  139. return concatenate(TT_JOB_DIR_PATTERN, "job_dir_path", 3, tt_root, user,
  140. jobid);
  141. }
  142. /**
  143. * Get the user directory of a particular user
  144. */
  145. char *get_user_directory(const char *tt_root, const char *user) {
  146. return concatenate(USER_DIR_PATTERN, "user_dir_path", 2, tt_root, user);
  147. }
  148. /**
  149. * Get the distributed cache directory for a particular user
  150. */
  151. char *get_distributed_cache_directory(const char *tt_root, const char *user,
  152. const char* unique_string) {
  153. return concatenate(USER_DISTRIBUTED_CACHE_DIR_PATTERN,
  154. "dist_cache_unique_path", 3, tt_root, user, unique_string);
  155. }
  156. char *get_job_work_directory(const char *job_dir) {
  157. return concatenate(JOB_DIR_TO_JOB_WORK_PATTERN, "job_work_dir_path", 2,
  158. job_dir, "");
  159. }
  160. /**
  161. * Get the attempt directory for the given attempt_id
  162. */
  163. char *get_attempt_directory(const char *job_dir, const char *attempt_id) {
  164. return concatenate(JOB_DIR_TO_ATTEMPT_DIR_PATTERN, "attempt_dir_path", 2,
  165. job_dir, attempt_id);
  166. }
  167. /*
  168. * Get the path to the task launcher file which is created by the TT
  169. */
  170. char *get_task_launcher_file(const char *job_dir, const char *attempt_dir) {
  171. return concatenate(TASK_SCRIPT_PATTERN, "task_script_path", 2, job_dir,
  172. attempt_dir);
  173. }
  174. /*
  175. * Builds the full path of the dir(localTaskDir or localWorkDir)
  176. * tt_root : is the base path(i.e. mapred-local-dir) sent to task-controller
  177. * dir_to_be_deleted : is either taskDir($taskId) OR taskWorkDir($taskId/work)
  178. */
  179. char *get_task_dir_path(const char *tt_root, const char *user,
  180. const char *jobid, const char *dir_to_be_deleted) {
  181. return concatenate(TT_LOCAL_TASK_DIR_PATTERN, "task_dir_full_path", 4,
  182. tt_root, user, jobid, dir_to_be_deleted);
  183. }
  184. /**
  185. * Get the log directory for the given attempt.
  186. */
  187. char *get_task_log_dir(const char *log_dir, const char *attempt_id) {
  188. return concatenate(ATTEMPT_LOG_DIR_PATTERN, "task_log_dir", 2, log_dir,
  189. attempt_id);
  190. }
  191. /**
  192. * Function to check if the passed tt_root is present in mapred.local.dir
  193. * the task-controller is configured with.
  194. */
  195. int check_tt_root(const char *tt_root) {
  196. return check_variable_against_config(TT_SYS_DIR_KEY, tt_root);
  197. }
  198. /**
  199. * Function to check if the constructed path and absolute path of the task
  200. * launcher file resolve to one and same. This is done so as to avoid
  201. * security pitfalls because of relative path components in the file name.
  202. */
  203. int check_path_for_relative_components(char *path) {
  204. char * resolved_path = (char *) canonicalize_file_name(path);
  205. if (resolved_path == NULL) {
  206. fprintf(LOGFILE,
  207. "Error resolving the path: %s. Passed path: %s\n",
  208. strerror(errno), path);
  209. return ERROR_RESOLVING_FILE_PATH;
  210. }
  211. if (strcmp(resolved_path, path) != 0) {
  212. fprintf(LOGFILE,
  213. "Relative path components in the path: %s. Resolved path: %s\n",
  214. path, resolved_path);
  215. free(resolved_path);
  216. return RELATIVE_PATH_COMPONENTS_IN_FILE_PATH;
  217. }
  218. free(resolved_path);
  219. return 0;
  220. }
  221. /**
  222. * Function to change the owner/group of a given path.
  223. */
  224. static int change_owner(const char *path, uid_t uid, gid_t gid) {
  225. int exit_code = chown(path, uid, gid);
  226. if (exit_code != 0) {
  227. fprintf(LOGFILE, "chown %d:%d for path %s failed: %s.\n", uid, gid, path,
  228. strerror(errno));
  229. }
  230. return exit_code;
  231. }
  232. /**
  233. * Function to change the mode of a given path.
  234. */
  235. static int change_mode(const char *path, mode_t mode) {
  236. int exit_code = chmod(path, mode);
  237. if (exit_code != 0) {
  238. fprintf(LOGFILE, "chmod %d of path %s failed: %s.\n", mode, path,
  239. strerror(errno));
  240. }
  241. return exit_code;
  242. }
  243. /**
  244. * Function to change permissions of the given path. It does the following
  245. * recursively:
  246. * 1) changes the owner/group of the paths to the passed owner/group
  247. * 2) changes the file permission to the passed file_mode and directory
  248. * permission to the passed dir_mode
  249. *
  250. * should_check_ownership : boolean to enable checking of ownership of each path
  251. */
  252. static int secure_path(const char *path, uid_t uid, gid_t gid,
  253. mode_t file_mode, mode_t dir_mode, int should_check_ownership) {
  254. FTS *tree = NULL; // the file hierarchy
  255. FTSENT *entry = NULL; // a file in the hierarchy
  256. char *paths[] = { (char *) path, NULL };//array needs to be NULL-terminated
  257. int process_path = 0;
  258. int dir = 0;
  259. int error_code = 0;
  260. int done = 0;
  261. // Get physical locations and don't resolve the symlinks.
  262. // Don't change directory while walking the directory.
  263. int ftsoptions = FTS_PHYSICAL | FTS_NOCHDIR;
  264. tree = fts_open(paths, ftsoptions, NULL);
  265. if (tree == NULL) {
  266. fprintf(LOGFILE,
  267. "Cannot open file traversal structure for the path %s:%s.\n", path,
  268. strerror(errno));
  269. return -1;
  270. }
  271. while (((entry = fts_read(tree)) != NULL) && !done) {
  272. dir = 0;
  273. switch (entry->fts_info) {
  274. case FTS_D:
  275. // A directory being visited in pre-order.
  276. // We change ownership of directories in post-order.
  277. // so ignore the pre-order visit.
  278. process_path = 0;
  279. break;
  280. case FTS_DC:
  281. // A directory that causes a cycle in the tree
  282. // We don't expect cycles, ignore.
  283. process_path = 0;
  284. break;
  285. case FTS_DNR:
  286. // A directory which cannot be read
  287. // Ignore and set error code.
  288. process_path = 0;
  289. error_code = -1;
  290. break;
  291. case FTS_DOT:
  292. // "." or ".."
  293. process_path = 0;
  294. break;
  295. case FTS_F:
  296. // A regular file
  297. process_path = 1;
  298. break;
  299. case FTS_DP:
  300. // A directory being visited in post-order
  301. if (entry->fts_level == 0) {
  302. // root directory. Done with traversing.
  303. done = 1;
  304. }
  305. process_path = 1;
  306. dir = 1;
  307. break;
  308. case FTS_SL:
  309. // A symbolic link
  310. // We don't want to change-ownership(and set-permissions) for the file/dir
  311. // pointed to by any symlink.
  312. process_path = 0;
  313. break;
  314. case FTS_SLNONE:
  315. // A symbolic link with a nonexistent target
  316. process_path = 0;
  317. break;
  318. case FTS_NS:
  319. // A file for which no stat(2) information was available
  320. // Ignore and set error code
  321. process_path = 0;
  322. error_code = -1;
  323. break;
  324. case FTS_ERR:
  325. // An error return. Ignore and set error code.
  326. process_path = 0;
  327. error_code = -1;
  328. break;
  329. case FTS_DEFAULT:
  330. // File that doesn't belong to any of the above type. Ignore.
  331. process_path = 0;
  332. break;
  333. default:
  334. // None of the above. Ignore and set error code
  335. process_path = 0;
  336. error_code = -1;
  337. }
  338. if (error_code != 0) {
  339. break;
  340. }
  341. if (!process_path) {
  342. continue;
  343. }
  344. if (should_check_ownership && (check_ownership(entry->fts_path) != 0)) {
  345. fprintf(LOGFILE,
  346. "Invalid file path. %s not user/group owned by the tasktracker.\n",
  347. entry->fts_path);
  348. error_code = -1;
  349. } else if (change_owner(entry->fts_path, uid, gid) != 0) {
  350. fprintf(LOGFILE, "couldn't change the ownership of %s\n",
  351. entry->fts_path);
  352. error_code = -3;
  353. } else if (change_mode(entry->fts_path, (dir ? dir_mode : file_mode)) != 0) {
  354. fprintf(LOGFILE, "couldn't change the permissions of %s\n",
  355. entry->fts_path);
  356. error_code = -3;
  357. }
  358. }
  359. if (fts_close(tree) != 0) {
  360. fprintf(LOGFILE, "couldn't close file traversal structure:%s.\n",
  361. strerror(errno));
  362. }
  363. return error_code;
  364. }
  365. /**
  366. * Function to prepare the attempt directories for the task JVM.
  367. * This is done by changing the ownership of the attempt directory recursively
  368. * to the job owner. We do the following:
  369. * * sudo chown user:mapred -R taskTracker/$user/jobcache/$jobid/$attemptid/
  370. * * sudo chmod 2770 -R taskTracker/$user/jobcache/$jobid/$attemptid/
  371. */
  372. int prepare_attempt_directories(const char *job_id, const char *attempt_id,
  373. const char *user) {
  374. if (job_id == NULL || attempt_id == NULL || user == NULL) {
  375. fprintf(LOGFILE, "Either attempt_id is null or the user passed is null.\n");
  376. return INVALID_ARGUMENT_NUMBER;
  377. }
  378. gid_t tasktracker_gid = getegid(); // the group permissions of the binary.
  379. if (get_user_details(user) < 0) {
  380. fprintf(LOGFILE, "Couldn't get the user details of %s.\n", user);
  381. return INVALID_USER_NAME;
  382. }
  383. char **local_dir = (char **) get_values(TT_SYS_DIR_KEY);
  384. if (local_dir == NULL) {
  385. fprintf(LOGFILE, "%s is not configured.\n", TT_SYS_DIR_KEY);
  386. cleanup();
  387. return PREPARE_ATTEMPT_DIRECTORIES_FAILED;
  388. }
  389. char *full_local_dir_str = (char *) get_value(TT_SYS_DIR_KEY);
  390. #ifdef DEBUG
  391. fprintf(LOGFILE, "Value from config for %s is %s.\n", TT_SYS_DIR_KEY,
  392. full_local_dir_str);
  393. #endif
  394. char *job_dir;
  395. char *attempt_dir;
  396. char **local_dir_ptr = local_dir;
  397. int failed = 0;
  398. while (*local_dir_ptr != NULL) {
  399. job_dir = get_job_directory(*local_dir_ptr, user, job_id);
  400. if (job_dir == NULL) {
  401. fprintf(LOGFILE, "Couldn't get job directory for %s.\n", job_id);
  402. failed = 1;
  403. break;
  404. }
  405. // prepare attempt-dir in each of the mapred_local_dir
  406. attempt_dir = get_attempt_directory(job_dir, attempt_id);
  407. if (attempt_dir == NULL) {
  408. fprintf(LOGFILE, "Couldn't get attempt directory for %s.\n", attempt_id);
  409. failed = 1;
  410. free(job_dir);
  411. break;
  412. }
  413. struct stat filestat;
  414. if (stat(attempt_dir, &filestat) != 0) {
  415. if (errno == ENOENT) {
  416. #ifdef DEBUG
  417. fprintf(LOGFILE,
  418. "attempt_dir %s doesn't exist. Not doing anything.\n", attempt_dir);
  419. #endif
  420. } else {
  421. // stat failed because of something else!
  422. fprintf(LOGFILE, "Failed to stat the attempt_dir %s\n", attempt_dir);
  423. failed = 1;
  424. free(attempt_dir);
  425. free(job_dir);
  426. break;
  427. }
  428. } else if (secure_path(attempt_dir, user_detail->pw_uid,
  429. tasktracker_gid, S_IRWXU | S_IRWXG, S_ISGID | S_IRWXU | S_IRWXG,
  430. 1) != 0) {
  431. // No setgid on files and setgid on dirs, 770
  432. fprintf(LOGFILE, "Failed to secure the attempt_dir %s\n", attempt_dir);
  433. failed = 1;
  434. free(attempt_dir);
  435. free(job_dir);
  436. break;
  437. }
  438. local_dir_ptr++;
  439. free(attempt_dir);
  440. free(job_dir);
  441. }
  442. free(local_dir);
  443. free(full_local_dir_str);
  444. cleanup();
  445. if (failed) {
  446. return PREPARE_ATTEMPT_DIRECTORIES_FAILED;
  447. }
  448. return 0;
  449. }
  450. /**
  451. * Function to prepare the task logs for the child. It gives the user
  452. * ownership of the attempt's log-dir to the user and group ownership to the
  453. * user running tasktracker.
  454. * * sudo chown user:mapred log-dir/userlogs/$attemptid
  455. * * sudo chmod -R 2770 log-dir/userlogs/$attemptid
  456. */
  457. int prepare_task_logs(const char *log_dir, const char *task_id) {
  458. char *task_log_dir = get_task_log_dir(log_dir, task_id);
  459. if (task_log_dir == NULL) {
  460. fprintf(LOGFILE, "Couldn't get task_log directory %s.\n", task_log_dir);
  461. return -1;
  462. }
  463. struct stat filestat;
  464. if (stat(task_log_dir, &filestat) != 0) {
  465. if (errno == ENOENT) {
  466. // See TaskRunner.java to see that an absent log-dir doesn't fail the task.
  467. // Task log dir for cleanup tasks will not have the name
  468. // task-attempt-id.cleanup. Instead a log.index.cleanup is created in
  469. // task-attempt log dir. We check if the directory exists and return if
  470. // it doesn't. So the following will work for cleanup attempts too.
  471. #ifdef DEBUG
  472. fprintf(LOGFILE, "task_log_dir %s doesn't exist. Not doing anything.\n",
  473. task_log_dir);
  474. #endif
  475. return 0;
  476. } else {
  477. // stat failed because of something else!
  478. fprintf(LOGFILE, "Failed to stat the task_log_dir %s\n", task_log_dir);
  479. return -1;
  480. }
  481. }
  482. gid_t tasktracker_gid = getegid(); // the group permissions of the binary.
  483. if (secure_path(task_log_dir, user_detail->pw_uid, tasktracker_gid,
  484. S_IRWXU | S_IRWXG, S_ISGID | S_IRWXU | S_IRWXG, 1) != 0) {
  485. // setgid on dirs but not files, 770. As of now, there are no files though
  486. fprintf(LOGFILE, "Failed to secure the log_dir %s\n", task_log_dir);
  487. return -1;
  488. }
  489. return 0;
  490. }
  491. //function used to populate and user_details structure.
  492. int get_user_details(const char *user) {
  493. if (user_detail == NULL) {
  494. user_detail = getpwnam(user);
  495. if (user_detail == NULL) {
  496. fprintf(LOGFILE, "Invalid user\n");
  497. return -1;
  498. }
  499. }
  500. return 0;
  501. }
  502. /*
  503. * Function to check if the TaskTracker actually owns the file.
  504. */
  505. int check_ownership(char *path) {
  506. struct stat filestat;
  507. if (stat(path, &filestat) != 0) {
  508. return UNABLE_TO_STAT_FILE;
  509. }
  510. // check user/group. User should be TaskTracker user, group can either be
  511. // TaskTracker's primary group or the special group to which binary's
  512. // permissions are set.
  513. if (getuid() != filestat.st_uid || (getgid() != filestat.st_gid && getegid()
  514. != filestat.st_gid)) {
  515. return FILE_NOT_OWNED_BY_TASKTRACKER;
  516. }
  517. return 0;
  518. }
  519. /**
  520. * Function to initialize the user directories of a user.
  521. * It does the following:
  522. * * sudo chown user:mapred -R taskTracker/$user
  523. * * if user is not $tt_user,
  524. * * sudo chmod 2570 -R taskTracker/$user
  525. * * else // user is tt_user
  526. * * sudo chmod 2770 -R taskTracker/$user
  527. * This is done once per every user on the TaskTracker.
  528. */
  529. int initialize_user(const char *user) {
  530. if (user == NULL) {
  531. fprintf(LOGFILE, "user passed is null.\n");
  532. return INVALID_ARGUMENT_NUMBER;
  533. }
  534. if (get_user_details(user) < 0) {
  535. fprintf(LOGFILE, "Couldn't get the user details of %s", user);
  536. return INVALID_USER_NAME;
  537. }
  538. gid_t tasktracker_gid = getegid(); // the group permissions of the binary.
  539. char **local_dir = (char **) get_values(TT_SYS_DIR_KEY);
  540. if (local_dir == NULL) {
  541. fprintf(LOGFILE, "%s is not configured.\n", TT_SYS_DIR_KEY);
  542. cleanup();
  543. return INVALID_TT_ROOT;
  544. }
  545. char *full_local_dir_str = (char *) get_value(TT_SYS_DIR_KEY);
  546. #ifdef DEBUG
  547. fprintf(LOGFILE, "Value from config for %s is %s.\n", TT_SYS_DIR_KEY,
  548. full_local_dir_str);
  549. #endif
  550. int is_tt_user = (user_detail->pw_uid == getuid());
  551. // for tt_user, set 770 permissions; otherwise set 570
  552. mode_t permissions = is_tt_user ? (S_IRWXU | S_IRWXG)
  553. : (S_IRUSR | S_IXUSR | S_IRWXG);
  554. char *user_dir;
  555. char **local_dir_ptr = local_dir;
  556. int failed = 0;
  557. while (*local_dir_ptr != NULL) {
  558. user_dir = get_user_directory(*local_dir_ptr, user);
  559. if (user_dir == NULL) {
  560. fprintf(LOGFILE, "Couldn't get userdir directory for %s.\n", user);
  561. failed = 1;
  562. break;
  563. }
  564. struct stat filestat;
  565. if (stat(user_dir, &filestat) != 0) {
  566. if (errno == ENOENT) {
  567. #ifdef DEBUG
  568. fprintf(LOGFILE, "user_dir %s doesn't exist. Not doing anything.\n",
  569. user_dir);
  570. #endif
  571. } else {
  572. // stat failed because of something else!
  573. fprintf(LOGFILE, "Failed to stat the user_dir %s\n",
  574. user_dir);
  575. failed = 1;
  576. free(user_dir);
  577. break;
  578. }
  579. } else if (secure_path(user_dir, user_detail->pw_uid,
  580. tasktracker_gid, permissions, S_ISGID | permissions, 1) != 0) {
  581. // No setgid on files and setgid on dirs,
  582. // 770 for tt_user and 570 for any other user
  583. fprintf(LOGFILE, "Failed to secure the user_dir %s\n",
  584. user_dir);
  585. failed = 1;
  586. free(user_dir);
  587. break;
  588. }
  589. local_dir_ptr++;
  590. free(user_dir);
  591. }
  592. free(local_dir);
  593. free(full_local_dir_str);
  594. cleanup();
  595. if (failed) {
  596. return INITIALIZE_USER_FAILED;
  597. }
  598. return 0;
  599. }
  600. /**
  601. * Function to prepare the job directories for the task JVM.
  602. * We do the following:
  603. * * sudo chown user:mapred -R taskTracker/$user/jobcache/$jobid
  604. * * if user is not $tt_user,
  605. * * sudo chmod 2570 -R taskTracker/$user/jobcache/$jobid
  606. * * else // user is tt_user
  607. * * sudo chmod 2770 -R taskTracker/$user/jobcache/$jobid
  608. * *
  609. * * For any user, sudo chmod 2770 taskTracker/$user/jobcache/$jobid/work
  610. */
  611. int initialize_job(const char *jobid, const char *user) {
  612. if (jobid == NULL || user == NULL) {
  613. fprintf(LOGFILE, "Either jobid is null or the user passed is null.\n");
  614. return INVALID_ARGUMENT_NUMBER;
  615. }
  616. if (get_user_details(user) < 0) {
  617. fprintf(LOGFILE, "Couldn't get the user details of %s", user);
  618. return INVALID_USER_NAME;
  619. }
  620. gid_t tasktracker_gid = getegid(); // the group permissions of the binary.
  621. char **local_dir = (char **) get_values(TT_SYS_DIR_KEY);
  622. if (local_dir == NULL) {
  623. fprintf(LOGFILE, "%s is not configured.\n", TT_SYS_DIR_KEY);
  624. cleanup();
  625. return INVALID_TT_ROOT;
  626. }
  627. char *full_local_dir_str = (char *) get_value(TT_SYS_DIR_KEY);
  628. #ifdef DEBUG
  629. fprintf(LOGFILE, "Value from config for %s is %s.\n", TT_SYS_DIR_KEY,
  630. full_local_dir_str);
  631. #endif
  632. int is_tt_user = (user_detail->pw_uid == getuid());
  633. // for tt_user, set 770 permissions; for any other user, set 570 for job-dir
  634. mode_t permissions = is_tt_user ? (S_IRWXU | S_IRWXG)
  635. : (S_IRUSR | S_IXUSR | S_IRWXG);
  636. char *job_dir, *job_work_dir;
  637. char **local_dir_ptr = local_dir;
  638. int failed = 0;
  639. while (*local_dir_ptr != NULL) {
  640. job_dir = get_job_directory(*local_dir_ptr, user, jobid);
  641. if (job_dir == NULL) {
  642. fprintf(LOGFILE, "Couldn't get job directory for %s.\n", jobid);
  643. failed = 1;
  644. break;
  645. }
  646. struct stat filestat;
  647. if (stat(job_dir, &filestat) != 0) {
  648. if (errno == ENOENT) {
  649. #ifdef DEBUG
  650. fprintf(LOGFILE, "job_dir %s doesn't exist. Not doing anything.\n",
  651. job_dir);
  652. #endif
  653. } else {
  654. // stat failed because of something else!
  655. fprintf(LOGFILE, "Failed to stat the job_dir %s\n", job_dir);
  656. failed = 1;
  657. free(job_dir);
  658. break;
  659. }
  660. } else if (secure_path(job_dir, user_detail->pw_uid, tasktracker_gid,
  661. permissions, S_ISGID | permissions, 1) != 0) {
  662. // No setgid on files and setgid on dirs,
  663. // 770 for tt_user and 570 for any other user
  664. fprintf(LOGFILE, "Failed to secure the job_dir %s\n", job_dir);
  665. failed = 1;
  666. free(job_dir);
  667. break;
  668. } else if (!is_tt_user) {
  669. // For tt_user, we don't need this as we already set 2770 for
  670. // job-work-dir because of "chmod -R" done above
  671. job_work_dir = get_job_work_directory(job_dir);
  672. if (job_work_dir == NULL) {
  673. fprintf(LOGFILE, "Couldn't get job-work directory for %s.\n", jobid);
  674. failed = 1;
  675. break;
  676. }
  677. // Set 2770 on the job-work directory
  678. if (stat(job_work_dir, &filestat) != 0) {
  679. if (errno == ENOENT) {
  680. #ifdef DEBUG
  681. fprintf(LOGFILE,
  682. "job_work_dir %s doesn't exist. Not doing anything.\n",
  683. job_work_dir);
  684. #endif
  685. free(job_work_dir);
  686. } else {
  687. // stat failed because of something else!
  688. fprintf(LOGFILE, "Failed to stat the job_work_dir %s\n",
  689. job_work_dir);
  690. failed = 1;
  691. free(job_work_dir);
  692. free(job_dir);
  693. break;
  694. }
  695. } else if (change_mode(job_work_dir, S_ISGID | S_IRWXU | S_IRWXG) != 0) {
  696. fprintf(LOGFILE,
  697. "couldn't change the permissions of job_work_dir %s\n",
  698. job_work_dir);
  699. failed = 1;
  700. free(job_work_dir);
  701. free(job_dir);
  702. break;
  703. }
  704. }
  705. local_dir_ptr++;
  706. free(job_dir);
  707. }
  708. free(local_dir);
  709. free(full_local_dir_str);
  710. cleanup();
  711. if (failed) {
  712. return INITIALIZE_JOB_FAILED;
  713. }
  714. return 0;
  715. }
  716. /**
  717. * Function to initialize the distributed cache file for a user.
  718. * It does the following:
  719. * * sudo chown user:mapred -R taskTracker/$user/distcache/<randomdir>
  720. * * if user is not $tt_user,
  721. * * sudo chmod 2570 -R taskTracker/$user/distcache/<randomdir>
  722. * * else // user is tt_user
  723. * * sudo chmod 2770 -R taskTracker/$user/distcache/<randomdir>
  724. * This is done once per localization. Tasks reusing JVMs just create
  725. * symbolic links themselves and so there isn't anything specific to do in
  726. * that case.
  727. */
  728. int initialize_distributed_cache_file(const char *tt_root,
  729. const char *unique_string, const char *user) {
  730. if (tt_root == NULL) {
  731. fprintf(LOGFILE, "tt_root passed is null.\n");
  732. return INVALID_ARGUMENT_NUMBER;
  733. }
  734. if (unique_string == NULL) {
  735. fprintf(LOGFILE, "unique_string passed is null.\n");
  736. return INVALID_ARGUMENT_NUMBER;
  737. }
  738. if (user == NULL) {
  739. fprintf(LOGFILE, "user passed is null.\n");
  740. return INVALID_ARGUMENT_NUMBER;
  741. }
  742. if (get_user_details(user) < 0) {
  743. fprintf(LOGFILE, "Couldn't get the user details of %s", user);
  744. return INVALID_USER_NAME;
  745. }
  746. //Check tt_root
  747. if (check_tt_root(tt_root) < 0) {
  748. fprintf(LOGFILE, "invalid tt root passed %s\n", tt_root);
  749. cleanup();
  750. return INVALID_TT_ROOT;
  751. }
  752. // set permission on the unique directory
  753. char *localized_unique_dir = get_distributed_cache_directory(tt_root, user,
  754. unique_string);
  755. if (localized_unique_dir == NULL) {
  756. fprintf(LOGFILE, "Couldn't get unique distcache directory for %s.\n", user);
  757. cleanup();
  758. return INITIALIZE_DISTCACHEFILE_FAILED;
  759. }
  760. gid_t binary_gid = getegid(); // the group permissions of the binary.
  761. int is_tt_user = (user_detail->pw_uid == getuid());
  762. // for tt_user, set 770 permissions; for any other user, set 570
  763. mode_t permissions = is_tt_user ? (S_IRWXU | S_IRWXG)
  764. : (S_IRUSR | S_IXUSR | S_IRWXG);
  765. int failed = 0;
  766. struct stat filestat;
  767. if (stat(localized_unique_dir, &filestat) != 0) {
  768. // stat on distcache failed because of something
  769. fprintf(LOGFILE, "Failed to stat the localized_unique_dir %s\n",
  770. localized_unique_dir);
  771. failed = INITIALIZE_DISTCACHEFILE_FAILED;
  772. } else if (secure_path(localized_unique_dir, user_detail->pw_uid,
  773. binary_gid, permissions, S_ISGID | permissions, 1) != 0) {
  774. // No setgid on files and setgid on dirs,
  775. // 770 for tt_user and 570 for any other user
  776. fprintf(LOGFILE, "Failed to secure the localized_unique_dir %s\n",
  777. localized_unique_dir);
  778. failed = INITIALIZE_DISTCACHEFILE_FAILED;
  779. }
  780. free(localized_unique_dir);
  781. cleanup();
  782. return failed;
  783. }
  784. /**
  785. * Function used to initialize task. Prepares attempt_dir, jars_dir and
  786. * log_dir to be accessible by the child
  787. */
  788. int initialize_task(const char *jobid, const char *taskid, const char *user) {
  789. int exit_code = 0;
  790. #ifdef DEBUG
  791. fprintf(LOGFILE, "job-id passed to initialize_task : %s.\n", jobid);
  792. fprintf(LOGFILE, "task-d passed to initialize_task : %s.\n", taskid);
  793. #endif
  794. if (prepare_attempt_directories(jobid, taskid, user) != 0) {
  795. fprintf(LOGFILE,
  796. "Couldn't prepare the attempt directories for %s of user %s.\n",
  797. taskid, user);
  798. exit_code = PREPARE_ATTEMPT_DIRECTORIES_FAILED;
  799. goto cleanup;
  800. }
  801. char *log_dir = (char *) get_value(TT_LOG_DIR_KEY);
  802. if (log_dir == NULL) {
  803. fprintf(LOGFILE, "Log directory is not configured.\n");
  804. exit_code = INVALID_TT_LOG_DIR;
  805. goto cleanup;
  806. }
  807. if (prepare_task_logs(log_dir, taskid) != 0) {
  808. fprintf(LOGFILE, "Couldn't prepare task logs directory %s for %s.\n",
  809. log_dir, taskid);
  810. exit_code = PREPARE_TASK_LOGS_FAILED;
  811. }
  812. cleanup:
  813. // free configurations
  814. cleanup();
  815. if (log_dir != NULL) {
  816. free(log_dir);
  817. }
  818. return exit_code;
  819. }
  820. /*
  821. * Function used to launch a task as the provided user. It does the following :
  822. * 1) Checks if the tt_root passed is found in mapred.local.dir
  823. * 2) Prepares attempt_dir and log_dir to be accessible by the child
  824. * 3) Uses get_task_launcher_file to fetch the task script file path
  825. * 4) Does an execlp on the same in order to replace the current image with
  826. * task image.
  827. */
  828. int run_task_as_user(const char * user, const char *jobid, const char *taskid,
  829. const char *tt_root) {
  830. int exit_code = 0;
  831. if (jobid == NULL || taskid == NULL || tt_root == NULL) {
  832. return INVALID_ARGUMENT_NUMBER;
  833. }
  834. #ifdef DEBUG
  835. fprintf(LOGFILE, "Job-id passed to run_task_as_user : %s.\n", jobid);
  836. fprintf(LOGFILE, "task-d passed to run_task_as_user : %s.\n", taskid);
  837. fprintf(LOGFILE, "tt_root passed to run_task_as_user : %s.\n", tt_root);
  838. #endif
  839. //Check tt_root before switching the user, as reading configuration
  840. //file requires privileged access.
  841. if (check_tt_root(tt_root) < 0) {
  842. fprintf(LOGFILE, "invalid tt root passed %s\n", tt_root);
  843. cleanup();
  844. return INVALID_TT_ROOT;
  845. }
  846. char *job_dir = NULL, *task_script_path = NULL;
  847. if ((exit_code = initialize_task(jobid, taskid, user)) != 0) {
  848. fprintf(LOGFILE, "Couldn't initialise the task %s of user %s.\n", taskid,
  849. user);
  850. goto cleanup;
  851. }
  852. job_dir = get_job_directory(tt_root, user, jobid);
  853. if (job_dir == NULL) {
  854. fprintf(LOGFILE, "Couldn't obtain job_dir for %s in %s.\n", jobid, tt_root);
  855. exit_code = OUT_OF_MEMORY;
  856. goto cleanup;
  857. }
  858. task_script_path = get_task_launcher_file(job_dir, taskid);
  859. if (task_script_path == NULL) {
  860. fprintf(LOGFILE, "Couldn't obtain task_script_path in %s.\n", job_dir);
  861. exit_code = OUT_OF_MEMORY;
  862. goto cleanup;
  863. }
  864. errno = 0;
  865. exit_code = check_path_for_relative_components(task_script_path);
  866. if(exit_code != 0) {
  867. goto cleanup;
  868. }
  869. //change the user
  870. fcloseall();
  871. free(job_dir);
  872. umask(0007);
  873. if (change_user(user) != 0) {
  874. exit_code = SETUID_OPER_FAILED;
  875. goto cleanup;
  876. }
  877. errno = 0;
  878. cleanup();
  879. execlp(task_script_path, task_script_path, NULL);
  880. if (errno != 0) {
  881. fprintf(LOGFILE, "Couldn't execute the task jvm file: %s", strerror(errno));
  882. free(task_script_path);
  883. exit_code = UNABLE_TO_EXECUTE_TASK_SCRIPT;
  884. }
  885. return exit_code;
  886. cleanup:
  887. if (job_dir != NULL) {
  888. free(job_dir);
  889. }
  890. if (task_script_path != NULL) {
  891. free(task_script_path);
  892. }
  893. // free configurations
  894. cleanup();
  895. return exit_code;
  896. }
  897. /**
  898. * Function used to terminate/kill a task launched by the user.
  899. * The function sends appropriate signal to the process group
  900. * specified by the task_pid.
  901. */
  902. int kill_user_task(const char *user, const char *task_pid, int sig) {
  903. int pid = 0;
  904. if(task_pid == NULL) {
  905. return INVALID_ARGUMENT_NUMBER;
  906. }
  907. #ifdef DEBUG
  908. fprintf(LOGFILE, "user passed to kill_user_task : %s.\n", user);
  909. fprintf(LOGFILE, "task-pid passed to kill_user_task : %s.\n", task_pid);
  910. fprintf(LOGFILE, "signal passed to kill_user_task : %d.\n", sig);
  911. #endif
  912. pid = atoi(task_pid);
  913. if(pid <= 0) {
  914. return INVALID_TASK_PID;
  915. }
  916. fcloseall();
  917. if (change_user(user) != 0) {
  918. cleanup();
  919. return SETUID_OPER_FAILED;
  920. }
  921. //Don't continue if the process-group is not alive anymore.
  922. if(kill(-pid,0) < 0) {
  923. errno = 0;
  924. cleanup();
  925. return 0;
  926. }
  927. if (kill(-pid, sig) < 0) {
  928. if(errno != ESRCH) {
  929. fprintf(LOGFILE, "Error is %s\n", strerror(errno));
  930. cleanup();
  931. return UNABLE_TO_KILL_TASK;
  932. }
  933. errno = 0;
  934. }
  935. cleanup();
  936. return 0;
  937. }
  938. /**
  939. * Enables the path for deletion by changing the owner, group and permissions
  940. * of the specified path and all the files/directories in the path recursively.
  941. * * sudo chown user:mapred -R full_path
  942. * * sudo chmod 2770 -R full_path
  943. * Before changing permissions, makes sure that the given path doesn't contain
  944. * any relative components.
  945. * tt_root : is the base path(i.e. mapred-local-dir) sent to task-controller
  946. * full_path : is either jobLocalDir, taskDir OR taskWorkDir that is to be
  947. * deleted
  948. */
  949. static int enable_path_for_cleanup(const char *tt_root, const char *user,
  950. char *full_path) {
  951. int exit_code = 0;
  952. gid_t tasktracker_gid = getegid(); // the group permissions of the binary.
  953. if (check_tt_root(tt_root) < 0) {
  954. fprintf(LOGFILE, "invalid tt root passed %s\n", tt_root);
  955. cleanup();
  956. return INVALID_TT_ROOT;
  957. }
  958. if (full_path == NULL) {
  959. fprintf(LOGFILE,
  960. "Could not build the full path. Not deleting the dir %s\n",
  961. full_path);
  962. exit_code = UNABLE_TO_BUILD_PATH; // may be malloc failed
  963. }
  964. // Make sure that the path given is not having any relative components
  965. else if ((exit_code = check_path_for_relative_components(full_path)) != 0) {
  966. fprintf(LOGFILE,
  967. "Not changing permissions. Path may contain relative components.\n",
  968. full_path);
  969. }
  970. else if (get_user_details(user) < 0) {
  971. fprintf(LOGFILE, "Couldn't get the user details of %s.\n", user);
  972. exit_code = INVALID_USER_NAME;
  973. }
  974. else if (exit_code = secure_path(full_path, user_detail->pw_uid,
  975. tasktracker_gid,
  976. S_IRWXU | S_IRWXG, S_ISGID | S_IRWXU | S_IRWXG, 0) != 0) {
  977. // No setgid on files and setgid on dirs, 770.
  978. // set 770 permissions for user, TTgroup for all files/directories in
  979. // 'full_path' recursively sothat deletion of path by TaskTracker succeeds.
  980. fprintf(LOGFILE, "Failed to set permissions for %s\n", full_path);
  981. }
  982. if (full_path != NULL) {
  983. free(full_path);
  984. }
  985. // free configurations
  986. cleanup();
  987. return exit_code;
  988. }
  989. /**
  990. * Enables the task work-dir/local-dir path for deletion.
  991. * tt_root : is the base path(i.e. mapred-local-dir) sent to task-controller
  992. * dir_to_be_deleted : is either taskDir OR taskWorkDir that is to be deleted
  993. */
  994. int enable_task_for_cleanup(const char *tt_root, const char *user,
  995. const char *jobid, const char *dir_to_be_deleted) {
  996. char *full_path = get_task_dir_path(tt_root, user, jobid, dir_to_be_deleted);
  997. return enable_path_for_cleanup(tt_root, user, full_path);
  998. }
  999. /**
  1000. * Enables the jobLocalDir for deletion.
  1001. * tt_root : is the base path(i.e. mapred-local-dir) sent to task-controller
  1002. * user : owner of the job
  1003. * jobid : id of the job for which the cleanup is needed.
  1004. */
  1005. int enable_job_for_cleanup(const char *tt_root, const char *user,
  1006. const char *jobid) {
  1007. char *full_path = get_job_directory(tt_root, user, jobid);
  1008. return enable_path_for_cleanup(tt_root, user, full_path);
  1009. }