task-controller.c 39 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300
  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. #include "task-controller.h"
  19. //struct to store the user details
  20. struct passwd *user_detail = NULL;
  21. //LOGFILE
  22. FILE *LOGFILE;
  23. //placeholder for global cleanup operations
  24. void cleanup() {
  25. free_configurations();
  26. }
  27. //change the user to passed user for executing/killing tasks
  28. int change_user(const char * user) {
  29. if (get_user_details(user) < 0) {
  30. return -1;
  31. }
  32. if(initgroups(user_detail->pw_name, user_detail->pw_gid) != 0) {
  33. fprintf(LOGFILE, "unable to initgroups : %s\n", strerror(errno));
  34. cleanup();
  35. return SETUID_OPER_FAILED;
  36. }
  37. errno = 0;
  38. setgid(user_detail->pw_gid);
  39. if (errno != 0) {
  40. fprintf(LOGFILE, "unable to setgid : %s\n", strerror(errno));
  41. cleanup();
  42. return SETUID_OPER_FAILED;
  43. }
  44. setegid(user_detail->pw_gid);
  45. if (errno != 0) {
  46. fprintf(LOGFILE, "unable to setegid : %s\n", strerror(errno));
  47. cleanup();
  48. return SETUID_OPER_FAILED;
  49. }
  50. setuid(user_detail->pw_uid);
  51. if (errno != 0) {
  52. fprintf(LOGFILE, "unable to setuid : %s\n", strerror(errno));
  53. cleanup();
  54. return SETUID_OPER_FAILED;
  55. }
  56. seteuid(user_detail->pw_uid);
  57. if (errno != 0) {
  58. fprintf(LOGFILE, "unable to seteuid : %s\n", strerror(errno));
  59. cleanup();
  60. return SETUID_OPER_FAILED;
  61. }
  62. return 0;
  63. }
  64. /**
  65. * Checks the passed value for the variable config_key against the values in
  66. * the configuration.
  67. * Returns 0 if the passed value is found in the configuration,
  68. * -1 otherwise
  69. */
  70. int check_variable_against_config(const char *config_key,
  71. const char *passed_value) {
  72. if (config_key == NULL || passed_value == NULL) {
  73. return -1;
  74. }
  75. int found = -1;
  76. const char **config_value = get_values(config_key);
  77. if (config_value == NULL) {
  78. fprintf(LOGFILE, "%s is not configured.\n", config_key);
  79. return -1;
  80. }
  81. char *full_config_value = (char *)get_value(config_key);
  82. char **config_val_ptr = (char **) config_value;
  83. while (*config_val_ptr != NULL) {
  84. if (strcmp(*config_val_ptr, passed_value) == 0) {
  85. found = 0;
  86. break;
  87. }
  88. config_val_ptr++;
  89. }
  90. if (found != 0) {
  91. fprintf(
  92. LOGFILE,
  93. "Invalid value passed: \
  94. Configured value of %s is %s. \
  95. Passed value is %s.\n",
  96. config_key, full_config_value, passed_value);
  97. }
  98. free(full_config_value);
  99. free(config_value);
  100. return found;
  101. }
  102. /**
  103. * Utility function to concatenate argB to argA using the concat_pattern
  104. */
  105. char *concatenate(char *concat_pattern, char *return_path_name, int numArgs,
  106. ...) {
  107. va_list ap;
  108. va_start(ap, numArgs);
  109. int strlen_args = 0;
  110. char *arg = NULL;
  111. int j;
  112. for (j = 0; j < numArgs; j++) {
  113. arg = va_arg(ap, char*);
  114. if (arg == NULL) {
  115. fprintf(LOGFILE, "One of the arguments passed for %s in null.\n",
  116. return_path_name);
  117. return NULL;
  118. }
  119. strlen_args += strlen(arg);
  120. }
  121. va_end(ap);
  122. char *return_path = NULL;
  123. int str_len = strlen(concat_pattern) + strlen_args;
  124. return_path = (char *) malloc(sizeof(char) * (str_len + 1));
  125. if (return_path == NULL) {
  126. fprintf(LOGFILE, "Unable to allocate memory for %s.\n", return_path_name);
  127. return NULL;
  128. }
  129. memset(return_path, '\0', str_len + 1);
  130. va_start(ap, numArgs);
  131. vsnprintf(return_path, str_len, concat_pattern, ap);
  132. va_end(ap);
  133. return return_path;
  134. }
  135. /**
  136. * Get the job-directory path from tt_root, user name and job-id
  137. */
  138. char *get_job_directory(const char * tt_root, const char *user,
  139. const char *jobid) {
  140. return concatenate(TT_JOB_DIR_PATTERN, "job_dir_path", 3, tt_root, user,
  141. jobid);
  142. }
  143. /**
  144. * Get the user directory of a particular user
  145. */
  146. char *get_user_directory(const char *tt_root, const char *user) {
  147. return concatenate(USER_DIR_PATTERN, "user_dir_path", 2, tt_root, user);
  148. }
  149. /**
  150. * Get the distributed cache directory for a particular user
  151. */
  152. char *get_distributed_cache_directory(const char *tt_root, const char *user,
  153. const char* unique_string) {
  154. return concatenate(USER_DISTRIBUTED_CACHE_DIR_PATTERN,
  155. "dist_cache_unique_path", 3, tt_root, user, unique_string);
  156. }
  157. char *get_job_work_directory(const char *job_dir) {
  158. return concatenate(JOB_DIR_TO_JOB_WORK_PATTERN, "job_work_dir_path", 2,
  159. job_dir, "");
  160. }
  161. /**
  162. * Get the attempt directory for the given attempt_id
  163. */
  164. char *get_attempt_directory(const char *job_dir, const char *attempt_id) {
  165. return concatenate(JOB_DIR_TO_ATTEMPT_DIR_PATTERN, "attempt_dir_path", 2,
  166. job_dir, attempt_id);
  167. }
  168. /*
  169. * Get the path to the task launcher file which is created by the TT
  170. */
  171. char *get_task_launcher_file(const char *job_dir, const char *attempt_dir) {
  172. return concatenate(TASK_SCRIPT_PATTERN, "task_script_path", 2, job_dir,
  173. attempt_dir);
  174. }
  175. /*
  176. * Builds the full path of the dir(localTaskDir or localWorkDir)
  177. * tt_root : is the base path(i.e. mapred-local-dir) sent to task-controller
  178. * dir_to_be_deleted : is either taskDir($taskId) OR taskWorkDir($taskId/work)
  179. */
  180. char *get_task_dir_path(const char *tt_root, const char *user,
  181. const char *jobid, const char *dir_to_be_deleted) {
  182. return concatenate(TT_LOCAL_TASK_DIR_PATTERN, "task_dir_full_path", 4,
  183. tt_root, user, jobid, dir_to_be_deleted);
  184. }
  185. /**
  186. * Get the log directory for the given attempt.
  187. */
  188. char *get_task_log_dir(const char *log_dir, const char *job_id,
  189. const char *attempt_id) {
  190. return concatenate(ATTEMPT_LOG_DIR_PATTERN, "task_log_dir", 3, log_dir,
  191. job_id, attempt_id);
  192. }
  193. /**
  194. * Get the log directory for the given job.
  195. */
  196. char *get_job_log_dir(const char *log_dir, const char *job_id) {
  197. return concatenate(JOB_LOG_DIR_PATTERN, "job_log_dir", 2, log_dir, job_id);
  198. }
  199. /**
  200. * Get the job ACLs file for the given job log dir.
  201. */
  202. char *get_job_acls_file(const char *log_dir) {
  203. return concatenate(JOB_LOG_DIR_TO_JOB_ACLS_FILE_PATTERN, "job_acls_file",
  204. 1, log_dir);
  205. }
  206. /**
  207. * Function to check if the passed tt_root is present in mapreduce.cluster.local.dir
  208. * the task-controller is configured with.
  209. */
  210. int check_tt_root(const char *tt_root) {
  211. return check_variable_against_config(TT_SYS_DIR_KEY, tt_root);
  212. }
  213. /**
  214. * Function to check if the constructed path and absolute path of the task
  215. * launcher file resolve to one and same. This is done so as to avoid
  216. * security pitfalls because of relative path components in the file name.
  217. */
  218. int check_path_for_relative_components(char *path) {
  219. char * resolved_path = (char *) canonicalize_file_name(path);
  220. if (resolved_path == NULL) {
  221. fprintf(LOGFILE,
  222. "Error resolving the path: %s. Passed path: %s\n",
  223. strerror(errno), path);
  224. return ERROR_RESOLVING_FILE_PATH;
  225. }
  226. if (strcmp(resolved_path, path) != 0) {
  227. fprintf(LOGFILE,
  228. "Relative path components in the path: %s. Resolved path: %s\n",
  229. path, resolved_path);
  230. free(resolved_path);
  231. return RELATIVE_PATH_COMPONENTS_IN_FILE_PATH;
  232. }
  233. free(resolved_path);
  234. return 0;
  235. }
  236. /**
  237. * Function to change the owner/group of a given path.
  238. */
  239. static int change_owner(const char *path, uid_t uid, gid_t gid) {
  240. int exit_code = chown(path, uid, gid);
  241. if (exit_code != 0) {
  242. fprintf(LOGFILE, "chown %d:%d for path %s failed: %s.\n", uid, gid, path,
  243. strerror(errno));
  244. }
  245. return exit_code;
  246. }
  247. /**
  248. * Function to change the mode of a given path.
  249. */
  250. static int change_mode(const char *path, mode_t mode) {
  251. int exit_code = chmod(path, mode);
  252. if (exit_code != 0) {
  253. fprintf(LOGFILE, "chmod %d of path %s failed: %s.\n", mode, path,
  254. strerror(errno));
  255. }
  256. return exit_code;
  257. }
  258. /**
  259. * Function to change permissions of the given path. It does the following
  260. * recursively:
  261. * 1) changes the owner/group of the paths to the passed owner/group
  262. * 2) changes the file permission to the passed file_mode and directory
  263. * permission to the passed dir_mode
  264. *
  265. * should_check_ownership : boolean to enable checking of ownership of each path
  266. */
  267. static int secure_path(const char *path, uid_t uid, gid_t gid,
  268. mode_t file_mode, mode_t dir_mode, int should_check_ownership) {
  269. FTS *tree = NULL; // the file hierarchy
  270. FTSENT *entry = NULL; // a file in the hierarchy
  271. char *paths[] = { (char *) path, NULL };//array needs to be NULL-terminated
  272. int process_path = 0;
  273. int dir = 0;
  274. int error_code = 0;
  275. int done = 0;
  276. // Get physical locations and don't resolve the symlinks.
  277. // Don't change directory while walking the directory.
  278. int ftsoptions = FTS_PHYSICAL | FTS_NOCHDIR;
  279. tree = fts_open(paths, ftsoptions, NULL);
  280. if (tree == NULL) {
  281. fprintf(LOGFILE,
  282. "Cannot open file traversal structure for the path %s:%s.\n", path,
  283. strerror(errno));
  284. return -1;
  285. }
  286. while (((entry = fts_read(tree)) != NULL) && !done) {
  287. dir = 0;
  288. switch (entry->fts_info) {
  289. case FTS_D:
  290. // A directory being visited in pre-order.
  291. // We change ownership of directories in post-order.
  292. // so ignore the pre-order visit.
  293. process_path = 0;
  294. break;
  295. case FTS_DC:
  296. // A directory that causes a cycle in the tree
  297. // We don't expect cycles, ignore.
  298. process_path = 0;
  299. break;
  300. case FTS_DNR:
  301. // A directory which cannot be read
  302. // Ignore and set error code.
  303. process_path = 0;
  304. error_code = -1;
  305. break;
  306. case FTS_DOT:
  307. // "." or ".."
  308. process_path = 0;
  309. break;
  310. case FTS_F:
  311. // A regular file
  312. process_path = 1;
  313. break;
  314. case FTS_DP:
  315. // A directory being visited in post-order
  316. if (entry->fts_level == 0) {
  317. // root directory. Done with traversing.
  318. done = 1;
  319. }
  320. process_path = 1;
  321. dir = 1;
  322. break;
  323. case FTS_SL:
  324. // A symbolic link
  325. // We don't want to change-ownership(and set-permissions) for the file/dir
  326. // pointed to by any symlink.
  327. process_path = 0;
  328. break;
  329. case FTS_SLNONE:
  330. // A symbolic link with a nonexistent target
  331. process_path = 0;
  332. break;
  333. case FTS_NS:
  334. // A file for which no stat(2) information was available
  335. // Ignore and set error code
  336. process_path = 0;
  337. error_code = -1;
  338. break;
  339. case FTS_ERR:
  340. // An error return. Ignore and set error code.
  341. process_path = 0;
  342. error_code = -1;
  343. break;
  344. case FTS_DEFAULT:
  345. // File that doesn't belong to any of the above type. Ignore.
  346. process_path = 0;
  347. break;
  348. default:
  349. // None of the above. Ignore and set error code
  350. process_path = 0;
  351. error_code = -1;
  352. }
  353. if (error_code != 0) {
  354. break;
  355. }
  356. if (!process_path) {
  357. continue;
  358. }
  359. error_code = secure_single_path(entry->fts_path, uid, gid,
  360. (dir ? dir_mode : file_mode), should_check_ownership);
  361. }
  362. if (fts_close(tree) != 0) {
  363. fprintf(LOGFILE, "couldn't close file traversal structure:%s.\n",
  364. strerror(errno));
  365. }
  366. return error_code;
  367. }
  368. /**
  369. * Function to change ownership and permissions of the given path.
  370. * This call sets ownership and permissions just for the path, not recursive.
  371. */
  372. int secure_single_path(char *path, uid_t uid, gid_t gid,
  373. mode_t perm, int should_check_ownership) {
  374. int error_code = 0;
  375. if (should_check_ownership &&
  376. (check_ownership(path, uid, gid) != 0)) {
  377. fprintf(LOGFILE,
  378. "Invalid file path. %s not user/group owned by the tasktracker.\n", path);
  379. error_code = -1;
  380. } else if (change_owner(path, uid, gid) != 0) {
  381. fprintf(LOGFILE, "couldn't change the ownership of %s\n", path);
  382. error_code = -3;
  383. } else if (change_mode(path, perm) != 0) {
  384. fprintf(LOGFILE, "couldn't change the permissions of %s\n", path);
  385. error_code = -3;
  386. }
  387. return error_code;
  388. }
  389. /**
  390. * Function to prepare the attempt directories for the task JVM.
  391. * This is done by changing the ownership of the attempt directory recursively
  392. * to the job owner. We do the following:
  393. * * sudo chown user:mapred -R taskTracker/$user/jobcache/$jobid/$attemptid/
  394. * * sudo chmod 2770 -R taskTracker/$user/jobcache/$jobid/$attemptid/
  395. */
  396. int prepare_attempt_directories(const char *job_id, const char *attempt_id,
  397. const char *user) {
  398. if (job_id == NULL || attempt_id == NULL || user == NULL) {
  399. fprintf(LOGFILE, "Either attempt_id is null or the user passed is null.\n");
  400. return INVALID_ARGUMENT_NUMBER;
  401. }
  402. gid_t tasktracker_gid = getegid(); // the group permissions of the binary.
  403. if (get_user_details(user) < 0) {
  404. fprintf(LOGFILE, "Couldn't get the user details of %s.\n", user);
  405. return INVALID_USER_NAME;
  406. }
  407. char **local_dir = (char **) get_values(TT_SYS_DIR_KEY);
  408. if (local_dir == NULL) {
  409. fprintf(LOGFILE, "%s is not configured.\n", TT_SYS_DIR_KEY);
  410. cleanup();
  411. return PREPARE_ATTEMPT_DIRECTORIES_FAILED;
  412. }
  413. char *full_local_dir_str = (char *) get_value(TT_SYS_DIR_KEY);
  414. #ifdef DEBUG
  415. fprintf(LOGFILE, "Value from config for %s is %s.\n", TT_SYS_DIR_KEY,
  416. full_local_dir_str);
  417. #endif
  418. char *job_dir;
  419. char *attempt_dir;
  420. char **local_dir_ptr = local_dir;
  421. int failed = 0;
  422. while (*local_dir_ptr != NULL) {
  423. job_dir = get_job_directory(*local_dir_ptr, user, job_id);
  424. if (job_dir == NULL) {
  425. fprintf(LOGFILE, "Couldn't get job directory for %s.\n", job_id);
  426. failed = 1;
  427. break;
  428. }
  429. // prepare attempt-dir in each of the mapreduce.cluster.local.dir
  430. attempt_dir = get_attempt_directory(job_dir, attempt_id);
  431. if (attempt_dir == NULL) {
  432. fprintf(LOGFILE, "Couldn't get attempt directory for %s.\n", attempt_id);
  433. failed = 1;
  434. free(job_dir);
  435. break;
  436. }
  437. struct stat filestat;
  438. if (stat(attempt_dir, &filestat) != 0) {
  439. if (errno == ENOENT) {
  440. #ifdef DEBUG
  441. fprintf(LOGFILE,
  442. "attempt_dir %s doesn't exist. Not doing anything.\n", attempt_dir);
  443. #endif
  444. } else {
  445. // stat failed because of something else!
  446. fprintf(LOGFILE, "Failed to stat the attempt_dir %s\n", attempt_dir);
  447. failed = 1;
  448. free(attempt_dir);
  449. free(job_dir);
  450. break;
  451. }
  452. } else if (secure_path(attempt_dir, user_detail->pw_uid,
  453. tasktracker_gid, S_IRWXU | S_IRWXG, S_ISGID | S_IRWXU | S_IRWXG,
  454. 1) != 0) {
  455. // No setgid on files and setgid on dirs, 770
  456. fprintf(LOGFILE, "Failed to secure the attempt_dir %s\n", attempt_dir);
  457. failed = 1;
  458. free(attempt_dir);
  459. free(job_dir);
  460. break;
  461. }
  462. local_dir_ptr++;
  463. free(attempt_dir);
  464. free(job_dir);
  465. }
  466. free(local_dir);
  467. free(full_local_dir_str);
  468. cleanup();
  469. if (failed) {
  470. return PREPARE_ATTEMPT_DIRECTORIES_FAILED;
  471. }
  472. return 0;
  473. }
  474. /**
  475. * Function to prepare the job log dir(and job acls file in it) for the child.
  476. * It gives the user ownership of the job's log-dir to the user and
  477. * group ownership to the user running tasktracker(i.e. tt_user).
  478. *
  479. * * sudo chown user:mapred log-dir/userlogs/$jobid
  480. * * if user is not $tt_user,
  481. * * sudo chmod 2570 log-dir/userlogs/$jobid
  482. * * else
  483. * * sudo chmod 2770 log-dir/userlogs/$jobid
  484. * * sudo chown user:mapred log-dir/userlogs/$jobid/job-acls.xml
  485. * * if user is not $tt_user,
  486. * * sudo chmod 2570 log-dir/userlogs/$jobid/job-acls.xml
  487. * * else
  488. * * sudo chmod 2770 log-dir/userlogs/$jobid/job-acls.xml
  489. */
  490. int prepare_job_logs(const char *log_dir, const char *job_id,
  491. mode_t permissions) {
  492. char *job_log_dir = get_job_log_dir(log_dir, job_id);
  493. if (job_log_dir == NULL) {
  494. fprintf(LOGFILE, "Couldn't get job log directory %s.\n", job_log_dir);
  495. return -1;
  496. }
  497. struct stat filestat;
  498. if (stat(job_log_dir, &filestat) != 0) {
  499. if (errno == ENOENT) {
  500. #ifdef DEBUG
  501. fprintf(LOGFILE, "job_log_dir %s doesn't exist. Not doing anything.\n",
  502. job_log_dir);
  503. #endif
  504. free(job_log_dir);
  505. return 0;
  506. } else {
  507. // stat failed because of something else!
  508. fprintf(LOGFILE, "Failed to stat the job log dir %s\n", job_log_dir);
  509. free(job_log_dir);
  510. return -1;
  511. }
  512. }
  513. gid_t tasktracker_gid = getegid(); // the group permissions of the binary.
  514. // job log directory should not be set permissions recursively
  515. // because, on tt restart/reinit, it would contain directories of earlier run
  516. if (secure_single_path(job_log_dir, user_detail->pw_uid, tasktracker_gid,
  517. S_ISGID | permissions, 1) != 0) {
  518. fprintf(LOGFILE, "Failed to secure the log_dir %s\n", job_log_dir);
  519. free(job_log_dir);
  520. return -1;
  521. }
  522. //set ownership and permissions for job_log_dir/job-acls.xml, if exists.
  523. char *job_acls_file = get_job_acls_file(job_log_dir);
  524. if (job_acls_file == NULL) {
  525. fprintf(LOGFILE, "Couldn't get job acls file %s.\n", job_acls_file);
  526. free(job_log_dir);
  527. return -1;
  528. }
  529. struct stat filestat1;
  530. if (stat(job_acls_file, &filestat1) != 0) {
  531. if (errno == ENOENT) {
  532. #ifdef DEBUG
  533. fprintf(LOGFILE, "job_acls_file %s doesn't exist. Not doing anything.\n",
  534. job_acls_file);
  535. #endif
  536. free(job_acls_file);
  537. free(job_log_dir);
  538. return 0;
  539. } else {
  540. // stat failed because of something else!
  541. fprintf(LOGFILE, "Failed to stat the job_acls_file %s\n", job_acls_file);
  542. free(job_acls_file);
  543. free(job_log_dir);
  544. return -1;
  545. }
  546. }
  547. if (secure_single_path(job_acls_file, user_detail->pw_uid, tasktracker_gid,
  548. permissions, 1) != 0) {
  549. fprintf(LOGFILE, "Failed to secure the job acls file %s\n", job_acls_file);
  550. free(job_acls_file);
  551. free(job_log_dir);
  552. return -1;
  553. }
  554. free(job_acls_file);
  555. free(job_log_dir);
  556. return 0;
  557. }
  558. /**
  559. * Function to prepare the task logs for the child. It gives the user
  560. * ownership of the attempt's log-dir to the user and group ownership to the
  561. * user running tasktracker.
  562. * * sudo chown user:mapred log-dir/userlogs/$jobid/$attemptid
  563. * * sudo chmod -R 2770 log-dir/userlogs/$jobid/$attemptid
  564. */
  565. int prepare_task_logs(const char *log_dir, const char *job_id,
  566. const char *task_id) {
  567. char *task_log_dir = get_task_log_dir(log_dir, job_id, task_id);
  568. if (task_log_dir == NULL) {
  569. fprintf(LOGFILE, "Couldn't get task_log directory %s.\n", task_log_dir);
  570. return -1;
  571. }
  572. struct stat filestat;
  573. if (stat(task_log_dir, &filestat) != 0) {
  574. if (errno == ENOENT) {
  575. // See TaskRunner.java to see that an absent log-dir doesn't fail the task.
  576. #ifdef DEBUG
  577. fprintf(LOGFILE, "task_log_dir %s doesn't exist. Not doing anything.\n",
  578. task_log_dir);
  579. #endif
  580. free(task_log_dir);
  581. return 0;
  582. } else {
  583. // stat failed because of something else!
  584. fprintf(LOGFILE, "Failed to stat the task_log_dir %s\n", task_log_dir);
  585. free(task_log_dir);
  586. return -1;
  587. }
  588. }
  589. gid_t tasktracker_gid = getegid(); // the group permissions of the binary.
  590. if (secure_path(task_log_dir, user_detail->pw_uid, tasktracker_gid,
  591. S_IRWXU | S_IRWXG, S_ISGID | S_IRWXU | S_IRWXG, 1) != 0) {
  592. // setgid on dirs but not files, 770. As of now, there are no files though
  593. fprintf(LOGFILE, "Failed to secure the log_dir %s\n", task_log_dir);
  594. free(task_log_dir);
  595. return -1;
  596. }
  597. free(task_log_dir);
  598. return 0;
  599. }
  600. //function used to populate and user_details structure.
  601. int get_user_details(const char *user) {
  602. if (user_detail == NULL) {
  603. user_detail = getpwnam(user);
  604. if (user_detail == NULL) {
  605. fprintf(LOGFILE, "Invalid user\n");
  606. return -1;
  607. }
  608. }
  609. return 0;
  610. }
  611. /*
  612. * Function to check if the TaskTracker actually owns the file.
  613. * Or it has right ownership already.
  614. */
  615. int check_ownership(char *path, uid_t uid, gid_t gid) {
  616. struct stat filestat;
  617. if (stat(path, &filestat) != 0) {
  618. return UNABLE_TO_STAT_FILE;
  619. }
  620. // check user/group. User should be TaskTracker user, group can either be
  621. // TaskTracker's primary group or the special group to which binary's
  622. // permissions are set.
  623. // Or it can be the user/group owned by uid and gid passed.
  624. if ((getuid() != filestat.st_uid || (getgid() != filestat.st_gid && getegid()
  625. != filestat.st_gid)) &&
  626. ((uid != filestat.st_uid) || (gid != filestat.st_gid))) {
  627. return FILE_NOT_OWNED_BY_TASKTRACKER;
  628. }
  629. return 0;
  630. }
  631. /**
  632. * Function to initialize the user directories of a user.
  633. * It does the following:
  634. * * sudo chown user:mapred -R taskTracker/$user
  635. * * if user is not $tt_user,
  636. * * sudo chmod 2570 -R taskTracker/$user
  637. * * else // user is tt_user
  638. * * sudo chmod 2770 -R taskTracker/$user
  639. * This is done once per every user on the TaskTracker.
  640. */
  641. int initialize_user(const char *user) {
  642. if (user == NULL) {
  643. fprintf(LOGFILE, "user passed is null.\n");
  644. return INVALID_ARGUMENT_NUMBER;
  645. }
  646. if (get_user_details(user) < 0) {
  647. fprintf(LOGFILE, "Couldn't get the user details of %s", user);
  648. return INVALID_USER_NAME;
  649. }
  650. gid_t tasktracker_gid = getegid(); // the group permissions of the binary.
  651. char **local_dir = (char **) get_values(TT_SYS_DIR_KEY);
  652. if (local_dir == NULL) {
  653. fprintf(LOGFILE, "%s is not configured.\n", TT_SYS_DIR_KEY);
  654. cleanup();
  655. return INVALID_TT_ROOT;
  656. }
  657. char *full_local_dir_str = (char *) get_value(TT_SYS_DIR_KEY);
  658. #ifdef DEBUG
  659. fprintf(LOGFILE, "Value from config for %s is %s.\n", TT_SYS_DIR_KEY,
  660. full_local_dir_str);
  661. #endif
  662. int is_tt_user = (user_detail->pw_uid == getuid());
  663. // for tt_user, set 770 permissions; otherwise set 570
  664. mode_t permissions = is_tt_user ? (S_IRWXU | S_IRWXG)
  665. : (S_IRUSR | S_IXUSR | S_IRWXG);
  666. char *user_dir;
  667. char **local_dir_ptr = local_dir;
  668. int failed = 0;
  669. while (*local_dir_ptr != NULL) {
  670. user_dir = get_user_directory(*local_dir_ptr, user);
  671. if (user_dir == NULL) {
  672. fprintf(LOGFILE, "Couldn't get userdir directory for %s.\n", user);
  673. failed = 1;
  674. break;
  675. }
  676. struct stat filestat;
  677. if (stat(user_dir, &filestat) != 0) {
  678. if (errno == ENOENT) {
  679. #ifdef DEBUG
  680. fprintf(LOGFILE, "user_dir %s doesn't exist. Not doing anything.\n",
  681. user_dir);
  682. #endif
  683. } else {
  684. // stat failed because of something else!
  685. fprintf(LOGFILE, "Failed to stat the user_dir %s\n",
  686. user_dir);
  687. failed = 1;
  688. free(user_dir);
  689. break;
  690. }
  691. } else if (secure_path(user_dir, user_detail->pw_uid,
  692. tasktracker_gid, permissions, S_ISGID | permissions, 1) != 0) {
  693. // No setgid on files and setgid on dirs,
  694. // 770 for tt_user and 570 for any other user
  695. fprintf(LOGFILE, "Failed to secure the user_dir %s\n",
  696. user_dir);
  697. failed = 1;
  698. free(user_dir);
  699. break;
  700. }
  701. local_dir_ptr++;
  702. free(user_dir);
  703. }
  704. free(local_dir);
  705. free(full_local_dir_str);
  706. cleanup();
  707. if (failed) {
  708. return INITIALIZE_USER_FAILED;
  709. }
  710. return 0;
  711. }
  712. /**
  713. * Function to prepare the job directories for the task JVM.
  714. * We do the following:
  715. * * sudo chown user:mapred -R taskTracker/$user/jobcache/$jobid
  716. * * sudo chown user:mapred -R logs/userlogs/$jobid
  717. * * if user is not $tt_user,
  718. * * sudo chmod 2570 -R taskTracker/$user/jobcache/$jobid
  719. * * sudo chmod 2570 -R logs/userlogs/$jobid
  720. * * else // user is tt_user
  721. * * sudo chmod 2770 -R taskTracker/$user/jobcache/$jobid
  722. * * sudo chmod 2770 -R logs/userlogs/$jobid
  723. * *
  724. * * For any user, sudo chmod 2770 taskTracker/$user/jobcache/$jobid/work
  725. */
  726. int initialize_job(const char *jobid, const char *user) {
  727. if (jobid == NULL || user == NULL) {
  728. fprintf(LOGFILE, "Either jobid is null or the user passed is null.\n");
  729. return INVALID_ARGUMENT_NUMBER;
  730. }
  731. if (get_user_details(user) < 0) {
  732. fprintf(LOGFILE, "Couldn't get the user details of %s", user);
  733. return INVALID_USER_NAME;
  734. }
  735. gid_t tasktracker_gid = getegid(); // the group permissions of the binary.
  736. char **local_dir = (char **) get_values(TT_SYS_DIR_KEY);
  737. if (local_dir == NULL) {
  738. fprintf(LOGFILE, "%s is not configured.\n", TT_SYS_DIR_KEY);
  739. cleanup();
  740. return INVALID_TT_ROOT;
  741. }
  742. char *full_local_dir_str = (char *) get_value(TT_SYS_DIR_KEY);
  743. #ifdef DEBUG
  744. fprintf(LOGFILE, "Value from config for %s is %s.\n", TT_SYS_DIR_KEY,
  745. full_local_dir_str);
  746. #endif
  747. int is_tt_user = (user_detail->pw_uid == getuid());
  748. // for tt_user, set 770 permissions; for any other user, set 570 for job-dir
  749. mode_t permissions = is_tt_user ? (S_IRWXU | S_IRWXG)
  750. : (S_IRUSR | S_IXUSR | S_IRWXG);
  751. char *job_dir, *job_work_dir;
  752. char **local_dir_ptr = local_dir;
  753. int failed = 0;
  754. while (*local_dir_ptr != NULL) {
  755. job_dir = get_job_directory(*local_dir_ptr, user, jobid);
  756. if (job_dir == NULL) {
  757. fprintf(LOGFILE, "Couldn't get job directory for %s.\n", jobid);
  758. failed = 1;
  759. break;
  760. }
  761. struct stat filestat;
  762. if (stat(job_dir, &filestat) != 0) {
  763. if (errno == ENOENT) {
  764. #ifdef DEBUG
  765. fprintf(LOGFILE, "job_dir %s doesn't exist. Not doing anything.\n",
  766. job_dir);
  767. #endif
  768. } else {
  769. // stat failed because of something else!
  770. fprintf(LOGFILE, "Failed to stat the job_dir %s\n", job_dir);
  771. failed = 1;
  772. free(job_dir);
  773. break;
  774. }
  775. } else if (secure_path(job_dir, user_detail->pw_uid, tasktracker_gid,
  776. permissions, S_ISGID | permissions, 1) != 0) {
  777. // No setgid on files and setgid on dirs,
  778. // 770 for tt_user and 570 for any other user
  779. fprintf(LOGFILE, "Failed to secure the job_dir %s\n", job_dir);
  780. failed = 1;
  781. free(job_dir);
  782. break;
  783. } else if (!is_tt_user) {
  784. // For tt_user, we don't need this as we already set 2770 for
  785. // job-work-dir because of "chmod -R" done above
  786. job_work_dir = get_job_work_directory(job_dir);
  787. if (job_work_dir == NULL) {
  788. fprintf(LOGFILE, "Couldn't get job-work directory for %s.\n", jobid);
  789. failed = 1;
  790. break;
  791. }
  792. // Set 2770 on the job-work directory
  793. if (stat(job_work_dir, &filestat) != 0) {
  794. if (errno == ENOENT) {
  795. #ifdef DEBUG
  796. fprintf(LOGFILE,
  797. "job_work_dir %s doesn't exist. Not doing anything.\n",
  798. job_work_dir);
  799. #endif
  800. free(job_work_dir);
  801. } else {
  802. // stat failed because of something else!
  803. fprintf(LOGFILE, "Failed to stat the job_work_dir %s\n",
  804. job_work_dir);
  805. failed = 1;
  806. free(job_work_dir);
  807. free(job_dir);
  808. break;
  809. }
  810. } else if (change_mode(job_work_dir, S_ISGID | S_IRWXU | S_IRWXG) != 0) {
  811. fprintf(LOGFILE,
  812. "couldn't change the permissions of job_work_dir %s\n",
  813. job_work_dir);
  814. failed = 1;
  815. free(job_work_dir);
  816. free(job_dir);
  817. break;
  818. }
  819. }
  820. local_dir_ptr++;
  821. free(job_dir);
  822. }
  823. free(local_dir);
  824. free(full_local_dir_str);
  825. int exit_code = 0;
  826. if (failed) {
  827. exit_code = INITIALIZE_JOB_FAILED;
  828. goto cleanup;
  829. }
  830. char *log_dir = (char *) get_value(TT_LOG_DIR_KEY);
  831. if (log_dir == NULL) {
  832. fprintf(LOGFILE, "Log directory is not configured.\n");
  833. exit_code = INVALID_TT_LOG_DIR;
  834. goto cleanup;
  835. }
  836. if (prepare_job_logs(log_dir, jobid, permissions) != 0) {
  837. fprintf(LOGFILE, "Couldn't prepare job logs directory %s for %s.\n",
  838. log_dir, jobid);
  839. exit_code = PREPARE_JOB_LOGS_FAILED;
  840. }
  841. cleanup:
  842. // free configurations
  843. cleanup();
  844. if (log_dir != NULL) {
  845. free(log_dir);
  846. }
  847. return exit_code;
  848. }
  849. /**
  850. * Function to initialize the distributed cache file for a user.
  851. * It does the following:
  852. * * sudo chown user:mapred -R taskTracker/$user/distcache/<randomdir>
  853. * * if user is not $tt_user,
  854. * * sudo chmod 2570 -R taskTracker/$user/distcache/<randomdir>
  855. * * else // user is tt_user
  856. * * sudo chmod 2770 -R taskTracker/$user/distcache/<randomdir>
  857. * This is done once per localization. Tasks reusing JVMs just create
  858. * symbolic links themselves and so there isn't anything specific to do in
  859. * that case.
  860. */
  861. int initialize_distributed_cache_file(const char *tt_root,
  862. const char *unique_string, const char *user) {
  863. if (tt_root == NULL) {
  864. fprintf(LOGFILE, "tt_root passed is null.\n");
  865. return INVALID_ARGUMENT_NUMBER;
  866. }
  867. if (unique_string == NULL) {
  868. fprintf(LOGFILE, "unique_string passed is null.\n");
  869. return INVALID_ARGUMENT_NUMBER;
  870. }
  871. if (user == NULL) {
  872. fprintf(LOGFILE, "user passed is null.\n");
  873. return INVALID_ARGUMENT_NUMBER;
  874. }
  875. if (get_user_details(user) < 0) {
  876. fprintf(LOGFILE, "Couldn't get the user details of %s", user);
  877. return INVALID_USER_NAME;
  878. }
  879. //Check tt_root
  880. if (check_tt_root(tt_root) < 0) {
  881. fprintf(LOGFILE, "invalid tt root passed %s\n", tt_root);
  882. cleanup();
  883. return INVALID_TT_ROOT;
  884. }
  885. // set permission on the unique directory
  886. char *localized_unique_dir = get_distributed_cache_directory(tt_root, user,
  887. unique_string);
  888. if (localized_unique_dir == NULL) {
  889. fprintf(LOGFILE, "Couldn't get unique distcache directory for %s.\n", user);
  890. cleanup();
  891. return INITIALIZE_DISTCACHEFILE_FAILED;
  892. }
  893. gid_t binary_gid = getegid(); // the group permissions of the binary.
  894. int is_tt_user = (user_detail->pw_uid == getuid());
  895. // for tt_user, set 770 permissions; for any other user, set 570
  896. mode_t permissions = is_tt_user ? (S_IRWXU | S_IRWXG)
  897. : (S_IRUSR | S_IXUSR | S_IRWXG);
  898. int failed = 0;
  899. struct stat filestat;
  900. if (stat(localized_unique_dir, &filestat) != 0) {
  901. // stat on distcache failed because of something
  902. fprintf(LOGFILE, "Failed to stat the localized_unique_dir %s\n",
  903. localized_unique_dir);
  904. failed = INITIALIZE_DISTCACHEFILE_FAILED;
  905. } else if (secure_path(localized_unique_dir, user_detail->pw_uid,
  906. binary_gid, permissions, S_ISGID | permissions, 1) != 0) {
  907. // No setgid on files and setgid on dirs,
  908. // 770 for tt_user and 570 for any other user
  909. fprintf(LOGFILE, "Failed to secure the localized_unique_dir %s\n",
  910. localized_unique_dir);
  911. failed = INITIALIZE_DISTCACHEFILE_FAILED;
  912. }
  913. free(localized_unique_dir);
  914. cleanup();
  915. return failed;
  916. }
  917. /**
  918. * Function used to initialize task. Prepares attempt_dir, jars_dir and
  919. * log_dir to be accessible by the child
  920. */
  921. int initialize_task(const char *jobid, const char *taskid, const char *user) {
  922. int exit_code = 0;
  923. #ifdef DEBUG
  924. fprintf(LOGFILE, "job-id passed to initialize_task : %s.\n", jobid);
  925. fprintf(LOGFILE, "task-d passed to initialize_task : %s.\n", taskid);
  926. #endif
  927. if (prepare_attempt_directories(jobid, taskid, user) != 0) {
  928. fprintf(LOGFILE,
  929. "Couldn't prepare the attempt directories for %s of user %s.\n",
  930. taskid, user);
  931. exit_code = PREPARE_ATTEMPT_DIRECTORIES_FAILED;
  932. goto cleanup;
  933. }
  934. char *log_dir = (char *) get_value(TT_LOG_DIR_KEY);
  935. if (log_dir == NULL) {
  936. fprintf(LOGFILE, "Log directory is not configured.\n");
  937. exit_code = INVALID_TT_LOG_DIR;
  938. goto cleanup;
  939. }
  940. if (prepare_task_logs(log_dir, jobid, taskid) != 0) {
  941. fprintf(LOGFILE, "Couldn't prepare task logs directory %s for %s.\n",
  942. log_dir, taskid);
  943. exit_code = PREPARE_TASK_LOGS_FAILED;
  944. }
  945. cleanup:
  946. // free configurations
  947. cleanup();
  948. if (log_dir != NULL) {
  949. free(log_dir);
  950. }
  951. return exit_code;
  952. }
  953. /*
  954. * Function used to launch a task as the provided user.
  955. */
  956. int run_task_as_user(const char * user, const char *jobid, const char *taskid,
  957. const char *tt_root) {
  958. return run_process_as_user(user, jobid, taskid, tt_root, LAUNCH_TASK_JVM);
  959. }
  960. /*
  961. * Function that is used as a helper to launch task JVMs and debug scripts.
  962. * Not meant for launching any other process. It does the following :
  963. * 1) Checks if the tt_root passed is found in mapreduce.cluster.local.dir
  964. * 2) Prepares attempt_dir and log_dir to be accessible by the task JVMs
  965. * 3) Uses get_task_launcher_file to fetch the task script file path
  966. * 4) Does an execlp on the same in order to replace the current image with
  967. * task image.
  968. */
  969. int run_process_as_user(const char * user, const char * jobid,
  970. const char *taskid, const char *tt_root, int command) {
  971. if (command != LAUNCH_TASK_JVM && command != RUN_DEBUG_SCRIPT) {
  972. return INVALID_COMMAND_PROVIDED;
  973. }
  974. if (jobid == NULL || taskid == NULL || tt_root == NULL) {
  975. return INVALID_ARGUMENT_NUMBER;
  976. }
  977. if (command == LAUNCH_TASK_JVM) {
  978. fprintf(LOGFILE, "run_process_as_user launching a JVM for task :%s.\n", taskid);
  979. } else if (command == RUN_DEBUG_SCRIPT) {
  980. fprintf(LOGFILE, "run_process_as_user launching a debug script for task :%s.\n", taskid);
  981. }
  982. #ifdef DEBUG
  983. fprintf(LOGFILE, "Job-id passed to run_process_as_user : %s.\n", jobid);
  984. fprintf(LOGFILE, "task-d passed to run_process_as_user : %s.\n", taskid);
  985. fprintf(LOGFILE, "tt_root passed to run_process_as_user : %s.\n", tt_root);
  986. #endif
  987. //Check tt_root before switching the user, as reading configuration
  988. //file requires privileged access.
  989. if (check_tt_root(tt_root) < 0) {
  990. fprintf(LOGFILE, "invalid tt root passed %s\n", tt_root);
  991. cleanup();
  992. return INVALID_TT_ROOT;
  993. }
  994. int exit_code = 0;
  995. char *job_dir = NULL, *task_script_path = NULL;
  996. if (command == LAUNCH_TASK_JVM &&
  997. (exit_code = initialize_task(jobid, taskid, user)) != 0) {
  998. fprintf(LOGFILE, "Couldn't initialise the task %s of user %s.\n", taskid,
  999. user);
  1000. goto cleanup;
  1001. }
  1002. job_dir = get_job_directory(tt_root, user, jobid);
  1003. if (job_dir == NULL) {
  1004. fprintf(LOGFILE, "Couldn't obtain job_dir for %s in %s.\n", jobid, tt_root);
  1005. exit_code = OUT_OF_MEMORY;
  1006. goto cleanup;
  1007. }
  1008. task_script_path = get_task_launcher_file(job_dir, taskid);
  1009. if (task_script_path == NULL) {
  1010. fprintf(LOGFILE, "Couldn't obtain task_script_path in %s.\n", job_dir);
  1011. exit_code = OUT_OF_MEMORY;
  1012. goto cleanup;
  1013. }
  1014. errno = 0;
  1015. exit_code = check_path_for_relative_components(task_script_path);
  1016. if(exit_code != 0) {
  1017. goto cleanup;
  1018. }
  1019. //change the user
  1020. fcloseall();
  1021. free(job_dir);
  1022. umask(0007);
  1023. if (change_user(user) != 0) {
  1024. exit_code = SETUID_OPER_FAILED;
  1025. goto cleanup;
  1026. }
  1027. errno = 0;
  1028. cleanup();
  1029. execlp(task_script_path, task_script_path, NULL);
  1030. if (errno != 0) {
  1031. free(task_script_path);
  1032. if (command == LAUNCH_TASK_JVM) {
  1033. fprintf(LOGFILE, "Couldn't execute the task jvm file: %s", strerror(errno));
  1034. exit_code = UNABLE_TO_EXECUTE_TASK_SCRIPT;
  1035. } else if (command == RUN_DEBUG_SCRIPT) {
  1036. fprintf(LOGFILE, "Couldn't execute the task debug script file: %s", strerror(errno));
  1037. exit_code = UNABLE_TO_EXECUTE_DEBUG_SCRIPT;
  1038. }
  1039. }
  1040. return exit_code;
  1041. cleanup:
  1042. if (job_dir != NULL) {
  1043. free(job_dir);
  1044. }
  1045. if (task_script_path != NULL) {
  1046. free(task_script_path);
  1047. }
  1048. // free configurations
  1049. cleanup();
  1050. return exit_code;
  1051. }
  1052. /*
  1053. * Function used to launch a debug script as the provided user.
  1054. */
  1055. int run_debug_script_as_user(const char * user, const char *jobid, const char *taskid,
  1056. const char *tt_root) {
  1057. return run_process_as_user(user, jobid, taskid, tt_root, RUN_DEBUG_SCRIPT);
  1058. }
  1059. /**
  1060. * Function used to terminate/kill a task launched by the user,
  1061. * or dump the process' stack (by sending SIGQUIT).
  1062. * The function sends appropriate signal to the process group
  1063. * specified by the task_pid.
  1064. */
  1065. int kill_user_task(const char *user, const char *task_pid, int sig) {
  1066. int pid = 0;
  1067. if(task_pid == NULL) {
  1068. return INVALID_ARGUMENT_NUMBER;
  1069. }
  1070. #ifdef DEBUG
  1071. fprintf(LOGFILE, "user passed to kill_user_task : %s.\n", user);
  1072. fprintf(LOGFILE, "task-pid passed to kill_user_task : %s.\n", task_pid);
  1073. fprintf(LOGFILE, "signal passed to kill_user_task : %d.\n", sig);
  1074. #endif
  1075. pid = atoi(task_pid);
  1076. if(pid <= 0) {
  1077. return INVALID_TASK_PID;
  1078. }
  1079. fcloseall();
  1080. if (change_user(user) != 0) {
  1081. cleanup();
  1082. return SETUID_OPER_FAILED;
  1083. }
  1084. //Don't continue if the process-group is not alive anymore.
  1085. if(kill(-pid,0) < 0) {
  1086. errno = 0;
  1087. cleanup();
  1088. return 0;
  1089. }
  1090. if (kill(-pid, sig) < 0) {
  1091. if(errno != ESRCH) {
  1092. fprintf(LOGFILE, "Error is %s\n", strerror(errno));
  1093. cleanup();
  1094. return UNABLE_TO_KILL_TASK;
  1095. }
  1096. errno = 0;
  1097. }
  1098. cleanup();
  1099. return 0;
  1100. }
  1101. /**
  1102. * Enables the path for deletion by changing the owner, group and permissions
  1103. * of the specified path and all the files/directories in the path recursively.
  1104. * * sudo chown user:mapred -R full_path
  1105. * * sudo chmod 2770 -R full_path
  1106. * Before changing permissions, makes sure that the given path doesn't contain
  1107. * any relative components.
  1108. * tt_root : is the base path(i.e. mapred-local-dir) sent to task-controller
  1109. * full_path : is either jobLocalDir, taskDir OR taskWorkDir that is to be
  1110. * deleted
  1111. */
  1112. static int enable_path_for_cleanup(const char *tt_root, const char *user,
  1113. char *full_path) {
  1114. int exit_code = 0;
  1115. gid_t tasktracker_gid = getegid(); // the group permissions of the binary.
  1116. if (check_tt_root(tt_root) < 0) {
  1117. fprintf(LOGFILE, "invalid tt root passed %s\n", tt_root);
  1118. cleanup();
  1119. return INVALID_TT_ROOT;
  1120. }
  1121. if (full_path == NULL) {
  1122. fprintf(LOGFILE,
  1123. "Could not build the full path. Not deleting the dir %s\n",
  1124. full_path);
  1125. exit_code = UNABLE_TO_BUILD_PATH; // may be malloc failed
  1126. }
  1127. // Make sure that the path given is not having any relative components
  1128. else if ((exit_code = check_path_for_relative_components(full_path)) != 0) {
  1129. fprintf(LOGFILE,
  1130. "Not changing permissions. Path may contain relative components.\n",
  1131. full_path);
  1132. }
  1133. else if (get_user_details(user) < 0) {
  1134. fprintf(LOGFILE, "Couldn't get the user details of %s.\n", user);
  1135. exit_code = INVALID_USER_NAME;
  1136. }
  1137. else if (exit_code = secure_path(full_path, user_detail->pw_uid,
  1138. tasktracker_gid,
  1139. S_IRWXU | S_IRWXG, S_ISGID | S_IRWXU | S_IRWXG, 0) != 0) {
  1140. // No setgid on files and setgid on dirs, 770.
  1141. // set 770 permissions for user, TTgroup for all files/directories in
  1142. // 'full_path' recursively sothat deletion of path by TaskTracker succeeds.
  1143. fprintf(LOGFILE, "Failed to set permissions for %s\n", full_path);
  1144. }
  1145. if (full_path != NULL) {
  1146. free(full_path);
  1147. }
  1148. // free configurations
  1149. cleanup();
  1150. return exit_code;
  1151. }
  1152. /**
  1153. * Enables the task work-dir/local-dir path for deletion.
  1154. * tt_root : is the base path(i.e. mapred-local-dir) sent to task-controller
  1155. * dir_to_be_deleted : is either taskDir OR taskWorkDir that is to be deleted
  1156. */
  1157. int enable_task_for_cleanup(const char *tt_root, const char *user,
  1158. const char *jobid, const char *dir_to_be_deleted) {
  1159. char *full_path = get_task_dir_path(tt_root, user, jobid, dir_to_be_deleted);
  1160. return enable_path_for_cleanup(tt_root, user, full_path);
  1161. }
  1162. /**
  1163. * Enables the jobLocalDir for deletion.
  1164. * tt_root : is the base path(i.e. mapred-local-dir) sent to task-controller
  1165. * user : owner of the job
  1166. * jobid : id of the job for which the cleanup is needed.
  1167. */
  1168. int enable_job_for_cleanup(const char *tt_root, const char *user,
  1169. const char *jobid) {
  1170. char *full_path = get_job_directory(tt_root, user, jobid);
  1171. return enable_path_for_cleanup(tt_root, user, full_path);
  1172. }