hadoop-setup-conf.sh 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644
  1. #!/usr/bin/env bash
  2. # Licensed to the Apache Software Foundation (ASF) under one or more
  3. # contributor license agreements. See the NOTICE file distributed with
  4. # this work for additional information regarding copyright ownership.
  5. # The ASF licenses this file to You under the Apache License, Version 2.0
  6. # (the "License"); you may not use this file except in compliance with
  7. # the License. You may obtain a copy of the License at
  8. #
  9. # http://www.apache.org/licenses/LICENSE-2.0
  10. #
  11. # Unless required by applicable law or agreed to in writing, software
  12. # distributed under the License is distributed on an "AS IS" BASIS,
  13. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. # See the License for the specific language governing permissions and
  15. # limitations under the License.
  16. bin=`dirname "$0"`
  17. bin=`cd "$bin"; pwd`
  18. this="${BASH_SOURCE-$0}"
  19. export HADOOP_PREFIX=`dirname "$this"`/..
  20. usage() {
  21. echo "
  22. usage: $0 <parameters>
  23. Optional parameters:
  24. --auto Setup path and configuration automatically
  25. --default Setup configuration as default
  26. --conf-dir=/etc/hadoop Set configuration directory
  27. --datanode-dir=/var/lib/hadoop/hdfs/datanode Set datanode directory
  28. --group=hadoop Set Hadoop group name
  29. -h Display this message
  30. --hdfs-user=hdfs Set HDFS user
  31. --jobtracker-host=hostname Set jobtracker host
  32. --namenode-host=hostname Set namenode host
  33. --secondarynamenode-host=hostname Set secondary namenode host
  34. --kerberos-realm=KERBEROS.EXAMPLE.COM Set Kerberos realm
  35. --kinit-location=/usr/kerberos/bin/kinit Set kinit location
  36. --keytab-dir=/etc/security/keytabs Set keytab directory
  37. --log-dir=/var/log/hadoop Set log directory
  38. --pid-dir=/var/run/hadoop Set pid directory
  39. --hdfs-dir=/var/lib/hadoop/hdfs Set HDFS directory
  40. --hdfs-user-keytab=/home/hdfs/hdfs.keytab Set HDFS user key tab
  41. --mapred-dir=/var/lib/hadoop/mapred Set mapreduce directory
  42. --mapreduce-user=mr Set mapreduce user
  43. --mapreduce-user-keytab=/home/mr/hdfs.keytab Set mapreduce user key tab
  44. --namenode-dir=/var/lib/hadoop/hdfs/namenode Set namenode directory
  45. --replication=3 Set replication factor
  46. --taskscheduler=org.apache.hadoop.mapred.JobQueueTaskScheduler Set task scheduler
  47. --datanodes=hostname1,hostname2,... SET the datanodes
  48. --tasktrackers=hostname1,hostname2,... SET the tasktrackers
  49. --dfs-webhdfs-enabled=false|true Enable webhdfs
  50. --dfs-support-append=false|true Enable append
  51. --hadoop-proxy-users='user1:groups:hosts;user2:groups:hosts' Setup proxy users for hadoop
  52. --hbase-user=hbase User which hbase is running as. Defaults to hbase
  53. --mapred-cluster-map-memory-mb=memory Virtual memory of a map slot for the MR framework. Defaults to -1
  54. --mapred-cluster-reduce-memory-mb=memory Virtual memory, of a reduce slot for the MR framework. Defaults to -1
  55. --mapred-cluster-max-map-memory-mb=memory Maximum virtual memory of a single map task. Defaults to -1
  56. This value should be set to (mapred.tasktracker.map.tasks.maximum * mapred.cluster.map.memory.mb)
  57. --mapred-cluster-max-reduce-memory-mb=memory maximum virtual memory of a single reduce task. Defaults to -1
  58. This value should be set to (mapred.tasktracker.reduce.tasks.maximum * mapred.cluster.reduce.memory.mb)
  59. --mapred-job-map-memory-mb=memory Virtual memory of a single map slot for a job. Defaults to -1
  60. This value should be <= mapred.cluster.max.map.memory.mb
  61. --mapred-job-reduce-memory-mb=memory Virtual memory, of a single reduce slot for a job. Defaults to -1
  62. This value should be <= mapred.cluster.max.reduce.memory.mb
  63. "
  64. exit 1
  65. }
  66. check_permission() {
  67. TARGET=$1
  68. OWNER="0"
  69. RESULT=0
  70. while [ "$TARGET" != "/" ]; do
  71. if [ "`uname`" = "Darwin" ]; then
  72. OWNER=`stat -f %u $TARGET`
  73. else
  74. OWNER=`stat -c %u $TARGET`
  75. fi
  76. if [ "$OWNER" != "0" ]; then
  77. RESULT=1
  78. break
  79. fi
  80. TARGET=`dirname $TARGET`
  81. done
  82. return $RESULT
  83. }
  84. template_generator() {
  85. REGEX='(\$\{[a-zA-Z_][a-zA-Z_0-9]*\})'
  86. if [ -e $2 ]; then
  87. mv -f $2 "$2.bak"
  88. fi
  89. cat $1 |
  90. while read line ; do
  91. while [[ "$line" =~ $REGEX ]] ; do
  92. LHS=${BASH_REMATCH[1]}
  93. RHS="$(eval echo "\"$LHS\"")"
  94. line=${line//$LHS/$RHS}
  95. done
  96. echo $line >> $2
  97. done
  98. }
  99. #########################################
  100. # Function to modify a value of a field in an xml file
  101. # Params: $1 is the file with full path; $2 is the property, $3 is the new value
  102. #########################################
  103. function addPropertyToXMLConf
  104. {
  105. #read the file name with full path
  106. local file=$1
  107. #get the property name
  108. local property=$2
  109. #get what value should be set for that
  110. local propValue=$3
  111. #get the description
  112. local desc=$4
  113. #get the value for the final tag
  114. local finalVal=$5
  115. #create the property text, make sure the / are escaped
  116. propText="<property>\n<name>$property<\/name>\n<value>$propValue<\/value>"
  117. #if description is not empty add it
  118. if [ ! -z $desc ]
  119. then
  120. propText="${propText}<description>$desc<\/description>\n"
  121. fi
  122. #if final is not empty add it
  123. if [ ! -z $finalVal ]
  124. then
  125. propText="${propText}final>$finalVal<\/final>\n"
  126. fi
  127. #add the ending tag
  128. propText="${propText}<\/property>\n"
  129. #add the property to the file
  130. endText="<\/configuration>"
  131. #add the text using sed at the end of the file
  132. sed -i "s|$endText|$propText$endText|" $file
  133. }
  134. ##########################################
  135. # Function to setup up the proxy user settings
  136. #########################################
  137. function setupProxyUsers
  138. {
  139. local conf_file="${HADOOP_CONF_DIR}/core-site.xml"
  140. #if hadoop proxy users are sent, setup hadoop proxy
  141. if [ ! -z $HADOOP_PROXY_USERS ]
  142. then
  143. oldIFS=$IFS
  144. IFS=';'
  145. #process each proxy config
  146. for proxy in $HADOOP_PROXY_USERS
  147. do
  148. #get the user, group and hosts information for each proxy
  149. IFS=':'
  150. arr=($proxy)
  151. user="${arr[0]}"
  152. groups="${arr[1]}"
  153. hosts="${arr[2]}"
  154. #determine the property names and values
  155. proxy_groups_property="hadoop.proxyuser.${user}.groups"
  156. proxy_groups_val="$groups"
  157. addPropertyToXMLConf "$conf_file" "$proxy_groups_property" "$proxy_groups_val"
  158. proxy_hosts_property="hadoop.proxyuser.${user}.hosts"
  159. proxy_hosts_val="$hosts"
  160. addPropertyToXMLConf "$conf_file" "$proxy_hosts_property" "$proxy_hosts_val"
  161. IFS=';'
  162. done
  163. IFS=$oldIFS
  164. fi
  165. }
  166. OPTS=$(getopt \
  167. -n $0 \
  168. -o '' \
  169. -l 'auto' \
  170. -l 'java-home:' \
  171. -l 'conf-dir:' \
  172. -l 'default' \
  173. -l 'group:' \
  174. -l 'hdfs-dir:' \
  175. -l 'namenode-dir:' \
  176. -l 'datanode-dir:' \
  177. -l 'mapred-dir:' \
  178. -l 'namenode-host:' \
  179. -l 'secondarynamenode-host:' \
  180. -l 'jobtracker-host:' \
  181. -l 'log-dir:' \
  182. -l 'pid-dir:' \
  183. -l 'replication:' \
  184. -l 'taskscheduler:' \
  185. -l 'hdfs-user:' \
  186. -l 'hdfs-user-keytab:' \
  187. -l 'mapreduce-user:' \
  188. -l 'mapreduce-user-keytab:' \
  189. -l 'keytab-dir:' \
  190. -l 'kerberos-realm:' \
  191. -l 'kinit-location:' \
  192. -l 'datanodes:' \
  193. -l 'tasktrackers:' \
  194. -l 'dfs-webhdfs-enabled:' \
  195. -l 'hadoop-proxy-users:' \
  196. -l 'dfs-support-append:' \
  197. -l 'hbase-user:' \
  198. -l 'mapred-cluster-map-memory-mb:' \
  199. -l 'mapred-cluster-reduce-memory-mb:' \
  200. -l 'mapred-cluster-max-map-memory-mb:' \
  201. -l 'mapred-cluster-max-reduce-memory-mb:' \
  202. -l 'mapred-job-map-memory-mb:' \
  203. -l 'mapred-job-reduce-memory-mb:' \
  204. -o 'h' \
  205. -- "$@")
  206. if [ $? != 0 ] ; then
  207. usage
  208. fi
  209. # Make sure the HADOOP_LOG_DIR is not picked up from user environment.
  210. unset HADOOP_LOG_DIR
  211. eval set -- "${OPTS}"
  212. while true ; do
  213. case "$1" in
  214. --auto)
  215. AUTOSETUP=1
  216. AUTOMATED=1
  217. shift
  218. ;;
  219. --java-home)
  220. JAVA_HOME=$2; shift 2
  221. AUTOMATED=1
  222. ;;
  223. --conf-dir)
  224. HADOOP_CONF_DIR=$2; shift 2
  225. AUTOMATED=1
  226. ;;
  227. --default)
  228. AUTOMATED=1; shift
  229. ;;
  230. --group)
  231. HADOOP_GROUP=$2; shift 2
  232. AUTOMATED=1
  233. ;;
  234. -h)
  235. usage
  236. ;;
  237. --hdfs-dir)
  238. HADOOP_HDFS_DIR=$2; shift 2
  239. AUTOMATED=1
  240. ;;
  241. --namenode-dir)
  242. HADOOP_NN_DIR=$2; shift 2
  243. AUTOMATED=1
  244. ;;
  245. --datanode-dir)
  246. HADOOP_DN_DIR=$2; shift 2
  247. AUTOMATED=1
  248. ;;
  249. --mapred-dir)
  250. HADOOP_MAPRED_DIR=$2; shift 2
  251. AUTOMATED=1
  252. ;;
  253. --namenode-host)
  254. HADOOP_NN_HOST=$2; shift 2
  255. AUTOMATED=1
  256. ;;
  257. --secondarynamenode-host)
  258. HADOOP_SNN_HOST=$2; shift 2
  259. AUTOMATED=1
  260. ;;
  261. --jobtracker-host)
  262. HADOOP_JT_HOST=$2; shift 2
  263. AUTOMATED=1
  264. ;;
  265. --log-dir)
  266. HADOOP_LOG_DIR=$2; shift 2
  267. AUTOMATED=1
  268. ;;
  269. --pid-dir)
  270. HADOOP_PID_DIR=$2; shift 2
  271. AUTOMATED=1
  272. ;;
  273. --replication)
  274. HADOOP_REPLICATION=$2; shift 2
  275. AUTOMATED=1
  276. ;;
  277. --taskscheduler)
  278. HADOOP_TASK_SCHEDULER=$2; shift 2
  279. AUTOMATED=1
  280. ;;
  281. --hdfs-user)
  282. HADOOP_HDFS_USER=$2; shift 2
  283. AUTOMATED=1
  284. ;;
  285. --mapreduce-user)
  286. HADOOP_MR_USER=$2; shift 2
  287. AUTOMATED=1
  288. ;;
  289. --keytab-dir)
  290. KEYTAB_DIR=$2; shift 2
  291. AUTOMATED=1
  292. ;;
  293. --hdfs-user-keytab)
  294. HDFS_KEYTAB=$2; shift 2
  295. AUTOMATED=1
  296. ;;
  297. --mapreduce-user-keytab)
  298. MR_KEYTAB=$2; shift 2
  299. AUTOMATED=1
  300. ;;
  301. --kerberos-realm)
  302. KERBEROS_REALM=$2; shift 2
  303. SECURITY_TYPE="kerberos"
  304. AUTOMATED=1
  305. ;;
  306. --kinit-location)
  307. KINIT=$2; shift 2
  308. AUTOMATED=1
  309. ;;
  310. --datanodes)
  311. DATANODES=$2; shift 2
  312. AUTOMATED=1
  313. DATANODES=$(echo $DATANODES | tr ',' ' ')
  314. ;;
  315. --tasktrackers)
  316. TASKTRACKERS=$2; shift 2
  317. AUTOMATED=1
  318. TASKTRACKERS=$(echo $TASKTRACKERS | tr ',' ' ')
  319. ;;
  320. --dfs-webhdfs-enabled)
  321. DFS_WEBHDFS_ENABLED=$2; shift 2
  322. AUTOMATED=1
  323. ;;
  324. --hadoop-proxy-users)
  325. HADOOP_PROXY_USERS=$2; shift 2
  326. AUTOMATED=1
  327. ;;
  328. --dfs-support-append)
  329. DFS_SUPPORT_APPEND=$2; shift 2
  330. AUTOMATED=1
  331. ;;
  332. --hbase-user)
  333. HBASE_USER=$2; shift 2
  334. AUTOMATED=1
  335. ;;
  336. --mapred-cluster-map-memory-mb)
  337. MAPRED_CLUSTER_MAP_MEMORY_MB=$2; shift 2
  338. AUTOMATED=1
  339. ;;
  340. --mapred-cluster-reduce-memory-mb)
  341. MAPRED_CLUSTER_REDUCE_MEMORY_MB=$2; shift 2
  342. AUTOMATED=1
  343. ;;
  344. --mapred-cluster-max-map-memory-mb)
  345. MAPRED_CLUSTER_MAX_MAP_MEMORY_MB=$2; shift 2
  346. AUTOMATED=1
  347. ;;
  348. --mapred-cluster-max-reduce-memory-mb)
  349. MAPRED_CLUSTER_MAX_REDUCE_MEMORY_MB=$2; shift 2
  350. AUTOMATED=1
  351. ;;
  352. --mapred-job-map-memory-mb)
  353. MAPRED_JOB_MAP_MEMORY_MB=$2; shift 2
  354. AUTOMATED=1
  355. ;;
  356. --mapred-job-reduce-memory-mb)
  357. MAPRED_JOB_REDUCE_MEMORY_MB=$2; shift 2
  358. AUTOMATED=1
  359. ;;
  360. --)
  361. shift ; break
  362. ;;
  363. *)
  364. echo "Unknown option: $1"
  365. usage
  366. exit 1
  367. ;;
  368. esac
  369. done
  370. AUTOSETUP=${AUTOSETUP:-1}
  371. JAVA_HOME=${JAVA_HOME:-/usr/java/default}
  372. HADOOP_GROUP=${HADOOP_GROUP:-hadoop}
  373. HADOOP_NN_HOST=${HADOOP_NN_HOST:-`hostname`}
  374. HADOOP_SNN_HOST=${HADOOP_SNN_HOST:-`hostname`}
  375. HADOOP_NN_DIR=${HADOOP_NN_DIR:-/var/lib/hadoop/hdfs/namenode}
  376. HADOOP_DN_DIR=${HADOOP_DN_DIR:-/var/lib/hadoop/hdfs/datanode}
  377. HADOOP_JT_HOST=${HADOOP_JT_HOST:-`hostname`}
  378. HADOOP_HDFS_DIR=${HADOOP_HDFS_DIR:-/var/lib/hadoop/hdfs}
  379. HADOOP_MAPRED_DIR=${HADOOP_MAPRED_DIR:-/var/lib/hadoop/mapred}
  380. HADOOP_LOG_DIR=${HADOOP_LOG_DIR:-/var/log/hadoop}
  381. HADOOP_PID_DIR=${HADOOP_PID_DIR:-/var/log/hadoop}
  382. HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-/etc/hadoop}
  383. HADOOP_REPLICATION=${HADOOP_RELICATION:-3}
  384. HADOOP_TASK_SCHEDULER=${HADOOP_TASK_SCHEDULER:-org.apache.hadoop.mapred.JobQueueTaskScheduler}
  385. HADOOP_HDFS_USER=${HADOOP_HDFS_USER:-hdfs}
  386. HADOOP_MR_USER=${HADOOP_MR_USER:-mr}
  387. DFS_WEBHDFS_ENABLED=${DFS_WEBHDFS_ENABLED:-false}
  388. DFS_SUPPORT_APPEND=${DFS_SUPPORT_APPEND:-false}
  389. HBASE_USER=${HBASE_USER:-hbase}
  390. MAPRED_CLUSTER_MAP_MEMORY_MB=${MAPRED_CLUSTER_MAP_MEMORY_MB:--1}
  391. MAPRED_CLUSTER_REDUCE_MEMORY_MB=${MAPRED_CLUSTER_REDUCE_MEMORY_MB:--1}
  392. MAPRED_CLUSTER_MAX_MAP_MEMORY_MB=${MAPRED_CLUSTER_MAX_MAP_MEMORY_MB:--1}
  393. MAPRED_CLUSTER_MAX_REDUCE_MEMORY_MB=${MAPRED_CLUSTER_MAX_REDUCE_MEMORY_MB:--1}
  394. MAPRED_JOB_MAP_MEMORY_MB=${MAPRED_JOB_MAP_MEMORY_MB=:--1}
  395. MAPRED_JOB_REDUCE_MEMORY_MB=${MAPRED_JOB_REDUCE_MEMORY_MB:--1}
  396. KEYTAB_DIR=${KEYTAB_DIR:-/etc/security/keytabs}
  397. HDFS_KEYTAB=${HDFS_KEYTAB:-/home/hdfs/hdfs.keytab}
  398. MR_KEYTAB=${MR_KEYTAB:-/home/mr/mr.keytab}
  399. KERBEROS_REALM=${KERBEROS_REALM:-KERBEROS.EXAMPLE.COM}
  400. SECURITY_TYPE=${SECURITY_TYPE:-simple}
  401. KINIT=${KINIT:-/usr/kerberos/bin/kinit}
  402. if [ "${SECURITY_TYPE}" = "kerberos" ]; then
  403. TASK_CONTROLLER="org.apache.hadoop.mapred.LinuxTaskController"
  404. HADOOP_DN_ADDR="0.0.0.0:1019"
  405. HADOOP_DN_HTTP_ADDR="0.0.0.0:1022"
  406. SECURITY="true"
  407. HADOOP_SECURE_DN_USER=${HADOOP_HDFS_USER}
  408. else
  409. TASK_CONTROLLER="org.apache.hadoop.mapred.DefaultTaskController"
  410. HADOOP_DN_ADDR="0.0.0.0:50010"
  411. HADOOP_DN_HTTP_ADDR="0.0.0.0:50075"
  412. SECURITY="false"
  413. HADOOP_SECURE_DN_USER=""
  414. fi
  415. #unset env vars
  416. unset HADOOP_CLIENT_OPTS HADOOP_NAMENODE_OPTS HADOOP_JOBTRACKER_OPTS HADOOP_TASKTRACKER_OPTS HADOOP_DATANODE_OPTS HADOOP_SECONDARYNAMENODE_OPTS HADOOP_JAVA_PLATFORM_OPTS
  417. if [ "${AUTOMATED}" != "1" ]; then
  418. echo "Setup Hadoop Configuration"
  419. echo
  420. echo -n "Where would you like to put config directory? (${HADOOP_CONF_DIR}) "
  421. read USER_HADOOP_CONF_DIR
  422. echo -n "Where would you like to put log directory? (${HADOOP_LOG_DIR}) "
  423. read USER_HADOOP_LOG_DIR
  424. echo -n "Where would you like to put pid directory? (${HADOOP_PID_DIR}) "
  425. read USER_HADOOP_PID_DIR
  426. echo -n "What is the host of the namenode? (${HADOOP_NN_HOST}) "
  427. read USER_HADOOP_NN_HOST
  428. echo -n "Where would you like to put namenode data directory? (${HADOOP_NN_DIR}) "
  429. read USER_HADOOP_NN_DIR
  430. echo -n "Where would you like to put datanode data directory? (${HADOOP_DN_DIR}) "
  431. read USER_HADOOP_DN_DIR
  432. echo -n "What is the host of the jobtracker? (${HADOOP_JT_HOST}) "
  433. read USER_HADOOP_JT_HOST
  434. echo -n "Where would you like to put jobtracker/tasktracker data directory? (${HADOOP_MAPRED_DIR}) "
  435. read USER_HADOOP_MAPRED_DIR
  436. echo -n "Where is JAVA_HOME directory? (${JAVA_HOME}) "
  437. read USER_JAVA_HOME
  438. echo -n "Would you like to create directories/copy conf files to localhost? (Y/n) "
  439. read USER_AUTOSETUP
  440. echo
  441. JAVA_HOME=${USER_USER_JAVA_HOME:-$JAVA_HOME}
  442. HADOOP_NN_HOST=${USER_HADOOP_NN_HOST:-$HADOOP_NN_HOST}
  443. HADOOP_NN_DIR=${USER_HADOOP_NN_DIR:-$HADOOP_NN_DIR}
  444. HADOOP_DN_DIR=${USER_HADOOP_DN_DIR:-$HADOOP_DN_DIR}
  445. HADOOP_JT_HOST=${USER_HADOOP_JT_HOST:-$HADOOP_JT_HOST}
  446. HADOOP_HDFS_DIR=${USER_HADOOP_HDFS_DIR:-$HADOOP_HDFS_DIR}
  447. HADOOP_MAPRED_DIR=${USER_HADOOP_MAPRED_DIR:-$HADOOP_MAPRED_DIR}
  448. HADOOP_TASK_SCHEDULER=${HADOOP_TASK_SCHEDULER:-org.apache.hadoop.mapred.JobQueueTaskScheduler}
  449. HADOOP_LOG_DIR=${USER_HADOOP_LOG_DIR:-$HADOOP_LOG_DIR}
  450. HADOOP_PID_DIR=${USER_HADOOP_PID_DIR:-$HADOOP_PID_DIR}
  451. HADOOP_CONF_DIR=${USER_HADOOP_CONF_DIR:-$HADOOP_CONF_DIR}
  452. AUTOSETUP=${USER_AUTOSETUP:-y}
  453. echo "Review your choices:"
  454. echo
  455. echo "Config directory : ${HADOOP_CONF_DIR}"
  456. echo "Log directory : ${HADOOP_LOG_DIR}"
  457. echo "PID directory : ${HADOOP_PID_DIR}"
  458. echo "Namenode host : ${HADOOP_NN_HOST}"
  459. echo "Namenode directory : ${HADOOP_NN_DIR}"
  460. echo "Datanode directory : ${HADOOP_DN_DIR}"
  461. echo "Jobtracker host : ${HADOOP_JT_HOST}"
  462. echo "Mapreduce directory : ${HADOOP_MAPRED_DIR}"
  463. echo "Task scheduler : ${HADOOP_TASK_SCHEDULER}"
  464. echo "JAVA_HOME directory : ${JAVA_HOME}"
  465. echo "Create dirs/copy conf files : ${AUTOSETUP}"
  466. echo
  467. echo -n "Proceed with generate configuration? (y/N) "
  468. read CONFIRM
  469. if [ "${CONFIRM}" != "y" ]; then
  470. echo "User aborted setup, exiting..."
  471. exit 1
  472. fi
  473. fi
  474. if [ "${AUTOSETUP}" == "1" -o "${AUTOSETUP}" == "y" ]; then
  475. if [ -d ${KEYTAB_DIR} ]; then
  476. chmod 700 ${KEYTAB_DIR}/*
  477. chown ${HADOOP_MR_USER}:${HADOOP_GROUP} ${KEYTAB_DIR}/[jt]t.service.keytab
  478. chown ${HADOOP_HDFS_USER}:${HADOOP_GROUP} ${KEYTAB_DIR}/[dns]n.service.keytab
  479. fi
  480. chmod 755 -R ${HADOOP_PREFIX}/sbin/*hadoop*
  481. chmod 755 -R ${HADOOP_PREFIX}/bin/hadoop
  482. chmod 755 -R ${HADOOP_PREFIX}/libexec/hadoop-config.sh
  483. mkdir -p /home/${HADOOP_MR_USER}
  484. chown ${HADOOP_MR_USER}:${HADOOP_GROUP} /home/${HADOOP_MR_USER}
  485. HDFS_DIR=`echo ${HADOOP_HDFS_DIR} | sed -e 's/,/ /g'`
  486. mkdir -p ${HDFS_DIR}
  487. if [ -e ${HADOOP_NN_DIR} ]; then
  488. rm -rf ${HADOOP_NN_DIR}
  489. fi
  490. DATANODE_DIR=`echo ${HADOOP_DN_DIR} | sed -e 's/,/ /g'`
  491. mkdir -p ${DATANODE_DIR}
  492. MAPRED_DIR=`echo ${HADOOP_MAPRED_DIR} | sed -e 's/,/ /g'`
  493. mkdir -p ${MAPRED_DIR}
  494. mkdir -p ${HADOOP_CONF_DIR}
  495. check_permission ${HADOOP_CONF_DIR}
  496. if [ $? == 1 ]; then
  497. echo "Full path to ${HADOOP_CONF_DIR} should be owned by root."
  498. exit 1
  499. fi
  500. mkdir -p ${HADOOP_LOG_DIR}
  501. #create the log sub dir for diff users
  502. mkdir -p ${HADOOP_LOG_DIR}/${HADOOP_HDFS_USER}
  503. mkdir -p ${HADOOP_LOG_DIR}/${HADOOP_MR_USER}
  504. mkdir -p ${HADOOP_PID_DIR}
  505. chown ${HADOOP_HDFS_USER}:${HADOOP_GROUP} ${HDFS_DIR}
  506. chown ${HADOOP_HDFS_USER}:${HADOOP_GROUP} ${DATANODE_DIR}
  507. chmod 700 -R ${DATANODE_DIR}
  508. chown ${HADOOP_MR_USER}:${HADOOP_GROUP} ${MAPRED_DIR}
  509. chown ${HADOOP_HDFS_USER}:${HADOOP_GROUP} ${HADOOP_LOG_DIR}
  510. chmod 775 ${HADOOP_LOG_DIR}
  511. chmod 775 ${HADOOP_PID_DIR}
  512. chown root:${HADOOP_GROUP} ${HADOOP_PID_DIR}
  513. #change the permission and the owner
  514. chmod 755 ${HADOOP_LOG_DIR}/${HADOOP_HDFS_USER}
  515. chown ${HADOOP_HDFS_USER}:${HADOOP_GROUP} ${HADOOP_LOG_DIR}/${HADOOP_HDFS_USER}
  516. chmod 755 ${HADOOP_LOG_DIR}/${HADOOP_MR_USER}
  517. chown ${HADOOP_MR_USER}:${HADOOP_GROUP} ${HADOOP_LOG_DIR}/${HADOOP_MR_USER}
  518. template_generator ${HADOOP_PREFIX}/share/hadoop/templates/conf/core-site.xml ${HADOOP_CONF_DIR}/core-site.xml
  519. template_generator ${HADOOP_PREFIX}/share/hadoop/templates/conf/hdfs-site.xml ${HADOOP_CONF_DIR}/hdfs-site.xml
  520. template_generator ${HADOOP_PREFIX}/share/hadoop/templates/conf/mapred-site.xml ${HADOOP_CONF_DIR}/mapred-site.xml
  521. template_generator ${HADOOP_PREFIX}/share/hadoop/templates/conf/hadoop-env.sh ${HADOOP_CONF_DIR}/hadoop-env.sh
  522. template_generator ${HADOOP_PREFIX}/share/hadoop/templates/conf/hadoop-policy.xml ${HADOOP_CONF_DIR}/hadoop-policy.xml
  523. template_generator ${HADOOP_PREFIX}/share/hadoop/templates/conf/commons-logging.properties ${HADOOP_CONF_DIR}/commons-logging.properties
  524. template_generator ${HADOOP_PREFIX}/share/hadoop/templates/conf/mapred-queue-acls.xml ${HADOOP_CONF_DIR}/mapred-queue-acls.xml
  525. template_generator ${HADOOP_PREFIX}/share/hadoop/templates/conf/taskcontroller.cfg ${HADOOP_CONF_DIR}/taskcontroller.cfg
  526. template_generator ${HADOOP_PREFIX}/share/hadoop/templates/conf/capacity-scheduler.xml ${HADOOP_CONF_DIR}/capacity-scheduler.xml
  527. template_generator ${HADOOP_PREFIX}/share/hadoop/templates/conf/log4j.properties ${HADOOP_CONF_DIR}/log4j.properties
  528. template_generator ${HADOOP_PREFIX}/share/hadoop/templates/conf/hadoop-metrics2.properties ${HADOOP_CONF_DIR}/hadoop-metrics2.properties
  529. #setup up the proxy users
  530. setupProxyUsers
  531. #set the owner of the hadoop dir to root
  532. chown root ${HADOOP_PREFIX}
  533. chown root:${HADOOP_GROUP} ${HADOOP_CONF_DIR}/hadoop-env.sh
  534. chmod 755 ${HADOOP_CONF_DIR}/hadoop-env.sh
  535. #set taskcontroller
  536. chown root:${HADOOP_GROUP} ${HADOOP_CONF_DIR}/taskcontroller.cfg
  537. chmod 400 ${HADOOP_CONF_DIR}/taskcontroller.cfg
  538. chown root:${HADOOP_GROUP} ${HADOOP_PREFIX}/bin/task-controller
  539. chmod 6050 ${HADOOP_PREFIX}/bin/task-controller
  540. #generate the slaves file and include and exclude files for hdfs and mapred
  541. echo '' > ${HADOOP_CONF_DIR}/slaves
  542. echo '' > ${HADOOP_CONF_DIR}/dfs.include
  543. echo '' > ${HADOOP_CONF_DIR}/dfs.exclude
  544. echo '' > ${HADOOP_CONF_DIR}/mapred.include
  545. echo '' > ${HADOOP_CONF_DIR}/mapred.exclude
  546. for dn in $DATANODES
  547. do
  548. echo $dn >> ${HADOOP_CONF_DIR}/slaves
  549. echo $dn >> ${HADOOP_CONF_DIR}/dfs.include
  550. done
  551. for tt in $TASKTRACKERS
  552. do
  553. echo $tt >> ${HADOOP_CONF_DIR}/mapred.include
  554. done
  555. echo "Configuration setup is completed."
  556. if [[ "$HADOOP_NN_HOST" =~ "`hostname`" ]]; then
  557. echo "Proceed to run hadoop-setup-hdfs.sh on namenode."
  558. fi
  559. else
  560. template_generator ${HADOOP_PREFIX}/share/hadoop/templates/conf/core-site.xml ${HADOOP_CONF_DIR}/core-site.xml
  561. template_generator ${HADOOP_PREFIX}/share/hadoop/templates/conf/hdfs-site.xml ${HADOOP_CONF_DIR}/hdfs-site.xml
  562. template_generator ${HADOOP_PREFIX}/share/hadoop/templates/conf/mapred-site.xml ${HADOOP_CONF_DIR}/mapred-site.xml
  563. template_generator ${HADOOP_PREFIX}/share/hadoop/templates/conf/hadoop-env.sh ${HADOOP_CONF_DIR}/hadoop-env.sh
  564. template_generator ${HADOOP_PREFIX}/share/hadoop/templates/conf/hadoop-policy.xml ${HADOOP_CONF_DIR}/hadoop-policy.xml
  565. template_generator ${HADOOP_PREFIX}/share/hadoop/templates/conf/commons-logging.properties ${HADOOP_CONF_DIR}/commons-logging.properties
  566. template_generator ${HADOOP_PREFIX}/share/hadoop/templates/conf/mapred-queue-acls.xml ${HADOOP_CONF_DIR}/mapred-queue-acls.xml
  567. template_generator ${HADOOP_PREFIX}/share/hadoop/templates/conf/taskcontroller.cfg ${HADOOP_CONF_DIR}/taskcontroller.cfg
  568. template_generator ${HADOOP_PREFIX}/share/hadoop/templates/conf/capacity-scheduler.xml ${HADOOP_CONF_DIR}/capacity-scheduler.xml
  569. template_generator ${HADOOP_PREFIX}/share/hadoop/templates/conf/log4j.properties ${HADOOP_CONF_DIR}/log4j.properties
  570. template_generator ${HADOOP_PREFIX}/share/hadoop/templates/conf/hadoop-metrics2.properties ${HADOOP_CONF_DIR}/hadoop-metrics2.properties
  571. #setup up the proxy users
  572. setupProxyUsers
  573. chown root:${HADOOP_GROUP} ${HADOOP_CONF_DIR}/hadoop-env.sh
  574. chmod 755 ${HADOOP_CONF_DIR}/hadoop-env.sh
  575. #set taskcontroller
  576. chown root:${HADOOP_GROUP} ${HADOOP_CONF_DIR}/taskcontroller.cfg
  577. chmod 400 ${HADOOP_CONF_DIR}/taskcontroller.cfg
  578. chown root:${HADOOP_GROUP} ${HADOOP_PREFIX}/bin/task-controller
  579. chmod 6050 ${HADOOP_PREFIX}/bin/task-controller
  580. #generate the slaves file and include and exclude files for hdfs and mapred
  581. echo '' > ${HADOOP_CONF_DIR}/slaves
  582. echo '' > ${HADOOP_CONF_DIR}/dfs.include
  583. echo '' > ${HADOOP_CONF_DIR}/dfs.exclude
  584. echo '' > ${HADOOP_CONF_DIR}/mapred.include
  585. echo '' > ${HADOOP_CONF_DIR}/mapred.exclude
  586. for dn in $DATANODES
  587. do
  588. echo $dn >> ${HADOOP_CONF_DIR}/slaves
  589. echo $dn >> ${HADOOP_CONF_DIR}/dfs.include
  590. done
  591. for tt in $TASKTRACKERS
  592. do
  593. echo $tt >> ${HADOOP_CONF_DIR}/mapred.include
  594. done
  595. echo
  596. echo "Configuration file has been generated in:"
  597. echo
  598. echo "${HADOOP_CONF_DIR}/core-site.xml"
  599. echo "${HADOOP_CONF_DIR}/hdfs-site.xml"
  600. echo "${HADOOP_CONF_DIR}/mapred-site.xml"
  601. echo "${HADOOP_CONF_DIR}/hadoop-env.sh"
  602. echo "${HADOOP_CONF_DIR}/hadoop-policy.xml"
  603. echo "${HADOOP_CONF_DIR}/commons-logging.properties"
  604. echo "${HADOOP_CONF_DIR}/taskcontroller.cfg"
  605. echo "${HADOOP_CONF_DIR}/capacity-scheduler.xml"
  606. echo "${HADOOP_CONF_DIR}/log4j.properties"
  607. echo "${HADOOP_CONF_DIR}/hadoop-metrics2.properties"
  608. echo
  609. echo " to ${HADOOP_CONF_DIR} on all nodes, and proceed to run hadoop-setup-hdfs.sh on namenode."
  610. fi