hadoop-setup-conf.sh 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707
  1. #!/usr/bin/env bash
  2. # Licensed to the Apache Software Foundation (ASF) under one or more
  3. # contributor license agreements. See the NOTICE file distributed with
  4. # this work for additional information regarding copyright ownership.
  5. # The ASF licenses this file to You under the Apache License, Version 2.0
  6. # (the "License"); you may not use this file except in compliance with
  7. # the License. You may obtain a copy of the License at
  8. #
  9. # http://www.apache.org/licenses/LICENSE-2.0
  10. #
  11. # Unless required by applicable law or agreed to in writing, software
  12. # distributed under the License is distributed on an "AS IS" BASIS,
  13. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. # See the License for the specific language governing permissions and
  15. # limitations under the License.
  16. bin=`dirname "$0"`
  17. bin=`cd "$bin"; pwd`
  18. this="${BASH_SOURCE-$0}"
  19. HADOOP_DEFAULT_PREFIX=`dirname "$this"`/..
  20. HADOOP_PREFIX=${HADOOP_PREFIX:-$HADOOP_DEFAULT_PREFIX}
  21. export HADOOP_PREFIX
  22. usage() {
  23. echo "
  24. usage: $0 <parameters>
  25. Optional parameters:
  26. --auto Setup path and configuration automatically
  27. --default Setup configuration as default
  28. --conf-dir=/etc/hadoop Set configuration directory
  29. --datanode-dir=/var/lib/hadoop/hdfs/datanode Set datanode directory
  30. --group=hadoop Set Hadoop group name
  31. -h Display this message
  32. --hdfs-user=hdfs Set HDFS user
  33. --jobtracker-host=hostname Set jobtracker host
  34. --namenode-host=hostname Set namenode host
  35. --secondarynamenode-host=hostname Set secondary namenode host
  36. --kerberos-realm=KERBEROS.EXAMPLE.COM Set Kerberos realm
  37. --kinit-location=/usr/kerberos/bin/kinit Set kinit location
  38. --keytab-dir=/etc/security/keytabs Set keytab directory
  39. --log-dir=/var/log/hadoop Set log directory
  40. --pid-dir=/var/run/hadoop Set pid directory
  41. --hdfs-dir=/var/lib/hadoop/hdfs Set HDFS directory
  42. --hdfs-user-keytab=/home/hdfs/hdfs.keytab Set HDFS user key tab
  43. --mapred-dir=/var/lib/hadoop/mapred Set mapreduce directory
  44. --mapreduce-user=mr Set mapreduce user
  45. --mapreduce-user-keytab=/home/mr/hdfs.keytab Set mapreduce user key tab
  46. --namenode-dir=/var/lib/hadoop/hdfs/namenode Set namenode directory
  47. --replication=3 Set replication factor
  48. --taskscheduler=org.apache.hadoop.mapred.JobQueueTaskScheduler Set task scheduler
  49. --datanodes=hostname1,hostname2,... SET the datanodes
  50. --tasktrackers=hostname1,hostname2,... SET the tasktrackers
  51. --dfs-webhdfs-enabled=false|true Enable webhdfs
  52. --dfs-support-append=false|true Enable append
  53. --hadoop-proxy-users='user1:groups:hosts;user2:groups:hosts' Setup proxy users for hadoop
  54. --hbase-user=hbase User which hbase is running as. Defaults to hbase
  55. --mapreduce-cluster-mapmemory-mb=memory Virtual memory of a map slot for the MR framework. Defaults to -1
  56. --mapreduce-cluster-reducememory-mb=memory Virtual memory, of a reduce slot for the MR framework. Defaults to -1
  57. --mapreduce-jobtracker-maxmapmemory-mb=memory Maximum virtual memory of a single map task. Defaults to -1
  58. This value should be set to (mapreduce.cluster.mapmemory.mb * mapreduce.tasktracker.map.tasks.maximum)
  59. --mapreduce-jobtracker-maxreducememory-mb=memory Maximum virtual memory of a single reduce task. Defaults to -1
  60. This value should be set to (mapreduce.cluster.reducememory.mb * mapreduce.tasktracker.reduce.tasks.maximum)
  61. --mapreduce-map-memory-mb=memory Virtual memory of a single map slot for a job. Defaults to -1
  62. This value should be <= mapred.cluster.max.map.memory.mb
  63. --mapreduce-reduce-memory-mb=memory Virtual memory, of a single reduce slot for a job. Defaults to -1
  64. This value should be <= mapred.cluster.max.reduce.memory.mb
  65. --dfs-datanode-dir-perm=700 Set the permission for the datanode data directories. Defaults to 700
  66. --dfs-block-local-path-access-user=user User for which you want to enable shortcircuit read.
  67. --dfs-client-read-shortcircuit=true/false Enable shortcircuit read for the client. Will default to true if the shortcircuit user is set.
  68. --dfs-client-read-shortcircuit-skip-checksum=false/true Disable checking of checksum when shortcircuit read is taking place. Defaults to false.
  69. "
  70. exit 1
  71. }
  72. check_permission() {
  73. TARGET=$1
  74. OWNER="0"
  75. RESULT=0
  76. while [ "$TARGET" != "/" ]; do
  77. if [ "`uname`" = "Darwin" ]; then
  78. OWNER=`stat -f %u $TARGET`
  79. else
  80. OWNER=`stat -c %u $TARGET`
  81. fi
  82. if [ "$OWNER" != "0" ]; then
  83. RESULT=1
  84. break
  85. fi
  86. TARGET=`dirname $TARGET`
  87. done
  88. return $RESULT
  89. }
  90. template_generator() {
  91. REGEX='(\$\{[a-zA-Z_][a-zA-Z_0-9]*\})'
  92. if [ -e $2 ]; then
  93. mv -f $2 "$2.bak"
  94. fi
  95. cat $1 |
  96. while read line ; do
  97. while [[ "$line" =~ $REGEX ]] ; do
  98. LHS=${BASH_REMATCH[1]}
  99. RHS="$(eval echo "\"$LHS\"")"
  100. line=${line//$LHS/$RHS}
  101. done
  102. echo $line >> $2
  103. done
  104. }
  105. #########################################
  106. # Function to modify a value of a field in an xml file
  107. # Params: $1 is the file with full path; $2 is the property, $3 is the new value
  108. #########################################
  109. function addPropertyToXMLConf
  110. {
  111. #read the file name with full path
  112. local file=$1
  113. #get the property name
  114. local property=$2
  115. #get what value should be set for that
  116. local propValue=$3
  117. #get the description
  118. local desc=$4
  119. #get the value for the final tag
  120. local finalVal=$5
  121. #create the property text, make sure the / are escaped
  122. propText="<property>\n<name>$property<\/name>\n<value>$propValue<\/value>\n"
  123. #if description is not empty add it
  124. if [ ! -z $desc ]
  125. then
  126. propText="${propText}<description>$desc<\/description>\n"
  127. fi
  128. #if final is not empty add it
  129. if [ ! -z $finalVal ]
  130. then
  131. propText="${propText}final>$finalVal<\/final>\n"
  132. fi
  133. #add the ending tag
  134. propText="${propText}<\/property>\n"
  135. #add the property to the file
  136. endText="<\/configuration>"
  137. #add the text using sed at the end of the file
  138. sed -i "s|$endText|$propText$endText|" $file
  139. }
  140. ##########################################
  141. # Function to setup up the short circuit read settings
  142. #########################################
  143. function setupShortCircuitRead
  144. {
  145. local conf_file="${HADOOP_CONF_DIR}/hdfs-site.xml"
  146. #if the shortcircuit user is not set then return
  147. if [ -z $DFS_BLOCK_LOCAL_PATH_ACCESS_USER ]
  148. then
  149. return
  150. fi
  151. #set the defaults if values not present
  152. DFS_CLIENT_READ_SHORTCIRCUIT=${DFS_CLIENT_READ_SHORTCIRCUIT:-false}
  153. DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM=${DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM:-false}
  154. #add the user to the conf file
  155. addPropertyToXMLConf "$conf_file" "dfs.block.local-path-access.user" "$DFS_BLOCK_LOCAL_PATH_ACCESS_USER"
  156. addPropertyToXMLConf "$conf_file" "dfs.client.read.shortcircuit" "$DFS_CLIENT_READ_SHORTCIRCUIT"
  157. addPropertyToXMLConf "$conf_file" "dfs.client.read.shortcircuit.skip.checksum" "$DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM"
  158. }
  159. ##########################################
  160. # Function to setup up the proxy user settings
  161. #########################################
  162. function setupProxyUsers
  163. {
  164. local conf_file="${HADOOP_CONF_DIR}/core-site.xml"
  165. #if hadoop proxy users are sent, setup hadoop proxy
  166. if [ ! -z $HADOOP_PROXY_USERS ]
  167. then
  168. oldIFS=$IFS
  169. IFS=';'
  170. #process each proxy config
  171. for proxy in $HADOOP_PROXY_USERS
  172. do
  173. #get the user, group and hosts information for each proxy
  174. IFS=':'
  175. arr=($proxy)
  176. user="${arr[0]}"
  177. groups="${arr[1]}"
  178. hosts="${arr[2]}"
  179. #determine the property names and values
  180. proxy_groups_property="hadoop.proxyuser.${user}.groups"
  181. proxy_groups_val="$groups"
  182. addPropertyToXMLConf "$conf_file" "$proxy_groups_property" "$proxy_groups_val"
  183. proxy_hosts_property="hadoop.proxyuser.${user}.hosts"
  184. proxy_hosts_val="$hosts"
  185. addPropertyToXMLConf "$conf_file" "$proxy_hosts_property" "$proxy_hosts_val"
  186. IFS=';'
  187. done
  188. IFS=$oldIFS
  189. fi
  190. }
  191. OPTS=$(getopt \
  192. -n $0 \
  193. -o '' \
  194. -l 'auto' \
  195. -l 'java-home:' \
  196. -l 'conf-dir:' \
  197. -l 'default' \
  198. -l 'group:' \
  199. -l 'hdfs-dir:' \
  200. -l 'namenode-dir:' \
  201. -l 'datanode-dir:' \
  202. -l 'mapred-dir:' \
  203. -l 'namenode-host:' \
  204. -l 'secondarynamenode-host:' \
  205. -l 'jobtracker-host:' \
  206. -l 'log-dir:' \
  207. -l 'pid-dir:' \
  208. -l 'replication:' \
  209. -l 'taskscheduler:' \
  210. -l 'hdfs-user:' \
  211. -l 'hdfs-user-keytab:' \
  212. -l 'mapreduce-user:' \
  213. -l 'mapreduce-user-keytab:' \
  214. -l 'keytab-dir:' \
  215. -l 'kerberos-realm:' \
  216. -l 'kinit-location:' \
  217. -l 'datanodes:' \
  218. -l 'tasktrackers:' \
  219. -l 'dfs-webhdfs-enabled:' \
  220. -l 'hadoop-proxy-users:' \
  221. -l 'dfs-support-append:' \
  222. -l 'hbase-user:' \
  223. -l 'mapreduce-cluster-mapmemory-mb:' \
  224. -l 'mapreduce-cluster-reducememory-mb:' \
  225. -l 'mapreduce-jobtracker-maxmapmemory-mb:' \
  226. -l 'mapreduce-jobtracker-maxreducememory-mb:' \
  227. -l 'mapreduce-map-memory-mb:' \
  228. -l 'mapreduce-reduce-memory-mb:' \
  229. -l 'dfs-datanode-dir-perm:' \
  230. -l 'dfs-block-local-path-access-user:' \
  231. -l 'dfs-client-read-shortcircuit:' \
  232. -l 'dfs-client-read-shortcircuit-skip-checksum:' \
  233. -o 'h' \
  234. -- "$@")
  235. if [ $? != 0 ] ; then
  236. usage
  237. fi
  238. # Make sure the HADOOP_LOG_DIR is not picked up from user environment.
  239. unset HADOOP_LOG_DIR
  240. eval set -- "${OPTS}"
  241. while true ; do
  242. case "$1" in
  243. --auto)
  244. AUTOSETUP=1
  245. AUTOMATED=1
  246. shift
  247. ;;
  248. --java-home)
  249. JAVA_HOME=$2; shift 2
  250. AUTOMATED=1
  251. ;;
  252. --conf-dir)
  253. HADOOP_CONF_DIR=$2; shift 2
  254. AUTOMATED=1
  255. ;;
  256. --default)
  257. AUTOMATED=1; shift
  258. ;;
  259. --group)
  260. HADOOP_GROUP=$2; shift 2
  261. AUTOMATED=1
  262. ;;
  263. -h)
  264. usage
  265. ;;
  266. --hdfs-dir)
  267. HADOOP_HDFS_DIR=$2; shift 2
  268. AUTOMATED=1
  269. ;;
  270. --namenode-dir)
  271. HADOOP_NN_DIR=$2; shift 2
  272. AUTOMATED=1
  273. ;;
  274. --datanode-dir)
  275. HADOOP_DN_DIR=$2; shift 2
  276. AUTOMATED=1
  277. ;;
  278. --mapred-dir)
  279. HADOOP_MAPRED_DIR=$2; shift 2
  280. AUTOMATED=1
  281. ;;
  282. --namenode-host)
  283. HADOOP_NN_HOST=$2; shift 2
  284. AUTOMATED=1
  285. ;;
  286. --secondarynamenode-host)
  287. HADOOP_SNN_HOST=$2; shift 2
  288. AUTOMATED=1
  289. ;;
  290. --jobtracker-host)
  291. HADOOP_JT_HOST=$2; shift 2
  292. AUTOMATED=1
  293. ;;
  294. --log-dir)
  295. HADOOP_LOG_DIR=$2; shift 2
  296. AUTOMATED=1
  297. ;;
  298. --pid-dir)
  299. HADOOP_PID_DIR=$2; shift 2
  300. AUTOMATED=1
  301. ;;
  302. --replication)
  303. HADOOP_REPLICATION=$2; shift 2
  304. AUTOMATED=1
  305. ;;
  306. --taskscheduler)
  307. HADOOP_TASK_SCHEDULER=$2; shift 2
  308. AUTOMATED=1
  309. ;;
  310. --hdfs-user)
  311. HADOOP_HDFS_USER=$2; shift 2
  312. AUTOMATED=1
  313. ;;
  314. --mapreduce-user)
  315. HADOOP_MR_USER=$2; shift 2
  316. AUTOMATED=1
  317. ;;
  318. --keytab-dir)
  319. KEYTAB_DIR=$2; shift 2
  320. AUTOMATED=1
  321. ;;
  322. --hdfs-user-keytab)
  323. HDFS_KEYTAB=$2; shift 2
  324. AUTOMATED=1
  325. ;;
  326. --mapreduce-user-keytab)
  327. MR_KEYTAB=$2; shift 2
  328. AUTOMATED=1
  329. ;;
  330. --kerberos-realm)
  331. KERBEROS_REALM=$2; shift 2
  332. SECURITY_TYPE="kerberos"
  333. AUTOMATED=1
  334. ;;
  335. --kinit-location)
  336. KINIT=$2; shift 2
  337. AUTOMATED=1
  338. ;;
  339. --datanodes)
  340. DATANODES=$2; shift 2
  341. AUTOMATED=1
  342. DATANODES=$(echo $DATANODES | tr ',' ' ')
  343. ;;
  344. --tasktrackers)
  345. TASKTRACKERS=$2; shift 2
  346. AUTOMATED=1
  347. TASKTRACKERS=$(echo $TASKTRACKERS | tr ',' ' ')
  348. ;;
  349. --dfs-webhdfs-enabled)
  350. DFS_WEBHDFS_ENABLED=$2; shift 2
  351. AUTOMATED=1
  352. ;;
  353. --hadoop-proxy-users)
  354. HADOOP_PROXY_USERS=$2; shift 2
  355. AUTOMATED=1
  356. ;;
  357. --dfs-support-append)
  358. DFS_SUPPORT_APPEND=$2; shift 2
  359. AUTOMATED=1
  360. ;;
  361. --hbase-user)
  362. HBASE_USER=$2; shift 2
  363. AUTOMATED=1
  364. ;;
  365. --mapreduce-cluster-mapmemory-mb)
  366. MAPREDUCE_CLUSTER_MAPMEMORY_MB=$2; shift 2
  367. AUTOMATED=1
  368. ;;
  369. --mapreduce-cluster-reducememory-mb)
  370. MAPREDUCE_CLUSTER_REDUCEMEMORY_MB=$2; shift 2
  371. AUTOMATED=1
  372. ;;
  373. --mapreduce-jobtracker-maxmapmemory-mb)
  374. MAPREDUCE_JOBTRACKER_MAXMAPMEMORY_MB=$2; shift 2
  375. AUTOMATED=1
  376. ;;
  377. --mapreduce-jobtracker-maxreducememory-mb)
  378. MAPREDUCE_JOBTRACKER_MAXREDUCEMEMORY_MB=$2; shift 2
  379. AUTOMATED=1
  380. ;;
  381. --mapreduce-map-memory-mb)
  382. MAPREDUCE_MAP_MEMORY_MB=$2; shift 2
  383. AUTOMATED=1
  384. ;;
  385. --mapreduce-reduce-memory-mb)
  386. MAPREDUCE_REDUCE_MEMORY_MB=$2; shift 2
  387. AUTOMATED=1
  388. ;;
  389. --dfs-datanode-dir-perm)
  390. DFS_DATANODE_DIR_PERM=$2; shift 2
  391. AUTOMATED=1
  392. ;;
  393. --dfs-block-local-path-access-user)
  394. DFS_BLOCK_LOCAL_PATH_ACCESS_USER=$2; shift 2
  395. AUTOMATED=1
  396. ;;
  397. --dfs-client-read-shortcircuit)
  398. DFS_CLIENT_READ_SHORTCIRCUIT=$2; shift 2
  399. AUTOMATED=1
  400. ;;
  401. --dfs-client-read-shortcircuit-skip-checksum)
  402. DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM=$2; shift 2
  403. AUTOMATED=1
  404. ;;
  405. --)
  406. shift ; break
  407. ;;
  408. *)
  409. echo "Unknown option: $1"
  410. usage
  411. exit 1
  412. ;;
  413. esac
  414. done
  415. AUTOSETUP=${AUTOSETUP:-1}
  416. JAVA_HOME=${JAVA_HOME:-/usr/java/default}
  417. HADOOP_GROUP=${HADOOP_GROUP:-hadoop}
  418. HADOOP_NN_HOST=${HADOOP_NN_HOST:-`hostname`}
  419. HADOOP_SNN_HOST=${HADOOP_SNN_HOST:-`hostname`}
  420. HADOOP_NN_DIR=${HADOOP_NN_DIR:-/var/lib/hadoop/hdfs/namenode}
  421. HADOOP_DN_DIR=${HADOOP_DN_DIR:-/var/lib/hadoop/hdfs/datanode}
  422. HADOOP_JT_HOST=${HADOOP_JT_HOST:-`hostname`}
  423. HADOOP_HDFS_DIR=${HADOOP_HDFS_DIR:-/var/lib/hadoop/hdfs}
  424. HADOOP_MAPRED_DIR=${HADOOP_MAPRED_DIR:-/var/lib/hadoop/mapred}
  425. HADOOP_LOG_DIR=${HADOOP_LOG_DIR:-/var/log/hadoop}
  426. HADOOP_PID_DIR=${HADOOP_PID_DIR:-/var/log/hadoop}
  427. HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-/etc/hadoop}
  428. HADOOP_REPLICATION=${HADOOP_RELICATION:-3}
  429. HADOOP_TASK_SCHEDULER=${HADOOP_TASK_SCHEDULER:-org.apache.hadoop.mapred.JobQueueTaskScheduler}
  430. HADOOP_HDFS_USER=${HADOOP_HDFS_USER:-hdfs}
  431. HADOOP_MR_USER=${HADOOP_MR_USER:-mr}
  432. DFS_WEBHDFS_ENABLED=${DFS_WEBHDFS_ENABLED:-false}
  433. DFS_SUPPORT_APPEND=${DFS_SUPPORT_APPEND:-false}
  434. HBASE_USER=${HBASE_USER:-hbase}
  435. MAPREDUCE_CLUSTER_MAPMEMORY_MB=${MAPREDUCE_CLUSTER_MAPMEMORY_MB:--1}
  436. MAPREDUCE_CLUSTER_REDUCEMEMORY_MB=${MAPREDUCE_CLUSTER_REDUCEMEMORY_MB:--1}
  437. MAPREDUCE_JOBTRACKER_MAXMAPMEMORY_MB=${MAPREDUCE_JOBTRACKER_MAXMAPMEMORY_MB:--1}
  438. MAPREDUCE_JOBTRACKER_MAXREDUCEMEMORY_MB=${MAPREDUCE_JOBTRACKER_MAXREDUCEMEMORY_MB:--1}
  439. MAPREDUCE_MAP_MEMORY_MB=${MAPREDUCE_MAP_MEMORY_MB:--1}
  440. MAPREDUCE_REDUCE_MEMORY_MB=${MAPREDUCE_REDUCE_MEMORY_MB:--1}
  441. KEYTAB_DIR=${KEYTAB_DIR:-/etc/security/keytabs}
  442. HDFS_KEYTAB=${HDFS_KEYTAB:-/home/hdfs/hdfs.keytab}
  443. MR_KEYTAB=${MR_KEYTAB:-/home/mr/mr.keytab}
  444. DFS_WEBHDFS_ENABLED=${DFS_WEBHDFS_ENABLED:-false}
  445. DFS_SUPPORT_APPEND=${DFS_SUPPORT_APPEND:-false}
  446. KERBEROS_REALM=${KERBEROS_REALM:-KERBEROS.EXAMPLE.COM}
  447. SECURITY_TYPE=${SECURITY_TYPE:-simple}
  448. KINIT=${KINIT:-/usr/kerberos/bin/kinit}
  449. #deault the data dir perm to 700
  450. DFS_DATANODE_DIR_PERM=${DFS_DATANODE_DIR_PERM:-700}
  451. if [ "${SECURITY_TYPE}" = "kerberos" ]; then
  452. TASK_CONTROLLER="org.apache.hadoop.mapred.LinuxTaskController"
  453. HADOOP_DN_ADDR="0.0.0.0:1019"
  454. HADOOP_DN_HTTP_ADDR="0.0.0.0:1022"
  455. SECURITY="true"
  456. HADOOP_SECURE_DN_USER=${HADOOP_HDFS_USER}
  457. else
  458. TASK_CONTROLLER="org.apache.hadoop.mapred.DefaultTaskController"
  459. HADOOP_DN_ADDR="0.0.0.0:50010"
  460. HADOOP_DN_HTTP_ADDR="0.0.0.0:50075"
  461. SECURITY="false"
  462. HADOOP_SECURE_DN_USER=""
  463. fi
  464. #unset env vars
  465. unset HADOOP_CLIENT_OPTS HADOOP_NAMENODE_OPTS HADOOP_DATANODE_OPTS HADOOP_SECONDARYNAMENODE_OPTS HADOOP_JAVA_PLATFORM_OPTS
  466. if [ "${AUTOMATED}" != "1" ]; then
  467. echo "Setup Hadoop Configuration"
  468. echo
  469. echo -n "Where would you like to put config directory? (${HADOOP_CONF_DIR}) "
  470. read USER_HADOOP_CONF_DIR
  471. echo -n "Where would you like to put log directory? (${HADOOP_LOG_DIR}) "
  472. read USER_HADOOP_LOG_DIR
  473. echo -n "Where would you like to put pid directory? (${HADOOP_PID_DIR}) "
  474. read USER_HADOOP_PID_DIR
  475. echo -n "What is the host of the namenode? (${HADOOP_NN_HOST}) "
  476. read USER_HADOOP_NN_HOST
  477. echo -n "Where would you like to put namenode data directory? (${HADOOP_NN_DIR}) "
  478. read USER_HADOOP_NN_DIR
  479. echo -n "Where would you like to put datanode data directory? (${HADOOP_DN_DIR}) "
  480. read USER_HADOOP_DN_DIR
  481. echo -n "What is the host of the jobtracker? (${HADOOP_JT_HOST}) "
  482. read USER_HADOOP_JT_HOST
  483. echo -n "Where would you like to put jobtracker/tasktracker data directory? (${HADOOP_MAPRED_DIR}) "
  484. read USER_HADOOP_MAPRED_DIR
  485. echo -n "Where is JAVA_HOME directory? (${JAVA_HOME}) "
  486. read USER_JAVA_HOME
  487. echo -n "Would you like to create directories/copy conf files to localhost? (Y/n) "
  488. read USER_AUTOSETUP
  489. echo
  490. JAVA_HOME=${USER_USER_JAVA_HOME:-$JAVA_HOME}
  491. HADOOP_NN_HOST=${USER_HADOOP_NN_HOST:-$HADOOP_NN_HOST}
  492. HADOOP_NN_DIR=${USER_HADOOP_NN_DIR:-$HADOOP_NN_DIR}
  493. HADOOP_DN_DIR=${USER_HADOOP_DN_DIR:-$HADOOP_DN_DIR}
  494. HADOOP_JT_HOST=${USER_HADOOP_JT_HOST:-$HADOOP_JT_HOST}
  495. HADOOP_HDFS_DIR=${USER_HADOOP_HDFS_DIR:-$HADOOP_HDFS_DIR}
  496. HADOOP_MAPRED_DIR=${USER_HADOOP_MAPRED_DIR:-$HADOOP_MAPRED_DIR}
  497. HADOOP_TASK_SCHEDULER=${HADOOP_TASK_SCHEDULER:-org.apache.hadoop.mapred.JobQueueTaskScheduler}
  498. HADOOP_LOG_DIR=${USER_HADOOP_LOG_DIR:-$HADOOP_LOG_DIR}
  499. HADOOP_PID_DIR=${USER_HADOOP_PID_DIR:-$HADOOP_PID_DIR}
  500. HADOOP_CONF_DIR=${USER_HADOOP_CONF_DIR:-$HADOOP_CONF_DIR}
  501. AUTOSETUP=${USER_AUTOSETUP:-y}
  502. echo "Review your choices:"
  503. echo
  504. echo "Config directory : ${HADOOP_CONF_DIR}"
  505. echo "Log directory : ${HADOOP_LOG_DIR}"
  506. echo "PID directory : ${HADOOP_PID_DIR}"
  507. echo "Namenode host : ${HADOOP_NN_HOST}"
  508. echo "Namenode directory : ${HADOOP_NN_DIR}"
  509. echo "Datanode directory : ${HADOOP_DN_DIR}"
  510. echo "Jobtracker host : ${HADOOP_JT_HOST}"
  511. echo "Mapreduce directory : ${HADOOP_MAPRED_DIR}"
  512. echo "Task scheduler : ${HADOOP_TASK_SCHEDULER}"
  513. echo "JAVA_HOME directory : ${JAVA_HOME}"
  514. echo "Create dirs/copy conf files : ${AUTOSETUP}"
  515. echo
  516. echo -n "Proceed with generate configuration? (y/N) "
  517. read CONFIRM
  518. if [ "${CONFIRM}" != "y" ]; then
  519. echo "User aborted setup, exiting..."
  520. exit 1
  521. fi
  522. fi
  523. if [ "${AUTOSETUP}" == "1" -o "${AUTOSETUP}" == "y" ]; then
  524. if [ -d ${KEYTAB_DIR} ]; then
  525. chmod 700 ${KEYTAB_DIR}/*
  526. chown ${HADOOP_MR_USER}:${HADOOP_GROUP} ${KEYTAB_DIR}/[jt]t.service.keytab
  527. chown ${HADOOP_HDFS_USER}:${HADOOP_GROUP} ${KEYTAB_DIR}/[dns]n.service.keytab
  528. fi
  529. chmod 755 -R ${HADOOP_PREFIX}/sbin/*hadoop*
  530. chmod 755 -R ${HADOOP_PREFIX}/bin/hadoop
  531. HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-${HADOOP_PREFIX}/libexec}
  532. chmod 755 -R ${HADOOP_LIBEXEC_DIR}/hadoop-config.sh
  533. mkdir -p /home/${HADOOP_MR_USER}
  534. chown ${HADOOP_MR_USER}:${HADOOP_GROUP} /home/${HADOOP_MR_USER}
  535. HDFS_DIR=`echo ${HADOOP_HDFS_DIR} | sed -e 's/,/ /g'`
  536. mkdir -p ${HDFS_DIR}
  537. if [ -e ${HADOOP_NN_DIR} ]; then
  538. rm -rf ${HADOOP_NN_DIR}
  539. fi
  540. DATANODE_DIR=`echo ${HADOOP_DN_DIR} | sed -e 's/,/ /g'`
  541. mkdir -p ${DATANODE_DIR}
  542. MAPRED_DIR=`echo ${HADOOP_MAPRED_DIR} | sed -e 's/,/ /g'`
  543. mkdir -p ${MAPRED_DIR}
  544. mkdir -p ${HADOOP_CONF_DIR}
  545. check_permission ${HADOOP_CONF_DIR}
  546. if [ $? == 1 ]; then
  547. echo "Full path to ${HADOOP_CONF_DIR} should be owned by root."
  548. exit 1
  549. fi
  550. mkdir -p ${HADOOP_LOG_DIR}
  551. #create the log sub dir for diff users
  552. mkdir -p ${HADOOP_LOG_DIR}/${HADOOP_HDFS_USER}
  553. mkdir -p ${HADOOP_LOG_DIR}/${HADOOP_MR_USER}
  554. mkdir -p ${HADOOP_PID_DIR}
  555. chown ${HADOOP_HDFS_USER}:${HADOOP_GROUP} ${HDFS_DIR}
  556. chown ${HADOOP_HDFS_USER}:${HADOOP_GROUP} ${DATANODE_DIR}
  557. chmod 700 -R ${DATANODE_DIR}
  558. chown ${HADOOP_MR_USER}:${HADOOP_GROUP} ${MAPRED_DIR}
  559. chown ${HADOOP_HDFS_USER}:${HADOOP_GROUP} ${HADOOP_LOG_DIR}
  560. chmod 775 ${HADOOP_LOG_DIR}
  561. chmod 775 ${HADOOP_PID_DIR}
  562. chown root:${HADOOP_GROUP} ${HADOOP_PID_DIR}
  563. #change the permission and the owner
  564. chmod 755 ${HADOOP_LOG_DIR}/${HADOOP_HDFS_USER}
  565. chown ${HADOOP_HDFS_USER}:${HADOOP_GROUP} ${HADOOP_LOG_DIR}/${HADOOP_HDFS_USER}
  566. chmod 755 ${HADOOP_LOG_DIR}/${HADOOP_MR_USER}
  567. chown ${HADOOP_MR_USER}:${HADOOP_GROUP} ${HADOOP_LOG_DIR}/${HADOOP_MR_USER}
  568. template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/core-site.xml ${HADOOP_CONF_DIR}/core-site.xml
  569. template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/hdfs-site.xml ${HADOOP_CONF_DIR}/hdfs-site.xml
  570. template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/mapred-site.xml ${HADOOP_CONF_DIR}/mapred-site.xml
  571. template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/hadoop-env.sh ${HADOOP_CONF_DIR}/hadoop-env.sh
  572. template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/hadoop-policy.xml ${HADOOP_CONF_DIR}/hadoop-policy.xml
  573. template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/commons-logging.properties ${HADOOP_CONF_DIR}/commons-logging.properties
  574. template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/mapred-queue-acls.xml ${HADOOP_CONF_DIR}/mapred-queue-acls.xml
  575. template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/taskcontroller.cfg ${HADOOP_CONF_DIR}/taskcontroller.cfg
  576. template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/capacity-scheduler.xml ${HADOOP_CONF_DIR}/capacity-scheduler.xml
  577. template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/log4j.properties ${HADOOP_CONF_DIR}/log4j.properties
  578. template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/hadoop-metrics2.properties ${HADOOP_CONF_DIR}/hadoop-metrics2.properties
  579. #setup up the proxy users
  580. setupProxyUsers
  581. #setup short circuit read
  582. setupShortCircuitRead
  583. #set the owner of the hadoop dir to root
  584. chown root ${HADOOP_PREFIX}
  585. chown root:${HADOOP_GROUP} ${HADOOP_CONF_DIR}/hadoop-env.sh
  586. chmod 755 ${HADOOP_CONF_DIR}/hadoop-env.sh
  587. #set taskcontroller
  588. chown root:${HADOOP_GROUP} ${HADOOP_CONF_DIR}/taskcontroller.cfg
  589. chmod 400 ${HADOOP_CONF_DIR}/taskcontroller.cfg
  590. chown root:${HADOOP_GROUP} ${HADOOP_PREFIX}/bin/task-controller
  591. chmod 6050 ${HADOOP_PREFIX}/bin/task-controller
  592. #generate the slaves file and include and exclude files for hdfs and mapred
  593. echo '' > ${HADOOP_CONF_DIR}/slaves
  594. echo '' > ${HADOOP_CONF_DIR}/dfs.include
  595. echo '' > ${HADOOP_CONF_DIR}/dfs.exclude
  596. echo '' > ${HADOOP_CONF_DIR}/mapred.include
  597. echo '' > ${HADOOP_CONF_DIR}/mapred.exclude
  598. for dn in $DATANODES
  599. do
  600. echo $dn >> ${HADOOP_CONF_DIR}/slaves
  601. echo $dn >> ${HADOOP_CONF_DIR}/dfs.include
  602. done
  603. for tt in $TASKTRACKERS
  604. do
  605. echo $tt >> ${HADOOP_CONF_DIR}/mapred.include
  606. done
  607. echo "Configuration setup is completed."
  608. if [[ "$HADOOP_NN_HOST" =~ "`hostname`" ]]; then
  609. echo "Proceed to run hadoop-setup-hdfs.sh on namenode."
  610. fi
  611. else
  612. template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/core-site.xml ${HADOOP_CONF_DIR}/core-site.xml
  613. template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/hdfs-site.xml ${HADOOP_CONF_DIR}/hdfs-site.xml
  614. template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/mapred-site.xml ${HADOOP_CONF_DIR}/mapred-site.xml
  615. template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/hadoop-env.sh ${HADOOP_CONF_DIR}/hadoop-env.sh
  616. template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/hadoop-policy.xml ${HADOOP_CONF_DIR}/hadoop-policy.xml
  617. template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/commons-logging.properties ${HADOOP_CONF_DIR}/commons-logging.properties
  618. template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/mapred-queue-acls.xml ${HADOOP_CONF_DIR}/mapred-queue-acls.xml
  619. template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/taskcontroller.cfg ${HADOOP_CONF_DIR}/taskcontroller.cfg
  620. template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/hadoop-metrics2.properties ${HADOOP_CONF_DIR}/hadoop-metrics2.properties
  621. template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/capacity-scheduler.xml ${HADOOP_CONF_DIR}/capacity-scheduler.xml
  622. template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/log4j.properties ${HADOOP_CONF_DIR}/log4j.properties
  623. template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/hadoop-metrics2.properties ${HADOOP_CONF_DIR}/hadoop-metrics2.properties
  624. #setup up the proxy users
  625. setupProxyUsers
  626. #setup short circuit read
  627. setupShortCircuitRead
  628. chown root:${HADOOP_GROUP} ${HADOOP_CONF_DIR}/hadoop-env.sh
  629. chmod 755 ${HADOOP_CONF_DIR}/hadoop-env.sh
  630. #set taskcontroller
  631. chown root:${HADOOP_GROUP} ${HADOOP_CONF_DIR}/taskcontroller.cfg
  632. chmod 400 ${HADOOP_CONF_DIR}/taskcontroller.cfg
  633. chown root:${HADOOP_GROUP} ${HADOOP_PREFIX}/bin/task-controller
  634. chmod 6050 ${HADOOP_PREFIX}/bin/task-controller
  635. #generate the slaves file and include and exclude files for hdfs and mapred
  636. echo '' > ${HADOOP_CONF_DIR}/slaves
  637. echo '' > ${HADOOP_CONF_DIR}/dfs.include
  638. echo '' > ${HADOOP_CONF_DIR}/dfs.exclude
  639. echo '' > ${HADOOP_CONF_DIR}/mapred.include
  640. echo '' > ${HADOOP_CONF_DIR}/mapred.exclude
  641. for dn in $DATANODES
  642. do
  643. echo $dn >> ${HADOOP_CONF_DIR}/slaves
  644. echo $dn >> ${HADOOP_CONF_DIR}/dfs.include
  645. done
  646. for tt in $TASKTRACKERS
  647. do
  648. echo $tt >> ${HADOOP_CONF_DIR}/mapred.include
  649. done
  650. echo
  651. echo "Configuration file has been generated in:"
  652. echo
  653. echo "${HADOOP_CONF_DIR}/core-site.xml"
  654. echo "${HADOOP_CONF_DIR}/hdfs-site.xml"
  655. echo "${HADOOP_CONF_DIR}/mapred-site.xml"
  656. echo "${HADOOP_CONF_DIR}/hadoop-env.sh"
  657. echo "${HADOOP_CONF_DIR}/hadoop-policy.xml"
  658. echo "${HADOOP_CONF_DIR}/commons-logging.properties"
  659. echo "${HADOOP_CONF_DIR}/taskcontroller.cfg"
  660. echo "${HADOOP_CONF_DIR}/capacity-scheduler.xml"
  661. echo "${HADOOP_CONF_DIR}/log4j.properties"
  662. echo "${HADOOP_CONF_DIR}/hadoop-metrics2.properties"
  663. echo
  664. echo " to ${HADOOP_CONF_DIR} on all nodes, and proceed to run hadoop-setup-hdfs.sh on namenode."
  665. fi