ambari-metrics-collector 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427
  1. #!/usr/bin/env bash
  2. # Licensed to the Apache Software Foundation (ASF) under one or more
  3. # contributor license agreements. See the NOTICE file distributed with
  4. # this work for additional information regarding copyright ownership.
  5. # The ASF licenses this file to You under the Apache License, Version 2.0
  6. # (the "License"); you may not use this file except in compliance with
  7. # the License. You may obtain a copy of the License at
  8. #
  9. # http://www.apache.org/licenses/LICENSE-2.0
  10. #
  11. # Unless required by applicable law or agreed to in writing, software
  12. # distributed under the License is distributed on an "AS IS" BASIS,
  13. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. # See the License for the specific
  15. #JAVA_HOME=/usr/jdk64/jdk1.7.0_45
  16. PIDFILE=/var/run/ambari-metrics-collector/ambari-metrics-collector.pid
  17. OUTFILE=/var/log/ambari-metrics-collector/ambari-metrics-collector.out
  18. STARTUPFILE=/var/log/ambari-metrics-collector/ambari-metrics-collector-startup.out
  19. HBASE_ZK_PID=/var/run/ams-hbase/hbase-${USER}-zookeeper.pid
  20. HBASE_MASTER_PID=/var/run/ams-hbase/hbase-${USER}-master.pid
  21. HBASE_RS_PID=/var/run/ams-hbase/hbase-${USER}-regionserver.pid
  22. HBASE_DIR=/usr/lib/ams-hbase
  23. DAEMON_NAME=timelineserver
  24. COLLECTOR_CONF_DIR=/etc/ambari-metrics-collector/conf
  25. HBASE_CONF_DIR=/etc/ams-hbase/conf
  26. HBASE_CMD=${HBASE_DIR}/bin/hbase
  27. METRIC_TABLES=(METRIC_AGGREGATE_DAILY METRIC_AGGREGATE_HOURLY METRIC_AGGREGATE_MINUTE METRIC_AGGREGATE METRIC_RECORD METRIC_RECORD_DAILY METRIC_RECORD_HOURLY METRIC_RECORD_MINUTE)
  28. METRIC_FIFO_COMPACTION_TABLES=(METRIC_AGGREGATE METRIC_RECORD METRIC_RECORD_MINUTE)
  29. METRIC_COLLECTOR=ambari-metrics-collector
  30. NORMALIZER_ENABLED_STUB_FILE=/var/run/ambari-metrics-collector/normalizer_enabled
  31. FIFO_ENABLED_STUB_FILE=/var/run/ambari-metrics-collector/fifo_enabled
  32. STOP_TIMEOUT=5
  33. DISTRIBUTED_HBASE=false
  34. function hbase_daemon
  35. {
  36. local daemon=$1
  37. local cmd=$2
  38. local pid
  39. case "${daemon}" in
  40. "master")
  41. pid=${HBASE_MASTER_PID}
  42. ;;
  43. "zookeeper")
  44. pid=${HBASE_ZK_PID}
  45. ;;
  46. "regionserver")
  47. pid=${HBASE_RS_PID}
  48. ;;
  49. esac
  50. daemon_status "${pid}"
  51. if [[ $? == 0 ]]; then
  52. echo "${daemon} is running as process $(cat "${pid}"). Continuing" | tee -a $STARTUPFILE
  53. else
  54. # stale pid file, so just remove it and continue on
  55. rm -f "${pid}" >/dev/null 2>&1
  56. fi
  57. ${HBASE_DIR}/bin/hbase-daemon.sh --config ${HBASE_CONF_DIR} ${cmd} ${daemon}
  58. }
  59. function write_pidfile
  60. {
  61. local pidfile="$1"
  62. echo $! > "${pidfile}" 2>/dev/null
  63. if [[ $? -gt 0 ]]; then
  64. echo "ERROR: Cannot write pid ${pidfile}." | tee -a $STARTUPFILE
  65. exit 1;
  66. fi
  67. }
  68. # TODO replace this with Phoenix DDL, when normalization support added to Phoenix
  69. function enable_normalization_fifo
  70. {
  71. echo "$(date) Handling HBase normalization/fifo requests" | tee -a $STARTUPFILE
  72. command=""
  73. # Enable normalization for all the tables
  74. echo "$(date) Normalized enabled : ${AMS_HBASE_NORMALIZER_ENABLED}" | tee -a $STARTUPFILE
  75. if [[ "${AMS_HBASE_NORMALIZER_ENABLED}" == "true" || "${AMS_HBASE_NORMALIZER_ENABLED}" == "True" ]]
  76. then
  77. if [ ! -f "$NORMALIZER_ENABLED_STUB_FILE" ] #If stub file not found
  78. then
  79. echo "$(date) Normalizer stub file not found" | tee -a $STARTUPFILE
  80. for table in "${METRIC_TABLES[@]}"
  81. do
  82. command="$command \n alter '$table', {NORMALIZATION_ENABLED => 'true'}"
  83. done
  84. touch $NORMALIZER_ENABLED_STUB_FILE
  85. fi
  86. else
  87. if [ -f "$NORMALIZER_ENABLED_STUB_FILE" ] #If stub file found
  88. then
  89. echo "$(date) Normalizer stub file found" | tee -a $STARTUPFILE
  90. rm -f $NORMALIZER_ENABLED_STUB_FILE
  91. fi
  92. fi
  93. #Similarly for HBase FIFO Compaction policy
  94. echo "$(date) Fifo enabled : ${AMS_HBASE_FIFO_COMPACTION_ENABLED}" | tee -a $STARTUPFILE
  95. if [[ "${AMS_HBASE_FIFO_COMPACTION_ENABLED}" == "true" || "${AMS_HBASE_FIFO_COMPACTION_ENABLED}" == "True" ]]
  96. then
  97. if [ ! -f "$FIFO_ENABLED_STUB_FILE" ] #If stub file not found
  98. then
  99. echo "$(date) Fifo stub file not found" | tee -a $STARTUPFILE
  100. for table in "${METRIC_FIFO_COMPACTION_TABLES[@]}"
  101. do
  102. command="$command \n alter '$table', CONFIGURATION => {'hbase.hstore.blockingStoreFiles' => '1000',
  103. 'hbase.hstore.defaultengine.compactionpolicy.class' =>
  104. 'org.apache.hadoop.hbase.regionserver.compactions.FIFOCompactionPolicy'}"
  105. done
  106. touch $FIFO_ENABLED_STUB_FILE
  107. fi
  108. else
  109. if [ -f "$FIFO_ENABLED_STUB_FILE" ] #If stub file found
  110. then
  111. echo "$(date) Fifo stub file found" | tee -a $STARTUPFILE
  112. for table in "${METRIC_FIFO_COMPACTION_TABLES[@]}"
  113. do
  114. command="$command \n alter '$table', CONFIGURATION => {'hbase.hstore.defaultengine.compactionpolicy.class' =>
  115. 'org.apache.hadoop.hbase.regionserver.compactions.ExploringCompactionPolicy',
  116. 'hbase.hstore.blockingStoreFiles' => '300'}"
  117. done
  118. rm -f $FIFO_ENABLED_STUB_FILE
  119. fi
  120. fi
  121. if [[ ! -z "$command" ]]
  122. then
  123. echo "$(date) Executing HBase shell command..." | tee -a $STARTUPFILE
  124. echo -e ${command} | tee -a $STARTUPFILE
  125. echo -e ${command} | ${HBASE_CMD} --config ${HBASE_CONF_DIR} shell > /dev/null 2>&1
  126. if [ $? -ne 0 ]; then
  127. echo "WARNING: Failed to enable Ambari Metrics data model normalization."
  128. >&2 echo "WARNING: Failed to enable Ambari Metrics data model normalization."
  129. rm -f $NORMALIZER_ENABLED_STUB_FILE
  130. rm -f $FIFO_ENABLED_STUB_FILE
  131. else
  132. echo "$(date) HBase shell command completed" | tee -a $STARTUPFILE
  133. fi
  134. else
  135. echo "$(date) Nothing to execute against HBase shell" | tee -a $STARTUPFILE
  136. fi
  137. }
  138. function hadoop_java_setup
  139. {
  140. # Bail if we did not detect it
  141. if [[ -z "${JAVA_HOME}" ]]; then
  142. echo "ERROR: JAVA_HOME is not set and could not be found." | tee -a $STARTUPFILE
  143. exit 1
  144. fi
  145. if [[ ! -d "${JAVA_HOME}" ]]; then
  146. echo "ERROR: JAVA_HOME ${JAVA_HOME} does not exist." | tee -a $STARTUPFILE
  147. exit 1
  148. fi
  149. JAVA="${JAVA_HOME}/bin/java"
  150. if [[ ! -x "$JAVA" ]]; then
  151. echo "ERROR: $JAVA is not executable." | tee -a $STARTUPFILE
  152. exit 1
  153. fi
  154. # shellcheck disable=SC2034
  155. JAVA_HEAP_MAX=-Xmx1g
  156. HADOOP_HEAPSIZE=${HADOOP_HEAPSIZE:-1024}
  157. # check envvars which might override default args
  158. if [[ -n "$HADOOP_HEAPSIZE" ]]; then
  159. # shellcheck disable=SC2034
  160. JAVA_HEAP_MAX="-Xmx${HADOOP_HEAPSIZE}m"
  161. fi
  162. }
  163. function daemon_status()
  164. {
  165. #
  166. # LSB 4.1.0 compatible status command (1)
  167. #
  168. # 0 = program is running
  169. # 1 = dead, but still a pid (2)
  170. # 2 = (not used by us)
  171. # 3 = not running
  172. #
  173. # 1 - this is not an endorsement of the LSB
  174. #
  175. # 2 - technically, the specification says /var/run/pid, so
  176. # we should never return this value, but we're giving
  177. # them the benefit of a doubt and returning 1 even if
  178. # our pid is not in in /var/run .
  179. #
  180. local pidfile="$1"
  181. shift
  182. local pid
  183. if [[ -f "${pidfile}" ]]; then
  184. pid=$(cat "${pidfile}")
  185. if ps -p "${pid}" > /dev/null 2>&1; then
  186. return 0
  187. fi
  188. return 1
  189. fi
  190. return 3
  191. }
  192. function start()
  193. {
  194. hadoop_java_setup
  195. # hbase_daemon "zookeeper" "start"
  196. # hbase_daemon "master" "start"
  197. # hbase_daemon "regionserver" "start"
  198. if [ !"${DISTRIBUTED_HBASE}" ]; then
  199. echo "$(date) Starting HBase." | tee -a $STARTUPFILE
  200. hbase_daemon "master" "start"
  201. else
  202. echo "$(date) Launching in distributed mode. Assuming Hbase daemons up and running." | tee -a $STARTUPFILE
  203. fi
  204. CLASS='org.apache.hadoop.yarn.server.applicationhistoryservice.ApplicationHistoryServer'
  205. # YARN_OPTS="${YARN_OPTS} ${YARN_TIMELINESERVER_OPTS}"
  206. # if [[ -n "${YARN_TIMELINESERVER_HEAPSIZE}" ]]; then
  207. # JAVA_HEAP_MAX="-Xmx${YARN_TIMELINESERVER_HEAPSIZE}m"
  208. # fi
  209. # check if this is needed?
  210. # export PHOENIX_JAR_PATH=/usr/lib/ambari-metrics/timelineservice/phoenix-client.jar
  211. # export HBASE_CONF_DIR=${HBASE_DIR}/conf
  212. daemon_status "${PIDFILE}"
  213. if [[ $? == 0 ]]; then
  214. echo "AMS is running as process $(cat "${PIDFILE}"). Exiting" | tee -a $STARTUPFILE
  215. exit 0
  216. else
  217. # stale pid file, so just remove it and continue on
  218. rm -f "${PIDFILE}" >/dev/null 2>&1
  219. fi
  220. nohup "${JAVA}" "-Xms$AMS_COLLECTOR_HEAPSIZE" "-Xmx$AMS_COLLECTOR_HEAPSIZE" ${AMS_COLLECTOR_OPTS} "-cp" "/usr/lib/ambari-metrics-collector/*:${COLLECTOR_CONF_DIR}" "-Djava.net.preferIPv4Stack=true" "-Dams.log.dir=${AMS_COLLECTOR_LOG_DIR}" "-Dproc_${DAEMON_NAME}" "${CLASS}" "$@" > $OUTFILE 2>&1 &
  221. PID=$!
  222. write_pidfile "${PIDFILE}"
  223. sleep 2
  224. echo "Verifying ${METRIC_COLLECTOR} process status..." | tee -a $STARTUPFILE
  225. if [ -z "`ps ax | grep -w ${PID} | grep ApplicationHistoryServer`" ]; then
  226. if [ -s ${OUTFILE} ]; then
  227. echo "ERROR: ${METRIC_COLLECTOR} start failed. For more details, see ${OUTFILE}:" | tee -a $STARTUPFILE
  228. echo "===================="
  229. tail -n 10 ${OUTFILE}
  230. echo "===================="
  231. else
  232. echo "ERROR: ${METRIC_COLLECTOR} start failed" | tee -a $STARTUPFILE
  233. rm -f ${PIDFILE}
  234. fi
  235. echo "Collector out at: ${OUTFILE}" | tee -a $STARTUPFILE
  236. exit -1
  237. fi
  238. rm -f $STARTUPFILE #Deleting startup file
  239. echo "$(date) Collector successfully started." | tee -a $STARTUPFILE
  240. if [[ "${AMS_HBASE_INIT_CHECK_ENABLED}" == "true" || "${AMS_HBASE_INIT_CHECK_ENABLED}" == "True" ]]
  241. then
  242. echo "$(date) Initializing Ambari Metrics data model" | tee -a $STARTUPFILE
  243. start=$SECONDS
  244. # Wait until METRIC_* tables created
  245. # Wait for 10 tries or 5 minutes whichever comes first
  246. for retry in {1..10}
  247. do
  248. echo 'list' | ${HBASE_CMD} --config ${HBASE_CONF_DIR} shell 2> /dev/null | grep ^${METRIC_TABLES[0]} > /dev/null 2>&1
  249. if [ $? -eq 0 ]; then
  250. echo "$(date) Ambari Metrics data model initialization completed." | tee -a $STARTUPFILE
  251. break
  252. fi
  253. echo "$(date) Ambari Metrics data model initialization check $retry" | tee -a $STARTUPFILE
  254. duration=$(( SECONDS - start ))
  255. if [ $duration -gt 300 ]; then
  256. echo "$(date) Ambari Metrics data model initialization timed out" | tee -a $STARTUPFILE
  257. break
  258. fi
  259. sleep 5
  260. done
  261. if [ $? -ne 0 ]; then
  262. echo "WARNING: Ambari Metrics data model initialization failed."
  263. >&2 echo "WARNING: Ambari Metrics data model initialization failed."
  264. fi
  265. else
  266. echo "$(date) Skipping Ambari Metrics data model initialization" | tee -a $STARTUPFILE
  267. fi
  268. }
  269. function stop()
  270. {
  271. pidfile=${PIDFILE}
  272. if [[ -f "${pidfile}" ]]; then
  273. pid=$(cat "$pidfile")
  274. kill "${pid}" >/dev/null 2>&1
  275. sleep "${STOP_TIMEOUT}"
  276. if kill -0 "${pid}" > /dev/null 2>&1; then
  277. echo "WARNING: ${METRIC_COLLECTOR} did not stop gracefully after ${STOP_TIMEOUT} seconds: Trying to kill with kill -9" | tee -a $STARTUPFILE
  278. kill -9 "${pid}" >/dev/null 2>&1
  279. fi
  280. if ps -p "${pid}" > /dev/null 2>&1; then
  281. echo "ERROR: Unable to kill ${pid}" | tee -a $STARTUPFILE
  282. else
  283. rm -f "${pidfile}" >/dev/null 2>&1
  284. fi
  285. fi
  286. #stop hbase daemons
  287. if [ !"${DISTRIBUTED_HBASE}" ]; then
  288. echo "Stopping HBase master" | tee -a $STARTUPFILE
  289. hbase_daemon "master" "stop"
  290. fi
  291. }
  292. while [[ -z "${_ams_configs_done}" ]]; do
  293. case $1 in
  294. --config)
  295. shift
  296. confdir=$1
  297. shift
  298. if [[ -d "${confdir}" ]]; then
  299. COLLECTOR_CONF_DIR="${confdir}"
  300. elif [[ -z "${confdir}" ]]; then
  301. echo "ERROR: No parameter provided for --config " | tee -a $STARTUPFILE
  302. exit 1
  303. else
  304. echo "ERROR: Cannot find configuration directory \"${confdir}\"" | tee -a $STARTUPFILE
  305. exit 1
  306. fi
  307. ;;
  308. --distributed)
  309. DISTRIBUTED_HBASE=true
  310. shift
  311. ;;
  312. *)
  313. _ams_configs_done=true
  314. ;;
  315. esac
  316. done
  317. # execute ams-env.sh
  318. if [[ -f "${COLLECTOR_CONF_DIR}/ams-env.sh" ]]; then
  319. . "${COLLECTOR_CONF_DIR}/ams-env.sh"
  320. else
  321. echo "ERROR: Cannot execute ${COLLECTOR_CONF_DIR}/ams-env.sh." 2>&1
  322. exit 1
  323. fi
  324. # set these env variables only if they were not set by ams-env.sh
  325. : ${AMS_COLLECTOR_LOG_DIR:=/var/log/ambari-metrics-collector}
  326. : ${AMS_HBASE_NORMALIZER_ENABLED:=true}
  327. : ${AMS_HBASE_FIFO_COMPACTION_ENABLED:=true}
  328. : ${AMS_HBASE_INIT_CHECK_ENABLED:=true}
  329. # set pid dir path
  330. if [[ -n "${AMS_COLLECTOR_PID_DIR}" ]]; then
  331. PIDFILE=${AMS_COLLECTOR_PID_DIR}/ambari-metrics-collector.pid
  332. NORMALIZER_ENABLED_STUB_FILE=${AMS_COLLECTOR_PID_DIR}/normalizer_enabled
  333. FIFO_ENABLED_STUB_FILE=${AMS_COLLECTOR_PID_DIR}/fifo_enabled
  334. fi
  335. if [[ -n "${AMS_HBASE_PID_DIR}" ]]; then
  336. HBASE_ZK_PID=${AMS_HBASE_PID_DIR}/hbase-${USER}-zookeeper.pid
  337. HBASE_MASTER_PID=${AMS_HBASE_PID_DIR}/hbase-${USER}-master.pid
  338. HBASE_RS_PID=${AMS_HBASE_PID_DIR}/hbase-${USER}-regionserver.pid
  339. fi
  340. # set out file path
  341. if [[ -n "${AMS_COLLECTOR_LOG_DIR}" ]]; then
  342. OUTFILE=${AMS_COLLECTOR_LOG_DIR}/ambari-metrics-collector.out
  343. STARTUPFILE=${AMS_COLLECTOR_LOG_DIR}/ambari-metrics-collector-startup.out
  344. fi
  345. #TODO manage 3 hbase daemons for start/stop/status
  346. case "$1" in
  347. start)
  348. start
  349. ;;
  350. stop)
  351. stop
  352. ;;
  353. status)
  354. daemon_status "${PIDFILE}"
  355. if [[ $? == 0 ]]; then
  356. echo "AMS is running as process $(cat "${PIDFILE}")."
  357. else
  358. echo "AMS is not running."
  359. fi
  360. #print embedded hbase daemons statuses?
  361. ;;
  362. restart)
  363. stop
  364. start
  365. ;;
  366. enable_normalization_fifo)
  367. enable_normalization_fifo
  368. ;;
  369. esac