ambari-metrics-collector 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419
  1. #!/usr/bin/env bash
  2. # Licensed to the Apache Software Foundation (ASF) under one or more
  3. # contributor license agreements. See the NOTICE file distributed with
  4. # this work for additional information regarding copyright ownership.
  5. # The ASF licenses this file to You under the Apache License, Version 2.0
  6. # (the "License"); you may not use this file except in compliance with
  7. # the License. You may obtain a copy of the License at
  8. #
  9. # http://www.apache.org/licenses/LICENSE-2.0
  10. #
  11. # Unless required by applicable law or agreed to in writing, software
  12. # distributed under the License is distributed on an "AS IS" BASIS,
  13. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. # See the License for the specific
  15. #JAVA_HOME=/usr/jdk64/jdk1.7.0_45
  16. PIDFILE=/var/run/ambari-metrics-collector/ambari-metrics-collector.pid
  17. OUTFILE=/var/log/ambari-metrics-collector/ambari-metrics-collector.out
  18. STARTUPFILE=/var/log/ambari-metrics-collector/ambari-metrics-collector-startup.out
  19. HBASE_ZK_PID=/var/run/ams-hbase/hbase-${USER}-zookeeper.pid
  20. HBASE_MASTER_PID=/var/run/ams-hbase/hbase-${USER}-master.pid
  21. HBASE_RS_PID=/var/run/ams-hbase/hbase-${USER}-regionserver.pid
  22. HBASE_DIR=/usr/lib/ams-hbase
  23. DAEMON_NAME=timelineserver
  24. COLLECTOR_CONF_DIR=/etc/ambari-metrics-collector/conf
  25. HBASE_CONF_DIR=/etc/ams-hbase/conf
  26. HBASE_CMD=${HBASE_DIR}/bin/hbase
  27. METRIC_TABLES=(METRIC_AGGREGATE_DAILY METRIC_AGGREGATE_HOURLY METRIC_AGGREGATE_MINUTE METRIC_AGGREGATE METRIC_RECORD METRIC_RECORD_DAILY METRIC_RECORD_HOURLY METRIC_RECORD_MINUTE)
  28. METRIC_FIFO_COMPACTION_TABLES=(METRIC_AGGREGATE METRIC_RECORD METRIC_RECORD_MINUTE)
  29. METRIC_COLLECTOR=ambari-metrics-collector
  30. AMS_COLLECTOR_LOG_DIR=/var/log/ambari-metrics-collector
  31. AMS_HBASE_NORMALIZER_ENABLED=true
  32. AMS_HBASE_FIFO_COMPACTION_ENABLED=true
  33. NORMALIZER_ENABLED_STUB_FILE=/var/run/ambari-metrics-collector/normalizer_enabled
  34. FIFO_ENABLED_STUB_FILE=/var/run/ambari-metrics-collector/fifo_enabled
  35. STOP_TIMEOUT=5
  36. DISTRIBUTED_HBASE=false
  37. function hbase_daemon
  38. {
  39. local daemon=$1
  40. local cmd=$2
  41. local pid
  42. case "${daemon}" in
  43. "master")
  44. pid=${HBASE_MASTER_PID}
  45. ;;
  46. "zookeeper")
  47. pid=${HBASE_ZK_PID}
  48. ;;
  49. "regionserver")
  50. pid=${HBASE_RS_PID}
  51. ;;
  52. esac
  53. daemon_status "${pid}"
  54. if [[ $? == 0 ]]; then
  55. echo "${daemon} is running as process $(cat "${pid}"). Continuing" | tee -a $STARTUPFILE
  56. else
  57. # stale pid file, so just remove it and continue on
  58. rm -f "${pid}" >/dev/null 2>&1
  59. fi
  60. ${HBASE_DIR}/bin/hbase-daemon.sh --config ${HBASE_CONF_DIR} ${cmd} ${daemon}
  61. }
  62. function write_pidfile
  63. {
  64. local pidfile="$1"
  65. echo $! > "${pidfile}" 2>/dev/null
  66. if [[ $? -gt 0 ]]; then
  67. echo "ERROR: Cannot write pid ${pidfile}." | tee -a $STARTUPFILE
  68. exit 1;
  69. fi
  70. }
  71. # TODO replace this with Phoenix DDL, when normalization support added to Phoenix
  72. function enable_normalization_fifo
  73. {
  74. echo "$(date) Handling HBase normalization/fifo requests" | tee -a $STARTUPFILE
  75. command=""
  76. # Enable normalization for all the tables
  77. echo "$(date) Normalized enabled : ${AMS_HBASE_NORMALIZER_ENABLED}" | tee -a $STARTUPFILE
  78. if [[ "${AMS_HBASE_NORMALIZER_ENABLED}" == "true" || "${AMS_HBASE_NORMALIZER_ENABLED}" == "True" ]]
  79. then
  80. if [ ! -f "$NORMALIZER_ENABLED_STUB_FILE" ] #If stub file not found
  81. then
  82. echo "$(date) Normalizer stub file not found" | tee -a $STARTUPFILE
  83. for table in "${METRIC_TABLES[@]}"
  84. do
  85. command="$command \n alter '$table', {NORMALIZATION_ENABLED => 'true'}"
  86. done
  87. touch $NORMALIZER_ENABLED_STUB_FILE
  88. fi
  89. else
  90. if [ -f "$NORMALIZER_ENABLED_STUB_FILE" ] #If stub file found
  91. then
  92. echo "$(date) Normalizer stub file found" | tee -a $STARTUPFILE
  93. rm -f $NORMALIZER_ENABLED_STUB_FILE
  94. fi
  95. fi
  96. #Similarly for HBase FIFO Compaction policy
  97. echo "$(date) Fifo enabled : ${AMS_HBASE_FIFO_COMPACTION_ENABLED}" | tee -a $STARTUPFILE
  98. if [[ "${AMS_HBASE_FIFO_COMPACTION_ENABLED}" == "true" || "${AMS_HBASE_FIFO_COMPACTION_ENABLED}" == "True" ]]
  99. then
  100. if [ ! -f "$FIFO_ENABLED_STUB_FILE" ] #If stub file not found
  101. then
  102. echo "$(date) Fifo stub file not found" | tee -a $STARTUPFILE
  103. for table in "${METRIC_FIFO_COMPACTION_TABLES[@]}"
  104. do
  105. command="$command \n alter '$table', CONFIGURATION => {'hbase.hstore.blockingStoreFiles' => '1000',
  106. 'hbase.hstore.defaultengine.compactionpolicy.class' =>
  107. 'org.apache.hadoop.hbase.regionserver.compactions.FIFOCompactionPolicy'}"
  108. done
  109. touch $FIFO_ENABLED_STUB_FILE
  110. fi
  111. else
  112. if [ -f "$FIFO_ENABLED_STUB_FILE" ] #If stub file found
  113. then
  114. echo "$(date) Fifo stub file found" | tee -a $STARTUPFILE
  115. for table in "${METRIC_FIFO_COMPACTION_TABLES[@]}"
  116. do
  117. command="$command \n alter '$table', CONFIGURATION => {'hbase.hstore.defaultengine.compactionpolicy.class' =>
  118. 'org.apache.hadoop.hbase.regionserver.compactions.ExploringCompactionPolicy',
  119. 'hbase.hstore.blockingStoreFiles' => '300'}"
  120. done
  121. rm -f $FIFO_ENABLED_STUB_FILE
  122. fi
  123. fi
  124. if [[ ! -z "$command" ]]
  125. then
  126. echo "$(date) Executing HBase shell command..." | tee -a $STARTUPFILE
  127. echo -e ${command} | tee -a $STARTUPFILE
  128. echo -e ${command} | ${HBASE_CMD} --config ${HBASE_CONF_DIR} shell > /dev/null 2>&1
  129. if [ $? -ne 0 ]; then
  130. echo "WARNING: Failed to enable Ambari Metrics data model normalization."
  131. >&2 echo "WARNING: Failed to enable Ambari Metrics data model normalization."
  132. rm -f $NORMALIZER_ENABLED_STUB_FILE
  133. rm -f $FIFO_ENABLED_STUB_FILE
  134. else
  135. echo "$(date) HBase shell command completed" | tee -a $STARTUPFILE
  136. fi
  137. else
  138. echo "$(date) Nothing to execute against HBase shell" | tee -a $STARTUPFILE
  139. fi
  140. }
  141. function hadoop_java_setup
  142. {
  143. # Bail if we did not detect it
  144. if [[ -z "${JAVA_HOME}" ]]; then
  145. echo "ERROR: JAVA_HOME is not set and could not be found." | tee -a $STARTUPFILE
  146. exit 1
  147. fi
  148. if [[ ! -d "${JAVA_HOME}" ]]; then
  149. echo "ERROR: JAVA_HOME ${JAVA_HOME} does not exist." | tee -a $STARTUPFILE
  150. exit 1
  151. fi
  152. JAVA="${JAVA_HOME}/bin/java"
  153. if [[ ! -x "$JAVA" ]]; then
  154. echo "ERROR: $JAVA is not executable." | tee -a $STARTUPFILE
  155. exit 1
  156. fi
  157. # shellcheck disable=SC2034
  158. JAVA_HEAP_MAX=-Xmx1g
  159. HADOOP_HEAPSIZE=${HADOOP_HEAPSIZE:-1024}
  160. # check envvars which might override default args
  161. if [[ -n "$HADOOP_HEAPSIZE" ]]; then
  162. # shellcheck disable=SC2034
  163. JAVA_HEAP_MAX="-Xmx${HADOOP_HEAPSIZE}m"
  164. fi
  165. }
  166. function daemon_status()
  167. {
  168. #
  169. # LSB 4.1.0 compatible status command (1)
  170. #
  171. # 0 = program is running
  172. # 1 = dead, but still a pid (2)
  173. # 2 = (not used by us)
  174. # 3 = not running
  175. #
  176. # 1 - this is not an endorsement of the LSB
  177. #
  178. # 2 - technically, the specification says /var/run/pid, so
  179. # we should never return this value, but we're giving
  180. # them the benefit of a doubt and returning 1 even if
  181. # our pid is not in in /var/run .
  182. #
  183. local pidfile="$1"
  184. shift
  185. local pid
  186. if [[ -f "${pidfile}" ]]; then
  187. pid=$(cat "${pidfile}")
  188. if ps -p "${pid}" > /dev/null 2>&1; then
  189. return 0
  190. fi
  191. return 1
  192. fi
  193. return 3
  194. }
  195. function start()
  196. {
  197. hadoop_java_setup
  198. # hbase_daemon "zookeeper" "start"
  199. # hbase_daemon "master" "start"
  200. # hbase_daemon "regionserver" "start"
  201. if [ !"${DISTRIBUTED_HBASE}" ]; then
  202. echo "$(date) Starting HBase." | tee -a $STARTUPFILE
  203. hbase_daemon "master" "start"
  204. else
  205. echo "$(date) Launching in distributed mode. Assuming Hbase daemons up and running." | tee -a $STARTUPFILE
  206. fi
  207. CLASS='org.apache.hadoop.yarn.server.applicationhistoryservice.ApplicationHistoryServer'
  208. # YARN_OPTS="${YARN_OPTS} ${YARN_TIMELINESERVER_OPTS}"
  209. # if [[ -n "${YARN_TIMELINESERVER_HEAPSIZE}" ]]; then
  210. # JAVA_HEAP_MAX="-Xmx${YARN_TIMELINESERVER_HEAPSIZE}m"
  211. # fi
  212. # check if this is needed?
  213. # export PHOENIX_JAR_PATH=/usr/lib/ambari-metrics/timelineservice/phoenix-client.jar
  214. # export HBASE_CONF_DIR=${HBASE_DIR}/conf
  215. daemon_status "${PIDFILE}"
  216. if [[ $? == 0 ]]; then
  217. echo "AMS is running as process $(cat "${PIDFILE}"). Exiting" | tee -a $STARTUPFILE
  218. exit 0
  219. else
  220. # stale pid file, so just remove it and continue on
  221. rm -f "${PIDFILE}" >/dev/null 2>&1
  222. fi
  223. nohup "${JAVA}" "-Xms$AMS_COLLECTOR_HEAPSIZE" "-Xmx$AMS_COLLECTOR_HEAPSIZE" ${AMS_COLLECTOR_OPTS} "-cp" "/usr/lib/ambari-metrics-collector/*:${COLLECTOR_CONF_DIR}" "-Djava.net.preferIPv4Stack=true" "-Dams.log.dir=${AMS_COLLECTOR_LOG_DIR}" "-Dproc_${DAEMON_NAME}" "${CLASS}" "$@" > $OUTFILE 2>&1 &
  224. PID=$!
  225. write_pidfile "${PIDFILE}"
  226. sleep 2
  227. echo "Verifying ${METRIC_COLLECTOR} process status..." | tee -a $STARTUPFILE
  228. if [ -z "`ps ax | grep -w ${PID} | grep ApplicationHistoryServer`" ]; then
  229. if [ -s ${OUTFILE} ]; then
  230. echo "ERROR: ${METRIC_COLLECTOR} start failed. For more details, see ${OUTFILE}:" | tee -a $STARTUPFILE
  231. echo "===================="
  232. tail -n 10 ${OUTFILE}
  233. echo "===================="
  234. else
  235. echo "ERROR: ${METRIC_COLLECTOR} start failed" | tee -a $STARTUPFILE
  236. rm -f ${PIDFILE}
  237. fi
  238. echo "Collector out at: ${OUTFILE}" | tee -a $STARTUPFILE
  239. exit -1
  240. fi
  241. rm -f $STARTUPFILE #Deleting startup file
  242. echo "$(date) Collector successfully started." | tee -a $STARTUPFILE
  243. echo "$(date) Initializing Ambari Metrics data model" | tee -a $STARTUPFILE
  244. start=$SECONDS
  245. # Wait until METRIC_* tables created
  246. for retry in {1..5}
  247. do
  248. echo 'list' | ${HBASE_CMD} --config ${HBASE_CONF_DIR} shell | grep ^${METRIC_TABLES[0]} > /dev/null 2>&1
  249. if [ $? -eq 0 ]; then
  250. echo "$(date) Ambari Metrics data model initialization completed." | tee -a $STARTUPFILE
  251. break
  252. fi
  253. echo "$(date) Ambari Metrics data model initialization check $retry" | tee -a $STARTUPFILE
  254. duration=$(( SECONDS - start ))
  255. if [ $duration -gt 300 ]; then
  256. echo "$(date) Ambari Metrics data model initialization timed out" | tee -a $STARTUPFILE
  257. break
  258. fi
  259. sleep 5
  260. done
  261. if [ $? -ne 0 ]; then
  262. echo "WARNING: Ambari Metrics data model initialization failed."
  263. >&2 echo "WARNING: Ambari Metrics data model initialization failed."
  264. else
  265. enable_normalization_fifo
  266. fi
  267. }
  268. function stop()
  269. {
  270. pidfile=${PIDFILE}
  271. if [[ -f "${pidfile}" ]]; then
  272. pid=$(cat "$pidfile")
  273. kill "${pid}" >/dev/null 2>&1
  274. sleep "${STOP_TIMEOUT}"
  275. if kill -0 "${pid}" > /dev/null 2>&1; then
  276. echo "WARNING: ${METRIC_COLLECTOR} did not stop gracefully after ${STOP_TIMEOUT} seconds: Trying to kill with kill -9" | tee -a $STARTUPFILE
  277. kill -9 "${pid}" >/dev/null 2>&1
  278. fi
  279. if ps -p "${pid}" > /dev/null 2>&1; then
  280. echo "ERROR: Unable to kill ${pid}" | tee -a $STARTUPFILE
  281. else
  282. rm -f "${pidfile}" >/dev/null 2>&1
  283. fi
  284. fi
  285. #stop hbase daemons
  286. if [ !"${DISTRIBUTED_HBASE}" ]; then
  287. echo "Stopping HBase master" | tee -a $STARTUPFILE
  288. hbase_daemon "master" "stop"
  289. fi
  290. }
  291. while [[ -z "${_ams_configs_done}" ]]; do
  292. case $1 in
  293. --config)
  294. shift
  295. confdir=$1
  296. shift
  297. if [[ -d "${confdir}" ]]; then
  298. COLLECTOR_CONF_DIR="${confdir}"
  299. elif [[ -z "${confdir}" ]]; then
  300. echo "ERROR: No parameter provided for --config " | tee -a $STARTUPFILE
  301. exit 1
  302. else
  303. echo "ERROR: Cannot find configuration directory \"${confdir}\"" | tee -a $STARTUPFILE
  304. exit 1
  305. fi
  306. ;;
  307. --distributed)
  308. DISTRIBUTED_HBASE=true
  309. shift
  310. ;;
  311. *)
  312. _ams_configs_done=true
  313. ;;
  314. esac
  315. done
  316. # execute ams-env.sh
  317. if [[ -f "${COLLECTOR_CONF_DIR}/ams-env.sh" ]]; then
  318. . "${COLLECTOR_CONF_DIR}/ams-env.sh"
  319. else
  320. echo "ERROR: Cannot execute ${COLLECTOR_CONF_DIR}/ams-env.sh." 2>&1
  321. exit 1
  322. fi
  323. # set pid dir path
  324. if [[ -n "${AMS_COLLECTOR_PID_DIR}" ]]; then
  325. PIDFILE=${AMS_COLLECTOR_PID_DIR}/ambari-metrics-collector.pid
  326. NORMALIZER_ENABLED_STUB_FILE=${AMS_COLLECTOR_PID_DIR}/normalizer_enabled
  327. FIFO_ENABLED_STUB_FILE=${AMS_COLLECTOR_PID_DIR}/fifo_enabled
  328. fi
  329. if [[ -n "${AMS_HBASE_PID_DIR}" ]]; then
  330. HBASE_ZK_PID=${AMS_HBASE_PID_DIR}/hbase-${USER}-zookeeper.pid
  331. HBASE_MASTER_PID=${AMS_HBASE_PID_DIR}/hbase-${USER}-master.pid
  332. HBASE_RS_PID=${AMS_HBASE_PID_DIR}/hbase-${USER}-regionserver.pid
  333. fi
  334. # set out file path
  335. if [[ -n "${AMS_COLLECTOR_LOG_DIR}" ]]; then
  336. OUTFILE=${AMS_COLLECTOR_LOG_DIR}/ambari-metrics-collector.out
  337. STARTUPFILE=${AMS_COLLECTOR_LOG_DIR}/ambari-metrics-collector-startup.out
  338. fi
  339. #TODO manage 3 hbase daemons for start/stop/status
  340. case "$1" in
  341. start)
  342. start
  343. ;;
  344. stop)
  345. stop
  346. ;;
  347. status)
  348. daemon_status "${PIDFILE}"
  349. if [[ $? == 0 ]]; then
  350. echo "AMS is running as process $(cat "${PIDFILE}")."
  351. else
  352. echo "AMS is not running."
  353. fi
  354. #print embedded hbase daemons statuses?
  355. ;;
  356. restart)
  357. stop
  358. start
  359. ;;
  360. esac