ambari-metrics-monitor 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222
  1. #!/bin/bash
  2. # Licensed to the Apache Software Foundation (ASF) under one or more
  3. # contributor license agreements. See the NOTICE file distributed with
  4. # this work for additional information regarding copyright ownership.
  5. # The ASF licenses this file to You under the Apache License, Version 2.0
  6. # (the "License"); you may not use this file except in compliance with
  7. # the License. You may obtain a copy of the License at
  8. #
  9. # http://www.apache.org/licenses/LICENSE-2.0
  10. #
  11. # Unless required by applicable law or agreed to in writing, software
  12. # distributed under the License is distributed on an "AS IS" BASIS,
  13. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. # See the License for the specific
  15. MONITOR_CONF_DIR=/etc/ambari-metrics-monitor/conf/
  16. METRIC_MONITOR=ambari-metrics-monitor
  17. RESOURCE_MONITORING_DIR=/usr/lib/python2.6/site-packages/resource_monitoring
  18. METRIC_MONITOR_PY_SCRIPT=${RESOURCE_MONITORING_DIR}/main.py
  19. PIDFILE=/var/run/ambari-metrics-monitor/ambari-metrics-monitor.pid
  20. OUTFILE=/var/log/ambari-metrics-monitor/ambari-metrics-monitor.out
  21. STOP_TIMEOUT=5
  22. OK=0
  23. NOTOK=1
  24. if [ -a /usr/bin/python2.7 ] && [ -z "${PYTHON}" ]; then
  25. PYTHON=/usr/bin/python2.7
  26. fi
  27. if [ -a /usr/bin/python2.6 ] && [ -z "${PYTHON}" ]; then
  28. PYTHON=/usr/bin/python2.6
  29. fi
  30. if [ "x$PYTHON" == "x" ]; then
  31. PYTHON=/usr/bin/python
  32. fi
  33. export PYTHON=${PYTHON}
  34. check_python_version ()
  35. {
  36. echo "Verifying Python version compatibility..."
  37. majversion=`${PYTHON} -V 2>&1 | awk '{print $2}' | cut -d'.' -f1`
  38. minversion=`${PYTHON} -V 2>&1 | awk '{print $2}' | cut -d'.' -f2`
  39. numversion=$(( 10 * $majversion + $minversion))
  40. if (( $numversion < 26 )); then
  41. echo "ERROR: Found Python version $majversion.$minversion. Ambari Metric Monitor requires Python version > 2.6"
  42. return ${NOTOK}
  43. fi
  44. echo "Using python " ${PYTHON}
  45. return ${OK}
  46. }
  47. function write_pidfile
  48. {
  49. local pidfile="$1"
  50. echo $! > "${pidfile}" 2>/dev/null
  51. if [[ $? -gt 0 ]]; then
  52. echo "ERROR: Cannot write pid ${pidfile}."
  53. exit 1;
  54. fi
  55. }
  56. #locate config dir
  57. while [[ -z "${_ams_configs_done}" ]]; do
  58. case $1 in
  59. --config)
  60. shift
  61. confdir=$1
  62. shift
  63. if [[ -d "${confdir}" ]]; then
  64. MONITOR_CONF_DIR="${confdir}"
  65. elif [[ -z "${confdir}" ]]; then
  66. echo "ERROR: No parameter provided for --config "
  67. exit 1
  68. else
  69. echo "ERROR: Cannot find configuration directory \"${confdir}\""
  70. exit 1
  71. fi
  72. ;;
  73. *)
  74. _ams_configs_done=true
  75. ;;
  76. esac
  77. done
  78. #execute ams-env.sh
  79. if [[ -f "${MONITOR_CONF_DIR}/ams-env.sh" ]]; then
  80. . "${MONITOR_CONF_DIR}/ams-env.sh"
  81. else
  82. echo "ERROR: Cannot execute ${MONITOR_CONF_DIR}/ams-env.sh." 2>&1
  83. exit 1
  84. fi
  85. #TODO decide if rebuild on each start (pretty quickly) to tolerate major node changes (like kernel update)
  86. #build psutil
  87. if [ ! "$(ls -A ${RESOURCE_MONITORING_DIR}/psutil/build)" ]; then
  88. echo "Building psutil..."
  89. dir=$(pwd)
  90. cd "${RESOURCE_MONITORING_DIR}/psutil"
  91. ${PYTHON} "setup.py" "build"
  92. cd "${dir}"
  93. else
  94. echo "psutil build directory is not empty, continuing..."
  95. fi
  96. # Set log directory path
  97. if [[ -n "${AMS_MONITOR_LOG_DIR}" ]]; then
  98. OUTFILE=${AMS_MONITOR_LOG_DIR}/ambari-metrics-monitor.out
  99. fi
  100. # Set pid directory path
  101. if [[ -n "${AMS_MONITOR_PID_DIR}" ]]; then
  102. PIDFILE=${AMS_MONITOR_PID_DIR}/ambari-metrics-monitor.pid
  103. fi
  104. case "$1" in
  105. start)
  106. check_python_version
  107. if [ "$?" -eq "${NOTOK}" ]; then
  108. exit -1
  109. fi
  110. echo "Checking for previously running Metric Monitor..."
  111. if [ -f ${PIDFILE} ]; then
  112. PID=`cat ${PIDFILE}`
  113. if [ -z "`ps ax | grep -w ${PID} | grep resource_monitoring`" ]; then
  114. echo "${PIDFILE} found with no process. Removing ${PID}..."
  115. rm -f ${PIDFILE}
  116. else
  117. tput bold
  118. echo "WARN: ${METRIC_MONITOR} already running with PID: ${PID}"
  119. tput sgr0
  120. echo "Exiting."
  121. exit 0
  122. fi
  123. fi
  124. echo "Starting ${METRIC_MONITOR}"
  125. nohup ${PYTHON} ${METRIC_MONITOR_PY_SCRIPT} "$@" > ${OUTFILE} 2>&1 &
  126. PID=$!
  127. write_pidfile ${PIDFILE}
  128. sleep 2
  129. echo "Verifying ${METRIC_MONITOR} process status..."
  130. if [ -z "`ps ax | grep -w ${PID} | grep resource_monitoring`" ]; then
  131. if [ -s ${OUTFILE} ]; then
  132. echo "ERROR: ${METRIC_MONITOR} start failed. For more details, see ${OUTFILE}:"
  133. echo "===================="
  134. tail -n 10 ${OUTFILE}
  135. echo "===================="
  136. else
  137. echo "ERROR: ${METRIC_MONITOR} start failed"
  138. rm -f ${PIDFILE}
  139. fi
  140. echo "Monitor out at: ${OUTFILE}"
  141. exit -1
  142. fi
  143. echo "Metric Monitor successfully started"
  144. echo "Server log at: ${OUTFILE}"
  145. ;;
  146. status)
  147. if [ -f ${PIDFILE} ]; then
  148. PID=`cat ${PIDFILE}`
  149. echo "Found ${METRIC_MONITOR} PID: $PID"
  150. if [ -z "`ps ax | grep -w ${PID} | grep resource_monitoring`" ]; then
  151. echo "${METRIC_MONITOR} not running. Stale PID File at: $PIDFILE"
  152. retcode=2
  153. else
  154. tput bold
  155. echo "${METRIC_MONITOR} running."
  156. tput sgr0
  157. echo "Monitor PID at: ${PIDFILE}"
  158. echo "Monitor out at: ${OUTFILE}"
  159. fi
  160. else
  161. tput bold
  162. echo "${METRIC_MONITOR} currently not running"
  163. tput sgr0
  164. echo "Usage: /usr/sbin/${METRIC_MONITOR} {start|stop|restart|status}"
  165. retcode=3
  166. fi
  167. ;;
  168. stop)
  169. pidfile=${PIDFILE}
  170. if [[ -f "${pidfile}" ]]; then
  171. pid=$(cat "$pidfile")
  172. kill "${pid}" >/dev/null 2>&1
  173. sleep "${STOP_TIMEOUT}"
  174. if kill -0 "${pid}" > /dev/null 2>&1; then
  175. echo "WARNING: ${METRIC_MONITOR} did not stop gracefully after ${STOP_TIMEOUT} seconds: Trying to kill with kill -9"
  176. kill -9 "${pid}" >/dev/null 2>&1
  177. fi
  178. if ps -p "${pid}" > /dev/null 2>&1; then
  179. echo "ERROR: Unable to kill ${pid}"
  180. else
  181. rm -f "${pidfile}" >/dev/null 2>&1
  182. fi
  183. fi
  184. ;;
  185. restart)
  186. echo -e "Restarting ${METRIC_MONITOR}"
  187. $0 stop
  188. $0 start "$@"
  189. retcode=$?
  190. ;;
  191. esac