HostInfo.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345
  1. #!/usr/bin/env python2.6
  2. '''
  3. Licensed to the Apache Software Foundation (ASF) under one
  4. or more contributor license agreements. See the NOTICE file
  5. distributed with this work for additional information
  6. regarding copyright ownership. The ASF licenses this file
  7. to you under the Apache License, Version 2.0 (the
  8. "License"); you may not use this file except in compliance
  9. with the License. You may obtain a copy of the License at
  10. http://www.apache.org/licenses/LICENSE-2.0
  11. Unless required by applicable law or agreed to in writing, software
  12. distributed under the License is distributed on an "AS IS" BASIS,
  13. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. See the License for the specific language governing permissions and
  15. limitations under the License.
  16. '''
  17. import os
  18. import glob
  19. import logging
  20. import pwd
  21. import re
  22. import time
  23. import subprocess
  24. import threading
  25. import shlex
  26. import platform
  27. from PackagesAnalyzer import PackagesAnalyzer
  28. from HostCheckReportFileHandler import HostCheckReportFileHandler
  29. from Hardware import Hardware
  30. logger = logging.getLogger()
  31. class HostInfo:
  32. # List of project names to be used to find alternatives folders etc.
  33. DEFAULT_PROJECT_NAMES = [
  34. "hadoop*", "hadoop", "hbase", "hcatalog", "hive", "ganglia", "nagios",
  35. "oozie", "sqoop", "hue", "zookeeper", "mapred", "hdfs", "flume",
  36. "ambari_qa", "hadoop_deploy", "rrdcached", "hcat", "ambari-qa",
  37. "sqoop-ambari-qa", "sqoop-ambari_qa", "webhcat", "hadoop-hdfs", "hadoop-yarn",
  38. "hadoop-mapreduce"
  39. ]
  40. # List of live services checked for on the host, takes a map of plan strings
  41. DEFAULT_LIVE_SERVICES = [
  42. {"redhat":"ntpd", "suse":"ntp"}
  43. ]
  44. # Set of default users (need to be replaced with the configured user names)
  45. DEFAULT_USERS = [
  46. "nagios", "hive", "ambari-qa", "oozie", "hbase", "hcat", "mapred",
  47. "hdfs", "rrdcached", "zookeeper", "flume", "sqoop", "sqoop2",
  48. "hue", "yarn"
  49. ]
  50. # Filters used to identify processed
  51. PROC_FILTER = [
  52. "hadoop", "zookeeper"
  53. ]
  54. # Additional path patterns to find existing directory
  55. DIRNAME_PATTERNS = [
  56. "/tmp/hadoop-", "/tmp/hsperfdata_"
  57. ]
  58. # Default set of directories that are checked for existence of files and folders
  59. DEFAULT_DIRS = [
  60. "/etc", "/var/run", "/var/log", "/usr/lib", "/var/lib", "/var/tmp", "/tmp", "/var", "/hadoop"
  61. ]
  62. # Packages that are used to find repos (then repos are used to find other packages)
  63. PACKAGES = [
  64. "hadoop", "zookeeper", "webhcat", "*-manager-server-db", "*-manager-daemons"
  65. ]
  66. # Additional packages to look for (search packages that start with these)
  67. ADDITIONAL_PACKAGES = [
  68. "rrdtool", "rrdtool-python", "nagios", "ganglia", "gmond", "gweb", "libconfuse", "ambari-log4j",
  69. "hadoop", "zookeeper"
  70. ]
  71. # ignore packages from repos whose names start with these strings
  72. IGNORE_PACKAGES_FROM_REPOS = [
  73. "ambari", "installed"
  74. ]
  75. # ignore required packages
  76. IGNORE_PACKAGES = [
  77. "epel-release"
  78. ]
  79. # ignore repos from the list of repos to be cleaned
  80. IGNORE_REPOS = [
  81. "ambari", "HDP-UTILS"
  82. ]
  83. # default timeout for async invoked processes
  84. TIMEOUT_SECONDS = 60
  85. RESULT_UNAVAILABLE = "unable_to_determine"
  86. event = threading.Event()
  87. current_umask = -1
  88. def __init__(self, config=None):
  89. self.packages = PackagesAnalyzer()
  90. self.reportFileHandler = HostCheckReportFileHandler(config)
  91. def dirType(self, path):
  92. if not os.path.exists(path):
  93. return 'not_exist'
  94. elif os.path.islink(path):
  95. return 'sym_link'
  96. elif os.path.isdir(path):
  97. return 'directory'
  98. elif os.path.isfile(path):
  99. return 'file'
  100. return 'unknown'
  101. def hadoopVarRunCount(self):
  102. if not os.path.exists('/var/run/hadoop'):
  103. return 0
  104. pids = glob.glob('/var/run/hadoop/*/*.pid')
  105. return len(pids)
  106. def hadoopVarLogCount(self):
  107. if not os.path.exists('/var/log/hadoop'):
  108. return 0
  109. logs = glob.glob('/var/log/hadoop/*/*.log')
  110. return len(logs)
  111. def etcAlternativesConf(self, projects, etcResults):
  112. if not os.path.exists('/etc/alternatives'):
  113. return []
  114. projectRegex = "'" + '|'.join(projects) + "'"
  115. files = [f for f in os.listdir('/etc/alternatives') if re.match(projectRegex, f)]
  116. for conf in files:
  117. result = {}
  118. filePath = os.path.join('/etc/alternatives', conf)
  119. if os.path.islink(filePath):
  120. realConf = os.path.realpath(filePath)
  121. result['name'] = conf
  122. result['target'] = realConf
  123. etcResults.append(result)
  124. def get_os_type(self):
  125. os_info = platform.linux_distribution(None, None, None, ['SuSE',
  126. 'redhat' ], 0)
  127. return os_info[0].lower()
  128. def checkLiveServices(self, services, result):
  129. osType = self.get_os_type()
  130. for service in services:
  131. svcCheckResult = {}
  132. if isinstance(service, dict):
  133. serviceName = service[osType]
  134. else:
  135. serviceName = service
  136. svcCheckResult['name'] = serviceName
  137. svcCheckResult['status'] = "UNKNOWN"
  138. svcCheckResult['desc'] = ""
  139. try:
  140. cmd = "/sbin/service " + serviceName + " status"
  141. osStat = subprocess.Popen(shlex.split(cmd), stdout=subprocess.PIPE,
  142. stderr=subprocess.PIPE)
  143. out, err = osStat.communicate()
  144. if 0 != osStat.returncode:
  145. svcCheckResult['status'] = "Unhealthy"
  146. svcCheckResult['desc'] = out
  147. if len(out) == 0:
  148. svcCheckResult['desc'] = err
  149. else:
  150. svcCheckResult['status'] = "Healthy"
  151. except Exception, e:
  152. svcCheckResult['status'] = "Unhealthy"
  153. svcCheckResult['desc'] = repr(e)
  154. result.append(svcCheckResult)
  155. def checkUsers(self, users, results):
  156. f = open('/etc/passwd', 'r')
  157. for userLine in f:
  158. fields = userLine.split(":")
  159. if fields[0] in users:
  160. result = {}
  161. homeDir = fields[5]
  162. result['name'] = fields[0]
  163. result['homeDir'] = fields[5]
  164. result['status'] = "Available";
  165. if not os.path.exists(homeDir):
  166. result['status'] = "Invalid home directory";
  167. results.append(result)
  168. def osdiskAvailableSpace(self, path):
  169. diskInfo = {}
  170. try:
  171. df = subprocess.Popen(["df", "-kPT", path], stdout=subprocess.PIPE)
  172. dfdata = df.communicate()[0]
  173. return Hardware.extractMountInfo(dfdata.splitlines()[-1])
  174. except:
  175. pass
  176. return diskInfo
  177. def checkFolders(self, basePaths, projectNames, existingUsers, dirs):
  178. foldersToIgnore = []
  179. for user in existingUsers:
  180. foldersToIgnore.append(user['homeDir'])
  181. try:
  182. for dirName in basePaths:
  183. for project in projectNames:
  184. path = os.path.join(dirName.strip(), project.strip())
  185. if not path in foldersToIgnore and os.path.exists(path):
  186. obj = {}
  187. obj['type'] = self.dirType(path)
  188. obj['name'] = path
  189. dirs.append(obj)
  190. except:
  191. pass
  192. def javaProcs(self, list):
  193. try:
  194. pids = [pid for pid in os.listdir('/proc') if pid.isdigit()]
  195. for pid in pids:
  196. cmd = open(os.path.join('/proc', pid, 'cmdline'), 'rb').read()
  197. cmd = cmd.replace('\0', ' ')
  198. if not 'AmbariServer' in cmd:
  199. if 'java' in cmd:
  200. dict = {}
  201. dict['pid'] = int(pid)
  202. dict['hadoop'] = False
  203. for filter in self.PROC_FILTER:
  204. if filter in cmd:
  205. dict['hadoop'] = True
  206. dict['command'] = cmd.strip()
  207. for line in open(os.path.join('/proc', pid, 'status')):
  208. if line.startswith('Uid:'):
  209. uid = int(line.split()[1])
  210. dict['user'] = pwd.getpwuid(uid).pw_name
  211. list.append(dict)
  212. except:
  213. pass
  214. pass
  215. def getReposToRemove(self, repos, ignoreList):
  216. reposToRemove = []
  217. for repo in repos:
  218. addToRemoveList = True
  219. for ignoreRepo in ignoreList:
  220. if self.packages.nameMatch(ignoreRepo, repo):
  221. addToRemoveList = False
  222. continue
  223. if addToRemoveList:
  224. reposToRemove.append(repo)
  225. return reposToRemove
  226. def getUMask(self):
  227. if (self.current_umask == -1):
  228. self.current_umask = os.umask(self.current_umask)
  229. os.umask(self.current_umask)
  230. return self.current_umask
  231. else:
  232. return self.current_umask
  233. """ Return various details about the host
  234. componentsMapped: indicates if any components are mapped to this host
  235. commandsInProgress: indicates if any commands are in progress
  236. """
  237. def register(self, dict, componentsMapped=True, commandsInProgress=True):
  238. dict['hostHealth'] = {}
  239. java = []
  240. self.javaProcs(java)
  241. dict['hostHealth']['activeJavaProcs'] = java
  242. dict['hostHealth']['diskStatus'] = [self.osdiskAvailableSpace("/")]
  243. dict['rpms'] = []
  244. liveSvcs = []
  245. self.checkLiveServices(self.DEFAULT_LIVE_SERVICES, liveSvcs)
  246. dict['hostHealth']['liveServices'] = liveSvcs
  247. dict['umask'] = str(self.getUMask())
  248. # If commands are in progress or components are already mapped to this host
  249. # Then do not perform certain expensive host checks
  250. if componentsMapped or commandsInProgress:
  251. dict['existingRepos'] = [self.RESULT_UNAVAILABLE]
  252. dict['installedPackages'] = []
  253. dict['alternatives'] = []
  254. dict['stackFoldersAndFiles'] = []
  255. dict['existingUsers'] = []
  256. else:
  257. etcs = []
  258. self.etcAlternativesConf(self.DEFAULT_PROJECT_NAMES, etcs)
  259. dict['alternatives'] = etcs
  260. existingUsers = []
  261. self.checkUsers(self.DEFAULT_USERS, existingUsers)
  262. dict['existingUsers'] = existingUsers
  263. dirs = []
  264. self.checkFolders(self.DEFAULT_DIRS, self.DEFAULT_PROJECT_NAMES, existingUsers, dirs)
  265. dict['stackFoldersAndFiles'] = dirs
  266. installedPackages = []
  267. availablePackages = []
  268. self.packages.allInstalledPackages(installedPackages)
  269. self.packages.allAvailablePackages(availablePackages)
  270. repos = []
  271. self.packages.getInstalledRepos(self.PACKAGES, installedPackages + availablePackages,
  272. self.IGNORE_PACKAGES_FROM_REPOS, repos)
  273. packagesInstalled = self.packages.getInstalledPkgsByRepo(repos, self.IGNORE_PACKAGES, installedPackages)
  274. additionalPkgsInstalled = self.packages.getInstalledPkgsByNames(
  275. self.ADDITIONAL_PACKAGES, installedPackages)
  276. allPackages = list(set(packagesInstalled + additionalPkgsInstalled))
  277. dict['installedPackages'] = self.packages.getPackageDetails(installedPackages, allPackages)
  278. repos = self.getReposToRemove(repos, self.IGNORE_REPOS)
  279. dict['existingRepos'] = repos
  280. self.reportFileHandler.writeHostCheckFile(dict)
  281. pass
  282. # The time stamp must be recorded at the end
  283. dict['hostHealth']['agentTimeStampAtReporting'] = int(time.time() * 1000)
  284. pass
  285. def main(argv=None):
  286. h = HostInfo()
  287. struct = {}
  288. h.register(struct)
  289. print struct
  290. if __name__ == '__main__':
  291. main()