stack_advisor.py 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593
  1. #!/usr/bin/env ambari-python-wrap
  2. """
  3. Licensed to the Apache Software Foundation (ASF) under one
  4. or more contributor license agreements. See the NOTICE file
  5. distributed with this work for additional information
  6. regarding copyright ownership. The ASF licenses this file
  7. to you under the Apache License, Version 2.0 (the
  8. "License"); you may not use this file except in compliance
  9. with the License. You may obtain a copy of the License at
  10. http://www.apache.org/licenses/LICENSE-2.0
  11. Unless required by applicable law or agreed to in writing, software
  12. distributed under the License is distributed on an "AS IS" BASIS,
  13. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. See the License for the specific language governing permissions and
  15. limitations under the License.
  16. """
  17. import socket
  18. class StackAdvisor(object):
  19. """
  20. Abstract class implemented by all stack advisors. Stack advisors advise on stack specific questions.
  21. Currently stack advisors provide following abilities:
  22. - Recommend where services should be installed in cluster
  23. - Recommend configurations based on host hardware
  24. - Validate user selection of where services are installed on cluster
  25. - Validate user configuration values
  26. Each of the above methods is passed in parameters about services and hosts involved as described below.
  27. @type services: dictionary
  28. @param services: Dictionary containing all information about services selected by the user.
  29. Example: {
  30. "services": [
  31. {
  32. "StackServices": {
  33. "service_name" : "HDFS",
  34. "service_version" : "2.6.0.2.2",
  35. },
  36. "components" : [
  37. {
  38. "StackServiceComponents" : {
  39. "cardinality" : "1+",
  40. "component_category" : "SLAVE",
  41. "component_name" : "DATANODE",
  42. "display_name" : "DataNode",
  43. "service_name" : "HDFS",
  44. "hostnames" : []
  45. },
  46. "dependencies" : []
  47. }, {
  48. "StackServiceComponents" : {
  49. "cardinality" : "1-2",
  50. "component_category" : "MASTER",
  51. "component_name" : "NAMENODE",
  52. "display_name" : "NameNode",
  53. "service_name" : "HDFS",
  54. "hostnames" : []
  55. },
  56. "dependencies" : []
  57. },
  58. ...
  59. ]
  60. },
  61. ...
  62. ]
  63. }
  64. @type hosts: dictionary
  65. @param hosts: Dictionary containing all information about hosts in this cluster
  66. Example: {
  67. "items": [
  68. {
  69. Hosts: {
  70. "host_name": "c6401.ambari.apache.org",
  71. "public_host_name" : "c6401.ambari.apache.org",
  72. "ip": "192.168.1.101",
  73. "cpu_count" : 1,
  74. "disk_info" : [
  75. {
  76. "available" : "4564632",
  77. "used" : "5230344",
  78. "percent" : "54%",
  79. "size" : "10319160",
  80. "type" : "ext4",
  81. "mountpoint" : "/"
  82. },
  83. {
  84. "available" : "1832436",
  85. "used" : "0",
  86. "percent" : "0%",
  87. "size" : "1832436",
  88. "type" : "tmpfs",
  89. "mountpoint" : "/dev/shm"
  90. }
  91. ],
  92. "host_state" : "HEALTHY",
  93. "os_arch" : "x86_64",
  94. "os_type" : "centos6",
  95. "total_mem" : 3664872
  96. }
  97. },
  98. ...
  99. ]
  100. }
  101. Each of the methods can either return recommendations or validations.
  102. Recommendations are made in a Ambari Blueprints friendly format.
  103. Validations are an array of validation objects.
  104. """
  105. def recommendComponentLayout(self, services, hosts):
  106. """
  107. Returns recommendation of which hosts various service components should be installed on.
  108. This function takes as input all details about services being installed, and hosts
  109. they are being installed into, to generate hostname assignments to various components
  110. of each service.
  111. @type services: dictionary
  112. @param services: Dictionary containing all information about services selected by the user.
  113. @type hosts: dictionary
  114. @param hosts: Dictionary containing all information about hosts in this cluster
  115. @rtype: dictionary
  116. @return: Layout recommendation of service components on cluster hosts in Ambari Blueprints friendly format.
  117. Example: {
  118. "resources" : [
  119. {
  120. "hosts" : [
  121. "c6402.ambari.apache.org",
  122. "c6401.ambari.apache.org"
  123. ],
  124. "services" : [
  125. "HDFS"
  126. ],
  127. "recommendations" : {
  128. "blueprint" : {
  129. "host_groups" : [
  130. {
  131. "name" : "host-group-2",
  132. "components" : [
  133. { "name" : "JOURNALNODE" },
  134. { "name" : "ZKFC" },
  135. { "name" : "DATANODE" },
  136. { "name" : "SECONDARY_NAMENODE" }
  137. ]
  138. },
  139. {
  140. "name" : "host-group-1",
  141. "components" : [
  142. { "name" : "HDFS_CLIENT" },
  143. { "name" : "NAMENODE" },
  144. { "name" : "JOURNALNODE" },
  145. { "name" : "ZKFC" },
  146. { "name" : "DATANODE" }
  147. ]
  148. }
  149. ]
  150. },
  151. "blueprint_cluster_binding" : {
  152. "host_groups" : [
  153. {
  154. "name" : "host-group-1",
  155. "hosts" : [ { "fqdn" : "c6401.ambari.apache.org" } ]
  156. },
  157. {
  158. "name" : "host-group-2",
  159. "hosts" : [ { "fqdn" : "c6402.ambari.apache.org" } ]
  160. }
  161. ]
  162. }
  163. }
  164. }
  165. ]
  166. }
  167. """
  168. pass
  169. def validateComponentLayout(self, services, hosts):
  170. """
  171. Returns array of Validation issues with service component layout on hosts
  172. This function takes as input all details about services being installed along with
  173. hosts the components are being installed on (hostnames property is populated for
  174. each component).
  175. @type services: dictionary
  176. @param services: Dictionary containing information about services and host layout selected by the user.
  177. @type hosts: dictionary
  178. @param hosts: Dictionary containing all information about hosts in this cluster
  179. @rtype: dictionary
  180. @return: Dictionary containing array of validation items
  181. Example: {
  182. "items": [
  183. {
  184. "type" : "host-group",
  185. "level" : "ERROR",
  186. "message" : "NameNode and Secondary NameNode should not be hosted on the same machine",
  187. "component-name" : "NAMENODE",
  188. "host" : "c6401.ambari.apache.org"
  189. },
  190. ...
  191. ]
  192. }
  193. """
  194. pass
  195. def recommendConfigurations(self, services, hosts):
  196. """
  197. Returns recommendation of service configurations based on host-specific layout of components.
  198. This function takes as input all details about services being installed, and hosts
  199. they are being installed into, to recommend host-specific configurations.
  200. @type services: dictionary
  201. @param services: Dictionary containing all information about services and component layout selected by the user.
  202. @type hosts: dictionary
  203. @param hosts: Dictionary containing all information about hosts in this cluster
  204. @rtype: dictionary
  205. @return: Layout recommendation of service components on cluster hosts in Ambari Blueprints friendly format.
  206. Example: {
  207. "services": [
  208. "HIVE",
  209. "TEZ",
  210. "YARN"
  211. ],
  212. "recommendations": {
  213. "blueprint": {
  214. "host_groups": [],
  215. "configurations": {
  216. "yarn-site": {
  217. "properties": {
  218. "yarn.scheduler.minimum-allocation-mb": "682",
  219. "yarn.scheduler.maximum-allocation-mb": "2048",
  220. "yarn.nodemanager.resource.memory-mb": "2048"
  221. }
  222. },
  223. "tez-site": {
  224. "properties": {
  225. "tez.am.java.opts": "-server -Xmx546m -Djava.net.preferIPv4Stack=true -XX:+UseNUMA -XX:+UseParallelGC",
  226. "tez.am.resource.memory.mb": "682"
  227. }
  228. },
  229. "hive-site": {
  230. "properties": {
  231. "hive.tez.container.size": "682",
  232. "hive.tez.java.opts": "-server -Xmx546m -Djava.net.preferIPv4Stack=true -XX:NewRatio=8 -XX:+UseNUMA -XX:+UseParallelGC",
  233. "hive.auto.convert.join.noconditionaltask.size": "238026752"
  234. }
  235. }
  236. }
  237. },
  238. "blueprint_cluster_binding": {
  239. "host_groups": []
  240. }
  241. },
  242. "hosts": [
  243. "c6401.ambari.apache.org",
  244. "c6402.ambari.apache.org",
  245. "c6403.ambari.apache.org"
  246. ]
  247. }
  248. """
  249. pass
  250. def validateConfigurations(self, services, hosts):
  251. """"
  252. Returns array of Validation issues with configurations provided by user
  253. This function takes as input all details about services being installed along with
  254. configuration values entered by the user. These configurations can be validated against
  255. service requirements, or host hardware to generate validation issues.
  256. @type services: dictionary
  257. @param services: Dictionary containing information about services and user configurations.
  258. @type hosts: dictionary
  259. @param hosts: Dictionary containing all information about hosts in this cluster
  260. @rtype: dictionary
  261. @return: Dictionary containing array of validation items
  262. Example: {
  263. "items": [
  264. {
  265. "config-type": "yarn-site",
  266. "message": "Value is less than the recommended default of 682",
  267. "type": "configuration",
  268. "config-name": "yarn.scheduler.minimum-allocation-mb",
  269. "level": "WARN"
  270. }
  271. ]
  272. }
  273. """
  274. pass
  275. class DefaultStackAdvisor(StackAdvisor):
  276. """
  277. Default stack advisor implementation.
  278. This implementation is used when a stack-version, or its hierarchy does not
  279. have an advisor. Stack-versions can extend this class to provide their own
  280. implement
  281. """
  282. def recommendComponentLayout(self, services, hosts):
  283. """Returns Services object with hostnames array populated for components"""
  284. stackName = services["Versions"]["stack_name"]
  285. stackVersion = services["Versions"]["stack_version"]
  286. hostsList = [host["Hosts"]["host_name"] for host in hosts["items"]]
  287. servicesList = [service["StackServices"]["service_name"] for service in services["services"]]
  288. layoutRecommendations = self.createComponentLayoutRecommendations(services, hosts)
  289. recommendations = {
  290. "Versions": {"stack_name": stackName, "stack_version": stackVersion},
  291. "hosts": hostsList,
  292. "services": servicesList,
  293. "recommendations": layoutRecommendations
  294. }
  295. return recommendations
  296. def createComponentLayoutRecommendations(self, services, hosts):
  297. recommendations = {
  298. "blueprint": {
  299. "host_groups": [ ]
  300. },
  301. "blueprint_cluster_binding": {
  302. "host_groups": [ ]
  303. }
  304. }
  305. hostsList = [host["Hosts"]["host_name"] for host in hosts["items"]]
  306. hostsComponentsMap = {}
  307. for hostName in hostsList:
  308. if hostName not in hostsComponentsMap:
  309. hostsComponentsMap[hostName] = []
  310. #extend 'hostsComponentsMap' with MASTER components
  311. for service in services["services"]:
  312. masterComponents = [component for component in service["components"] if self.isMasterComponent(component)]
  313. for component in masterComponents:
  314. componentName = component["StackServiceComponents"]["component_name"]
  315. if self.isComponentHostsPopulated(component):
  316. hostsForComponent = component["StackServiceComponents"]["hostnames"]
  317. else:
  318. if len(hostsList) > 1 and self.isMasterComponentWithMultipleInstances(component):
  319. hostsCount = self.getMinComponentCount(component)
  320. if hostsCount > 1: # get first 'hostsCount' available hosts
  321. hostsForComponent = []
  322. hostIndex = 0
  323. while hostsCount > len(hostsForComponent) and hostIndex < len(hostsList):
  324. currentHost = hostsList[hostIndex]
  325. if self.isHostSuitableForComponent(currentHost, component):
  326. hostsForComponent.append(currentHost)
  327. hostIndex += 1
  328. else:
  329. hostsForComponent = [self.getHostForComponent(component, hostsList)]
  330. else:
  331. hostsForComponent = [self.getHostForComponent(component, hostsList)]
  332. #extend 'hostsComponentsMap' with 'hostsForComponent'
  333. for hostName in hostsForComponent:
  334. hostsComponentsMap[hostName].append( { "name":componentName } )
  335. #extend 'hostsComponentsMap' with Slave and Client Components
  336. componentsListList = [service["components"] for service in services["services"]]
  337. componentsList = [item for sublist in componentsListList for item in sublist]
  338. usedHostsListList = [component["StackServiceComponents"]["hostnames"] for component in componentsList if not self.isComponentNotValuable(component)]
  339. utilizedHosts = [item for sublist in usedHostsListList for item in sublist]
  340. freeHosts = [hostName for hostName in hostsList if hostName not in utilizedHosts]
  341. for service in services["services"]:
  342. slaveClientComponents = [component for component in service["components"]
  343. if self.isSlaveComponent(component) or self.isClientComponent(component)]
  344. for component in slaveClientComponents:
  345. componentName = component["StackServiceComponents"]["component_name"]
  346. if self.isComponentHostsPopulated(component):
  347. hostsForComponent = component["StackServiceComponents"]["hostnames"]
  348. elif component["StackServiceComponents"]["cardinality"] == "ALL":
  349. hostsForComponent = hostsList
  350. else:
  351. if len(freeHosts) == 0:
  352. hostsForComponent = hostsList[-1:]
  353. else: # len(freeHosts) >= 1
  354. hostsForComponent = freeHosts
  355. if self.isClientComponent(component):
  356. hostsForComponent = freeHosts[0:1]
  357. #extend 'hostsComponentsMap' with 'hostsForComponent'
  358. for hostName in hostsForComponent:
  359. if hostName not in hostsComponentsMap:
  360. hostsComponentsMap[hostName] = []
  361. hostsComponentsMap[hostName].append( { "name": componentName } )
  362. #prepare 'host-group's from 'hostsComponentsMap'
  363. host_groups = recommendations["blueprint"]["host_groups"]
  364. bindings = recommendations["blueprint_cluster_binding"]["host_groups"]
  365. index = 0
  366. for key in hostsComponentsMap.keys():
  367. index += 1
  368. host_group_name = "host-group-{0}".format(index)
  369. host_groups.append( { "name": host_group_name, "components": hostsComponentsMap[key] } )
  370. bindings.append( { "name": host_group_name, "hosts": [{ "fqdn": key }] } )
  371. return recommendations
  372. pass
  373. def createValidationResponse(self, services, validationItems):
  374. """Returns array of Validation objects about issues with hostnames components assigned to"""
  375. stackName = services["Versions"]["stack_name"]
  376. stackVersion = services["Versions"]["stack_version"]
  377. validations = {
  378. "Versions": {"stack_name": stackName, "stack_version": stackVersion},
  379. "items": validationItems
  380. }
  381. return validations
  382. def validateComponentLayout(self, services, hosts):
  383. """Returns array of Validation objects about issues with hostnames components assigned to"""
  384. validationItems = self.getComponentLayoutValidations(services, hosts)
  385. return self.createValidationResponse(services, validationItems)
  386. def validateConfigurations(self, services, hosts):
  387. """Returns array of Validation objects about issues with hostnames components assigned to"""
  388. validationItems = self.getConfigurationsValidationItems(services, hosts)
  389. return self.createValidationResponse(services, validationItems)
  390. def getComponentLayoutValidations(self, services, hosts):
  391. return []
  392. def getConfigurationClusterSummary(self, servicesList, hosts, components):
  393. pass
  394. def getConfigurationsValidationItems(self, services, hosts):
  395. return []
  396. def recommendConfigurations(self, services, hosts):
  397. stackName = services["Versions"]["stack_name"]
  398. stackVersion = services["Versions"]["stack_version"]
  399. hostsList = [host["Hosts"]["host_name"] for host in hosts["items"]]
  400. servicesList = [service["StackServices"]["service_name"] for service in services["services"]]
  401. components = [component["StackServiceComponents"]["component_name"]
  402. for service in services["services"]
  403. for component in service["components"]]
  404. clusterSummary = self.getConfigurationClusterSummary(servicesList, hosts, components)
  405. recommendations = {
  406. "Versions": {"stack_name": stackName, "stack_version": stackVersion},
  407. "hosts": hostsList,
  408. "services": servicesList,
  409. "recommendations": {
  410. "blueprint": {
  411. "configurations": {},
  412. "host_groups": []
  413. },
  414. "blueprint_cluster_binding": {
  415. "host_groups": []
  416. }
  417. }
  418. }
  419. configurations = recommendations["recommendations"]["blueprint"]["configurations"]
  420. for service in servicesList:
  421. calculation = self.getServiceConfigurationRecommender(service)
  422. if calculation is not None:
  423. calculation(configurations, clusterSummary, services, hosts)
  424. return recommendations
  425. def getServiceConfigurationRecommender(self, service):
  426. return self.getServiceConfigurationRecommenderDict().get(service, None)
  427. def getServiceConfigurationRecommenderDict(self):
  428. return {}
  429. # Recommendation helper methods
  430. def isComponentHostsPopulated(self, component):
  431. hostnames = self.getComponentAttribute(component, "hostnames")
  432. if hostnames is not None:
  433. return len(hostnames) > 0
  434. return False
  435. def isClientComponent(self, component):
  436. return self.getComponentAttribute(component, "component_category") == 'CLIENT'
  437. def isSlaveComponent(self, component):
  438. return self.getComponentAttribute(component, "component_category") == 'SLAVE'
  439. def isMasterComponent(self, component):
  440. return self.getComponentAttribute(component, "is_master")
  441. def getComponentAttribute(self, component, attribute):
  442. serviceComponent = component.get("StackServiceComponents", None)
  443. if serviceComponent is None:
  444. return None
  445. return serviceComponent.get(attribute, None)
  446. def isLocalHost(self, hostName):
  447. return socket.getfqdn(hostName) == socket.getfqdn()
  448. def isMasterComponentWithMultipleInstances(self, component):
  449. componentName = self.getComponentName(component)
  450. masters = self.getMastersWithMultipleInstances()
  451. return componentName in masters
  452. def isComponentNotValuable(self, component):
  453. componentName = self.getComponentName(component)
  454. service = self.getNotValuableComponents()
  455. return componentName in service
  456. def getMinComponentCount(self, component):
  457. componentName = self.getComponentName(component)
  458. return self.getComponentCardinality(componentName)["min"]
  459. # Helper dictionaries
  460. def getComponentCardinality(self, componentName):
  461. return self.getCardinalitiesDict().get(componentName, {"min": 1, "max": 1})
  462. def getHostForComponent(self, component, hostsList):
  463. componentName = self.getComponentName(component)
  464. if len(hostsList) != 1:
  465. scheme = self.getComponentLayoutScheme(componentName)
  466. if scheme is not None:
  467. hostIndex = next((index for key, index in scheme.iteritems() if isinstance(key, ( int, long )) and len(hostsList) < key), scheme['else'])
  468. else:
  469. hostIndex = 0
  470. for host in hostsList[hostIndex:]:
  471. if self.isHostSuitableForComponent(host, component):
  472. return host
  473. return hostsList[0]
  474. def getComponentLayoutScheme(self, componentName):
  475. """
  476. Provides a scheme for laying out given component on different number of hosts.
  477. """
  478. return self.getComponentLayoutSchemes().get(componentName, None)
  479. def getComponentName(self, component):
  480. return self.getComponentAttribute(component, "component_name")
  481. def isComponentNotPreferableOnAmbariServerHost(self, component):
  482. componentName = self.getComponentName(component)
  483. service = self.getNotPreferableOnServerComponents()
  484. return componentName in service
  485. def isHostSuitableForComponent(self, host, component):
  486. return not (self.isComponentNotPreferableOnAmbariServerHost(component) and self.isLocalHost(host))
  487. def getMastersWithMultipleInstances(self):
  488. return []
  489. def getNotValuableComponents(self):
  490. return []
  491. def getNotPreferableOnServerComponents(self):
  492. return []
  493. def getCardinalitiesDict(self):
  494. return {}
  495. def getComponentLayoutSchemes(self):
  496. """
  497. Provides layout scheme dictionaries for components.
  498. The scheme dictionary basically maps the number of hosts to
  499. host index where component should exist.
  500. """
  501. return {}
  502. def getComponentHostNames(self, servicesDict, serviceName, componentName):
  503. for service in servicesDict["services"]:
  504. if service["StackServices"]["service_name"] == serviceName:
  505. for component in service['components']:
  506. if component["StackServiceComponents"]["component_name"] == componentName:
  507. return component["StackServiceComponents"]["hostnames"]
  508. pass