relocate_resources.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489
  1. #!/usr/bin/env python
  2. '''
  3. Licensed to the Apache Software Foundation (ASF) under one
  4. or more contributor license agreements. See the NOTICE file
  5. distributed with this work for additional information
  6. regarding copyright ownership. The ASF licenses this file
  7. to you under the Apache License, Version 2.0 (the
  8. "License"); you may not use this file except in compliance
  9. with the License. You may obtain a copy of the License at
  10. http://www.apache.org/licenses/LICENSE-2.0
  11. Unless required by applicable law or agreed to in writing, software
  12. distributed under the License is distributed on an "AS IS" BASIS,
  13. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. See the License for the specific language governing permissions and
  15. limitations under the License.
  16. '''
  17. import optparse
  18. import sys
  19. import os
  20. import logging
  21. import tempfile
  22. import urllib2
  23. import socket
  24. import json
  25. import base64
  26. import time
  27. AMBARI_HOSTNAME = None
  28. AMBARI_PORT = 8080
  29. CLUSTER_NAME = None
  30. PROTOCOL = "http"
  31. USERNAME = "admin"
  32. PASSWORD = "admin"
  33. DEFAULT_TIMEOUT = 10 # seconds
  34. START_ON_RELOCATE = False
  35. # Supported Actions
  36. RELOCATE_ACTION = 'relocate'
  37. ALLOWED_ACTUAL_STATES_FOR_RELOCATE = [ 'INIT', 'UNKNOWN', 'MAINTENANCE', 'UNINSTALLED' ]
  38. ALLOWED_HOST_STATUS_FOR_RELOCATE = [ 'HEALTHY' ]
  39. STATUS_WAIT_TIMEOUT = 120 # seconds
  40. STATUS_CHECK_INTERVAL = 10 # seconds
  41. # API calls
  42. GET_CLUSTERS_URI = "/api/v1/clusters/"
  43. GET_HOST_COMPONENTS_URI = "/api/v1/clusters/{0}/services/{1}/components/{2}" +\
  44. "?fields=host_components"
  45. GET_HOST_COMPONENT_DESIRED_STATE_URI = "/api/v1/clusters/{0}/hosts/{1}" +\
  46. "/host_components/{2}" +\
  47. "?fields=HostRoles/desired_state"
  48. GET_HOST_COMPONENT_STATE_URI = "/api/v1/clusters/{0}/hosts/{1}" +\
  49. "/host_components/{2}" +\
  50. "?fields=HostRoles/state"
  51. GET_HOST_STATE_URL = "/api/v1/clusters/{0}/hosts/{1}?fields=Hosts/host_state"
  52. HOST_COMPONENT_URI = "/api/v1/clusters/{0}/hosts/{1}/host_components/{2}"
  53. ADD_HOST_COMPONENT_URI = "/api/v1/clusters/{0}/hosts?Hosts/host_name={1}"
  54. logger = logging.getLogger()
  55. class PreemptiveBasicAuthHandler(urllib2.BaseHandler):
  56. def __init__(self):
  57. password_mgr = urllib2.HTTPPasswordMgrWithDefaultRealm()
  58. password_mgr.add_password(None, getUrl(''), USERNAME, PASSWORD)
  59. self.passwd = password_mgr
  60. self.add_password = self.passwd.add_password
  61. def http_request(self, req):
  62. uri = req.get_full_url()
  63. user = USERNAME
  64. pw = PASSWORD
  65. raw = "%s:%s" % (user, pw)
  66. auth = 'Basic %s' % base64.b64encode(raw).strip()
  67. req.add_unredirected_header('Authorization', auth)
  68. return req
  69. class AmbariResource:
  70. def __init__(self, serviceName, componentName):
  71. self.serviveName = serviceName
  72. self.componentName = componentName
  73. self.isInitialized = False
  74. def initializeResource(self):
  75. global CLUSTER_NAME
  76. if CLUSTER_NAME is None:
  77. CLUSTER_NAME = self.findClusterName()
  78. if self.serviveName is None:
  79. raise Exception('Service name undefined')
  80. if self.componentName is None:
  81. raise Exception('Component name undefined')
  82. handler = PreemptiveBasicAuthHandler()
  83. opener = urllib2.build_opener(handler)
  84. # Install opener for all requests
  85. urllib2.install_opener(opener)
  86. self.urlOpener = opener
  87. self.old_hostname = self.getHostname()
  88. self.isInitialized = True
  89. def relocate(self, new_hostname):
  90. if not self.isInitialized:
  91. raise Exception('Resource not initialized')
  92. # If old and new hostname are the same exit harmlessly
  93. if self.old_hostname == new_hostname:
  94. logger.error('New hostname is same as existing host name, %s' % self.old_hostname)
  95. sys.exit(2)
  96. pass
  97. try:
  98. self.verifyHostComponentStatus(self.old_hostname, new_hostname, self.componentName)
  99. except Exception, e:
  100. logger.error("Exception caught on verify relocate request.")
  101. logger.error(e.message)
  102. sys.exit(3)
  103. # Put host component in Maintenance state
  104. self.updateHostComponentStatus(self.old_hostname, self.componentName,
  105. "Maintenance", "MAINTENANCE")
  106. # Delete current host component
  107. self.deleteHostComponent(self.old_hostname, self.componentName)
  108. # Add component on the new host
  109. self.addHostComponent(new_hostname, self.componentName)
  110. # Install host component
  111. self.updateHostComponentStatus(new_hostname, self.componentName,
  112. "Installing", "INSTALLED")
  113. # Wait on install
  114. self.waitOnHostComponentUpdate(new_hostname, self.componentName,
  115. "INSTALLED")
  116. if START_ON_RELOCATE:
  117. # Start host component
  118. self.updateHostComponentStatus(new_hostname, self.componentName,
  119. "Starting", "STARTED")
  120. # Wait on start
  121. self.waitOnHostComponentUpdate(new_hostname, self.componentName, "STARTED")
  122. pass
  123. pass
  124. def waitOnHostComponentUpdate(self, hostname, componentName, status):
  125. logger.info("Waiting for host component status to update ...")
  126. sleep_itr = 0
  127. state = None
  128. while sleep_itr < STATUS_WAIT_TIMEOUT:
  129. try:
  130. state = self.getHostComponentState(hostname, componentName)
  131. if status == state:
  132. logger.info("Status update successful. status: %s" % state)
  133. return
  134. pass
  135. except Exception, e:
  136. logger.error("Caught an exception waiting for status update.. "
  137. "continuing to wait...")
  138. pass
  139. time.sleep(STATUS_CHECK_INTERVAL)
  140. sleep_itr += STATUS_CHECK_INTERVAL
  141. pass
  142. if state and state != status:
  143. logger.error("Timed out on wait, status unchanged. status = %s" % state)
  144. sys.exit(1)
  145. pass
  146. pass
  147. def addHostComponent(self, hostname, componentName):
  148. data = '{"host_components":[{"HostRoles":{"component_name":"%s"}}]}' % self.componentName
  149. req = urllib2.Request(getUrl(ADD_HOST_COMPONENT_URI.format(CLUSTER_NAME,
  150. hostname)), data)
  151. req.add_header("X-Requested-By", "ambari_probe")
  152. req.get_method = lambda: 'POST'
  153. try:
  154. logger.info("Adding host component: %s" % req.get_full_url())
  155. resp = self.urlOpener.open(req)
  156. self.logResponse('Add host component response: ', resp)
  157. except Exception, e:
  158. logger.error('Create host component failed, component: {0}, host: {1}'
  159. .format(componentName, hostname))
  160. logger.error(e)
  161. raise e
  162. pass
  163. def deleteHostComponent(self, hostname, componentName):
  164. req = urllib2.Request(getUrl(HOST_COMPONENT_URI.format(CLUSTER_NAME,
  165. hostname, componentName)))
  166. req.add_header("X-Requested-By", "ambari_probe")
  167. req.get_method = lambda: 'DELETE'
  168. try:
  169. logger.info("Deleting host component: %s" % req.get_full_url())
  170. resp = self.urlOpener.open(req)
  171. self.logResponse('Delete component response: ', resp)
  172. except Exception, e:
  173. logger.error('Delete {0} failed.'.format(componentName))
  174. logger.error(e)
  175. raise e
  176. pass
  177. def updateHostComponentStatus(self, hostname, componentName, contextStr, status):
  178. # Update host component
  179. data = '{"RequestInfo":{"context":"%s %s"},"Body":{"HostRoles":{"state":"%s"}}}' % (contextStr, self.componentName, status)
  180. req = urllib2.Request(getUrl(HOST_COMPONENT_URI.format(CLUSTER_NAME,
  181. hostname, componentName)), data)
  182. req.add_header("X-Requested-By", "ambari_probe")
  183. req.get_method = lambda: 'PUT'
  184. try:
  185. logger.info("%s host component: %s" % (contextStr, req.get_full_url()))
  186. resp = self.urlOpener.open(req)
  187. self.logResponse('Update host component response: ', resp)
  188. except Exception, e:
  189. logger.error('Update Status {0} failed.'.format(componentName))
  190. logger.error(e)
  191. raise e
  192. pass
  193. def verifyHostComponentStatus(self, old_hostname, new_hostname, componentName):
  194. # Check desired state of host component is not STOPPED or host is
  195. # unreachable
  196. actualState = self.getHostComponentState(old_hostname, componentName)
  197. if actualState not in ALLOWED_ACTUAL_STATES_FOR_RELOCATE:
  198. raise Exception('Aborting relocate action since host component '
  199. 'state is %s' % actualState)
  200. hostState = self.getHostSatus(new_hostname)
  201. if hostState not in ALLOWED_HOST_STATUS_FOR_RELOCATE:
  202. raise Exception('Aborting relocate action since host state is %s' % hostState)
  203. pass
  204. def getHostSatus(self, hostname):
  205. hostStateUrl = getUrl(GET_HOST_STATE_URL.format(CLUSTER_NAME, hostname))
  206. logger.info("Requesting host status: %s " % hostStateUrl)
  207. urlResponse = self.urlOpener.open(hostStateUrl)
  208. state = None
  209. if urlResponse:
  210. response = urlResponse.read()
  211. data = json.loads(response)
  212. logger.debug('Response from getHostSatus: %s' % data)
  213. if data:
  214. try:
  215. hostsInfo = data.get('Hosts')
  216. if not hostsInfo:
  217. raise Exception('Cannot find host state for host: {1}'.format(hostname))
  218. state = hostsInfo.get('host_state')
  219. except Exception, e:
  220. logger.error('Unable to parse json data. %s' % data)
  221. raise e
  222. pass
  223. else:
  224. logger.error("Unable to retrieve host state.")
  225. pass
  226. return state
  227. def getHostComponentState(self, hostname, componentName):
  228. hostStatusUrl = getUrl(GET_HOST_COMPONENT_STATE_URI.format(CLUSTER_NAME,
  229. hostname, componentName))
  230. logger.info("Requesting host component state: %s " % hostStatusUrl)
  231. urlResponse = self.urlOpener.open(hostStatusUrl)
  232. state = None
  233. if urlResponse:
  234. response = urlResponse.read()
  235. data = json.loads(response)
  236. logger.debug('Response from getHostComponentState: %s' % data)
  237. if data:
  238. try:
  239. hostRoles = data.get('HostRoles')
  240. if not hostRoles:
  241. raise Exception('Cannot find host component state for component: ' +\
  242. '{0}, host: {1}'.format(componentName, hostname))
  243. state = hostRoles.get('state')
  244. except Exception, e:
  245. logger.error('Unable to parse json data. %s' % data)
  246. raise e
  247. pass
  248. else:
  249. logger.error("Unable to retrieve host component desired state.")
  250. pass
  251. return state
  252. # Log response for PUT, POST or DELETE
  253. def logResponse(self, text=None, response=None):
  254. if response is not None:
  255. resp = str(response.getcode())
  256. if text is None:
  257. text = 'Logging response from server: '
  258. if resp is not None:
  259. logger.info(text + resp)
  260. def findClusterName(self):
  261. clusterUrl = getUrl(GET_CLUSTERS_URI)
  262. clusterName = None
  263. logger.info("Requesting clusters: " + clusterUrl)
  264. urlResponse = self.urlOpener.open(clusterUrl)
  265. if urlResponse is not None:
  266. response = urlResponse.read()
  267. data = json.loads(response)
  268. logger.debug('Response from findClusterName: %s' % data)
  269. if data:
  270. try:
  271. clusters = data.get('items')
  272. if len(clusters) > 1:
  273. raise Exception('Multiple clusters found. %s' % clusters)
  274. clusterName = clusters[0].get('Clusters').get('cluster_name')
  275. except Exception, e:
  276. logger.error('Unable to parse json data. %s' % data)
  277. raise e
  278. pass
  279. else:
  280. logger.error("Unable to retrieve clusters data.")
  281. pass
  282. return clusterName
  283. def getHostname(self):
  284. hostsUrl = getUrl(GET_HOST_COMPONENTS_URI.format(CLUSTER_NAME,
  285. self.serviveName, self.componentName))
  286. logger.info("Requesting host info: " + hostsUrl)
  287. urlResponse = self.urlOpener.open(hostsUrl)
  288. hostname = None
  289. if urlResponse is not None:
  290. response = urlResponse.read()
  291. data = json.loads(response)
  292. logger.debug('Response from getHostname: %s' % data)
  293. if data:
  294. try:
  295. hostRoles = data.get('host_components')
  296. if not hostRoles:
  297. raise Exception('Cannot find host component data for service: ' +\
  298. '{0}, component: {1}'.format(self.serviveName, self.componentName))
  299. if len(hostRoles) > 1:
  300. raise Exception('More than one hosts found with the same role')
  301. hostname = hostRoles[0].get('HostRoles').get('host_name')
  302. except Exception, e:
  303. logger.error('Unable to parse json data. %s' % data)
  304. raise e
  305. pass
  306. else:
  307. logger.error("Unable to retrieve host component data.")
  308. pass
  309. return hostname
  310. def getUrl(partial_url):
  311. return PROTOCOL + "://" + AMBARI_HOSTNAME + ":" + AMBARI_PORT + partial_url
  312. def get_supported_actions():
  313. return [ RELOCATE_ACTION ]
  314. #
  315. # Main.
  316. #
  317. def main():
  318. tempDir = tempfile.gettempdir()
  319. outputFile = os.path.join(tempDir, "ambari_reinstall_probe.out")
  320. parser = optparse.OptionParser(usage="usage: %prog [options]")
  321. parser.set_description('This python program is a Ambari thin client and '
  322. 'supports relocation of ambari host components on '
  323. 'Ambari managed clusters.')
  324. parser.add_option("-v", "--verbose", dest="verbose", action="store_false",
  325. default=False, help="output verbosity.")
  326. parser.add_option("-s", "--host", dest="server_hostname",
  327. help="Ambari server host name.")
  328. parser.add_option("-p", "--port", dest="server_port",
  329. default="8080" ,help="Ambari server port. [default: 8080]")
  330. parser.add_option("-r", "--protocol", dest="protocol", default = "http",
  331. help="Protocol for communicating with Ambari server ("
  332. "http/https) [default: http].")
  333. parser.add_option("-c", "--cluster-name", dest="cluster_name",
  334. help="Ambari cluster to operate on.")
  335. parser.add_option("-e", "--service-name", dest="service_name",
  336. help="Ambari Service to which the component belongs to.")
  337. parser.add_option("-m", "--component-name", dest="component_name",
  338. help="Ambari Service Component to operate on.")
  339. parser.add_option("-n", "--new-host", dest="new_hostname",
  340. help="New host to relocate the component to.")
  341. parser.add_option("-a", "--action", dest="action", default = "relocate",
  342. help="Script action. [default: relocate]")
  343. parser.add_option("-o", "--output-file", dest="outputfile",
  344. default = outputFile, metavar="FILE",
  345. help="Output file. [default: %s]" % outputFile)
  346. parser.add_option("-u", "--username", dest="username",
  347. default="admin" ,help="Ambari server admin user. [default: admin]")
  348. parser.add_option("-w", "--password", dest="password",
  349. default="admin" ,help="Ambari server admin password.")
  350. parser.add_option("-d", "--start-component", dest="start_component",
  351. action="store_false", default=False,
  352. help="Should the script start the component after relocate.")
  353. (options, args) = parser.parse_args()
  354. # set verbose
  355. if options.verbose:
  356. logging.basicConfig(level=logging.DEBUG)
  357. else:
  358. logging.basicConfig(level=logging.INFO)
  359. global AMBARI_HOSTNAME
  360. AMBARI_HOSTNAME = options.server_hostname
  361. global AMBARI_PORT
  362. AMBARI_PORT = options.server_port
  363. global CLUSTER_NAME
  364. CLUSTER_NAME = options.cluster_name
  365. global PROTOCOL
  366. PROTOCOL = options.protocol
  367. global USERNAME
  368. USERNAME = options.username
  369. global PASSWORD
  370. PASSWORD = options.password
  371. global START_ON_RELOCATE
  372. START_ON_RELOCATE = options.start_component
  373. global logger
  374. logger = logging.getLogger('AmbariProbe')
  375. handler = logging.FileHandler(options.outputfile)
  376. formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
  377. handler.setFormatter(formatter)
  378. logger.addHandler(handler)
  379. action = RELOCATE_ACTION
  380. if options.action is not None:
  381. if options.action not in get_supported_actions():
  382. logger.error("Unsupported action: " + options.action + ", "
  383. "valid actions: " + str(get_supported_actions()))
  384. sys.exit(1)
  385. else:
  386. action = options.action
  387. socket.setdefaulttimeout(DEFAULT_TIMEOUT)
  388. ambariResource = AmbariResource(serviceName=options.service_name,
  389. componentName=options.component_name)
  390. ambariResource.initializeResource()
  391. if action == RELOCATE_ACTION:
  392. if options.new_hostname is not None:
  393. ambariResource.relocate(options.new_hostname)
  394. if __name__ == "__main__":
  395. try:
  396. main()
  397. except (KeyboardInterrupt, EOFError):
  398. print("\nAborting ... Keyboard Interrupt.")
  399. sys.exit(1)