TestAlerts.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605
  1. #!/usr/bin/env python
  2. '''
  3. Licensed to the Apache Software Foundation (ASF) under one
  4. or more contributor license agreements. See the NOTICE file
  5. distributed with this work for additional information
  6. regarding copyright ownership. The ASF licenses this file
  7. to you under the Apache License, Version 2.0 (the
  8. "License"); you may not use this file except in compliance
  9. with the License. You may obtain a copy of the License at
  10. http://www.apache.org/licenses/LICENSE-2.0
  11. Unless required by applicable law or agreed to in writing, software
  12. distributed under the License is distributed on an "AS IS" BASIS,
  13. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. See the License for the specific language governing permissions and
  15. limitations under the License.
  16. '''
  17. from stacks.utils.RMFTestCase import *
  18. import os
  19. import socket
  20. import sys
  21. from ambari_agent.AlertSchedulerHandler import AlertSchedulerHandler
  22. from ambari_agent.alerts.collector import AlertCollector
  23. from ambari_agent.alerts.metric_alert import MetricAlert
  24. from ambari_agent.alerts.port_alert import PortAlert
  25. from ambari_agent.alerts.script_alert import ScriptAlert
  26. from ambari_agent.alerts.web_alert import WebAlert
  27. from ambari_agent.apscheduler.scheduler import Scheduler
  28. from collections import namedtuple
  29. from mock.mock import patch
  30. from unittest import TestCase
  31. class TestAlerts(TestCase):
  32. def setUp(self):
  33. pass
  34. def tearDown(self):
  35. sys.stdout == sys.__stdout__
  36. @patch.object(Scheduler, "add_interval_job")
  37. @patch.object(Scheduler, "start")
  38. def test_start(self, aps_add_interval_job_mock, aps_start_mock):
  39. test_file_path = os.path.join('ambari_agent', 'dummy_files')
  40. test_stack_path = os.path.join('ambari_agent', 'dummy_files')
  41. test_host_scripts_path = os.path.join('ambari_agent', 'dummy_files')
  42. ash = AlertSchedulerHandler(test_file_path, test_stack_path, test_host_scripts_path)
  43. ash.start()
  44. self.assertTrue(aps_add_interval_job_mock.called)
  45. self.assertTrue(aps_start_mock.called)
  46. def test_port_alert(self):
  47. json = { "name": "namenode_process",
  48. "service": "HDFS",
  49. "component": "NAMENODE",
  50. "label": "NameNode process",
  51. "interval": 6,
  52. "scope": "host",
  53. "enabled": True,
  54. "uuid": "c1f73191-4481-4435-8dae-fd380e4c0be1",
  55. "source": {
  56. "type": "PORT",
  57. "uri": "{{hdfs-site/my-key}}",
  58. "default_port": 50070,
  59. "reporting": {
  60. "ok": {
  61. "text": "TCP OK - {0:.4f} response time on port {1}"
  62. },
  63. "critical": {
  64. "text": "Could not load process info: {0}"
  65. }
  66. }
  67. }
  68. }
  69. collector = AlertCollector()
  70. pa = PortAlert(json, json['source'])
  71. pa.set_helpers(collector, {'hdfs-site/my-key': 'value1'})
  72. self.assertEquals(6, pa.interval())
  73. pa.collect()
  74. @patch.object(socket.socket,"connect")
  75. def test_port_alert_complex_uri(self, socket_connect_mock):
  76. json = { "name": "namenode_process",
  77. "service": "HDFS",
  78. "component": "NAMENODE",
  79. "label": "NameNode process",
  80. "interval": 6,
  81. "scope": "host",
  82. "enabled": True,
  83. "uuid": "c1f73191-4481-4435-8dae-fd380e4c0be1",
  84. "source": {
  85. "type": "PORT",
  86. "uri": "{{hdfs-site/my-key}}",
  87. "default_port": 50070,
  88. "reporting": {
  89. "ok": {
  90. "text": "TCP OK - {0:.4f} response time on port {1}"
  91. },
  92. "critical": {
  93. "text": "Could not load process info: {0}"
  94. }
  95. }
  96. }
  97. }
  98. collector = AlertCollector()
  99. pa = PortAlert(json, json['source'])
  100. # use a URI that has commas to verify that we properly parse it
  101. pa.set_helpers(collector, {'hdfs-site/my-key': 'c6401.ambari.apache.org:2181,c6402.ambari.apache.org:2181,c6403.ambari.apache.org:2181'})
  102. pa.host_name = 'c6402.ambari.apache.org'
  103. self.assertEquals(6, pa.interval())
  104. pa.collect()
  105. self.assertEquals('OK', collector.alerts()[0]['state'])
  106. self.assertTrue('response time on port 2181' in collector.alerts()[0]['text'])
  107. def test_port_alert_no_sub(self):
  108. json = { "name": "namenode_process",
  109. "service": "HDFS",
  110. "component": "NAMENODE",
  111. "label": "NameNode process",
  112. "interval": 6,
  113. "scope": "host",
  114. "enabled": True,
  115. "uuid": "c1f73191-4481-4435-8dae-fd380e4c0be1",
  116. "source": {
  117. "type": "PORT",
  118. "uri": "http://c6401.ambari.apache.org",
  119. "default_port": 50070,
  120. "reporting": {
  121. "ok": {
  122. "text": "TCP OK - {0:.4f} response time on port {1}"
  123. },
  124. "critical": {
  125. "text": "Could not load process info: {0}"
  126. }
  127. }
  128. }
  129. }
  130. pa = PortAlert(json, json['source'])
  131. pa.set_helpers(AlertCollector(), '')
  132. self.assertEquals('http://c6401.ambari.apache.org', pa.uri)
  133. pa.collect()
  134. def test_script_alert(self):
  135. json = {
  136. "name": "namenode_process",
  137. "service": "HDFS",
  138. "component": "NAMENODE",
  139. "label": "NameNode process",
  140. "interval": 6,
  141. "scope": "host",
  142. "enabled": True,
  143. "uuid": "c1f73191-4481-4435-8dae-fd380e4c0be1",
  144. "source": {
  145. "type": "SCRIPT",
  146. "path": "test_script.py",
  147. }
  148. }
  149. # normally set by AlertSchedulerHandler
  150. json['source']['stacks_directory'] = os.path.join('ambari_agent', 'dummy_files')
  151. json['source']['host_scripts_directory'] = os.path.join('ambari_agent', 'host_scripts')
  152. collector = AlertCollector()
  153. sa = ScriptAlert(json, json['source'])
  154. sa.set_helpers(collector, {'foo-site/bar': 'rendered-bar', 'foo-site/baz':'rendered-baz'} )
  155. self.assertEquals(json['source']['path'], sa.path)
  156. self.assertEquals(json['source']['stacks_directory'], sa.stacks_dir)
  157. self.assertEquals(json['source']['host_scripts_directory'], sa.host_scripts_dir)
  158. sa.collect()
  159. self.assertEquals('WARNING', collector.alerts()[0]['state'])
  160. self.assertEquals('bar is rendered-bar, baz is rendered-baz', collector.alerts()[0]['text'])
  161. @patch.object(MetricAlert, "_load_jmx")
  162. def test_metric_alert(self, ma_load_jmx_mock):
  163. json = {
  164. "name": "cpu_check",
  165. "service": "HDFS",
  166. "component": "NAMENODE",
  167. "label": "NameNode process",
  168. "interval": 6,
  169. "scope": "host",
  170. "enabled": True,
  171. "uuid": "c1f73191-4481-4435-8dae-fd380e4c0be1",
  172. "source": {
  173. "type": "METRIC",
  174. "uri": {
  175. "http": "{{hdfs-site/dfs.datanode.http.address}}"
  176. },
  177. "jmx": {
  178. "property_list": [
  179. "someJmxObject/value",
  180. "someOtherJmxObject/value"
  181. ],
  182. "value": "{0} * 100 + 123"
  183. },
  184. "reporting": {
  185. "ok": {
  186. "text": "ok_arr: {0} {1} {2}",
  187. },
  188. "warning": {
  189. "text": "",
  190. "value": 13
  191. },
  192. "critical": {
  193. "text": "crit_arr: {0} {1} {2}",
  194. "value": 72
  195. }
  196. }
  197. }
  198. }
  199. ma_load_jmx_mock.return_value = [1, 3]
  200. collector = AlertCollector()
  201. ma = MetricAlert(json, json['source'])
  202. ma.set_helpers(collector, {'hdfs-site/dfs.datanode.http.address': '1.2.3.4:80'})
  203. ma.collect()
  204. self.assertEquals('CRITICAL', collector.alerts()[0]['state'])
  205. self.assertEquals('crit_arr: 1 3 223', collector.alerts()[0]['text'])
  206. del json['source']['jmx']['value']
  207. collector = AlertCollector()
  208. ma = MetricAlert(json, json['source'])
  209. ma.set_helpers(collector, {'hdfs-site/dfs.datanode.http.address': '1.2.3.4:80'})
  210. ma.collect()
  211. self.assertEquals('OK', collector.alerts()[0]['state'])
  212. self.assertEquals('ok_arr: 1 3 None', collector.alerts()[0]['text'])
  213. @patch.object(MetricAlert, "_load_jmx")
  214. def test_alert_uri_structure(self, ma_load_jmx_mock):
  215. json = {
  216. "name": "cpu_check",
  217. "service": "HDFS",
  218. "component": "NAMENODE",
  219. "label": "NameNode process",
  220. "interval": 6,
  221. "scope": "host",
  222. "enabled": True,
  223. "uuid": "c1f73191-4481-4435-8dae-fd380e4c0be1",
  224. "source": {
  225. "type": "METRIC",
  226. "uri": {
  227. "http": "{{hdfs-site/dfs.datanode.http.address}}",
  228. "https": "{{hdfs-site/dfs.datanode.https.address}}",
  229. "https_property": "{{hdfs-site/dfs.http.policy}}",
  230. "https_property_value": "HTTPS_ONLY"
  231. },
  232. "jmx": {
  233. "property_list": [
  234. "someJmxObject/value",
  235. "someOtherJmxObject/value"
  236. ],
  237. "value": "{0}"
  238. },
  239. "reporting": {
  240. "ok": {
  241. "text": "ok_arr: {0} {1} {2}",
  242. },
  243. "warning": {
  244. "text": "",
  245. "value": 10
  246. },
  247. "critical": {
  248. "text": "crit_arr: {0} {1} {2}",
  249. "value": 20
  250. }
  251. }
  252. }
  253. }
  254. ma_load_jmx_mock.return_value = [1,1]
  255. # run the alert without specifying any keys; an exception should be thrown
  256. # indicating that there was no URI and the result is UNKNOWN
  257. collector = AlertCollector()
  258. ma = MetricAlert(json, json['source'])
  259. ma.set_helpers(collector, '')
  260. ma.collect()
  261. self.assertEquals('UNKNOWN', collector.alerts()[0]['state'])
  262. # set 2 properties that make no sense wihtout the main URI properties
  263. collector = AlertCollector()
  264. ma = MetricAlert(json, json['source'])
  265. ma.set_helpers(collector, {'hdfs-site/dfs.http.policy': 'HTTP_ONLY'})
  266. ma.collect()
  267. self.assertEquals('UNKNOWN', collector.alerts()[0]['state'])
  268. # set an actual property key (http)
  269. collector = AlertCollector()
  270. ma = MetricAlert(json, json['source'])
  271. ma.set_helpers(collector, {'hdfs-site/dfs.datanode.http.address': '1.2.3.4:80',
  272. 'hdfs-site/dfs.http.policy': 'HTTP_ONLY'})
  273. ma.collect()
  274. self.assertEquals('OK', collector.alerts()[0]['state'])
  275. # set an actual property key (https)
  276. collector = AlertCollector()
  277. ma = MetricAlert(json, json['source'])
  278. ma.set_helpers(collector, {'hdfs-site/dfs.datanode.https.address': '1.2.3.4:443',
  279. 'hdfs-site/dfs.http.policy': 'HTTP_ONLY'})
  280. ma.collect()
  281. self.assertEquals('OK', collector.alerts()[0]['state'])
  282. # set both (http and https)
  283. collector = AlertCollector()
  284. ma = MetricAlert(json, json['source'])
  285. ma.set_helpers(collector, {'hdfs-site/dfs.datanode.http.address': '1.2.3.4:80',
  286. 'hdfs-site/dfs.datanode.https.address': '1.2.3.4:443',
  287. 'hdfs-site/dfs.http.policy': 'HTTP_ONLY'})
  288. ma.collect()
  289. self.assertEquals('OK', collector.alerts()[0]['state'])
  290. @patch.object(WebAlert, "_make_web_request")
  291. def test_web_alert(self, wa_make_web_request_mock):
  292. json = {
  293. "name": "webalert_test",
  294. "service": "HDFS",
  295. "component": "DATANODE",
  296. "label": "WebAlert Test",
  297. "interval": 1,
  298. "scope": "HOST",
  299. "enabled": True,
  300. "uuid": "c1f73191-4481-4435-8dae-fd380e4c0be1",
  301. "source": {
  302. "type": "WEB",
  303. "uri": {
  304. "http": "{{hdfs-site/dfs.datanode.http.address}}",
  305. "https": "{{hdfs-site/dfs.datanode.https.address}}",
  306. "https_property": "{{hdfs-site/dfs.http.policy}}",
  307. "https_property_value": "HTTPS_ONLY"
  308. },
  309. "reporting": {
  310. "ok": {
  311. "text": "ok: {0}",
  312. },
  313. "warning": {
  314. "text": "warning: {0}",
  315. },
  316. "critical": {
  317. "text": "critical: {1}",
  318. }
  319. }
  320. }
  321. }
  322. WebResponse = namedtuple('WebResponse', 'status_code time_millis')
  323. wa_make_web_request_mock.return_value = WebResponse(200,1.234)
  324. # run the alert and check HTTP 200
  325. collector = AlertCollector()
  326. alert = WebAlert(json, json['source'])
  327. alert.set_helpers(collector, {'hdfs-site/dfs.datanode.http.address': '1.2.3.4:80'})
  328. alert.collect()
  329. self.assertEquals('OK', collector.alerts()[0]['state'])
  330. self.assertEquals('ok: 200', collector.alerts()[0]['text'])
  331. # run the alert and check HTTP 500
  332. wa_make_web_request_mock.return_value = WebResponse(500,1.234)
  333. collector = AlertCollector()
  334. alert = WebAlert(json, json['source'])
  335. alert.set_helpers(collector, {'hdfs-site/dfs.datanode.http.address': '1.2.3.4:80'})
  336. alert.collect()
  337. self.assertEquals('WARNING', collector.alerts()[0]['state'])
  338. self.assertEquals('warning: 500', collector.alerts()[0]['text'])
  339. # run the alert and check critical
  340. wa_make_web_request_mock.return_value = WebResponse(0,0)
  341. collector = AlertCollector()
  342. alert = WebAlert(json, json['source'])
  343. alert.set_helpers(collector, {'hdfs-site/dfs.datanode.http.address': '1.2.3.4:80'})
  344. alert.collect()
  345. # http assertion indicating that we properly determined non-SSL
  346. self.assertEquals('CRITICAL', collector.alerts()[0]['state'])
  347. self.assertEquals('critical: http://1.2.3.4:80', collector.alerts()[0]['text'])
  348. collector = AlertCollector()
  349. alert = WebAlert(json, json['source'])
  350. alert.set_helpers(collector, {
  351. 'hdfs-site/dfs.datanode.http.address': '1.2.3.4:80',
  352. 'hdfs-site/dfs.datanode.https.address': '1.2.3.4:8443',
  353. 'hdfs-site/dfs.http.policy': 'HTTPS_ONLY'})
  354. alert.collect()
  355. # SSL assertion
  356. self.assertEquals('CRITICAL', collector.alerts()[0]['state'])
  357. self.assertEquals('critical: https://1.2.3.4:8443', collector.alerts()[0]['text'])
  358. def test_reschedule(self):
  359. test_file_path = os.path.join('ambari_agent', 'dummy_files')
  360. test_stack_path = os.path.join('ambari_agent', 'dummy_files')
  361. test_host_scripts_path = os.path.join('ambari_agent', 'dummy_files')
  362. ash = AlertSchedulerHandler(test_file_path, test_stack_path, test_host_scripts_path)
  363. ash.start()
  364. self.assertEquals(1, ash.get_job_count())
  365. ash.reschedule()
  366. self.assertEquals(1, ash.get_job_count())
  367. def test_alert_collector_purge(self):
  368. json = { "name": "namenode_process",
  369. "service": "HDFS",
  370. "component": "NAMENODE",
  371. "label": "NameNode process",
  372. "interval": 6,
  373. "scope": "host",
  374. "enabled": True,
  375. "uuid": "c1f73191-4481-4435-8dae-fd380e4c0be1",
  376. "source": {
  377. "type": "PORT",
  378. "uri": "{{hdfs-site/my-key}}",
  379. "default_port": 50070,
  380. "reporting": {
  381. "ok": {
  382. "text": "TCP OK - {0:.4f} response time on port {1}"
  383. },
  384. "critical": {
  385. "text": "Could not load process info: {0}"
  386. }
  387. }
  388. }
  389. }
  390. collector = AlertCollector()
  391. pa = PortAlert(json, json['source'])
  392. pa.set_helpers(collector, {'hdfs-site/my-key': 'value1'})
  393. self.assertEquals(6, pa.interval())
  394. res = pa.collect()
  395. self.assertTrue(collector.alerts()[0] is not None)
  396. self.assertEquals('CRITICAL', collector.alerts()[0]['state'])
  397. collector.remove_by_uuid('c1f73191-4481-4435-8dae-fd380e4c0be1')
  398. self.assertEquals(0,len(collector.alerts()))
  399. def test_disabled_definitions(self):
  400. test_file_path = os.path.join('ambari_agent', 'dummy_files')
  401. test_stack_path = os.path.join('ambari_agent', 'dummy_files')
  402. test_host_scripts_path = os.path.join('ambari_agent', 'dummy_files')
  403. ash = AlertSchedulerHandler(test_file_path, test_stack_path, test_host_scripts_path)
  404. ash.start()
  405. self.assertEquals(1, ash.get_job_count())
  406. json = { "name": "namenode_process",
  407. "service": "HDFS",
  408. "component": "NAMENODE",
  409. "label": "NameNode process",
  410. "interval": 6,
  411. "scope": "host",
  412. "enabled": True,
  413. "uuid": "c1f73191-4481-4435-8dae-fd380e4c0be1",
  414. "source": {
  415. "type": "PORT",
  416. "uri": "{{hdfs-site/my-key}}",
  417. "default_port": 50070,
  418. "reporting": {
  419. "ok": {
  420. "text": "TCP OK - {0:.4f} response time on port {1}"
  421. },
  422. "critical": {
  423. "text": "Could not load process info: {0}"
  424. }
  425. }
  426. }
  427. }
  428. pa = PortAlert(json, json['source'])
  429. ash.schedule_definition(pa)
  430. self.assertEquals(2, ash.get_job_count())
  431. json['enabled'] = False
  432. pa = PortAlert(json, json['source'])
  433. ash.schedule_definition(pa)
  434. # verify disabled alert not scheduled
  435. self.assertEquals(2, ash.get_job_count())
  436. json['enabled'] = True
  437. pa = PortAlert(json, json['source'])
  438. ash.schedule_definition(pa)
  439. # verify enabled alert was scheduled
  440. self.assertEquals(3, ash.get_job_count())
  441. def test_immediate_alert(self):
  442. test_file_path = os.path.join('ambari_agent', 'dummy_files')
  443. test_stack_path = os.path.join('ambari_agent', 'dummy_files')
  444. test_host_scripts_path = os.path.join('ambari_agent', 'dummy_files')
  445. ash = AlertSchedulerHandler(test_file_path, test_stack_path, test_host_scripts_path)
  446. ash.start()
  447. self.assertEquals(1, ash.get_job_count())
  448. self.assertEquals(0, len(ash._collector.alerts()))
  449. execution_commands = [ {
  450. "clusterName": "c1",
  451. "hostName": "c6401.ambari.apache.org",
  452. "alertDefinition": {
  453. "name": "namenode_process",
  454. "service": "HDFS",
  455. "component": "NAMENODE",
  456. "label": "NameNode process",
  457. "interval": 6,
  458. "scope": "host",
  459. "enabled": True,
  460. "uuid": "c1f73191-4481-4435-8dae-fd380e4c0be1",
  461. "source": {
  462. "type": "PORT",
  463. "uri": "{{hdfs-site/my-key}}",
  464. "default_port": 50070,
  465. "reporting": {
  466. "ok": {
  467. "text": "TCP OK - {0:.4f} response time on port {1}"
  468. },
  469. "critical": {
  470. "text": "Could not load process info: {0}"
  471. }
  472. }
  473. }
  474. }
  475. } ]
  476. # execute the alert immediately and verify that the collector has the result
  477. ash.execute_alert(execution_commands)
  478. self.assertEquals(1, len(ash._collector.alerts()))
  479. def test_skipped_alert(self):
  480. json = {
  481. "name": "namenode_process",
  482. "service": "HDFS",
  483. "component": "NAMENODE",
  484. "label": "NameNode process",
  485. "interval": 6,
  486. "scope": "host",
  487. "enabled": True,
  488. "uuid": "c1f73191-4481-4435-8dae-fd380e4c0be1",
  489. "source": {
  490. "type": "SCRIPT",
  491. "path": "test_script.py",
  492. }
  493. }
  494. # normally set by AlertSchedulerHandler
  495. json['source']['stacks_directory'] = os.path.join('ambari_agent', 'dummy_files')
  496. json['source']['host_scripts_directory'] = os.path.join('ambari_agent', 'host_scripts')
  497. collector = AlertCollector()
  498. sa = ScriptAlert(json, json['source'])
  499. # instruct the test alert script to be skipped
  500. sa.set_helpers(collector, {'foo-site/skip': 'true'} )
  501. self.assertEquals(json['source']['path'], sa.path)
  502. self.assertEquals(json['source']['stacks_directory'], sa.stacks_dir)
  503. self.assertEquals(json['source']['host_scripts_directory'], sa.host_scripts_dir)
  504. # ensure that it was skipped
  505. self.assertEquals(0,len(collector.alerts()))