Browse Source

AMBARI-13434. Expose Alert Grace Period Setting in Agents (aonishuk)

Andrew Onishuk 10 years ago
parent
commit
a509510ee7

+ 1 - 0
ambari-agent/conf/unix/ambari-agent.ini

@@ -29,6 +29,7 @@ cache_dir=/var/lib/ambari-agent/cache
 tolerate_download_failures=true
 run_as_user=root
 parallel_execution=0
+alert_grace_period=5
 
 [security]
 keysdir=/var/lib/ambari-agent/keys

+ 1 - 0
ambari-agent/conf/windows/ambari-agent.ini

@@ -29,6 +29,7 @@ ping_port=8670
 cache_dir=cache
 tolerate_download_failures=true
 parallel_execution=0
+alert_grace_period=5
 
 [security]
 keysdir=keys

+ 11 - 10
ambari-agent/src/main/python/ambari_agent/AlertSchedulerHandler.py

@@ -43,14 +43,8 @@ class AlertSchedulerHandler():
   TYPE_SCRIPT = 'SCRIPT'
   TYPE_WEB = 'WEB'
 
-  APS_CONFIG = { 
-    'threadpool.core_threads': 3,
-    'coalesce': True,
-    'standalone': False
-  }
-
   def __init__(self, cachedir, stacks_dir, common_services_dir, host_scripts_dir,
-      cluster_configuration, config, in_minutes=True):
+      alert_grace_period, cluster_configuration, config, in_minutes=True):
 
     self.cachedir = cachedir
     self.stacks_dir = stacks_dir
@@ -65,8 +59,15 @@ class AlertSchedulerHandler():
       except:
         logger.critical("[AlertScheduler] Could not create the cache directory {0}".format(cachedir))
 
+    self.APS_CONFIG = {
+      'apscheduler.threadpool.core_threads': 3,
+      'apscheduler.coalesce': True,
+      'apscheduler.standalone': False,
+      'apscheduler.misfire_grace_time': alert_grace_period
+    }
+
     self._collector = AlertCollector()
-    self.__scheduler = Scheduler(AlertSchedulerHandler.APS_CONFIG)
+    self.__scheduler = Scheduler(self.APS_CONFIG)
     self.__in_minutes = in_minutes
     self.config = config
 
@@ -122,7 +123,7 @@ class AlertSchedulerHandler():
 
     if self.__scheduler.running:
       self.__scheduler.shutdown(wait=False)
-      self.__scheduler = Scheduler(AlertSchedulerHandler.APS_CONFIG)
+      self.__scheduler = Scheduler(self.APS_CONFIG)
 
     alert_callables = self.__load_definitions()
 
@@ -139,7 +140,7 @@ class AlertSchedulerHandler():
   def stop(self):
     if not self.__scheduler is None:
       self.__scheduler.shutdown(wait=False)
-      self.__scheduler = Scheduler(AlertSchedulerHandler.APS_CONFIG)
+      self.__scheduler = Scheduler(self.APS_CONFIG)
 
     logger.info("[AlertScheduler] Stopped the alert scheduler.")
 

+ 3 - 1
ambari-agent/src/main/python/ambari_agent/Controller.py

@@ -101,9 +101,11 @@ class Controller(threading.Thread):
 
     self.move_data_dir_mount_file()
 
+    self.alert_grace_period = int(config.get('agent', 'alert_grace_period', 5))
+
     self.alert_scheduler_handler = AlertSchedulerHandler(alerts_cache_dir, 
       stacks_cache_dir, common_services_cache_dir, host_scripts_cache_dir,
-      self.cluster_configuration, config)
+      self.alert_grace_period, self.cluster_configuration, config)
 
     self.alert_scheduler_handler.start()
 

+ 11 - 11
ambari-agent/src/test/python/ambari_agent/TestAlertSchedulerHandler.py

@@ -42,7 +42,7 @@ class TestAlertSchedulerHandler(TestCase):
     self.assertEquals(len(definitions), 1)
 
   def test_json_to_callable_metric(self):
-    scheduler = AlertSchedulerHandler(TEST_PATH, TEST_PATH, TEST_PATH, TEST_PATH, None, None)
+    scheduler = AlertSchedulerHandler(TEST_PATH, TEST_PATH, TEST_PATH, TEST_PATH, 5, None, None)
     json_definition = {
       'source': {
         'type': 'METRIC'
@@ -63,7 +63,7 @@ class TestAlertSchedulerHandler(TestCase):
       }
     }
 
-    scheduler = AlertSchedulerHandler(TEST_PATH, TEST_PATH, TEST_PATH, TEST_PATH, None, None)
+    scheduler = AlertSchedulerHandler(TEST_PATH, TEST_PATH, TEST_PATH, TEST_PATH, 5, None, None)
     callable_result = scheduler._AlertSchedulerHandler__json_to_callable('cluster', 'host', copy.deepcopy(json_definition))
 
     self.assertTrue(callable_result is not None)
@@ -79,7 +79,7 @@ class TestAlertSchedulerHandler(TestCase):
       }
     }
 
-    scheduler = AlertSchedulerHandler(TEST_PATH, TEST_PATH, TEST_PATH, TEST_PATH, None, None)
+    scheduler = AlertSchedulerHandler(TEST_PATH, TEST_PATH, TEST_PATH, TEST_PATH, 5, None, None)
     callable_result = scheduler._AlertSchedulerHandler__json_to_callable('cluster', 'host', copy.deepcopy(json_definition))
 
     self.assertTrue(callable_result is not None)
@@ -94,7 +94,7 @@ class TestAlertSchedulerHandler(TestCase):
       }
     }
 
-    scheduler = AlertSchedulerHandler(TEST_PATH, TEST_PATH, TEST_PATH, TEST_PATH, None, None)
+    scheduler = AlertSchedulerHandler(TEST_PATH, TEST_PATH, TEST_PATH, TEST_PATH, 5, None, None)
     callable_result = scheduler._AlertSchedulerHandler__json_to_callable('cluster', 'host', copy.deepcopy(json_definition))
 
     self.assertTrue(callable_result is None)
@@ -102,7 +102,7 @@ class TestAlertSchedulerHandler(TestCase):
   def test_execute_alert_noneScheduler(self):
     execution_commands = []
 
-    scheduler = AlertSchedulerHandler(TEST_PATH, TEST_PATH, TEST_PATH, TEST_PATH, None, None)
+    scheduler = AlertSchedulerHandler(TEST_PATH, TEST_PATH, TEST_PATH, TEST_PATH, 5, None, None)
     scheduler._AlertSchedulerHandler__scheduler = None
     alert_mock = Mock()
     scheduler._AlertSchedulerHandler__json_to_callable = Mock(return_value=alert_mock)
@@ -114,7 +114,7 @@ class TestAlertSchedulerHandler(TestCase):
   def test_execute_alert_noneCommands(self):
     execution_commands = None
 
-    scheduler = AlertSchedulerHandler(TEST_PATH, TEST_PATH, TEST_PATH, TEST_PATH, None, None)
+    scheduler = AlertSchedulerHandler(TEST_PATH, TEST_PATH, TEST_PATH, TEST_PATH, 5, None, None)
     alert_mock = Mock()
     scheduler._AlertSchedulerHandler__json_to_callable = Mock(return_value=alert_mock)
 
@@ -125,7 +125,7 @@ class TestAlertSchedulerHandler(TestCase):
   def test_execute_alert_emptyCommands(self):
     execution_commands = []
 
-    scheduler = AlertSchedulerHandler(TEST_PATH, TEST_PATH, TEST_PATH, TEST_PATH, None, None)
+    scheduler = AlertSchedulerHandler(TEST_PATH, TEST_PATH, TEST_PATH, TEST_PATH, 5, None, None)
     alert_mock = Mock()
     scheduler._AlertSchedulerHandler__json_to_callable = Mock(return_value=alert_mock)
 
@@ -144,7 +144,7 @@ class TestAlertSchedulerHandler(TestCase):
       }
     ]
 
-    scheduler = AlertSchedulerHandler(TEST_PATH, TEST_PATH, TEST_PATH, TEST_PATH, None, None)
+    scheduler = AlertSchedulerHandler(TEST_PATH, TEST_PATH, TEST_PATH, TEST_PATH, 5, None, None)
     alert_mock = MagicMock()
     alert_mock.collect = Mock()
     alert_mock.set_helpers = Mock()
@@ -159,7 +159,7 @@ class TestAlertSchedulerHandler(TestCase):
     self.assertTrue(alert_mock.collect.called)
 
   def test_load_definitions(self):
-    scheduler = AlertSchedulerHandler(TEST_PATH, TEST_PATH, TEST_PATH, TEST_PATH, None, None)
+    scheduler = AlertSchedulerHandler(TEST_PATH, TEST_PATH, TEST_PATH, TEST_PATH, 5, None, None)
     scheduler._AlertSchedulerHandler__config_maps = {
       'cluster': {}
     }
@@ -170,7 +170,7 @@ class TestAlertSchedulerHandler(TestCase):
     self.assertTrue(isinstance(alert_def, PortAlert))
 
   def test_load_definitions_noFile(self):
-    scheduler = AlertSchedulerHandler('wrong_path', 'wrong_path', 'wrong_path', 'wrong_path', None, None)
+    scheduler = AlertSchedulerHandler('wrong_path', 'wrong_path', 'wrong_path', 'wrong_path', 5, None, None)
     scheduler._AlertSchedulerHandler__config_maps = {
       'cluster': {}
     }
@@ -190,7 +190,7 @@ class TestAlertSchedulerHandler(TestCase):
       }
     ]
 
-    scheduler = AlertSchedulerHandler(TEST_PATH, TEST_PATH, TEST_PATH, TEST_PATH, None, None)
+    scheduler = AlertSchedulerHandler(TEST_PATH, TEST_PATH, TEST_PATH, TEST_PATH, 5, None, None)
     alert_mock = MagicMock()
     alert_mock.interval = Mock(return_value=5)
     alert_mock.collect = Mock()

+ 4 - 4
ambari-agent/src/test/python/ambari_agent/TestAlerts.py

@@ -59,7 +59,7 @@ class TestAlerts(TestCase):
     cluster_configuration = self.__get_cluster_configuration()
 
     ash = AlertSchedulerHandler(test_file_path, test_stack_path,
-      test_common_services_path, test_host_scripts_path, cluster_configuration,
+      test_common_services_path, test_host_scripts_path, 5, cluster_configuration,
       None)
 
     ash.start()
@@ -474,7 +474,7 @@ class TestAlerts(TestCase):
     cluster_configuration = self.__get_cluster_configuration()
 
     ash = AlertSchedulerHandler(test_file_path, test_stack_path,
-      test_common_services_path, test_host_scripts_path, cluster_configuration,
+      test_common_services_path, test_host_scripts_path, 5, cluster_configuration,
       None)
 
     ash.start()
@@ -521,7 +521,7 @@ class TestAlerts(TestCase):
     cluster_configuration = self.__get_cluster_configuration()
 
     ash = AlertSchedulerHandler(test_file_path, test_stack_path,
-      test_common_services_path, test_host_scripts_path, cluster_configuration,
+      test_common_services_path, test_host_scripts_path, 5, cluster_configuration,
       None)
 
     ash.start()
@@ -557,7 +557,7 @@ class TestAlerts(TestCase):
 
     cluster_configuration = self.__get_cluster_configuration()
     ash = AlertSchedulerHandler(test_file_path, test_stack_path,
-      test_common_services_path, test_host_scripts_path, cluster_configuration,
+      test_common_services_path, test_host_scripts_path, 5, cluster_configuration,
       None)
 
     ash.start()