Browse Source

AMBARI-8804: Common Services: Script Alerts Are Unable To Find Scripts Located in common-services (Jayush Luniya)

Jayush Luniya 10 years ago
parent
commit
c01fda4506

+ 4 - 2
ambari-agent/src/main/python/ambari_agent/AlertSchedulerHandler.py

@@ -53,9 +53,10 @@ class AlertSchedulerHandler():
   }
 
 
-  def __init__(self, cachedir, stacks_dir, host_scripts_dir, in_minutes=True):
+  def __init__(self, cachedir, stacks_dir, common_services_dir, host_scripts_dir, in_minutes=True):
     self.cachedir = cachedir
     self.stacks_dir = stacks_dir
+    self.common_services_dir = common_services_dir
     self.host_scripts_dir = host_scripts_dir
     
     if not os.path.exists(cachedir) and AlertSchedulerHandler.make_cachedir:
@@ -224,6 +225,7 @@ class AlertSchedulerHandler():
       alert = PortAlert(json_definition, source)
     elif source_type == AlertSchedulerHandler.TYPE_SCRIPT:
       source['stacks_directory'] = self.stacks_dir
+      source['common_services_directory'] = self.common_services_dir
       source['host_scripts_directory'] = self.host_scripts_dir
       alert = ScriptAlert(json_definition, source)
     elif source_type == AlertSchedulerHandler.TYPE_WEB:
@@ -360,7 +362,7 @@ def main():
   ch.setLevel(logger.level)
   logger.addHandler(ch)
     
-  ash = AlertSchedulerHandler(args[0], args[1], False)
+  ash = AlertSchedulerHandler(args[0], args[1], args[2], False)
   ash.start()
   
   i = 0

+ 2 - 1
ambari-agent/src/main/python/ambari_agent/Controller.py

@@ -86,11 +86,12 @@ class Controller(threading.Thread):
       cache_dir = '/var/lib/ambari-agent/cache'
 
     stacks_cache_dir = os.path.join(cache_dir, FileCache.STACKS_CACHE_DIRECTORY)
+    common_services_cache_dir = os.path.join(cache_dir, FileCache.COMMON_SERVICES_DIRECTORY)
     host_scripts_cache_dir = os.path.join(cache_dir, FileCache.HOST_SCRIPTS_CACHE_DIRECTORY)
     alerts_cache_dir = os.path.join(cache_dir, 'alerts')
     
     self.alert_scheduler_handler = AlertSchedulerHandler(alerts_cache_dir, 
-        stacks_cache_dir, host_scripts_cache_dir)
+        stacks_cache_dir, common_services_cache_dir, host_scripts_cache_dir)
 
 
   def __del__(self):

+ 1 - 0
ambari-agent/src/main/python/ambari_agent/FileCache.py

@@ -39,6 +39,7 @@ class FileCache():
   """
 
   STACKS_CACHE_DIRECTORY="stacks"
+  COMMON_SERVICES_DIRECTORY="common-services"
   CUSTOM_ACTIONS_CACHE_DIRECTORY="custom_actions"
   HOST_SCRIPTS_CACHE_DIRECTORY="host_scripts"
   HASH_SUM_FILE=".hash"

+ 9 - 1
ambari-agent/src/main/python/ambari_agent/alerts/script_alert.py

@@ -41,14 +41,18 @@ class ScriptAlert(BaseAlert):
     
     self.path = None
     self.stacks_dir = None
+    self.common_services_dir = None
     self.host_scripts_dir = None
     
     if 'path' in alert_source_meta:
       self.path = alert_source_meta['path']
       
+    if 'common_services_directory' in alert_source_meta:
+      self.common_services_dir = alert_source_meta['common_services_directory']
+
     if 'stacks_directory' in alert_source_meta:
       self.stacks_dir = alert_source_meta['stacks_directory']
-      
+
     if 'host_scripts_directory' in alert_source_meta:
       self.host_scripts_dir = alert_source_meta['host_scripts_directory']
       
@@ -94,6 +98,10 @@ class ScriptAlert(BaseAlert):
     if not os.path.exists(path_to_script) and self.stacks_dir is not None:      
       path_to_script = os.path.join(self.stacks_dir, *paths)
 
+    # if the path doesn't exist and common services dir is defined, try that
+    if not os.path.exists(path_to_script) and self.common_services_dir is not None:
+      path_to_script = os.path.join(self.common_services_dir, *paths)
+
     # if the path doesn't exist and the host script dir is defined, try that
     if not os.path.exists(path_to_script) and self.host_scripts_dir is not None:
       path_to_script = os.path.join(self.host_scripts_dir, *paths)

+ 12 - 4
ambari-agent/src/test/python/ambari_agent/TestAlerts.py

@@ -49,9 +49,10 @@ class TestAlerts(TestCase):
   def test_start(self, aps_add_interval_job_mock, aps_start_mock):
     test_file_path = os.path.join('ambari_agent', 'dummy_files')
     test_stack_path = os.path.join('ambari_agent', 'dummy_files')
+    test_common_services_path = os.path.join('ambari_agent', 'dummy_files')
     test_host_scripts_path = os.path.join('ambari_agent', 'dummy_files')
 
-    ash = AlertSchedulerHandler(test_file_path, test_stack_path, test_host_scripts_path)
+    ash = AlertSchedulerHandler(test_file_path, test_stack_path, test_common_services_path, test_host_scripts_path)
     ash.start()
 
     self.assertTrue(aps_add_interval_job_mock.called)
@@ -212,6 +213,7 @@ class TestAlerts(TestCase):
 
     # normally set by AlertSchedulerHandler
     json['source']['stacks_directory'] = os.path.join('ambari_agent', 'dummy_files')
+    json['source']['common_services_directory'] = os.path.join('ambari_agent', 'common-services')
     json['source']['host_scripts_directory'] = os.path.join('ambari_agent', 'host_scripts')
 
     collector = AlertCollector()
@@ -219,6 +221,7 @@ class TestAlerts(TestCase):
     sa.set_helpers(collector, {'foo-site/bar': 'rendered-bar', 'foo-site/baz':'rendered-baz'} )
     self.assertEquals(json['source']['path'], sa.path)
     self.assertEquals(json['source']['stacks_directory'], sa.stacks_dir)
+    self.assertEquals(json['source']['common_services_directory'], sa.common_services_dir)
     self.assertEquals(json['source']['host_scripts_directory'], sa.host_scripts_dir)
 
     sa.collect()
@@ -480,9 +483,10 @@ class TestAlerts(TestCase):
   def test_reschedule(self):
     test_file_path = os.path.join('ambari_agent', 'dummy_files')
     test_stack_path = os.path.join('ambari_agent', 'dummy_files')
+    test_common_services_path = os.path.join('ambari_agent', 'dummy_files')
     test_host_scripts_path = os.path.join('ambari_agent', 'dummy_files')
     
-    ash = AlertSchedulerHandler(test_file_path, test_stack_path, test_host_scripts_path)
+    ash = AlertSchedulerHandler(test_file_path, test_stack_path, test_common_services_path, test_host_scripts_path)
     ash.start()
 
     self.assertEquals(1, ash.get_job_count())
@@ -535,9 +539,10 @@ class TestAlerts(TestCase):
   def test_disabled_definitions(self):
     test_file_path = os.path.join('ambari_agent', 'dummy_files')
     test_stack_path = os.path.join('ambari_agent', 'dummy_files')
+    test_common_services_path = os.path.join('ambari_agent', 'dummy_files')
     test_host_scripts_path = os.path.join('ambari_agent', 'dummy_files')
 
-    ash = AlertSchedulerHandler(test_file_path, test_stack_path, test_host_scripts_path)
+    ash = AlertSchedulerHandler(test_file_path, test_stack_path, test_common_services_path, test_host_scripts_path)
     ash.start()
 
     self.assertEquals(1, ash.get_job_count())
@@ -587,9 +592,10 @@ class TestAlerts(TestCase):
   def test_immediate_alert(self):
     test_file_path = os.path.join('ambari_agent', 'dummy_files')
     test_stack_path = os.path.join('ambari_agent', 'dummy_files')
+    test_common_services_path = os.path.join('ambari_agent', 'dummy_files')
     test_host_scripts_path = os.path.join('ambari_agent', 'dummy_files')
 
-    ash = AlertSchedulerHandler(test_file_path, test_stack_path, test_host_scripts_path)
+    ash = AlertSchedulerHandler(test_file_path, test_stack_path, test_common_services_path, test_host_scripts_path)
     ash.start()
 
     self.assertEquals(1, ash.get_job_count())
@@ -646,6 +652,7 @@ class TestAlerts(TestCase):
 
     # normally set by AlertSchedulerHandler
     json['source']['stacks_directory'] = os.path.join('ambari_agent', 'dummy_files')
+    json['source']['common_services_directory'] = os.path.join('ambari_agent', 'common-services')
     json['source']['host_scripts_directory'] = os.path.join('ambari_agent', 'host_scripts')
 
     collector = AlertCollector()
@@ -656,6 +663,7 @@ class TestAlerts(TestCase):
 
     self.assertEquals(json['source']['path'], sa.path)
     self.assertEquals(json['source']['stacks_directory'], sa.stacks_dir)
+    self.assertEquals(json['source']['common_services_directory'], sa.common_services_dir)
     self.assertEquals(json['source']['host_scripts_directory'], sa.host_scripts_dir)
 
     # ensure that it was skipped

+ 1 - 1
ambari-server/src/main/resources/common-services/FLUME/1.4.0.2.0/alerts.json

@@ -10,7 +10,7 @@
         "scope": "ANY",
         "source": {
           "type": "SCRIPT",
-          "path": "HDP/2.0.6/services/FLUME/package/files/alert_flume_agent_status.py"
+          "path": "FLUME/1.4.0.2.0/package/files/alert_flume_agent_status.py"
         }
       }
     ]

+ 2 - 2
ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/alerts.json

@@ -330,7 +330,7 @@
         "enabled": true,
         "source": {
           "type": "SCRIPT",
-          "path": "HDP/2.0.6/services/HDFS/package/files/alert_checkpoint_time.py"
+          "path": "HDFS/2.1.0.2.0/package/files/alert_checkpoint_time.py"
         }
       },
       {
@@ -343,7 +343,7 @@
         "ignore_host": true,
         "source": {
           "type": "SCRIPT",
-          "path": "HDP/2.0.6/services/HDFS/package/files/alert_ha_namenode_health.py"
+          "path": "HDFS/2.1.0.2.0/package/files/alert_ha_namenode_health.py"
         }
       }
     ],

+ 2 - 2
ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/alerts.json

@@ -38,7 +38,7 @@
         "enabled": true,
         "source": {
           "type": "SCRIPT",
-          "path": "HDP/2.0.6/services/HIVE/package/files/alert_hive_thrift_port.py"
+          "path": "HIVE/0.12.0.2.0/package/files/alert_hive_thrift_port.py"
         }
       }
     ],
@@ -52,7 +52,7 @@
         "enabled": true,
         "source": {
           "type": "SCRIPT",
-          "path": "HDP/2.0.6/services/HIVE/package/files/alert_webhcat_server.py"
+          "path": "HIVE/0.12.0.2.0/package/files/alert_webhcat_server.py"
         }
       }    
     ]

+ 0 - 60
ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/alerts.json

@@ -1,60 +0,0 @@
-{
-  "HIVE": {
-    "service": [],
-    "HIVE_METASTORE": [
-      {
-        "name": "hive_metastore_process",
-        "label": "Hive Metastore Process",
-        "description": "This host-level alert is triggered if the Hive Metastore process cannot be determined to be up and listening on the network.",
-        "interval": 1,
-        "scope": "ANY",
-        "source": {
-          "type": "PORT",
-          "uri": "{{hive-site/hive.metastore.uris}}",
-          "default_port": 9083,
-          "reporting": {
-            "ok": {
-              "text": "TCP OK - {0:.3f}s response on port {1}"
-            },
-            "warning": {
-              "text": "TCP OK - {0:.3f}s response on port {1}",
-              "value": 1.5
-            },
-            "critical": {
-              "text": "Connection failed: {0} to {1}:{2}",
-              "value": 5.0
-            }
-          }
-        }
-      }
-    ],
-    "HIVE_SERVER": [
-      {
-        "name": "hive_server_process",
-        "label": "HiveServer2 Process",
-        "description": "This host-level alert is triggered if the HiveServer cannot be determined to be up and responding to client requests.",
-        "interval": 1,
-        "scope": "ANY",
-        "enabled": true,
-        "source": {
-          "type": "SCRIPT",
-          "path": "HDP/2.0.6/services/HIVE/package/files/alert_hive_thrift_port.py"
-        }
-      }
-    ],
-    "WEBHCAT_SERVER": [
-      {
-        "name": "hive_webhcat_server_status",
-        "label": "WebHCat Server Status",
-        "description": "This host-level alert is triggered if the templeton server status is not healthy.",
-        "interval": 1,
-        "scope": "ANY",
-        "enabled": true,
-        "source": {
-          "type": "SCRIPT",
-          "path": "HDP/2.0.6/services/HIVE/package/files/alert_webhcat_server.py"
-        }
-      }    
-    ]
-  }
-}

+ 1 - 1
ambari-server/src/main/resources/common-services/OOZIE/4.0.0.2.0/alerts.json

@@ -34,7 +34,7 @@
         "scope": "ANY",
         "source": {
           "type": "SCRIPT",
-          "path": "HDP/2.0.6/services/OOZIE/package/files/alert_check_oozie_server.py"
+          "path": "OOZIE/4.0.0.2.0/package/files/alert_check_oozie_server.py"
         }
       }
     ]