Bladeren bron

AMBARI-15822 Enable HDFS alerts based on AMS metrics by default. (dsen)

Dmytro Sen 9 jaren geleden
bovenliggende
commit
badef3dcc1

+ 12 - 12
ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/alerts.json

@@ -620,7 +620,7 @@
         "description": "This service-level alert is triggered if the deviation of RPC queue latency on datanode port has grown beyond the specified threshold within a given time interval.",
         "interval": 5,
         "scope": "ANY",
-        "enabled": false,
+        "enabled": true,
         "source": {
           "type": "SCRIPT",
           "path": "HDFS/2.1.0.2.0/package/alerts/alert_metrics_deviation.py",
@@ -688,7 +688,7 @@
         "description": "This service-level alert is triggered if the deviation of RPC queue latency on client port has grown beyond the specified threshold within a given time interval.",
         "interval": 5,
         "scope": "ANY",
-        "enabled": false,
+        "enabled": true,
         "source": {
           "type": "SCRIPT",
           "path": "HDFS/2.1.0.2.0/package/alerts/alert_metrics_deviation.py",
@@ -756,7 +756,7 @@
         "description": "This service-level alert is triggered if the deviation of RPC latency on datanode port has grown beyond the specified threshold within a given time interval.",
         "interval": 5,
         "scope": "ANY",
-        "enabled": false,
+        "enabled": true,
         "source": {
           "type": "SCRIPT",
           "path": "HDFS/2.1.0.2.0/package/alerts/alert_metrics_deviation.py",
@@ -824,7 +824,7 @@
         "description": "This service-level alert is triggered if the deviation of RPC latency on client port has grown beyond the specified threshold within a given time interval.",
         "interval": 5,
         "scope": "ANY",
-        "enabled": false,
+        "enabled": true,
         "source": {
           "type": "SCRIPT",
           "path": "HDFS/2.1.0.2.0/package/alerts/alert_metrics_deviation.py",
@@ -892,7 +892,7 @@
         "description": "This service-level alert is triggered if the NN heap usage deviation has grown beyond the specified threshold within a given time interval.",
         "interval": 480,
         "scope": "ANY",
-        "enabled": false,
+        "enabled": true,
         "source": {
           "type": "SCRIPT",
           "path": "HDFS/2.1.0.2.0/package/alerts/alert_metrics_deviation.py",
@@ -953,7 +953,7 @@
         "description": "This service-level alert is triggered if the deviation of RPC latency on datanode port has grown beyond the specified threshold within a given time interval.",
         "interval": 480,
         "scope": "ANY",
-        "enabled": false,
+        "enabled": true,
         "source": {
           "type": "SCRIPT",
           "path": "HDFS/2.1.0.2.0/package/alerts/alert_metrics_deviation.py",
@@ -1021,7 +1021,7 @@
         "description": "This service-level alert is triggered if the deviation of RPC latency on client port has grown beyond the specified threshold within a given time interval.",
         "interval": 480,
         "scope": "ANY",
-        "enabled": false,
+        "enabled": true,
         "source": {
           "type": "SCRIPT",
           "path": "HDFS/2.1.0.2.0/package/alerts/alert_metrics_deviation.py",
@@ -1089,7 +1089,7 @@
         "description": "This service-level alert is triggered if the deviation of RPC latency on datanode port has grown beyond the specified threshold within a given time interval.",
         "interval": 480,
         "scope": "ANY",
-        "enabled": false,
+        "enabled": true,
         "source": {
           "type": "SCRIPT",
           "path": "HDFS/2.1.0.2.0/package/alerts/alert_metrics_deviation.py",
@@ -1157,7 +1157,7 @@
         "description": "This service-level alert is triggered if the deviation of RPC latency on client port has grown beyond the specified threshold within a given time interval.",
         "interval": 480,
         "scope": "ANY",
-        "enabled": false,
+        "enabled": true,
         "source": {
           "type": "SCRIPT",
           "path": "HDFS/2.1.0.2.0/package/alerts/alert_metrics_deviation.py",
@@ -1225,7 +1225,7 @@
         "description": "This service-level alert is triggered if the increase in storage capacity usage deviation has grown beyond the specified threshold within a given time interval.",
         "interval": 480,
         "scope": "ANY",
-        "enabled": false,
+        "enabled": true,
         "source": {
           "type": "SCRIPT",
           "path": "HDFS/2.1.0.2.0/package/alerts/alert_metrics_deviation.py",
@@ -1286,7 +1286,7 @@
         "description": "This service-level alert is triggered if the NN heap usage deviation has grown beyond the specified threshold within a given time interval.",
         "interval": 1440,
         "scope": "ANY",
-        "enabled": false,
+        "enabled": true,
         "source": {
           "type": "SCRIPT",
           "path": "HDFS/2.1.0.2.0/package/alerts/alert_metrics_deviation.py",
@@ -1347,7 +1347,7 @@
         "description": "This service-level alert is triggered if the increase in storage capacity usage deviation has grown beyond the specified threshold within a given time interval.",
         "interval": 1440,
         "scope": "ANY",
-        "enabled": false,
+        "enabled": true,
         "source": {
           "type": "SCRIPT",
           "path": "HDFS/2.1.0.2.0/package/alerts/alert_metrics_deviation.py",

+ 10 - 3
ambari-server/src/test/python/stacks/2.0.6/HDFS/test_alert_metrics_deviation.py

@@ -96,11 +96,11 @@ class TestAlertMetricsDeviation(RMFTestCase):
 
   @patch("httplib.HTTPConnection")
   def test_alert(self, conn_mock):
-    ca_connection = MagicMock()
+    connection = MagicMock()
     response = MagicMock()
     response.status = 200
-    ca_connection.getresponse.return_value = response
-    conn_mock.return_value = ca_connection
+    connection.getresponse.return_value = response
+    conn_mock.return_value = connection
     response.read.return_value = '{"metrics":[{"metricname":"metric1","metrics":{"1459966360838":1,"1459966370838":3}}]}'
 
     # OK, but no datapoints above the minimum threshold
@@ -137,3 +137,10 @@ class TestAlertMetricsDeviation(RMFTestCase):
     self.assertEqual(status, RESULT_STATE_UNKNOWN)
     self.assertTrue(messages is not None and len(messages) == 1)
     self.assertEquals('Unable to retrieve metrics from AMS.', messages[0])
+
+    # Unable to connect to AMS
+    conn_mock.side_effect = Exception('Unable to connect to AMS')
+    [status, messages] = alert.execute(configurations=configs, parameters=parameters)
+    self.assertEqual(status, RESULT_STATE_UNKNOWN)
+    self.assertTrue(messages is not None and len(messages) == 1)
+    self.assertEquals('Unable to retrieve metrics from AMS.', messages[0])