|
@@ -0,0 +1,139 @@
|
|
|
+{
|
|
|
+ "AMS": {
|
|
|
+ "service": [
|
|
|
+ {
|
|
|
+ "name": "ams_metric_monitor_process_percent",
|
|
|
+ "label": "Percent AMS Metric Monitors Available",
|
|
|
+ "interval": 1,
|
|
|
+ "scope": "SERVICE",
|
|
|
+ "enabled": true,
|
|
|
+ "source": {
|
|
|
+ "type": "AGGREGATE",
|
|
|
+ "alert_name": "ams_metric_monitor_process",
|
|
|
+ "reporting": {
|
|
|
+ "ok": {
|
|
|
+ "text": "affected: [{1}], total: [{0}]"
|
|
|
+ },
|
|
|
+ "warning": {
|
|
|
+ "text": "affected: [{1}], total: [{0}]",
|
|
|
+ "value": 0.1
|
|
|
+ },
|
|
|
+ "critical": {
|
|
|
+ "text": "affected: [{1}], total: [{0}]",
|
|
|
+ "value": 0.3
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "METRIC_COLLECTOR": [
|
|
|
+ {
|
|
|
+ "name": "ams_metric_collector_process",
|
|
|
+ "label": "AMS Metric Collector Process",
|
|
|
+ "interval": 1,
|
|
|
+ "scope": "ANY",
|
|
|
+ "enabled": true,
|
|
|
+ "source": {
|
|
|
+ "type": "PORT",
|
|
|
+ "uri": "8188",
|
|
|
+ "default_port": 8188,
|
|
|
+ "reporting": {
|
|
|
+ "ok": {
|
|
|
+ "text": "TCP OK - {0:.4f} response on port {1}"
|
|
|
+ },
|
|
|
+ "critical": {
|
|
|
+ "text": "Connection failed: {0} to {1}:{2}"
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "name": "ams_metric_collector_hbase_master_process",
|
|
|
+ "label": "AMS Metric Collector HBase Master Process",
|
|
|
+ "interval": 1,
|
|
|
+ "scope": "ANY",
|
|
|
+ "source": {
|
|
|
+ "type": "PORT",
|
|
|
+ "uri": "{{ams-hbase-site/hbase.master.info.port}}",
|
|
|
+ "default_port": 61310,
|
|
|
+ "reporting": {
|
|
|
+ "ok": {
|
|
|
+ "text": "TCP OK - {0:.4f} response on port {1}"
|
|
|
+ },
|
|
|
+ "critical": {
|
|
|
+ "text": "Connection failed: {0} to {1}:{2}"
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "name": "ams_metric_collector_hbase_master_cpu",
|
|
|
+ "label": "AMS Metric Collector HBase Maser CPU Utilization",
|
|
|
+ "interval": 5,
|
|
|
+ "scope": "ANY",
|
|
|
+ "enabled": true,
|
|
|
+ "source": {
|
|
|
+ "type": "METRIC",
|
|
|
+ "uri": {
|
|
|
+ "http": "{{ams-hbase-site/hbase.master.info.port}}",
|
|
|
+ "https": "{{ams-hbase-site/hbase.master.info.port}}",
|
|
|
+ "https_property": "{{cluster-env/security_enabled}}",
|
|
|
+ "https_property_value": "true",
|
|
|
+ "default_port": 61310
|
|
|
+ },
|
|
|
+ "reporting": {
|
|
|
+ "ok": {
|
|
|
+ "text": "{1} CPU, load {0:.1%}"
|
|
|
+ },
|
|
|
+ "warning": {
|
|
|
+ "text": "{1} CPU, load {0:.1%}",
|
|
|
+ "value": 200
|
|
|
+ },
|
|
|
+ "critical": {
|
|
|
+ "text": "{1} CPU, load {0:.1%}",
|
|
|
+ "value": 250
|
|
|
+ }
|
|
|
+ },
|
|
|
+ "jmx": {
|
|
|
+ "property_list": [
|
|
|
+ "java.lang:type=OperatingSystem/SystemCpuLoad",
|
|
|
+ "java.lang:type=OperatingSystem/AvailableProcessors"
|
|
|
+ ],
|
|
|
+ "value": "{0} * 100"
|
|
|
+ }
|
|
|
+ }
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "name": "ams_metric_collector_zookeeper_server_process",
|
|
|
+ "label": "AMS Metric Collector ZooKeeper Server Process",
|
|
|
+ "interval": 1,
|
|
|
+ "scope": "ANY",
|
|
|
+ "source": {
|
|
|
+ "type": "PORT",
|
|
|
+ "uri": "{{ams-hbase-site/hbase.zookeeper.property.clientPort}}",
|
|
|
+ "default_port": 61181,
|
|
|
+ "reporting": {
|
|
|
+ "ok": {
|
|
|
+ "text": "TCP OK - {0:.4f} response on port {1}"
|
|
|
+ },
|
|
|
+ "critical": {
|
|
|
+ "text": "Connection failed: {0} to {1}:{2}"
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "METRIC_MONITOR": [
|
|
|
+ {
|
|
|
+ "name": "ams_metric_monitor_process",
|
|
|
+ "label": "AMS Metric Monitor Status",
|
|
|
+ "interval": 1,
|
|
|
+ "scope": "ANY",
|
|
|
+ "source": {
|
|
|
+ "type": "SCRIPT",
|
|
|
+ "path": "HDP/2.2/services/AMS/package/files/alert_ambari_metrics_monitor.py"
|
|
|
+ }
|
|
|
+ }
|
|
|
+ ]
|
|
|
+ }
|
|
|
+}
|