9 gadi atpakaļ · 752b5a0518
--- a/ambari-metrics/ambari-metrics-timelineservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/metrics/timeline/PhoenixHBaseAccessor.java
+++ b/ambari-metrics/ambari-metrics-timelineservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/metrics/timeline/PhoenixHBaseAccessor.java
@@ -341,6 +341,10 @@ public class PhoenixHBaseAccessor {
 
				     boolean enableNormalizer = hbaseConf.getBoolean("hbase.normalizer.enabled", true);
			
 
				     boolean enableFifoCompaction = metricsConf.getBoolean("timeline.metrics.hbase.fifo.compaction.enabled", true);
			
 
				 
			
 
				+    if (!enableNormalizer && !enableFifoCompaction) {
			
 
				+      return;
			
 
				+    }
			
 
				+
			
 
				     HBaseAdmin hBaseAdmin = null;
			
 
				     try {
			
 
				       hBaseAdmin = dataSource.getHBaseAdmin();
			
@@ -352,10 +356,10 @@ public class PhoenixHBaseAccessor {
 
				       for (String tableName : PHOENIX_TABLES) {
			
 
				         try {
			
 
				           boolean modifyTable = false;
			
 
				-          HTableDescrsiptor tableDescriptor = hBaseAdmin.getTableDescriptor
			
 
				-            (tableName.getBytes());
			
 
				+          HTableDescriptor tableDescriptor = hBaseAdmin.getTableDescriptor(tableName.getBytes());
			
 
				 
			
 
				-          if (enableNormalizer && !tableDescriptor.isNormalizationEnabled()) {
			
 
				+          if (enableNormalizer &&
			
 
				+              !tableDescriptor.isNormalizationEnabled()) {
			
 
				             tableDescriptor.setNormalizationEnabled(true);
			
 
				             LOG.info("Enabling normalizer for " + tableName);
			
 
				             modifyTable = true;
			
--- a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/alerts.json
+++ b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/alerts.json
@@ -298,11 +298,11 @@
 
				             },
			
 
				             "warning": {
			
 
				               "text": "Capacity Used:[{2:.0f}%, {0}], Capacity Remaining:[{1}]",
			
 
				-              "value": 75
			
 
				+              "value": 80
			
 
				             },          
			
 
				             "critical": {
			
 
				               "text": "Capacity Used:[{2:.0f}%, {0}], Capacity Remaining:[{1}]",
			
 
				-              "value": 80
			
 
				+              "value": 90
			
 
				             },
			
 
				             "units" : "%"
			
 
				           },
			
@@ -522,12 +522,12 @@
 
				         }
			
 
				       },
			
 
				       {
			
 
				-        "name": "namenode_service_rpc_queue_latency_hourly",
			
 
				-        "label": "NameNode Service RPC Queue Latency (Hourly)",
			
 
				-        "description": "This service-level alert is triggered if the deviation of RPC queue latency on datanode port has grown beyond the specified threshold within a given time interval.",
			
 
				+        "name": "increase_nn_heap_usage_hourly",
			
 
				+        "label": "NameNode Heap Usage (Hourly)",
			
 
				+        "description": "This service-level alert is triggered if the NN heap usage deviation has grown beyond the specified threshold within a given time interval.",
			
 
				         "interval": 5,
			
 
				         "scope": "ANY",
			
 
				-        "enabled": true,
			
 
				+        "enabled": false,
			
 
				         "source": {
			
 
				           "type": "SCRIPT",
			
 
				           "path": "HDFS/2.1.0.2.0/package/alerts/alert_metrics_deviation.py",
			
@@ -556,7 +556,7 @@
 
				             {
			
 
				               "name": "metricName",
			
 
				               "display_name": "Metric Name",
			
 
				-              "value": "rpc.rpc.datanode.RpcQueueTimeAvgTime",
			
 
				+              "value": "jvm.JvmMetrics.MemHeapUsedM",
			
 
				               "type": "STRING",
			
 
				               "description": "The metric to monitor."
			
 
				             },
			
@@ -575,24 +575,17 @@
 
				               "units": "%",
			
 
				               "value": 200,
			
 
				               "threshold": "CRITICAL"
			
 
				-            },
			
 
				-            {
			
 
				-              "name": "minimumValue",
			
 
				-              "display_name": "Minimum Latency (in seconds)",
			
 
				-              "value": 30,
			
 
				-              "type": "NUMERIC",
			
 
				-              "description": "Minimum latency time to measure (in seconds)."
			
 
				             }
			
 
				           ]
			
 
				         }
			
 
				       },
			
 
				       {
			
 
				-        "name": "namenode_client_rpc_queue_latency_hourly",
			
 
				-        "label": "NameNode Client RPC Queue Latency (Hourly)",
			
 
				-        "description": "This service-level alert is triggered if the deviation of RPC queue latency on client port has grown beyond the specified threshold within a given time interval.",
			
 
				+        "name": "namenode_service_rpc_latency_hourly",
			
 
				+        "label": "NameNode RPC Latency (Hourly)",
			
 
				+        "description": "This service-level alert is triggered if the Service-RPC latency deviation has grown beyond the specified threshold within a given time interval.",
			
 
				         "interval": 5,
			
 
				         "scope": "ANY",
			
 
				-        "enabled": true,
			
 
				+        "enabled": false,
			
 
				         "source": {
			
 
				           "type": "SCRIPT",
			
 
				           "path": "HDFS/2.1.0.2.0/package/alerts/alert_metrics_deviation.py",
			
@@ -621,7 +614,7 @@
 
				             {
			
 
				               "name": "metricName",
			
 
				               "display_name": "Metric Name",
			
 
				-              "value": "rpc.rpc.client.RpcQueueTimeAvgTime",
			
 
				+              "value": "rpc.rpc.RpcProcessingTimeAvgTime",
			
 
				               "type": "STRING",
			
 
				               "description": "The metric to monitor."
			
 
				             },
			
@@ -640,24 +633,17 @@
 
				               "units": "%",
			
 
				               "value": 200,
			
 
				               "threshold": "CRITICAL"
			
 
				-            },
			
 
				-            {
			
 
				-              "name": "minimumValue",
			
 
				-              "display_name": "Minimum Latency (in seconds)",
			
 
				-              "value": 30,
			
 
				-              "type": "NUMERIC",
			
 
				-              "description": "Minimum latency time to measure (in seconds)."
			
 
				             }
			
 
				           ]
			
 
				         }
			
 
				       },
			
 
				       {
			
 
				-        "name": "namenode_service_rpc_processing_latency_hourly",
			
 
				-        "label": "NameNode Service RPC Processing Latency (Hourly)",
			
 
				-        "description": "This service-level alert is triggered if the deviation of RPC latency on datanode port has grown beyond the specified threshold within a given time interval.",
			
 
				+        "name": "namenode_increase_in_storage_capacity_usage_hourly",
			
 
				+        "label": "HDFS Storage Capacity Usage (Hourly)",
			
 
				+        "description": "This service-level alert is triggered if the increase in storage capacity usage deviation has grown beyond the specified threshold within a given time interval.",
			
 
				         "interval": 5,
			
 
				         "scope": "ANY",
			
 
				-        "enabled": true,
			
 
				+        "enabled": false,
			
 
				         "source": {
			
 
				           "type": "SCRIPT",
			
 
				           "path": "HDFS/2.1.0.2.0/package/alerts/alert_metrics_deviation.py",
			
@@ -665,7 +651,7 @@
 
				             {
			
 
				               "name": "mergeHaMetrics",
			
 
				               "display_name": "Whether active and stanby NameNodes metrics should be merged",
			
 
				-              "value": "false",
			
 
				+              "value": "true",
			
 
				               "type": "STRING",
			
 
				               "description": "Whether active and stanby NameNodes metrics should be merged."
			
 
				             },
			
@@ -686,7 +672,7 @@
 
				             {
			
 
				               "name": "metricName",
			
 
				               "display_name": "Metric Name",
			
 
				-              "value": "rpc.rpc.datanode.RpcProcessingTimeAvgTime",
			
 
				+              "value": "dfs.FSNamesystem.CapacityUsed",
			
 
				               "type": "STRING",
			
 
				               "description": "The metric to monitor."
			
 
				             },
			
@@ -705,78 +691,6 @@
 
				               "units": "%",
			
 
				               "value": 200,
			
 
				               "threshold": "CRITICAL"
			
 
				-            },
			
 
				-            {
			
 
				-              "name": "minimumValue",
			
 
				-              "display_name": "Minimum Latency (in seconds)",
			
 
				-              "value": 30,
			
 
				-              "type": "NUMERIC",
			
 
				-              "description": "Minimum latency time to measure (in seconds)."
			
 
				-            }
			
 
				-          ]
			
 
				-        }
			
 
				-      },
			
 
				-      {
			
 
				-        "name": "namenode_client_rpc_processing_latency_hourly",
			
 
				-        "label": "NameNode Client RPC Processing Latency (Hourly)",
			
 
				-        "description": "This service-level alert is triggered if the deviation of RPC latency on client port has grown beyond the specified threshold within a given time interval.",
			
 
				-        "interval": 5,
			
 
				-        "scope": "ANY",
			
 
				-        "enabled": true,
			
 
				-        "source": {
			
 
				-          "type": "SCRIPT",
			
 
				-          "path": "HDFS/2.1.0.2.0/package/alerts/alert_metrics_deviation.py",
			
 
				-          "parameters": [
			
 
				-            {
			
 
				-              "name": "mergeHaMetrics",
			
 
				-              "display_name": "Whether active and stanby NameNodes metrics should be merged",
			
 
				-              "value": "false",
			
 
				-              "type": "STRING",
			
 
				-              "description": "Whether active and stanby NameNodes metrics should be merged."
			
 
				-            },
			
 
				-            {
			
 
				-              "name": "interval",
			
 
				-              "display_name": "Time interval in minutes",
			
 
				-              "value": 60,
			
 
				-              "type": "NUMERIC",
			
 
				-              "description": "Time interval in minutes."
			
 
				-            },
			
 
				-            {
			
 
				-              "name": "appId",
			
 
				-              "display_name": "AMS application id",
			
 
				-              "value": "NAMENODE",
			
 
				-              "type": "STRING",
			
 
				-              "description": "The application id used to retrieve the metric."
			
 
				-            },
			
 
				-            {
			
 
				-              "name": "metricName",
			
 
				-              "display_name": "Metric Name",
			
 
				-              "value": "rpc.rpc.client.RpcProcessingTimeAvgTime",
			
 
				-              "type": "STRING",
			
 
				-              "description": "The metric to monitor."
			
 
				-            },
			
 
				-            {
			
 
				-              "name": "metric.deviation.warning.threshold",
			
 
				-              "display_name": "The standard deviation threshold above which a warning is produced.",
			
 
				-              "type": "PERCENT",
			
 
				-              "units": "%",
			
 
				-              "value": 100,
			
 
				-              "threshold": "WARNING"
			
 
				-            },
			
 
				-            {
			
 
				-              "name": "metric.deviation.critical.threshold",
			
 
				-              "display_name": "The standard deviation threshold above which a critical alert is produced.",
			
 
				-              "type": "PERCENT",
			
 
				-              "units": "%",
			
 
				-              "value": 200,
			
 
				-              "threshold": "CRITICAL"
			
 
				-            },
			
 
				-            {
			
 
				-              "name": "minimumValue",
			
 
				-              "display_name": "Minimum Latency (in seconds)",
			
 
				-              "value": 30,
			
 
				-              "type": "NUMERIC",
			
 
				-              "description": "Minimum latency time to measure (in seconds)."
			
 
				             }
			
 
				           ]
			
 
				         }
			
@@ -787,7 +701,7 @@
 
				         "description": "This service-level alert is triggered if the NN heap usage deviation has grown beyond the specified threshold within a given time interval.",
			
 
				         "interval": 480,
			
 
				         "scope": "ANY",
			
 
				-        "enabled": true,
			
 
				+        "enabled": false,
			
 
				         "source": {
			
 
				           "type": "SCRIPT",
			
 
				           "path": "HDFS/2.1.0.2.0/package/alerts/alert_metrics_deviation.py",
			
@@ -820,64 +734,6 @@
 
				               "type": "STRING",
			
 
				               "description": "The metric to monitor."
			
 
				             },
			
 
				-            {
			
 
				-              "name": "metric.deviation.warning.threshold",
			
 
				-              "display_name": "The standard deviation threshold above which a warning is produced.",
			
 
				-              "type": "PERCENT",
			
 
				-              "units": "%",
			
 
				-              "value": 20,
			
 
				-              "threshold": "WARNING"
			
 
				-            },
			
 
				-            {
			
 
				-              "name": "metric.deviation.critical.threshold",
			
 
				-              "display_name": "The standard deviation threshold above which a critical alert is produced.",
			
 
				-              "type": "PERCENT",
			
 
				-              "units": "%",
			
 
				-              "value": 50,
			
 
				-              "threshold": "CRITICAL"
			
 
				-            }
			
 
				-          ]
			
 
				-        }
			
 
				-      },
			
 
				-      {
			
 
				-        "name": "namenode_service_rpc_processing_latency_daily",
			
 
				-        "label": "NameNode Service RPC Processing Latency (Daily)",
			
 
				-        "description": "This service-level alert is triggered if the deviation of RPC latency on datanode port has grown beyond the specified threshold within a given time interval.",
			
 
				-        "interval": 480,
			
 
				-        "scope": "ANY",
			
 
				-        "enabled": true,
			
 
				-        "source": {
			
 
				-          "type": "SCRIPT",
			
 
				-          "path": "HDFS/2.1.0.2.0/package/alerts/alert_metrics_deviation.py",
			
 
				-          "parameters": [
			
 
				-            {
			
 
				-              "name": "mergeHaMetrics",
			
 
				-              "display_name": "Whether active and stanby NameNodes metrics should be merged",
			
 
				-              "value": "false",
			
 
				-              "type": "STRING",
			
 
				-              "description": "Whether active and stanby NameNodes metrics should be merged."
			
 
				-            },
			
 
				-            {
			
 
				-              "name": "interval",
			
 
				-              "display_name": "Time interval in minutes",
			
 
				-              "value": 1440,
			
 
				-              "type": "NUMERIC",
			
 
				-              "description": "Time interval in minutes."
			
 
				-            },
			
 
				-            {
			
 
				-              "name": "appId",
			
 
				-              "display_name": "AMS application id",
			
 
				-              "value": "NAMENODE",
			
 
				-              "type": "STRING",
			
 
				-              "description": "The application id used to retrieve the metric."
			
 
				-            },
			
 
				-            {
			
 
				-              "name": "metricName",
			
 
				-              "display_name": "Metric Name",
			
 
				-              "value": "rpc.rpc.datanode.RpcProcessingTimeAvgTime",
			
 
				-              "type": "STRING",
			
 
				-              "description": "The metric to monitor."
			
 
				-            },
			
 
				             {
			
 
				               "name": "metric.deviation.warning.threshold",
			
 
				               "display_name": "The standard deviation threshold above which a warning is produced.",
			
@@ -893,24 +749,17 @@
 
				               "units": "%",
			
 
				               "value": 200,
			
 
				               "threshold": "CRITICAL"
			
 
				-            },
			
 
				-            {
			
 
				-              "name": "minimumValue",
			
 
				-              "display_name": "Minimum Latency (in seconds)",
			
 
				-              "value": 30,
			
 
				-              "type": "NUMERIC",
			
 
				-              "description": "Minimum latency time to measure (in seconds)."
			
 
				             }
			
 
				           ]
			
 
				         }
			
 
				       },
			
 
				       {
			
 
				-        "name": "namenode_client_rpc_processing_latency_daily",
			
 
				-        "label": "NameNode Client RPC Processing Latency (Daily)",
			
 
				-        "description": "This service-level alert is triggered if the deviation of RPC latency on client port has grown beyond the specified threshold within a given time interval.",
			
 
				+        "name": "namenode_service_rpc_latency_daily",
			
 
				+        "label": "NameNode RPC Latency (Daily)",
			
 
				+        "description": "This service-level alert is triggered if the Service-RPC latency deviation has grown beyond the specified threshold within a given time interval.",
			
 
				         "interval": 480,
			
 
				         "scope": "ANY",
			
 
				-        "enabled": true,
			
 
				+        "enabled": false,
			
 
				         "source": {
			
 
				           "type": "SCRIPT",
			
 
				           "path": "HDFS/2.1.0.2.0/package/alerts/alert_metrics_deviation.py",
			
@@ -939,7 +788,7 @@
 
				             {
			
 
				               "name": "metricName",
			
 
				               "display_name": "Metric Name",
			
 
				-              "value": "rpc.rpc.client.RpcProcessingTimeAvgTime",
			
 
				+              "value": "rpc.rpc.RpcProcessingTimeAvgTime",
			
 
				               "type": "STRING",
			
 
				               "description": "The metric to monitor."
			
 
				             },
			
@@ -958,143 +807,6 @@
 
				               "units": "%",
			
 
				               "value": 200,
			
 
				               "threshold": "CRITICAL"
			
 
				-            },
			
 
				-            {
			
 
				-              "name": "minimumValue",
			
 
				-              "display_name": "Minimum Latency (in seconds)",
			
 
				-              "value": 30,
			
 
				-              "type": "NUMERIC",
			
 
				-              "description": "Minimum latency time to measure (in seconds)."
			
 
				-            }
			
 
				-          ]
			
 
				-        }
			
 
				-      },
			
 
				-      {
			
 
				-        "name": "namenode_service_rpc_queue_latency_daily",
			
 
				-        "label": "NameNode Service RPC Queue Latency (Daily)",
			
 
				-        "description": "This service-level alert is triggered if the deviation of RPC latency on datanode port has grown beyond the specified threshold within a given time interval.",
			
 
				-        "interval": 480,
			
 
				-        "scope": "ANY",
			
 
				-        "enabled": true,
			
 
				-        "source": {
			
 
				-          "type": "SCRIPT",
			
 
				-          "path": "HDFS/2.1.0.2.0/package/alerts/alert_metrics_deviation.py",
			
 
				-          "parameters": [
			
 
				-            {
			
 
				-              "name": "mergeHaMetrics",
			
 
				-              "display_name": "Whether active and stanby NameNodes metrics should be merged",
			
 
				-              "value": "false",
			
 
				-              "type": "STRING",
			
 
				-              "description": "Whether active and stanby NameNodes metrics should be merged."
			
 
				-            },
			
 
				-            {
			
 
				-              "name": "interval",
			
 
				-              "display_name": "Time interval in minutes",
			
 
				-              "value": 1440,
			
 
				-              "type": "NUMERIC",
			
 
				-              "description": "Time interval in minutes."
			
 
				-            },
			
 
				-            {
			
 
				-              "name": "appId",
			
 
				-              "display_name": "AMS application id",
			
 
				-              "value": "NAMENODE",
			
 
				-              "type": "STRING",
			
 
				-              "description": "The application id used to retrieve the metric."
			
 
				-            },
			
 
				-            {
			
 
				-              "name": "metricName",
			
 
				-              "display_name": "Metric Name",
			
 
				-              "value": "rpc.rpc.datanode.RpcQueueTimeAvgTime",
			
 
				-              "type": "STRING",
			
 
				-              "description": "The metric to monitor."
			
 
				-            },
			
 
				-            {
			
 
				-              "name": "metric.deviation.warning.threshold",
			
 
				-              "display_name": "The standard deviation threshold above which a warning is produced.",
			
 
				-              "type": "PERCENT",
			
 
				-              "units": "%",
			
 
				-              "value": 100,
			
 
				-              "threshold": "WARNING"
			
 
				-            },
			
 
				-            {
			
 
				-              "name": "metric.deviation.critical.threshold",
			
 
				-              "display_name": "The standard deviation threshold above which a critical alert is produced.",
			
 
				-              "type": "PERCENT",
			
 
				-              "units": "%",
			
 
				-              "value": 200,
			
 
				-              "threshold": "CRITICAL"
			
 
				-            },
			
 
				-            {
			
 
				-              "name": "minimumValue",
			
 
				-              "display_name": "Minimum Latency (in seconds)",
			
 
				-              "value": 30,
			
 
				-              "type": "NUMERIC",
			
 
				-              "description": "Minimum latency time to measure (in seconds)."
			
 
				-            }
			
 
				-          ]
			
 
				-        }
			
 
				-      },
			
 
				-      {
			
 
				-        "name": "namenode_client_rpc_queue_latency_daily",
			
 
				-        "label": "NameNode Client RPC Queue Latency (Daily)",
			
 
				-        "description": "This service-level alert is triggered if the deviation of RPC latency on client port has grown beyond the specified threshold within a given time interval.",
			
 
				-        "interval": 480,
			
 
				-        "scope": "ANY",
			
 
				-        "enabled": true,
			
 
				-        "source": {
			
 
				-          "type": "SCRIPT",
			
 
				-          "path": "HDFS/2.1.0.2.0/package/alerts/alert_metrics_deviation.py",
			
 
				-          "parameters": [
			
 
				-            {
			
 
				-              "name": "mergeHaMetrics",
			
 
				-              "display_name": "Whether active and stanby NameNodes metrics should be merged",
			
 
				-              "value": "false",
			
 
				-              "type": "STRING",
			
 
				-              "description": "Whether active and stanby NameNodes metrics should be merged."
			
 
				-            },
			
 
				-            {
			
 
				-              "name": "interval",
			
 
				-              "display_name": "Time interval in minutes",
			
 
				-              "value": 1440,
			
 
				-              "type": "NUMERIC",
			
 
				-              "description": "Time interval in minutes."
			
 
				-            },
			
 
				-            {
			
 
				-              "name": "appId",
			
 
				-              "display_name": "AMS application id",
			
 
				-              "value": "NAMENODE",
			
 
				-              "type": "STRING",
			
 
				-              "description": "The application id used to retrieve the metric."
			
 
				-            },
			
 
				-            {
			
 
				-              "name": "metricName",
			
 
				-              "display_name": "Metric Name",
			
 
				-              "value": "rpc.rpc.client.RpcQueueTimeAvgTime",
			
 
				-              "type": "STRING",
			
 
				-              "description": "The metric to monitor."
			
 
				-            },
			
 
				-            {
			
 
				-              "name": "metric.deviation.warning.threshold",
			
 
				-              "display_name": "The standard deviation threshold above which a warning is produced.",
			
 
				-              "type": "PERCENT",
			
 
				-              "units": "%",
			
 
				-              "value": 100,
			
 
				-              "threshold": "WARNING"
			
 
				-            },
			
 
				-            {
			
 
				-              "name": "metric.deviation.critical.threshold",
			
 
				-              "display_name": "The standard deviation threshold above which a critical alert is produced.",
			
 
				-              "type": "PERCENT",
			
 
				-              "units": "%",
			
 
				-              "value": 200,
			
 
				-              "threshold": "CRITICAL"
			
 
				-            },
			
 
				-            {
			
 
				-              "name": "minimumValue",
			
 
				-              "display_name": "Minimum Latency (in seconds)",
			
 
				-              "value": 30,
			
 
				-              "type": "NUMERIC",
			
 
				-              "description": "Minimum latency time to measure (in seconds)."
			
 
				             }
			
 
				           ]
			
 
				         }
			
@@ -1105,7 +817,7 @@
 
				         "description": "This service-level alert is triggered if the increase in storage capacity usage deviation has grown beyond the specified threshold within a given time interval.",
			
 
				         "interval": 480,
			
 
				         "scope": "ANY",
			
 
				-        "enabled": true,
			
 
				+        "enabled": false,
			
 
				         "source": {
			
 
				           "type": "SCRIPT",
			
 
				           "path": "HDFS/2.1.0.2.0/package/alerts/alert_metrics_deviation.py",
			
@@ -1113,7 +825,7 @@
 
				             {
			
 
				               "name": "mergeHaMetrics",
			
 
				               "display_name": "Whether active and stanby NameNodes metrics should be merged",
			
 
				-              "value": "false",
			
 
				+              "value": "true",
			
 
				               "type": "STRING",
			
 
				               "description": "Whether active and stanby NameNodes metrics should be merged."
			
 
				             },
			
@@ -1143,7 +855,7 @@
 
				               "display_name": "The standard deviation threshold above which a warning is produced.",
			
 
				               "type": "PERCENT",
			
 
				               "units": "%",
			
 
				-              "value": 30,
			
 
				+              "value": 100,
			
 
				               "threshold": "WARNING"
			
 
				             },
			
 
				             {
			
@@ -1151,7 +863,7 @@
 
				               "display_name": "The standard deviation threshold above which a critical alert is produced.",
			
 
				               "type": "PERCENT",
			
 
				               "units": "%",
			
 
				-              "value": 50,
			
 
				+              "value": 200,
			
 
				               "threshold": "CRITICAL"
			
 
				             }
			
 
				           ]
			
@@ -1163,7 +875,7 @@
 
				         "description": "This service-level alert is triggered if the NN heap usage deviation has grown beyond the specified threshold within a given time interval.",
			
 
				         "interval": 1440,
			
 
				         "scope": "ANY",
			
 
				-        "enabled": true,
			
 
				+        "enabled": false,
			
 
				         "source": {
			
 
				           "type": "SCRIPT",
			
 
				           "path": "HDFS/2.1.0.2.0/package/alerts/alert_metrics_deviation.py",
			
@@ -1201,7 +913,7 @@
 
				               "display_name": "The standard deviation threshold above which a warning is produced.",
			
 
				               "type": "PERCENT",
			
 
				               "units": "%",
			
 
				-              "value": 20,
			
 
				+              "value": 100,
			
 
				               "threshold": "WARNING"
			
 
				             },
			
 
				             {
			
@@ -1209,7 +921,7 @@
 
				               "display_name": "The standard deviation threshold above which a critical alert is produced.",
			
 
				               "type": "PERCENT",
			
 
				               "units": "%",
			
 
				-              "value": 50,
			
 
				+              "value": 200,
			
 
				               "threshold": "CRITICAL"
			
 
				             }
			
 
				           ]
			
@@ -1221,7 +933,7 @@
 
				         "description": "This service-level alert is triggered if the increase in storage capacity usage deviation has grown beyond the specified threshold within a given time interval.",
			
 
				         "interval": 1440,
			
 
				         "scope": "ANY",
			
 
				-        "enabled": true,
			
 
				+        "enabled": false,
			
 
				         "source": {
			
 
				           "type": "SCRIPT",
			
 
				           "path": "HDFS/2.1.0.2.0/package/alerts/alert_metrics_deviation.py",
			
@@ -1229,7 +941,7 @@
 
				             {
			
 
				               "name": "mergeHaMetrics",
			
 
				               "display_name": "Whether active and stanby NameNodes metrics should be merged",
			
 
				-              "value": "false",
			
 
				+              "value": "true",
			
 
				               "type": "STRING",
			
 
				               "description": "Whether active and stanby NameNodes metrics should be merged."
			
 
				             },
			
@@ -1259,7 +971,7 @@
 
				               "display_name": "The standard deviation threshold above which a warning is produced.",
			
 
				               "type": "PERCENT",
			
 
				               "units": "%",
			
 
				-              "value": 10,
			
 
				+              "value": 100,
			
 
				               "threshold": "WARNING"
			
 
				             },
			
 
				             {
			
@@ -1267,7 +979,7 @@
 
				               "display_name": "The standard deviation threshold above which a critical alert is produced.",
			
 
				               "type": "PERCENT",
			
 
				               "units": "%",
			
 
				-              "value": 20,
			
 
				+              "value": 200,
			
 
				               "threshold": "CRITICAL"
			
 
				             }
			
 
				           ]
			
@@ -1449,11 +1161,11 @@
 
				             },
			
 
				             "warning": {
			
 
				               "text": "Remaining Capacity:[{0}], Total Capacity:[{2:.0f}% Used, {1}]",
			
 
				-              "value": 75
			
 
				+              "value": 80
			
 
				             },
			
 
				             "critical": {
			
 
				               "text": "Remaining Capacity:[{0}], Total Capacity:[{2:.0f}% Used, {1}]",
			
 
				-              "value": 80
			
 
				+              "value": 90
			
 
				             },
			
 
				             "units" : "%"
			
 
				           },
			
@@ -1477,47 +1189,6 @@
 
				           "type": "SCRIPT",
			
 
				           "path": "HDFS/2.1.0.2.0/package/alerts/alert_datanode_unmounted_data_dir.py"
			
 
				         }
			
 
				-      },
			
 
				-      {
			
 
				-        "name": "datanode_heap_usage",
			
 
				-        "label": "DataNode Heap Usage",
			
 
				-        "description": "This host-level alert is triggered if heap usage goes past thresholds on the DataNode. It checks the DataNode JMXServlet for the MemHeapUsedM and MemHeapMaxM properties. The threshold values are in percent.",
			
 
				-        "interval": 2,
			
 
				-        "scope": "HOST",
			
 
				-        "enabled": true,
			
 
				-        "source": {
			
 
				-          "type": "METRIC",
			
 
				-          "uri": {
			
 
				-            "http": "{{hdfs-site/dfs.datanode.http.address}}",
			
 
				-            "https": "{{hdfs-site/dfs.datanode.https.address}}",
			
 
				-            "kerberos_keytab": "{{hdfs-site/dfs.web.authentication.kerberos.keytab}}",
			
 
				-            "kerberos_principal": "{{hdfs-site/dfs.web.authentication.kerberos.principal}}",
			
 
				-            "https_property": "{{hdfs-site/dfs.http.policy}}",
			
 
				-            "https_property_value": "HTTPS_ONLY",
			
 
				-            "connection_timeout": 5.0
			
 
				-          },
			
 
				-          "reporting": {
			
 
				-            "ok": {
			
 
				-              "text": "Used Heap:[{2:.0f}%, {0} MB], Max Heap: {1} MB"
			
 
				-            },
			
 
				-            "warning": {
			
 
				-              "text": "Used Heap:[{2:.0f}%, {0} MB], Max Heap: {1} MB",
			
 
				-              "value": 80
			
 
				-            },
			
 
				-            "critical": {
			
 
				-              "text": "Used Heap:[{2:.0f}%, {0} MB], Max Heap: {1} MB",
			
 
				-              "value": 90
			
 
				-            },
			
 
				-            "units" : "%"
			
 
				-          },
			
 
				-          "jmx": {
			
 
				-            "property_list": [
			
 
				-              "Hadoop:service=DataNode,name=JvmMetrics/MemHeapUsedM",
			
 
				-              "Hadoop:service=DataNode,name=JvmMetrics/MemHeapMaxM"
			
 
				-            ],
			
 
				-            "value": "100.0 - (({1} - {0})/{1} * 100.0)"
			
 
				-          }
			
 
				-        }
			
 
				       }
			
 
				     ],
			
 
				     "ZKFC": [
			
--- a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_metrics_deviation.py
+++ b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_metrics_deviation.py
@@ -70,9 +70,6 @@ DEVIATION_CRITICAL_THRESHOLD_KEY = 'metric.deviation.critical.threshold'
 
				 DEVIATION_CRITICAL_THRESHOLD_DEFAULT = 10
			
 
				 DEVIATION_WARNING_THRESHOLD_KEY = 'metric.deviation.warning.threshold'
			
 
				 DEVIATION_WARNING_THRESHOLD_DEFAULT = 5
			
 
				-NAMENODE_SERVICE_RPC_PORT_KEY = ''
			
 
				-
			
 
				-MINIMUM_VALUE_THRESHOLD_KEY = 'minimumValue'
			
 
				 
			
 
				 AMS_METRICS_GET_URL = "/ws/v1/timeline/metrics?%s"
			
 
				 
			
@@ -133,10 +130,6 @@ def execute(configurations={}, parameters={}, host_name=None):
 
				   if DEVIATION_CRITICAL_THRESHOLD_KEY in parameters:
			
 
				     critical_threshold = int(parameters[DEVIATION_CRITICAL_THRESHOLD_KEY])
			
 
				 
			
 
				-  minimum_value_threshold = None
			
 
				-  if MINIMUM_VALUE_THRESHOLD_KEY in parameters:
			
 
				-    minimum_value_threshold = int(parameters[MINIMUM_VALUE_THRESHOLD_KEY])
			
 
				-
			
 
				   #parse configuration
			
 
				   if configurations is None:
			
 
				     return (RESULT_STATE_UNKNOWN, ['There were no configurations supplied to the script.'])
			
@@ -156,16 +149,6 @@ def execute(configurations={}, parameters={}, host_name=None):
 
				     else:
			
 
				       return (RESULT_STATE_UNKNOWN, ['{0} value should be set as "fqdn_hostname:port", but set to {1}'.format(METRICS_COLLECTOR_WEBAPP_ADDRESS_KEY, configurations[METRICS_COLLECTOR_WEBAPP_ADDRESS_KEY])])
			
 
				 
			
 
				-  namenode_service_rpc_address = None
			
 
				-  # hdfs-site is required
			
 
				-  if not HDFS_SITE_KEY in configurations:
			
 
				-    return (RESULT_STATE_UNKNOWN, ['{0} is a required parameter for the script'.format(HDFS_SITE_KEY)])
			
 
				-
			
 
				-  hdfs_site = configurations[HDFS_SITE_KEY]
			
 
				-
			
 
				-  if 'dfs.namenode.servicerpc-address' in hdfs_site:
			
 
				-    namenode_service_rpc_address = hdfs_site['dfs.namenode.servicerpc-address']
			
 
				-
			
 
				   # if namenode alert and HA mode
			
 
				   if NAMESERVICE_KEY in configurations and app_id.lower() == 'namenode':
			
 
				     # hdfs-site is required
			
@@ -203,6 +186,7 @@ def execute(configurations={}, parameters={}, host_name=None):
 
				     kinit_timer_ms = parameters.get(KERBEROS_KINIT_TIMER_PARAMETER, DEFAULT_KERBEROS_KINIT_TIMER_MS)
			
 
				 
			
 
				     name_service = configurations[NAMESERVICE_KEY]
			
 
				+    hdfs_site = configurations[HDFS_SITE_KEY]
			
 
				 
			
 
				     # look for dfs.ha.namenodes.foo
			
 
				     nn_unique_ids_key = 'dfs.ha.namenodes.' + name_service
			
@@ -223,7 +207,7 @@ def execute(configurations={}, parameters={}, host_name=None):
 
				     active_namenodes = []
			
 
				     nn_unique_ids = hdfs_site[nn_unique_ids_key].split(',')
			
 
				     for nn_unique_id in nn_unique_ids:
			
 
				-      key = namenode_http_fragment.format(name_service, nn_unique_id)
			
 
				+      key = namenode_http_fragment.format(name_service,nn_unique_id)
			
 
				 
			
 
				       if key in hdfs_site:
			
 
				         # use str() to ensure that unicode strings do not have the u' in them
			
@@ -250,32 +234,21 @@ def execute(configurations={}, parameters={}, host_name=None):
 
				 
			
 
				           if state == HDFS_NN_STATE_ACTIVE:
			
 
				             active_namenodes.append(namenode)
			
 
				-
			
 
				-            # Only check active NN
			
 
				-            nn_service_rpc_address_key = 'dfs.namenode.servicerpc-address.{0}.{1}'.format(name_service, nn_unique_id)
			
 
				-            if nn_service_rpc_address_key in hdfs_site:
			
 
				-              namenode_service_rpc_address = hdfs_site[nn_service_rpc_address_key]
			
 
				-          pass
			
 
				         except:
			
 
				           logger.exception("Unable to determine active NameNode")
			
 
				-    pass
			
 
				+
			
 
				 
			
 
				     if merge_ha_metrics:
			
 
				       hostnames = ",".join(namenodes)
			
 
				-      # run only on active NN, no need to run the same requests from the standby
			
 
				+      # run only on active NN, no need to run the same requests from the
			
 
				       if host_name not in active_namenodes:
			
 
				         return (RESULT_STATE_SKIPPED, ['Another host will report this alert'])
			
 
				-    pass
			
 
				-
			
 
				-  # Skip service rpc alert if port is not enabled
			
 
				-  if not namenode_service_rpc_address and 'rpc.rpc.datanode' in metric_name:
			
 
				-    return (RESULT_STATE_SKIPPED, ['Service RPC port is not enabled.'])
			
 
				 
			
 
				   get_metrics_parameters = {
			
 
				     "metricNames": metric_name,
			
 
				     "appId": app_id,
			
 
				     "hostname": hostnames,
			
 
				-    "startTime": current_time - interval * 60 * 1000,
			
 
				+    "startTime": current_time - interval*60*1000,
			
 
				     "endTime": current_time,
			
 
				     "grouped": "true",
			
 
				     }
			
@@ -301,25 +274,15 @@ def execute(configurations={}, parameters={}, host_name=None):
 
				   # if host1 reports small local values, but host2 reports large local values
			
 
				   for metrics_data in data_json["metrics"]:
			
 
				     metrics += metrics_data["metrics"].values()
			
 
				-  pass
			
 
				 
			
 
				   if not metrics or len(metrics) < 2:
			
 
				     return (RESULT_STATE_UNKNOWN, ["Unable to calculate the standard deviation for {0} datapoints".format(len(metrics))])
			
 
				 
			
 
				-  # Filter out points below min threshold
			
 
				-  for metric in metrics:
			
 
				-    if metric <= minimum_value_threshold:
			
 
				-      metrics.remove(metric)
			
 
				-  pass
			
 
				-
			
 
				-  if len(metrics) < 2:
			
 
				-    return (RESULT_STATE_SKIPPED, ['No datapoints found above the minimum threshold of {0}'.format(minimum_value_threshold)])
			
 
				-
			
 
				   mean = calculate_mean(metrics)
			
 
				   stddev = calulate_sample_std_deviation(metrics)
			
 
				 
			
 
				   try:
			
 
				-    deviation_percent = stddev / mean * 100
			
 
				+    deviation_percent = stddev/mean*100
			
 
				   except ZeroDivisionError:
			
 
				     # should not be a case for this alert
			
 
				     return (RESULT_STATE_UNKNOWN, ["Unable to calculate the standard deviation percentage. The mean value is 0"])