Переглянути джерело

AMBARI-8729 - Alerts: Add a Units Label to Metric Types (jonathanhurley)

Jonathan Hurley 10 роки тому
батько
коміт
56ad13d96e

+ 28 - 0
ambari-server/src/main/java/org/apache/ambari/server/state/alert/Reporting.java

@@ -46,6 +46,13 @@ public class Reporting {
   @SerializedName("critical")
   @SerializedName("critical")
   private ReportTemplate m_critical;
   private ReportTemplate m_critical;
 
 
+  /**
+   * A label that identifies what units the value is in. For example, this could
+   * be "s" for seconds or GB for "Gigabytes".
+   */
+  @SerializedName("units")
+  private String m_units;
+
   /**
   /**
    * @return the WARNING structure or {@code null} if none.
    * @return the WARNING structure or {@code null} if none.
    */
    */
@@ -91,6 +98,27 @@ public class Reporting {
     m_ok = ok;
     m_ok = ok;
   }
   }
 
 
+  /**
+   * Gets a label identifying the units that the values are in. For example,
+   * this could be "s" for seconds or GB for "Gigabytes".
+   *
+   * @return the units, or {@code null} for none.
+   */
+  public String getUnits() {
+    return m_units;
+  }
+
+  /**
+   * Sets the label that identifies the units that the threshold values are in.
+   * For example, this could be "s" for seconds or GB for "Gigabytes".
+   *
+   * @param units
+   *          the units, or {@code null} for none.
+   */
+  public void setUnits(String units) {
+    m_units = units;
+  }
+
   /**
   /**
    *
    *
    */
    */

+ 3 - 2
ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HBASE/alerts.json

@@ -56,7 +56,7 @@
       {
       {
         "name": "hbase_master_cpu",
         "name": "hbase_master_cpu",
         "label": "HBase Maser CPU Utilization",
         "label": "HBase Maser CPU Utilization",
-        "description": "This host-level alert is triggered if CPU utilization of the HBase Master exceeds certain warning and critical thresholds. It checks the HBase Master JMX Servlet for the SystemCPULoad property.",
+        "description": "This host-level alert is triggered if CPU utilization of the HBase Master exceeds certain warning and critical thresholds. It checks the HBase Master JMX Servlet for the SystemCPULoad property. The threshold values are in percent.",
         "interval": 5,
         "interval": 5,
         "scope": "ANY",
         "scope": "ANY",
         "enabled": true,
         "enabled": true,
@@ -80,7 +80,8 @@
             "critical": {
             "critical": {
               "text": "{1} CPU, load {0:.1%}",
               "text": "{1} CPU, load {0:.1%}",
               "value": 250
               "value": 250
-            }
+            },
+            "units" : "%"            
           },
           },
           "jmx": {
           "jmx": {
             "property_list": [
             "property_list": [

+ 18 - 12
ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HDFS/alerts.json

@@ -109,7 +109,7 @@
       {
       {
         "name": "namenode_cpu",
         "name": "namenode_cpu",
         "label": "NameNode Host CPU Utilization",
         "label": "NameNode Host CPU Utilization",
-        "description": "This host-level alert is triggered if CPU utilization of the NameNode exceeds certain warning and critical thresholds. It checks the NameNode JMX Servlet for the SystemCPULoad property.",
+        "description": "This host-level alert is triggered if CPU utilization of the NameNode exceeds certain warning and critical thresholds. It checks the NameNode JMX Servlet for the SystemCPULoad property. The threshold values are in percent.",
         "interval": 5,
         "interval": 5,
         "scope": "ANY",
         "scope": "ANY",
         "enabled": true,
         "enabled": true,
@@ -132,7 +132,8 @@
             "critical": {
             "critical": {
               "text": "{1} CPU, load {0:.1%}",
               "text": "{1} CPU, load {0:.1%}",
               "value": 250
               "value": 250
-            }
+            },
+            "units" : "%"
           },
           },
           "jmx": {
           "jmx": {
             "property_list": [
             "property_list": [
@@ -146,7 +147,7 @@
       {
       {
         "name": "namenode_hdfs_blocks_health",
         "name": "namenode_hdfs_blocks_health",
         "label": "NameNode Blocks Health",
         "label": "NameNode Blocks Health",
-        "description": "This service-level alert is triggered if the number of corrupt or missing blocks exceeds the configured critical threshold.",
+        "description": "This service-level alert is triggered if the number of corrupt or missing blocks exceeds the configured critical threshold. The threshold values are in blocks.",
         "interval": 2,
         "interval": 2,
         "scope": "ANY",
         "scope": "ANY",
         "enabled": true,
         "enabled": true,
@@ -169,7 +170,8 @@
             "critical": {
             "critical": {
               "text": "Total Blocks:[{1}], Missing Blocks:[{0}]",
               "text": "Total Blocks:[{1}], Missing Blocks:[{0}]",
               "value": 1
               "value": 1
-            }
+            },
+            "units" : "Blocks"
           },
           },
           "jmx": {
           "jmx": {
             "property_list": [
             "property_list": [
@@ -183,7 +185,7 @@
       {
       {
         "name": "namenode_hdfs_capacity_utilization",
         "name": "namenode_hdfs_capacity_utilization",
         "label": "HDFS Capacity Utilization",
         "label": "HDFS Capacity Utilization",
-        "description": "This service-level alert is triggered if the HDFS capacity utilization exceeds the configured warning and critical thresholds. It checks the NameNode JMX Servlet for the CapacityUsed and CapacityRemaining properties.",
+        "description": "This service-level alert is triggered if the HDFS capacity utilization exceeds the configured warning and critical thresholds. It checks the NameNode JMX Servlet for the CapacityUsed and CapacityRemaining properties. The threshold values are in percent.",
         "interval": 2,
         "interval": 2,
         "scope": "ANY",
         "scope": "ANY",
         "enabled": true,
         "enabled": true,
@@ -206,7 +208,8 @@
             "critical": {
             "critical": {
               "text": "Capacity Used:[{2:d}%, {0}], Capacity Remaining:[{1}]",
               "text": "Capacity Used:[{2:d}%, {0}], Capacity Remaining:[{1}]",
               "value": 90
               "value": 90
-            }
+            },
+            "units" : "%"
           },
           },
           "jmx": {
           "jmx": {
             "property_list": [
             "property_list": [
@@ -220,7 +223,7 @@
       {
       {
         "name": "namenode_rpc_latency",
         "name": "namenode_rpc_latency",
         "label": "NameNode RPC Latency",
         "label": "NameNode RPC Latency",
-        "description": "This host-level alert is triggered if the NameNode RPC latency exceeds the configured critical threshold. Typically an increase in the RPC processing time increases the RPC queue length, causing the average queue wait time to increase for NameNode operations.",
+        "description": "This host-level alert is triggered if the NameNode RPC latency exceeds the configured critical threshold. Typically an increase in the RPC processing time increases the RPC queue length, causing the average queue wait time to increase for NameNode operations. The threshold values are in milliseconds.",
         "interval": 2,
         "interval": 2,
         "scope": "ANY",
         "scope": "ANY",
         "enabled": true,
         "enabled": true,
@@ -243,7 +246,8 @@
             "critical": {
             "critical": {
               "text": "Average Queue Time:[{0}], Average Processing Time:[{1}]",
               "text": "Average Queue Time:[{0}], Average Processing Time:[{1}]",
               "value": 5000
               "value": 5000
-            }
+            },
+            "units" : "ms"
           },
           },
           "jmx": {
           "jmx": {
             "property_list": [
             "property_list": [
@@ -257,7 +261,7 @@
       {
       {
         "name": "namenode_directory_status",
         "name": "namenode_directory_status",
         "label": "NameNode Directory Status",
         "label": "NameNode Directory Status",
-        "description": "This host-level alert is triggered if any of the the NameNode's NameDirStatuses metric reports a failed directory.",
+        "description": "This host-level alert is triggered if any of the the NameNode's NameDirStatuses metric reports a failed directory. The threshold values are in the number of directories that are not healthy.",
         "interval": 1,
         "interval": 1,
         "scope": "ANY",
         "scope": "ANY",
         "enabled": true,
         "enabled": true,
@@ -280,7 +284,8 @@
             "critical": {
             "critical": {
               "text": "Failed directory count: {1}",
               "text": "Failed directory count: {1}",
               "value": 1
               "value": 1
-            }
+            },
+            "units" : "Dirs"
           },
           },
           "jmx": {
           "jmx": {
             "property_list": [
             "property_list": [
@@ -456,7 +461,7 @@
       {
       {
         "name": "datanode_storage",
         "name": "datanode_storage",
         "label": "DataNode Storage",
         "label": "DataNode Storage",
-        "description": "This host-level alert is triggered if storage capacity if full on the DataNode. It checks the DataNode JMX Servlet for the Capacity and Remaining properties.",
+        "description": "This host-level alert is triggered if storage capacity if full on the DataNode. It checks the DataNode JMX Servlet for the Capacity and Remaining properties. The threshold values are in percent.",
         "interval": 2,
         "interval": 2,
         "scope": "HOST",
         "scope": "HOST",
         "enabled": true,
         "enabled": true,
@@ -479,7 +484,8 @@
             "critical": {
             "critical": {
               "text": "Remaining Capacity:[{0}], Total Capacity:[{2:d}% Used, {1}]",
               "text": "Remaining Capacity:[{0}], Total Capacity:[{2:d}% Used, {1}]",
               "value": 90
               "value": 90
-            }
+            },
+            "units" : "%"
           },
           },
           "jmx": {
           "jmx": {
             "property_list": [
             "property_list": [

+ 12 - 8
ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/YARN/alerts.json

@@ -32,7 +32,7 @@
       {
       {
         "name": "mapreduce_history_server_cpu",
         "name": "mapreduce_history_server_cpu",
         "label": "History Server CPU Utilization",
         "label": "History Server CPU Utilization",
-        "description": "This host-level alert is triggered if the percent of CPU utilization on the History Server exceeds the configured critical threshold.",
+        "description": "This host-level alert is triggered if the percent of CPU utilization on the History Server exceeds the configured critical threshold. The threshold values are in percent.",
         "interval": 5,
         "interval": 5,
         "scope": "ANY",
         "scope": "ANY",
         "enabled": true,
         "enabled": true,
@@ -55,7 +55,8 @@
             "critical": {
             "critical": {
               "text": "{1} CPU, load {0:.1%}",
               "text": "{1} CPU, load {0:.1%}",
               "value": 250
               "value": 250
-            }
+            },
+            "units" : "%"
           },
           },
           "jmx": {
           "jmx": {
             "property_list": [
             "property_list": [
@@ -69,7 +70,7 @@
       {
       {
         "name": "mapreduce_history_server_rpc_latency",
         "name": "mapreduce_history_server_rpc_latency",
         "label": "History Server RPC Latency",
         "label": "History Server RPC Latency",
-        "description": "This host-level alert is triggered if the History Server operations RPC latency exceeds the configured critical threshold. Typically an increase in the RPC processing time increases the RPC queue length, causing the average queue wait time to increase for operations.",
+        "description": "This host-level alert is triggered if the History Server operations RPC latency exceeds the configured critical threshold. Typically an increase in the RPC processing time increases the RPC queue length, causing the average queue wait time to increase for operations. The threshold values are in milliseconds.",
         "interval": 5,
         "interval": 5,
         "scope": "ANY",
         "scope": "ANY",
         "enabled": true,
         "enabled": true,
@@ -92,7 +93,8 @@
             "critical": {
             "critical": {
               "text": "Average Queue Time:[{0}], Average Processing Time:[{1}]",
               "text": "Average Queue Time:[{0}], Average Processing Time:[{1}]",
               "value": 5000
               "value": 5000
-            }
+            },
+            "units" : "ms"
           },
           },
           "jmx": {
           "jmx": {
             "property_list": [
             "property_list": [
@@ -232,7 +234,7 @@
       {
       {
         "name": "yarn_resourcemanager_cpu",
         "name": "yarn_resourcemanager_cpu",
         "label": "ResourceManager CPU Utilization",
         "label": "ResourceManager CPU Utilization",
-        "description": "This host-level alert is triggered if CPU utilization of the ResourceManager exceeds certain warning and critical thresholds. It checks the ResourceManager JMX Servlet for the SystemCPULoad property.",
+        "description": "This host-level alert is triggered if CPU utilization of the ResourceManager exceeds certain warning and critical thresholds. It checks the ResourceManager JMX Servlet for the SystemCPULoad property. The threshold values are in percent.",
         "interval": 5,
         "interval": 5,
         "scope": "ANY",
         "scope": "ANY",
         "enabled": true,
         "enabled": true,
@@ -255,7 +257,8 @@
             "critical": {
             "critical": {
               "text": "{1} CPU, load {0:.1%}",
               "text": "{1} CPU, load {0:.1%}",
               "value": 250
               "value": 250
-            }
+            },
+            "units" : "%"
           },
           },
           "jmx": {
           "jmx": {
             "property_list": [
             "property_list": [
@@ -269,7 +272,7 @@
       {
       {
         "name": "yarn_resourcemanager_rpc_latency",
         "name": "yarn_resourcemanager_rpc_latency",
         "label": "ResourceManager RPC Latency",
         "label": "ResourceManager RPC Latency",
-        "description": "This host-level alert is triggered if the ResourceManager operations RPC latency exceeds the configured critical threshold. Typically an increase in the RPC processing time increases the RPC queue length, causing the average queue wait time to increase for ResourceManager operations.",
+        "description": "This host-level alert is triggered if the ResourceManager operations RPC latency exceeds the configured critical threshold. Typically an increase in the RPC processing time increases the RPC queue length, causing the average queue wait time to increase for ResourceManager operations. The threshold values are in milliseconds.",
         "interval": 5,
         "interval": 5,
         "scope": "ANY",
         "scope": "ANY",
         "enabled": true,
         "enabled": true,
@@ -292,7 +295,8 @@
             "critical": {
             "critical": {
               "text": "Average Queue Time:[{0}], Average Processing Time:[{1}]",
               "text": "Average Queue Time:[{0}], Average Processing Time:[{1}]",
               "value": 5000
               "value": 5000
-            }
+            },
+            "units" : "ms"
           },
           },
           "jmx": {
           "jmx": {
             "property_list": [
             "property_list": [

+ 3 - 2
ambari-server/src/main/resources/stacks/HDP/1.3.2/services/HBASE/alerts.json

@@ -56,7 +56,7 @@
       {
       {
         "name": "hbase_master_cpu",
         "name": "hbase_master_cpu",
         "label": "HBase Maser CPU Utilization",
         "label": "HBase Maser CPU Utilization",
-        "description": "This host-level alert is triggered if CPU utilization of the HBase Master exceeds certain warning and critical thresholds. It checks the HBase Master JMX Servlet for the SystemCPULoad property.",
+        "description": "This host-level alert is triggered if CPU utilization of the HBase Master exceeds certain warning and critical thresholds. It checks the HBase Master JMX Servlet for the SystemCPULoad property. The threshold values are in percent.",
         "interval": 5,
         "interval": 5,
         "scope": "ANY",
         "scope": "ANY",
         "enabled": true,
         "enabled": true,
@@ -80,7 +80,8 @@
             "critical": {
             "critical": {
               "text": "{1} CPU, load {0:.1%}",
               "text": "{1} CPU, load {0:.1%}",
               "value": 250
               "value": 250
-            }
+            },
+            "units" : "%"
           },
           },
           "jmx": {
           "jmx": {
             "property_list": [
             "property_list": [

+ 18 - 12
ambari-server/src/main/resources/stacks/HDP/1.3.2/services/HDFS/alerts.json

@@ -109,7 +109,7 @@
       {
       {
         "name": "namenode_cpu",
         "name": "namenode_cpu",
         "label": "NameNode Host CPU Utilization",
         "label": "NameNode Host CPU Utilization",
-        "description": "This host-level alert is triggered if CPU utilization of the NameNode exceeds certain warning and critical thresholds. It checks the NameNode JMX Servlet for the SystemCPULoad property.",
+        "description": "This host-level alert is triggered if CPU utilization of the NameNode exceeds certain warning and critical thresholds. It checks the NameNode JMX Servlet for the SystemCPULoad property. The threshold values are in percent.",
         "interval": 5,
         "interval": 5,
         "scope": "ANY",
         "scope": "ANY",
         "enabled": true,
         "enabled": true,
@@ -132,7 +132,8 @@
             "critical": {
             "critical": {
               "text": "{1} CPU, load {0:.1%}",
               "text": "{1} CPU, load {0:.1%}",
               "value": 250
               "value": 250
-            }
+            },
+            "units" : "%"
           },
           },
           "jmx": {
           "jmx": {
             "property_list": [
             "property_list": [
@@ -146,7 +147,7 @@
       {
       {
         "name": "namenode_hdfs_blocks_health",
         "name": "namenode_hdfs_blocks_health",
         "label": "NameNode Blocks Health",
         "label": "NameNode Blocks Health",
-        "description": "This service-level alert is triggered if the number of corrupt or missing blocks exceeds the configured critical threshold.",
+        "description": "This service-level alert is triggered if the number of corrupt or missing blocks exceeds the configured critical threshold. The threshold values are in blocks.",
         "interval": 2,
         "interval": 2,
         "scope": "ANY",
         "scope": "ANY",
         "enabled": true,
         "enabled": true,
@@ -169,7 +170,8 @@
             "critical": {
             "critical": {
               "text": "Total Blocks:[{1}], Missing Blocks:[{0}]",
               "text": "Total Blocks:[{1}], Missing Blocks:[{0}]",
               "value": 1
               "value": 1
-            }
+            },
+            "units" : "Blocks"
           },
           },
           "jmx": {
           "jmx": {
             "property_list": [
             "property_list": [
@@ -183,7 +185,7 @@
       {
       {
         "name": "namenode_hdfs_capacity_utilization",
         "name": "namenode_hdfs_capacity_utilization",
         "label": "HDFS Capacity Utilization",
         "label": "HDFS Capacity Utilization",
-        "description": "This service-level alert is triggered if the HDFS capacity utilization exceeds the configured warning and critical thresholds. It checks the NameNode JMX Servlet for the CapacityUsed and CapacityRemaining properties.",
+        "description": "This service-level alert is triggered if the HDFS capacity utilization exceeds the configured warning and critical thresholds. It checks the NameNode JMX Servlet for the CapacityUsed and CapacityRemaining properties. The threshold values are in percent.",
         "interval": 2,
         "interval": 2,
         "scope": "ANY",
         "scope": "ANY",
         "enabled": true,
         "enabled": true,
@@ -206,7 +208,8 @@
             "critical": {
             "critical": {
               "text": "Capacity Used:[{2:d}%, {0}], Capacity Remaining:[{1}]",
               "text": "Capacity Used:[{2:d}%, {0}], Capacity Remaining:[{1}]",
               "value": 90
               "value": 90
-            }
+            },
+            "units" : "%"
           },
           },
           "jmx": {
           "jmx": {
             "property_list": [
             "property_list": [
@@ -220,7 +223,7 @@
       {
       {
         "name": "namenode_rpc_latency",
         "name": "namenode_rpc_latency",
         "label": "NameNode RPC Latency",
         "label": "NameNode RPC Latency",
-        "description": "This host-level alert is triggered if the NameNode RPC latency exceeds the configured critical threshold. Typically an increase in the RPC processing time increases the RPC queue length, causing the average queue wait time to increase for NameNode operations.",
+        "description": "This host-level alert is triggered if the NameNode RPC latency exceeds the configured critical threshold. Typically an increase in the RPC processing time increases the RPC queue length, causing the average queue wait time to increase for NameNode operations. The threshold values are in milliseconds.",
         "interval": 2,
         "interval": 2,
         "scope": "ANY",
         "scope": "ANY",
         "enabled": true,
         "enabled": true,
@@ -243,7 +246,8 @@
             "critical": {
             "critical": {
               "text": "Average Queue Time:[{0}], Average Processing Time:[{1}]",
               "text": "Average Queue Time:[{0}], Average Processing Time:[{1}]",
               "value": 5000
               "value": 5000
-            }
+            },
+            "units" : "ms"
           },
           },
           "jmx": {
           "jmx": {
             "property_list": [
             "property_list": [
@@ -257,7 +261,7 @@
       {
       {
         "name": "namenode_directory_status",
         "name": "namenode_directory_status",
         "label": "NameNode Directory Status",
         "label": "NameNode Directory Status",
-        "description": "This host-level alert is triggered if any of the the NameNode's NameDirStatuses metric reports a failed directory.",
+        "description": "This host-level alert is triggered if any of the the NameNode's NameDirStatuses metric reports a failed directory. The threshold values are in the number of directories that are not healthy.",
         "interval": 1,
         "interval": 1,
         "scope": "ANY",
         "scope": "ANY",
         "enabled": true,
         "enabled": true,
@@ -280,7 +284,8 @@
             "critical": {
             "critical": {
               "text": "Failed directory count: {1}",
               "text": "Failed directory count: {1}",
               "value": 1
               "value": 1
-            }
+            },
+            "units" : "Dirs"
           },
           },
           "jmx": {
           "jmx": {
             "property_list": [
             "property_list": [
@@ -431,7 +436,7 @@
       {
       {
         "name": "datanode_storage",
         "name": "datanode_storage",
         "label": "DataNode Storage",
         "label": "DataNode Storage",
-        "description": "This host-level alert is triggered if storage capacity if full on the DataNode. It checks the DataNode JMX Servlet for the Capacity and Remaining properties.",
+        "description": "This host-level alert is triggered if storage capacity if full on the DataNode. It checks the DataNode JMX Servlet for the Capacity and Remaining properties. The threshold values are in percent.",
         "interval": 2,
         "interval": 2,
         "scope": "HOST",
         "scope": "HOST",
         "enabled": true,
         "enabled": true,
@@ -454,7 +459,8 @@
             "critical": {
             "critical": {
               "text": "Remaining Capacity:[{0}], Total Capacity:[{2:d}% Used, {1}]",
               "text": "Remaining Capacity:[{0}], Total Capacity:[{2:d}% Used, {1}]",
               "value": 90
               "value": 90
-            }
+            },
+            "units" : "%"
           },
           },
           "jmx": {
           "jmx": {
             "property_list": [
             "property_list": [

+ 6 - 4
ambari-server/src/main/resources/stacks/HDP/1.3.2/services/MAPREDUCE/alerts.json

@@ -56,7 +56,7 @@
       {
       {
         "name": "mapreduce_jobtracker_cpu",
         "name": "mapreduce_jobtracker_cpu",
         "label": "JobTracker Host CPU Utilization",
         "label": "JobTracker Host CPU Utilization",
-        "description": "This host-level alert is triggered if the percent of CPU utilization on the JobTracker exceeds the configured critical threshold.",
+        "description": "This host-level alert is triggered if the percent of CPU utilization on the JobTracker exceeds the configured critical threshold. The threshold values are in percent.",
         "interval": 5,
         "interval": 5,
         "scope": "ANY",
         "scope": "ANY",
         "enabled": true,
         "enabled": true,
@@ -76,7 +76,8 @@
             "critical": {
             "critical": {
               "text": "{1} CPU, load {0:.1%}",
               "text": "{1} CPU, load {0:.1%}",
               "value": 250
               "value": 250
-            }
+            },
+            "units" : "%"
           },
           },
           "jmx": {
           "jmx": {
             "property_list": [
             "property_list": [
@@ -90,7 +91,7 @@
       {
       {
         "name": "mapreduce_jobtracker_rpc_latency",
         "name": "mapreduce_jobtracker_rpc_latency",
         "label": "JobTracker RPC Latency",
         "label": "JobTracker RPC Latency",
-        "description": "This host-level alert is triggered if the JobTracker operations RPC latency exceeds the configured critical threshold. Typically an increase in the RPC processing time increases the RPC queue length, causing the average queue wait time to increase for operations.",
+        "description": "This host-level alert is triggered if the JobTracker operations RPC latency exceeds the configured critical threshold. Typically an increase in the RPC processing time increases the RPC queue length, causing the average queue wait time to increase for operations. The threshold values are in milliseconds.",
         "interval": 2,
         "interval": 2,
         "scope": "ANY",
         "scope": "ANY",
         "enabled": true,
         "enabled": true,
@@ -110,7 +111,8 @@
             "critical": {
             "critical": {
               "text": "Average Queue Time:[{0}], Average Processing Time:[{1}]",
               "text": "Average Queue Time:[{0}], Average Processing Time:[{1}]",
               "value": 5000
               "value": 5000
-            }
+            },
+            "units" : "ms"
           },
           },
           "jmx": {
           "jmx": {
             "property_list": [
             "property_list": [

+ 3 - 2
ambari-server/src/main/resources/stacks/HDP/2.0.6/services/HBASE/alerts.json

@@ -56,7 +56,7 @@
       {
       {
         "name": "hbase_master_cpu",
         "name": "hbase_master_cpu",
         "label": "HBase Maser CPU Utilization",
         "label": "HBase Maser CPU Utilization",
-        "description": "This host-level alert is triggered if CPU utilization of the HBase Master exceeds certain warning and critical thresholds. It checks the HBase Master JMX Servlet for the SystemCPULoad property.",
+        "description": "This host-level alert is triggered if CPU utilization of the HBase Master exceeds certain warning and critical thresholds. It checks the HBase Master JMX Servlet for the SystemCPULoad property. The threshold values are in percent.",
         "interval": 5,
         "interval": 5,
         "scope": "ANY",
         "scope": "ANY",
         "enabled": true,
         "enabled": true,
@@ -80,7 +80,8 @@
             "critical": {
             "critical": {
               "text": "{1} CPU, load {0:.1%}",
               "text": "{1} CPU, load {0:.1%}",
               "value": 250
               "value": 250
-            }
+            },
+            "units" : "%"
           },
           },
           "jmx": {
           "jmx": {
             "property_list": [
             "property_list": [

+ 18 - 12
ambari-server/src/main/resources/stacks/HDP/2.0.6/services/HDFS/alerts.json

@@ -109,7 +109,7 @@
       {
       {
         "name": "namenode_cpu",
         "name": "namenode_cpu",
         "label": "NameNode Host CPU Utilization",
         "label": "NameNode Host CPU Utilization",
-        "description": "This host-level alert is triggered if CPU utilization of the NameNode exceeds certain warning and critical thresholds. It checks the NameNode JMX Servlet for the SystemCPULoad property.",
+        "description": "This host-level alert is triggered if CPU utilization of the NameNode exceeds certain warning and critical thresholds. It checks the NameNode JMX Servlet for the SystemCPULoad property. The threshold values are in percent.",
         "interval": 5,
         "interval": 5,
         "scope": "ANY",
         "scope": "ANY",
         "enabled": true,
         "enabled": true,
@@ -132,7 +132,8 @@
             "critical": {
             "critical": {
               "text": "{1} CPU, load {0:.1%}",
               "text": "{1} CPU, load {0:.1%}",
               "value": 250
               "value": 250
-            }
+            },
+            "units" : "%"
           },
           },
           "jmx": {
           "jmx": {
             "property_list": [
             "property_list": [
@@ -146,7 +147,7 @@
       {
       {
         "name": "namenode_hdfs_blocks_health",
         "name": "namenode_hdfs_blocks_health",
         "label": "NameNode Blocks Health",
         "label": "NameNode Blocks Health",
-        "description": "This service-level alert is triggered if the number of corrupt or missing blocks exceeds the configured critical threshold.",
+        "description": "This service-level alert is triggered if the number of corrupt or missing blocks exceeds the configured critical threshold. The threshold values are in blocks.",
         "interval": 2,
         "interval": 2,
         "scope": "ANY",
         "scope": "ANY",
         "enabled": true,
         "enabled": true,
@@ -169,7 +170,8 @@
             "critical": {
             "critical": {
               "text": "Total Blocks:[{1}], Missing Blocks:[{0}]",
               "text": "Total Blocks:[{1}], Missing Blocks:[{0}]",
               "value": 1
               "value": 1
-            }
+            },
+            "units" : "Blocks"
           },
           },
           "jmx": {
           "jmx": {
             "property_list": [
             "property_list": [
@@ -183,7 +185,7 @@
       {
       {
         "name": "namenode_hdfs_capacity_utilization",
         "name": "namenode_hdfs_capacity_utilization",
         "label": "HDFS Capacity Utilization",
         "label": "HDFS Capacity Utilization",
-        "description": "This service-level alert is triggered if the HDFS capacity utilization exceeds the configured warning and critical thresholds. It checks the NameNode JMX Servlet for the CapacityUsed and CapacityRemaining properties.",
+        "description": "This service-level alert is triggered if the HDFS capacity utilization exceeds the configured warning and critical thresholds. It checks the NameNode JMX Servlet for the CapacityUsed and CapacityRemaining properties. The threshold values are in percent.",
         "interval": 2,
         "interval": 2,
         "scope": "ANY",
         "scope": "ANY",
         "enabled": true,
         "enabled": true,
@@ -206,7 +208,8 @@
             "critical": {
             "critical": {
               "text": "Capacity Used:[{2:d}%, {0}], Capacity Remaining:[{1}]",
               "text": "Capacity Used:[{2:d}%, {0}], Capacity Remaining:[{1}]",
               "value": 90
               "value": 90
-            }
+            },
+            "units" : "%"
           },
           },
           "jmx": {
           "jmx": {
             "property_list": [
             "property_list": [
@@ -220,7 +223,7 @@
       {
       {
         "name": "namenode_rpc_latency",
         "name": "namenode_rpc_latency",
         "label": "NameNode RPC Latency",
         "label": "NameNode RPC Latency",
-        "description": "This host-level alert is triggered if the NameNode RPC latency exceeds the configured critical threshold. Typically an increase in the RPC processing time increases the RPC queue length, causing the average queue wait time to increase for NameNode operations.",
+        "description": "This host-level alert is triggered if the NameNode RPC latency exceeds the configured critical threshold. Typically an increase in the RPC processing time increases the RPC queue length, causing the average queue wait time to increase for NameNode operations. The threshold values are in milliseconds.",
         "interval": 2,
         "interval": 2,
         "scope": "ANY",
         "scope": "ANY",
         "enabled": true,
         "enabled": true,
@@ -243,7 +246,8 @@
             "critical": {
             "critical": {
               "text": "Average Queue Time:[{0}], Average Processing Time:[{1}]",
               "text": "Average Queue Time:[{0}], Average Processing Time:[{1}]",
               "value": 5000
               "value": 5000
-            }
+            },
+            "units" : "ms"
           },
           },
           "jmx": {
           "jmx": {
             "property_list": [
             "property_list": [
@@ -257,7 +261,7 @@
       {
       {
         "name": "namenode_directory_status",
         "name": "namenode_directory_status",
         "label": "NameNode Directory Status",
         "label": "NameNode Directory Status",
-        "description": "This host-level alert is triggered if any of the the NameNode's NameDirStatuses metric reports a failed directory.",
+        "description": "This host-level alert is triggered if any of the the NameNode's NameDirStatuses metric reports a failed directory. The threshold values are in the number of directories that are not healthy",
         "interval": 1,
         "interval": 1,
         "scope": "ANY",
         "scope": "ANY",
         "enabled": true,
         "enabled": true,
@@ -280,7 +284,8 @@
             "critical": {
             "critical": {
               "text": "Failed directory count: {1}",
               "text": "Failed directory count: {1}",
               "value": 1
               "value": 1
-            }
+            },
+            "units" : "Dirs"
           },
           },
           "jmx": {
           "jmx": {
             "property_list": [
             "property_list": [
@@ -456,7 +461,7 @@
       {
       {
         "name": "datanode_storage",
         "name": "datanode_storage",
         "label": "DataNode Storage",
         "label": "DataNode Storage",
-        "description": "This host-level alert is triggered if storage capacity if full on the DataNode. It checks the DataNode JMX Servlet for the Capacity and Remaining properties.",
+        "description": "This host-level alert is triggered if storage capacity if full on the DataNode. It checks the DataNode JMX Servlet for the Capacity and Remaining properties. The threshold values are in percent.",
         "interval": 2,
         "interval": 2,
         "scope": "HOST",
         "scope": "HOST",
         "enabled": true,
         "enabled": true,
@@ -479,7 +484,8 @@
             "critical": {
             "critical": {
               "text": "Remaining Capacity:[{0}], Total Capacity:[{2:d}% Used, {1}]",
               "text": "Remaining Capacity:[{0}], Total Capacity:[{2:d}% Used, {1}]",
               "value": 90
               "value": 90
-            }
+            },
+            "units" : "%"
           },
           },
           "jmx": {
           "jmx": {
             "property_list": [
             "property_list": [

+ 12 - 8
ambari-server/src/main/resources/stacks/HDP/2.0.6/services/YARN/alerts.json

@@ -32,7 +32,7 @@
       {
       {
         "name": "mapreduce_history_server_cpu",
         "name": "mapreduce_history_server_cpu",
         "label": "History Server CPU Utilization",
         "label": "History Server CPU Utilization",
-        "description": "This host-level alert is triggered if the percent of CPU utilization on the History Server exceeds the configured critical threshold.",
+        "description": "This host-level alert is triggered if the percent of CPU utilization on the History Server exceeds the configured critical threshold. The threshold values are in percent.",
         "interval": 5,
         "interval": 5,
         "scope": "ANY",
         "scope": "ANY",
         "enabled": true,
         "enabled": true,
@@ -55,7 +55,8 @@
             "critical": {
             "critical": {
               "text": "{1} CPU, load {0:.1%}",
               "text": "{1} CPU, load {0:.1%}",
               "value": 250
               "value": 250
-            }
+            },
+            "units" : "%"
           },
           },
           "jmx": {
           "jmx": {
             "property_list": [
             "property_list": [
@@ -69,7 +70,7 @@
       {
       {
         "name": "mapreduce_history_server_rpc_latency",
         "name": "mapreduce_history_server_rpc_latency",
         "label": "History Server RPC Latency",
         "label": "History Server RPC Latency",
-        "description": "This host-level alert is triggered if the History Server operations RPC latency exceeds the configured critical threshold. Typically an increase in the RPC processing time increases the RPC queue length, causing the average queue wait time to increase for operations.",
+        "description": "This host-level alert is triggered if the History Server operations RPC latency exceeds the configured critical threshold. Typically an increase in the RPC processing time increases the RPC queue length, causing the average queue wait time to increase for operations. The threshold values are in milliseconds.",
         "interval": 5,
         "interval": 5,
         "scope": "ANY",
         "scope": "ANY",
         "enabled": true,
         "enabled": true,
@@ -92,7 +93,8 @@
             "critical": {
             "critical": {
               "text": "Average Queue Time:[{0}], Average Processing Time:[{1}]",
               "text": "Average Queue Time:[{0}], Average Processing Time:[{1}]",
               "value": 5000
               "value": 5000
-            }
+            },
+            "units" : "ms"
           },
           },
           "jmx": {
           "jmx": {
             "property_list": [
             "property_list": [
@@ -232,7 +234,7 @@
       {
       {
         "name": "yarn_resourcemanager_cpu",
         "name": "yarn_resourcemanager_cpu",
         "label": "ResourceManager CPU Utilization",
         "label": "ResourceManager CPU Utilization",
-        "description": "This host-level alert is triggered if CPU utilization of the ResourceManager exceeds certain warning and critical thresholds. It checks the ResourceManager JMX Servlet for the SystemCPULoad property.",
+        "description": "This host-level alert is triggered if CPU utilization of the ResourceManager exceeds certain warning and critical thresholds. It checks the ResourceManager JMX Servlet for the SystemCPULoad property. The threshold values are in percent.",
         "interval": 5,
         "interval": 5,
         "scope": "ANY",
         "scope": "ANY",
         "enabled": true,
         "enabled": true,
@@ -255,7 +257,8 @@
             "critical": {
             "critical": {
               "text": "{1} CPU, load {0:.1%}",
               "text": "{1} CPU, load {0:.1%}",
               "value": 250
               "value": 250
-            }
+            },
+            "units" : "%"
           },
           },
           "jmx": {
           "jmx": {
             "property_list": [
             "property_list": [
@@ -269,7 +272,7 @@
       {
       {
         "name": "yarn_resourcemanager_rpc_latency",
         "name": "yarn_resourcemanager_rpc_latency",
         "label": "ResourceManager RPC Latency",
         "label": "ResourceManager RPC Latency",
-        "description": "This host-level alert is triggered if the ResourceManager operations RPC latency exceeds the configured critical threshold. Typically an increase in the RPC processing time increases the RPC queue length, causing the average queue wait time to increase for ResourceManager operations.",
+        "description": "This host-level alert is triggered if the ResourceManager operations RPC latency exceeds the configured critical threshold. Typically an increase in the RPC processing time increases the RPC queue length, causing the average queue wait time to increase for ResourceManager operations. The threshold values are in milliseconds.",
         "interval": 5,
         "interval": 5,
         "scope": "ANY",
         "scope": "ANY",
         "enabled": true,
         "enabled": true,
@@ -292,7 +295,8 @@
             "critical": {
             "critical": {
               "text": "Average Queue Time:[{0}], Average Processing Time:[{1}]",
               "text": "Average Queue Time:[{0}], Average Processing Time:[{1}]",
               "value": 5000
               "value": 5000
-            }
+            },
+            "units" : "ms"
           },
           },
           "jmx": {
           "jmx": {
             "property_list": [
             "property_list": [

+ 2 - 1
ambari-server/src/main/resources/stacks/HDP/2.2/services/AMS/alerts.json

@@ -102,7 +102,8 @@
             "critical": {
             "critical": {
               "text": "{1} CPU, load {0:.1%}",
               "text": "{1} CPU, load {0:.1%}",
               "value": 250
               "value": 250
-            }
+            },
+            "units" : "%"
           },
           },
           "jmx": {
           "jmx": {
             "property_list": [
             "property_list": [

+ 3 - 1
ambari-server/src/test/java/org/apache/ambari/server/controller/internal/AlertDefinitionResourceProviderTest.java

@@ -350,7 +350,7 @@ public class AlertDefinitionResourceProviderTest {
         source.getReporting().getWarning().getText());
         source.getReporting().getWarning().getText());
     requestProps.put("AlertDefinition/source/reporting/warning/value",
     requestProps.put("AlertDefinition/source/reporting/warning/value",
         source.getReporting().getWarning().getValue());
         source.getReporting().getWarning().getValue());
-
+    requestProps.put("AlertDefinition/source/reporting/units", "Gigabytes");
 
 
     Request request = PropertyHelper.getCreateRequest(Collections.singleton(requestProps), null);
     Request request = PropertyHelper.getCreateRequest(Collections.singleton(requestProps), null);
     provider.createResources(request);
     provider.createResources(request);
@@ -388,6 +388,8 @@ public class AlertDefinitionResourceProviderTest {
     assertEquals(source.getReporting().getCritical().getText(),
     assertEquals(source.getReporting().getCritical().getText(),
         actualSource.getReporting().getCritical().getText());
         actualSource.getReporting().getCritical().getText());
 
 
+    assertEquals("Gigabytes", actualSource.getReporting().getUnits());
+
     Assert.assertNotNull(source.getUri().getHttpUri());
     Assert.assertNotNull(source.getUri().getHttpUri());
     Assert.assertNotNull(source.getUri().getHttpsUri());
     Assert.assertNotNull(source.getUri().getHttpsUri());