Sfoglia il codice sorgente

AMBARI-7889 - Alerts: Convert HDFS Nagios Alerts Into Ambari (jonathanhurley)

Jonathan Hurley 10 anni fa
parent
commit
463dae91a1

+ 4 - 0
ambari-agent/src/main/python/ambari_agent/AlertSchedulerHandler.py

@@ -32,6 +32,7 @@ from alerts.collector import AlertCollector
 from alerts.metric_alert import MetricAlert
 from alerts.port_alert import PortAlert
 from alerts.script_alert import ScriptAlert
+from alerts.web_alert import WebAlert
 
 logger = logging.getLogger()
 
@@ -43,6 +44,7 @@ class AlertSchedulerHandler():
   TYPE_PORT = 'PORT'
   TYPE_METRIC = 'METRIC'
   TYPE_SCRIPT = 'SCRIPT'
+  TYPE_WEB = 'WEB'
 
   APS_CONFIG = { 
     'threadpool.core_threads': 3,
@@ -222,6 +224,8 @@ class AlertSchedulerHandler():
     elif source_type == AlertSchedulerHandler.TYPE_SCRIPT:
       source['stacks_dir'] = self.stacks_dir
       alert = ScriptAlert(json_definition, source)
+    elif source_type == AlertSchedulerHandler.TYPE_WEB:
+      alert = WebAlert(json_definition, source)
 
     if alert is not None:
       alert.set_cluster(clusterName, hostName)

+ 112 - 1
ambari-agent/src/main/python/ambari_agent/alerts/base_alert.py

@@ -22,6 +22,7 @@ import logging
 import re
 import time
 import traceback
+from collections import namedtuple
 
 logger = logging.getLogger()
 
@@ -91,10 +92,18 @@ class BaseAlert(object):
       res = self._collect()
       res_base_text = self.alert_source_meta['reporting'][res[0].lower()]['text']
     except Exception as e:
-      traceback.print_exc()
+      message = "Unable to run alert {0}".format(str(self.alert_meta['name']))
+      
+      # print the exception if in DEBUG, otherwise just log the warning
+      if logger.isEnabledFor(logging.DEBUG):
+        logger.exception(message)
+      else:
+        logger.warning(message)
+
       res = (BaseAlert.RESULT_UNKNOWN, [str(e)])
       res_base_text = "Unknown {0}"
     
+    
     if logger.isEnabledFor(logging.DEBUG):
       logger.debug("debug alert result: {0}".format(str(res)))
       
@@ -155,6 +164,108 @@ class BaseAlert(object):
     else:
       return None
 
+    
+  def _lookup_uri_property_keys(self, uri_structure):
+    '''
+    Loads the configuration lookup keys that the URI structure needs. This
+    will return a named tuple that contains the keys needed to lookup
+    parameterized URI values from the URI structure. The URI structure looks 
+    something like:
+    
+    "uri":{ 
+      "http": foo,
+      "https": bar,
+      ...
+    }
+    '''
+    
+    if uri_structure is None:
+      return None
+    
+    http_key = None
+    https_key = None
+    https_property_key = None
+    https_property_value_key = None
+    
+    if 'http' in uri_structure:
+      http_key = self._find_lookup_property(uri_structure['http'])
+    
+    if 'https' in uri_structure:
+      https_key = self._find_lookup_property(uri_structure['https'])
+      
+    if 'https_property' in uri_structure:
+      https_property_key = self._find_lookup_property(uri_structure['https_property'])
+      
+    if 'https_property_value' in uri_structure:
+      https_property_value_key = uri_structure['https_property_value']
+
+    AlertUriLookupKeys = namedtuple('AlertUriLookupKeys', 
+        'http https https_property https_property_value')
+    
+    alert_uri_lookup_keys = AlertUriLookupKeys(http=http_key, https=https_key, 
+        https_property=https_property_key, https_property_value=https_property_value_key)
+    
+    return alert_uri_lookup_keys
+
+    
+  def _get_uri_from_structure(self, alert_uri_lookup_keys):
+    '''
+    Gets the URI to use by examining the URI structure from the definition.
+    This will return a named tuple that has the uri and the SSL flag. The
+    URI structure looks something like:
+    
+    "uri":{ 
+      "http": foo,
+      "https": bar,
+      ...
+    }
+    '''
+    
+    if alert_uri_lookup_keys is None:
+      return None
+    
+    http_uri = None
+    https_uri = None
+    https_property = None
+    https_property_value = None
+
+    # attempt to parse and parameterize the various URIs; properties that
+    # do not exist int he lookup map are returned as None
+    if alert_uri_lookup_keys.http is not None:
+      http_uri = self._lookup_property_value(alert_uri_lookup_keys.http)
+    
+    if alert_uri_lookup_keys.https is not None:
+      https_uri = self._lookup_property_value(alert_uri_lookup_keys.https)
+
+    if alert_uri_lookup_keys.https_property is not None:
+      https_property = self._lookup_property_value(alert_uri_lookup_keys.https_property)
+
+    if alert_uri_lookup_keys.https_property_value is not None:
+      https_property_value = self._lookup_property_value(alert_uri_lookup_keys.https_property_value)
+
+    # without a URI, there's no way to create the structure we need    
+    if http_uri is None and https_uri is None:
+      raise Exception("Could not determine result. Either the http or https URI must be specified.")
+
+    # start out assuming plaintext
+    uri = http_uri
+    is_ssl_enabled = False
+    
+    if https_uri is not None:
+      # https without http implies SSL
+      if http_uri is None:
+        is_ssl_enabled = True
+        uri = https_uri
+      elif https_property is not None and https_property == https_property_value:
+        is_ssl_enabled = True
+        uri = https_uri
+    
+    # create a named tuple to return both the concrete URI and SSL flag
+    AlertUri = namedtuple('AlertUri', 'uri is_ssl_enabled')
+    alert_uri = AlertUri(uri=uri, is_ssl_enabled=is_ssl_enabled)
+    
+    return alert_uri
+
 
   def _collect(self):
     '''

+ 23 - 10
ambari-agent/src/main/python/ambari_agent/alerts/metric_alert.py

@@ -33,26 +33,34 @@ class MetricAlert(BaseAlert):
   
   def __init__(self, alert_meta, alert_source_meta):
     super(MetricAlert, self).__init__(alert_meta, alert_source_meta)
-
-    self.uri = self._find_lookup_property(alert_source_meta['uri'])
-    self.metric_info = None
-    
+ 
+    self.metric_info = None    
     if 'jmx' in alert_source_meta:
       self.metric_info = JmxMetric(alert_source_meta['jmx'])
+
+    # extract any lookup keys from the URI structure
+    self.uri_property_keys = self._lookup_uri_property_keys(alert_source_meta['uri'])
       
   def _collect(self):
     if self.metric_info is None:
-      raise Exception("Could not determine result.  Specific metric collector is not defined.")
+      raise Exception("Could not determine result. Specific metric collector is not defined.")
+    
+    if self.uri_property_keys is None:
+      raise Exception("Could not determine result. URL(s) were not defined.")
 
-    uri = self._lookup_property_value(self.uri)
+    # use the URI lookup keys to get a final URI value to query
+    alert_uri = self._get_uri_from_structure(self.uri_property_keys)      
     
-    host = BaseAlert.get_host_from_url(uri)
+    logger.debug("Calculated metric URI to be {0} (ssl={1})".format(alert_uri.uri, 
+        str(alert_uri.is_ssl_enabled)))
+
+    host = BaseAlert.get_host_from_url(alert_uri.uri)
     if host is None:
       host = self.host_name
 
     port = 80 # probably not very realistic
     try:      
-      port = int(get_port_from_url(uri))
+      port = int(get_port_from_url(alert_uri.uri))
     except:
       pass
 
@@ -61,7 +69,7 @@ class MetricAlert(BaseAlert):
     value_list = []
 
     if isinstance(self.metric_info, JmxMetric):
-      value_list.extend(self._load_jmx(False, host, port, self.metric_info))
+      value_list.extend(self._load_jmx(alert_uri.is_ssl_enabled, host, port, self.metric_info))
       check_value = self.metric_info.calculate(value_list)
       value_list.append(check_value)
       
@@ -70,6 +78,7 @@ class MetricAlert(BaseAlert):
     logger.debug("Resolved value list is: {0}".format(str(value_list)))
     
     return ((collect_result, value_list))
+
   
   def __get_result(self, value):
     ok_value = self.__find_threshold('ok')
@@ -77,7 +86,7 @@ class MetricAlert(BaseAlert):
     crit_value = self.__find_threshold('critical')
     
     # critical values are higher
-    crit_direction_up = crit_value > warn_value
+    crit_direction_up = crit_value >= warn_value
     
     if crit_direction_up: 
       # critcal values are higher
@@ -109,6 +118,7 @@ class MetricAlert(BaseAlert):
           return self.RESULT_OK
 
     return None
+
     
   def __find_threshold(self, reporting_type):
     ''' find the defined thresholds for alert values '''
@@ -123,6 +133,7 @@ class MetricAlert(BaseAlert):
       return None
       
     return self.alert_source_meta['reporting'][reporting_type]['value']
+
     
   def _load_jmx(self, ssl, host, port, jmx_metric):
     ''' creates a JmxMetric object that holds info about jmx-based metrics '''
@@ -143,6 +154,7 @@ class MetricAlert(BaseAlert):
         value_list.append(json_data[attr])
         
     return value_list
+
     
 class JmxMetric:
   def __init__(self, jmx_info):
@@ -162,6 +174,7 @@ class JmxMetric:
       if not parts[0] in self.property_map:
         self.property_map[parts[0]] = []
       self.property_map[parts[0]].append(parts[1])
+
       
   def calculate(self, args):
     if self.custom_module is not None:

+ 90 - 0
ambari-agent/src/main/python/ambari_agent/alerts/web_alert.py

@@ -0,0 +1,90 @@
+#!/usr/bin/env python
+
+'''
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+'''
+
+import logging
+import urllib2
+from alerts.base_alert import BaseAlert
+from resource_management.libraries.functions.get_port_from_url import get_port_from_url
+
+logger = logging.getLogger()
+
+class WebAlert(BaseAlert):
+  
+  def __init__(self, alert_meta, alert_source_meta):
+    super(WebAlert, self).__init__(alert_meta, alert_source_meta)
+    
+    # extract any lookup keys from the URI structure
+    self.uri_property_keys = self._lookup_uri_property_keys(alert_source_meta['uri'])
+
+      
+  def _collect(self):
+    if self.uri_property_keys is None:
+      raise Exception("Could not determine result. URL(s) were not defined.")
+
+    # use the URI lookup keys to get a final URI value to query
+    alert_uri = self._get_uri_from_structure(self.uri_property_keys)      
+
+    logger.debug("Calculated web URI to be {0} (ssl={1})".format(alert_uri.uri, 
+        str(alert_uri.is_ssl_enabled)))
+
+    host = BaseAlert.get_host_from_url(alert_uri.uri)
+    if host is None:
+      host = self.host_name
+
+    # maybe slightly realistic
+    port = 80 
+    if alert_uri.is_ssl_enabled:
+      port = 443
+      
+    try:      
+      port = int(get_port_from_url(alert_uri.uri))
+    except:
+      pass
+
+    status_code = self._make_web_request(host, port, alert_uri.is_ssl_enabled)
+
+    if status_code == 0:
+      return (self.RESULT_CRITICAL, [status_code, host, port])
+    
+    if status_code <= 401:
+      return (self.RESULT_OK, [status_code, host, port])
+    
+    return (self.RESULT_WARNING, [status_code, host, port])
+
+
+  def _make_web_request(self, host, port, ssl):
+    '''
+    Makes an http(s) request to a web resource and returns the http code. If
+    there was an error making the request, return 0 for the status code.
+    '''    
+    url = "{0}://{1}:{2}".format(
+        "https" if ssl else "http", host, str(port))
+    
+    try:
+      response = urllib2.urlopen(url)
+    except:
+      if logger.isEnabledFor(logging.DEBUG):
+        logger.exception("Unable to make a web request.")
+      
+      return 0
+    
+    return response.getcode()
+  
+  

+ 165 - 4
ambari-agent/src/test/python/ambari_agent/TestAlerts.py

@@ -25,7 +25,9 @@ from ambari_agent.alerts.collector import AlertCollector
 from ambari_agent.alerts.metric_alert import MetricAlert
 from ambari_agent.alerts.port_alert import PortAlert
 from ambari_agent.alerts.script_alert import ScriptAlert
+from ambari_agent.alerts.web_alert import WebAlert
 from ambari_agent.apscheduler.scheduler import Scheduler
+
 from mock.mock import patch
 from unittest import TestCase
 
@@ -168,7 +170,9 @@ class TestAlerts(TestCase):
       "uuid": "c1f73191-4481-4435-8dae-fd380e4c0be1",
       "source": {
         "type": "METRIC",
-        "uri": "http://myurl:8633",
+        "uri": {
+          "http": "{{hdfs-site/dfs.datanode.http.address}}"
+        },
         "jmx": {
           "property_list": [
             "someJmxObject/value",
@@ -196,7 +200,7 @@ class TestAlerts(TestCase):
 
     collector = AlertCollector()
     ma = MetricAlert(json, json['source'])
-    ma.set_helpers(collector, '')
+    ma.set_helpers(collector, {'hdfs-site/dfs.datanode.http.address': '1.2.3.4:80'})
     ma.collect()
 
     self.assertEquals('CRITICAL', collector.alerts()[0]['state'])
@@ -205,12 +209,168 @@ class TestAlerts(TestCase):
     del json['source']['jmx']['value']
     collector = AlertCollector()
     ma = MetricAlert(json, json['source'])
-    ma.set_helpers(collector, '')
+    ma.set_helpers(collector, {'hdfs-site/dfs.datanode.http.address': '1.2.3.4:80'})
     ma.collect()
 
     self.assertEquals('OK', collector.alerts()[0]['state'])
     self.assertEquals('ok_arr: 1 3 None', collector.alerts()[0]['text'])
+
+
+  @patch.object(MetricAlert, "_load_jmx")
+  def test_alert_uri_structure(self, ma_load_jmx_mock):
+    json = {
+      "name": "cpu_check",
+      "service": "HDFS",
+      "component": "NAMENODE",
+      "label": "NameNode process",
+      "interval": 6,
+      "scope": "host",
+      "enabled": True,
+      "uuid": "c1f73191-4481-4435-8dae-fd380e4c0be1",
+      "source": {
+        "type": "METRIC",
+        "uri": {
+          "http": "{{hdfs-site/dfs.datanode.http.address}}",
+          "https": "{{hdfs-site/dfs.datanode.https.address}}",
+          "https_property": "{{hdfs-site/dfs.http.policy}}",
+          "https_property_value": "HTTPS_ONLY"
+        },
+        "jmx": {
+          "property_list": [
+            "someJmxObject/value",
+            "someOtherJmxObject/value"
+          ],
+          "value": "{0}"
+        },
+        "reporting": {
+          "ok": {
+            "text": "ok_arr: {0} {1} {2}",
+          },
+          "warning": {
+            "text": "",
+            "value": 10
+          },
+          "critical": {
+            "text": "crit_arr: {0} {1} {2}",
+            "value": 20
+          }
+        }
+      }
+    }
+
+    ma_load_jmx_mock.return_value = [1,1]
+    
+    # run the alert without specifying any keys; an exception should be thrown
+    # indicating that there was no URI and the result is UNKNOWN
+    collector = AlertCollector()
+    ma = MetricAlert(json, json['source'])
+    ma.set_helpers(collector, '')
+    ma.collect()
+
+    self.assertEquals('UNKNOWN', collector.alerts()[0]['state'])
+
+    # set 2 properties that make no sense wihtout the main URI properties 
+    collector = AlertCollector()
+    ma = MetricAlert(json, json['source'])
+    ma.set_helpers(collector, {'hdfs-site/dfs.http.policy': 'HTTP_ONLY'})
+    ma.collect()
+    
+    self.assertEquals('UNKNOWN', collector.alerts()[0]['state'])
     
+    # set an actual property key (http)
+    collector = AlertCollector()
+    ma = MetricAlert(json, json['source'])
+    ma.set_helpers(collector, {'hdfs-site/dfs.datanode.http.address': '1.2.3.4:80', 
+        'hdfs-site/dfs.http.policy': 'HTTP_ONLY'})
+    ma.collect()
+    
+    self.assertEquals('OK', collector.alerts()[0]['state'])
+    
+    # set an actual property key (https)
+    collector = AlertCollector()
+    ma = MetricAlert(json, json['source'])
+    ma.set_helpers(collector, {'hdfs-site/dfs.datanode.https.address': '1.2.3.4:443', 
+        'hdfs-site/dfs.http.policy': 'HTTP_ONLY'})
+    ma.collect()
+    
+    self.assertEquals('OK', collector.alerts()[0]['state'])    
+
+    # set both (http and https)
+    collector = AlertCollector()
+    ma = MetricAlert(json, json['source'])
+    ma.set_helpers(collector, {'hdfs-site/dfs.datanode.http.address': '1.2.3.4:80', 
+        'hdfs-site/dfs.datanode.https.address': '1.2.3.4:443', 
+        'hdfs-site/dfs.http.policy': 'HTTP_ONLY'})
+    ma.collect()
+    
+    self.assertEquals('OK', collector.alerts()[0]['state'])    
+
+
+  @patch.object(WebAlert, "_make_web_request")
+  def test_web_alert(self, wa_make_web_request_mock):
+    json = {
+      "name": "webalert_test",
+      "service": "HDFS",
+      "component": "DATANODE",
+      "label": "WebAlert Test",
+      "interval": 1,
+      "scope": "HOST",
+      "enabled": True,
+      "uuid": "c1f73191-4481-4435-8dae-fd380e4c0be1",
+      "source": {
+        "type": "WEB",
+        "uri": {
+          "http": "{{hdfs-site/dfs.datanode.http.address}}",
+          "https": "{{hdfs-site/dfs.datanode.https.address}}",
+          "https_property": "{{hdfs-site/dfs.http.policy}}",
+          "https_property_value": "HTTPS_ONLY"
+        },
+        "reporting": {
+          "ok": {
+            "text": "ok: {0}",
+          },
+          "warning": {
+            "text": "warning: {0}",
+          },
+          "critical": {
+            "text": "critical: {1}:{2}",
+          }
+        }
+      }
+    }
+
+    wa_make_web_request_mock.return_value = 200
+
+    # run the alert and check HTTP 200    
+    collector = AlertCollector()
+    alert = WebAlert(json, json['source'])
+    alert.set_helpers(collector, {'hdfs-site/dfs.datanode.http.address': '1.2.3.4:80'})
+    alert.collect()
+
+    self.assertEquals('OK', collector.alerts()[0]['state'])
+    self.assertEquals('ok: 200', collector.alerts()[0]['text'])
+
+    # run the alert and check HTTP 500
+    wa_make_web_request_mock.return_value = 500
+    collector = AlertCollector()
+    alert = WebAlert(json, json['source'])
+    alert.set_helpers(collector, {'hdfs-site/dfs.datanode.http.address': '1.2.3.4:80'})
+    alert.collect()
+    
+    self.assertEquals('WARNING', collector.alerts()[0]['state'])
+    self.assertEquals('warning: 500', collector.alerts()[0]['text'])
+
+    # run the alert and check critical
+    wa_make_web_request_mock.return_value = 0
+     
+    collector = AlertCollector()
+    alert = WebAlert(json, json['source'])
+    alert.set_helpers(collector, {'hdfs-site/dfs.datanode.http.address': '1.2.3.4:80'})
+    alert.collect()
+    
+    self.assertEquals('CRITICAL', collector.alerts()[0]['state'])
+    self.assertEquals('critical: 1.2.3.4:80', collector.alerts()[0]['text'])
+
 
   def test_reschedule(self):
     test_file_path = os.path.join('ambari_agent', 'dummy_files')
@@ -313,7 +473,8 @@ class TestAlerts(TestCase):
     
     # verify enabled alert was scheduled
     self.assertEquals(3, ash.get_job_count())
-    
+
+
   def test_immediate_alert(self):
     test_file_path = os.path.join('ambari_agent', 'dummy_files')
     test_stack_path = os.path.join('ambari_agent', 'dummy_files')

+ 4 - 1
ambari-agent/src/test/python/ambari_agent/TestHostInfo.py

@@ -187,8 +187,11 @@ class TestHostInfo(TestCase):
                                               "koji-override-0/$releasever"])
       self.assertFalse(package['repoName'] in ["AMBARI.dev-1.x"])
 
+  @patch.object(OSCheck, 'get_os_family')
   @patch.object(PackagesAnalyzer, 'subprocessWithTimeout')
-  def test_analyze_yum_output_err(self, subprocessWithTimeout_mock):
+  def test_analyze_yum_output_err(self, subprocessWithTimeout_mock, get_os_family_mock):
+    get_os_family_mock.return_value = OSConst.REDHAT_FAMILY
+    
     packageAnalyzer = PackagesAnalyzer()
 
     result = {}

+ 4 - 0
ambari-server/src/main/java/org/apache/ambari/server/state/alert/AlertDefinitionFactory.java

@@ -267,6 +267,10 @@ public class AlertDefinitionFactory {
           clazz = PercentSource.class;
           break;
         }
+        case WEB: {
+          clazz = WebSource.class;
+          break;
+        }
         default:
           break;
       }

+ 175 - 0
ambari-server/src/main/java/org/apache/ambari/server/state/alert/AlertUri.java

@@ -0,0 +1,175 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.ambari.server.state.alert;
+
+import com.google.gson.annotations.SerializedName;
+
+/**
+ * The {@link AlertUri} class is used to represent a complex URI structure where
+ * there can be both a plaintext and SSL URI. This is used in cases where the
+ * alert definition needs a way to expose which URL (http or https) should be
+ * used to gather data. Currently, only {@link MetricSource} uses this, but it
+ * can be swapped out in other source types where a plain string is used for the
+ * URI.
+ */
+public class AlertUri {
+
+  /**
+   * The HTTP URI to use.
+   */
+  @SerializedName("http")
+  private String m_httpUri;
+
+  /**
+   * The HTTPS URI to use.
+   */
+  @SerializedName("https")
+  private String m_httpsUri;
+
+  /**
+   * The configuration property to check to determine if HTTP or HTTPS should be
+   * used.
+   */
+  @SerializedName("https_property")
+  private String m_httpsProperty;
+
+  /**
+   * The value to check {@link #m_httpsProperty} against to determine if HTTPS
+   * should be used.
+   */
+  @SerializedName("https_property_value")
+  private String m_httpsPropertyValue;
+
+  /**
+   * Gets the plaintext (HTTP) URI that can be used to retrieve alert
+   * information.
+   *
+   * @return the httpUri the URI (or {@code null} to always use the secure URL).
+   */
+  public String getHttpUri() {
+    return m_httpUri;
+  }
+
+  /**
+   * Gets the secure (HTTPS) URI that can be used to retrieve alert information.
+   *
+   * @return the httpsUri the URI (or {@code null} to always use the insecure
+   *         URL).
+   */
+  public String getHttpsUri() {
+    return m_httpsUri;
+  }
+
+  /**
+   * The configuration property that can be used to determine if the secure URL
+   * should be used.
+   *
+   * @return the httpsProperty the configuration property, or {@code null} for
+   *         none.
+   */
+  public String getHttpsProperty() {
+    return m_httpsProperty;
+  }
+
+  /**
+   * The literal value to use when comparing to the result from
+   * {@link #getHttpsProperty()}.
+   *
+   * @return the httpsPropertyValue the literal value that indicates SSL mode is
+   *         enabled, or {@code null} for none.
+   */
+  public String getHttpsPropertyValue() {
+    return m_httpsPropertyValue;
+  }
+
+  /**
+   * {@inheritDoc}
+   */
+  @Override
+  public int hashCode() {
+    final int prime = 31;
+    int result = 1;
+
+    result = prime * result + ((m_httpUri == null) ? 0 : m_httpUri.hashCode());
+
+    result = prime * result
+        + ((m_httpsProperty == null) ? 0 : m_httpsProperty.hashCode());
+
+    result = prime
+        * result
+        + ((m_httpsPropertyValue == null) ? 0 : m_httpsPropertyValue.hashCode());
+
+    result = prime * result
+        + ((m_httpsUri == null) ? 0 : m_httpsUri.hashCode());
+
+    return result;
+  }
+
+  /**
+   * {@inheritDoc}
+   */
+  @Override
+  public boolean equals(Object obj) {
+    if (this == obj) {
+      return true;
+    }
+
+    if (obj == null) {
+      return false;
+    }
+
+    if (getClass() != obj.getClass()) {
+      return false;
+    }
+
+    AlertUri other = (AlertUri) obj;
+    if (m_httpUri == null) {
+      if (other.m_httpUri != null) {
+        return false;
+      }
+    } else if (!m_httpUri.equals(other.m_httpUri)) {
+      return false;
+    }
+
+    if (m_httpsProperty == null) {
+      if (other.m_httpsProperty != null) {
+        return false;
+      }
+    } else if (!m_httpsProperty.equals(other.m_httpsProperty)) {
+      return false;
+    }
+
+    if (m_httpsPropertyValue == null) {
+      if (other.m_httpsPropertyValue != null) {
+        return false;
+      }
+    } else if (!m_httpsPropertyValue.equals(other.m_httpsPropertyValue)) {
+      return false;
+    }
+
+    if (m_httpsUri == null) {
+      if (other.m_httpsUri != null) {
+        return false;
+      }
+    } else if (!m_httpsUri.equals(other.m_httpsUri)) {
+      return false;
+    }
+
+    return true;
+  }
+}

+ 18 - 19
ambari-server/src/main/java/org/apache/ambari/server/state/alert/MetricSource.java

@@ -18,7 +18,6 @@
 package org.apache.ambari.server.state.alert;
 
 import java.util.ArrayList;
-import java.util.Collections;
 import java.util.List;
 
 import com.google.gson.annotations.SerializedName;
@@ -32,8 +31,8 @@ import com.google.gson.annotations.SerializedName;
 public class MetricSource extends Source {
 
   @SerializedName("uri")
-  private String m_uri = null;
-  
+  private AlertUri uri = null;
+
   @SerializedName("jmx")
   private JmxInfo jmxInfo = null;
 
@@ -57,8 +56,8 @@ public class MetricSource extends Source {
   /**
    * @return the uri info, which may include port information
    */
-  public String getUri() {
-    return m_uri;
+  public AlertUri getUri() {
+    return uri;
   }
 
   /**
@@ -70,7 +69,7 @@ public class MetricSource extends Source {
     int result = super.hashCode();
     result = prime * result
         + ((gangliaInfo == null) ? 0 : gangliaInfo.hashCode());
-    result = prime * result + ((m_uri == null) ? 0 : m_uri.hashCode());
+    result = prime * result + ((uri == null) ? 0 : uri.hashCode());
     result = prime * result + ((jmxInfo == null) ? 0 : jmxInfo.hashCode());
 
     return result;
@@ -102,11 +101,11 @@ public class MetricSource extends Source {
       return false;
     }
 
-    if (m_uri == null) {
-      if (other.m_uri != null) {
+    if (uri == null) {
+      if (other.uri != null) {
         return false;
       }
-    } else if (!m_uri.equals(other.m_uri)) {
+    } else if (!uri.equals(other.uri)) {
       return false;
     }
 
@@ -120,42 +119,42 @@ public class MetricSource extends Source {
 
     return true;
   }
-  
+
   /**
    * Represents the {@code jmx} element in a Metric alert.
    */
   public static class JmxInfo {
     @SerializedName("property_list")
     private List<String> propertyList;
-    
+
     private String value;
-    
+
     public List<String> getPropertyList() {
       return propertyList;
     }
-    
+
     public String getValue() {
       return value;
     }
-    
+
     @Override
     public boolean equals(Object object) {
       if (!JmxInfo.class.isInstance(object)) {
         return false;
       }
-      
+
       JmxInfo other = (JmxInfo)object;
-      
+
       List<String> list1 = new ArrayList<String>(propertyList);
       List<String> list2 = new ArrayList<String>(other.propertyList);
-      
+
       if ((null == list1 && null != list2) || (null != list1 && null == list2)) {
         return false;
       }
-      
+
       // !!! even if out of order, this is enough to fail
       return list1.equals(list2);
-      
+
     }
   }
 }

+ 6 - 1
ambari-server/src/main/java/org/apache/ambari/server/state/alert/SourceType.java

@@ -41,5 +41,10 @@ public enum SourceType {
   /**
    * Source is a ratio of two {@link #METRIC} values.
    */
-  PERCENT;
+  PERCENT,
+
+  /**
+   * Source is an http(s)-style request.
+   */
+  WEB;
 }

+ 79 - 0
ambari-server/src/main/java/org/apache/ambari/server/state/alert/WebSource.java

@@ -0,0 +1,79 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.ambari.server.state.alert;
+
+import com.google.gson.annotations.SerializedName;
+
+/**
+ * Alert when the source type is defined as {@link SourceType#WEB}
+ * <p/>
+ * Equality checking for instances of this class should be executed on every
+ * member to ensure that reconciling stack differences is correct.
+ */
+public class WebSource extends Source {
+
+  @SerializedName("uri")
+  private AlertUri uri = null;
+
+  /**
+   * @return the uri info, which may include port information
+   */
+  public AlertUri getUri() {
+    return uri;
+  }
+
+  /**
+   * {@inheritDoc}
+   */
+  @Override
+  public int hashCode() {
+    final int prime = 31;
+    int result = super.hashCode();
+    result = prime * result + ((uri == null) ? 0 : uri.hashCode());
+    return result;
+  }
+
+  /**
+   * {@inheritDoc}
+   */
+  @Override
+  public boolean equals(Object obj) {
+    if (this == obj) {
+      return true;
+    }
+
+    if (!super.equals(obj)) {
+      return false;
+    }
+
+    if (getClass() != obj.getClass()) {
+      return false;
+    }
+
+    WebSource other = (WebSource) obj;
+    if (uri == null) {
+      if (other.uri != null) {
+        return false;
+      }
+    } else if (!uri.equals(other.uri)) {
+      return false;
+    }
+
+    return true;
+  }
+}

+ 305 - 19
ambari-server/src/main/resources/stacks/HDP/2.0.6/services/HDFS/alerts.json

@@ -1,8 +1,8 @@
 {
   "service": [
     {
-      "name": "percent_datanode",
-      "label": "Percent DataNodes Live",
+      "name": "datanode_process_percent",
+      "label": "Percent DataNodes Available",
       "interval": 1,
       "scope": "SERVICE",
       "enabled": true,
@@ -23,38 +23,98 @@
           }
         }
       }
+    },
+    {
+      "name": "datanode_storage_percent",
+      "label": "Percent DataNodes With Available Space",
+      "interval": 1,
+      "scope": "SERVICE",
+      "enabled": true,
+      "source": {
+        "type": "AGGREGATE",
+        "alert_name": "datanode_storage",
+        "reporting": {
+          "ok": {
+            "text": "OK: total: <{0}>, affected: <{1}>"
+          },
+          "warning": {
+            "text": "OK: total: <{0}>, affected: <{1}>",
+            "value": 0.1
+          },
+          "critical": {
+            "text": "CRITICAL: total: <{0}>, affected <{1}>",
+            "value": 0.3
+          }
+        }
+      }
+    },
+    {
+      "name": "journalnode_process_percent",
+      "label": "Percent JournalNodes Available",
+      "interval": 1,
+      "scope": "SERVICE",
+      "enabled": true,
+      "source": {
+        "type": "AGGREGATE",
+        "alert_name": "journalnode_process",
+        "reporting": {
+          "ok": {
+            "text": "OK: total: <{0}>, affected: <{1}>"
+          },
+          "warning": {
+            "text": "OK: total: <{0}>, affected: <{1}>",
+            "value": 0.33
+          },
+          "critical": {
+            "text": "CRITICAL: total: <{0}>, affected <{1}>",
+            "value": 0.50
+          }
+        }
+      }
     }
   ],
   "NAMENODE": [
     {
-      "name": "namenode_process",
-      "label": "NameNode Process",
+      "name": "namenode_webui",
+      "label": "NameNode Web UI",
       "interval": 1,
       "scope": "ANY",
       "enabled": true,
       "source": {
-        "type": "PORT",
-        "uri": "{{hdfs-site/dfs.namenode.http-address}}",
-        "default_port": 50070,
+        "type": "WEB",
+        "uri": {
+          "http": "{{hdfs-site/dfs.namenode.http-address}}",
+          "https": "{{hdfs-site/dfs.namenode.https-address}}",
+          "https_property": "{{hdfs-site/dfs.http.policy}}",
+          "https_property_value": "HTTPS_ONLY"
+        },
         "reporting": {
           "ok": {
-            "text": "TCP OK - {0:.4f} response on port {1}"
+            "text": "The UI returned a response code of {0}"
+          },
+          "warning":{
+            "text": "The UI returned a response code of {0}"
           },
           "critical": {
-            "text": "Connection failed: {0} on host {1}:{2}"
+            "text": "Connection failed to {1}:{2}"
           }
         }        
       }
     },
     {
-      "name": "check_cpu",
-      "label": "NameNode host CPU utilization",
-      "interval": 2,
+      "name": "namenode_cpu",
+      "label": "NameNode Host CPU Utilization",
+      "interval": 5,
       "scope": "ANY",
       "enabled": true,
       "source": {
         "type": "METRIC",
-        "uri": "{{hdfs-site/dfs.namenode.http-address}}",
+        "uri": {
+          "http": "{{hdfs-site/dfs.namenode.http-address}}",
+          "https": "{{hdfs-site/dfs.namenode.https-address}}",
+          "https_property": "{{hdfs-site/dfs.http.policy}}",
+          "https_property_value": "HTTPS_ONLY"
+        },
         "reporting": {
           "ok": {
             "text": "{1} CPU, load {0:.1%}"
@@ -76,14 +136,177 @@
           "value": "{0} * 100"
         }
       }
-    }
+    },
+    {
+      "name": "namenode_hdfs_blocks_health",
+      "label": "NameNode Blocks Health",
+      "interval": 2,
+      "scope": "ANY",
+      "enabled": true,
+      "source": {
+        "type": "METRIC",
+        "uri": {
+          "http": "{{hdfs-site/dfs.namenode.http-address}}",
+          "https": "{{hdfs-site/dfs.namenode.https-address}}",
+          "https_property": "{{hdfs-site/dfs.http.policy}}",
+          "https_property_value": "HTTPS_ONLY"
+        },
+        "reporting": {
+          "ok": {
+            "text": "Total Blocks:[{1}], Missing Blocks:[{0}]"
+          },
+          "warning": {
+            "text": "Total Blocks:[{1}], Missing Blocks:[{0}]",
+            "value": 1
+          },          
+          "critical": {
+            "text": "Total Blocks:[{1}], Missing Blocks:[{0}]",
+            "value": 1
+          }
+        },
+        "jmx": {
+          "property_list": [
+            "Hadoop:service=NameNode,name=FSNamesystem/MissingBlocks",
+            "Hadoop:service=NameNode,name=FSNamesystem/BlocksTotal"
+          ],
+          "value": "{0}"
+        }
+      }
+    },
+    {
+      "name": "namenode_hdfs_capacity_utilization",
+      "label": "HDFS Capacity Utilization",
+      "interval": 2,
+      "scope": "ANY",
+      "enabled": true,
+      "source": {
+        "type": "METRIC",
+        "uri": {
+          "http": "{{hdfs-site/dfs.namenode.http-address}}",
+          "https": "{{hdfs-site/dfs.namenode.https-address}}",
+          "https_property": "{{hdfs-site/dfs.http.policy}}",
+          "https_property_value": "HTTPS_ONLY"
+        },
+        "reporting": {
+          "ok": {
+            "text": "Capacity Used:[{2:d}%, {0}], Capacity Remaining:[{1}]"
+          },
+          "warning": {
+            "text": "Capacity Used:[{2:d}%, {0}], Capacity Remaining:[{1}]",
+            "value": 80
+          },          
+          "critical": {
+            "text": "Capacity Used:[{2:d}%, {0}], Capacity Remaining:[{1}]",
+            "value": 90
+          }
+        },
+        "jmx": {
+          "property_list": [
+            "Hadoop:service=NameNode,name=FSNamesystemState/CapacityUsed",
+            "Hadoop:service=NameNode,name=FSNamesystemState/CapacityRemaining"
+          ],
+          "value": "{0}/({0} + {1}) * 100"
+        }
+      }
+    },
+    {
+      "name": "namenode_rpc_latency",
+      "label": "NameNode RPC Latency",
+      "interval": 2,
+      "scope": "ANY",
+      "enabled": true,
+      "source": {
+        "type": "METRIC",
+        "uri": {
+          "http": "{{hdfs-site/dfs.namenode.http-address}}",
+          "https": "{{hdfs-site/dfs.namenode.https-address}}",
+          "https_property": "{{hdfs-site/dfs.http.policy}}",
+          "https_property_value": "HTTPS_ONLY"
+        },
+        "reporting": {
+          "ok": {
+            "text": "Average Queue Time:[{0}], Average Processing Time:[{1}]"
+          },
+          "warning": {
+            "text": "Average Queue Time:[{0}], Average Processing Time:[{1}]",
+            "value": 3000
+          },          
+          "critical": {
+            "text": "Average Queue Time:[{0}], Average Processing Time:[{1}]",
+            "value": 5000
+          }
+        },
+        "jmx": {
+          "property_list": [
+            "Hadoop:service=NameNode,name=RpcActivityForPort*/RpcQueueTimeAvgTime",
+            "Hadoop:service=NameNode,name=RpcActivityForPort*/RpcProcessingTimeAvgTime"
+          ],
+          "value": "{0}"
+        }
+      }
+    },
+    {
+      "name": "namenode_directory_status",
+      "label": "NameNode Directory Status",
+      "interval": 1,
+      "scope": "ANY",
+      "enabled": true,
+      "source": {
+        "type": "METRIC",
+        "uri": {
+          "http": "{{hdfs-site/dfs.namenode.http-address}}",
+          "https": "{{hdfs-site/dfs.namenode.https-address}}",
+          "https_property": "{{hdfs-site/dfs.http.policy}}",
+          "https_property_value": "HTTPS_ONLY"
+        },
+        "reporting": {
+          "ok": {
+            "text": "Directories are healthy"
+          },
+          "warning": {
+            "text": "Failed directory count: {1}",
+            "value": 1
+          },          
+          "critical": {
+            "text": "Failed directory count: {1}",
+            "value": 1
+          }
+        },
+        "jmx": {
+          "property_list": [
+            "Hadoop:service=NameNode,name=NameNodeInfo/NameDirStatuses"
+          ],
+          "value": "calculate(args)\ndef calculate(args):\n  import json\n  json_statuses = json.loads({0})\n  return len(json_statuses['failed']) if 'failed' in json_statuses else 0"
+        }
+      }
+    },
+    {
+      "name": "namenode_process",
+      "label": "NameNode Process",
+      "interval": 1,
+      "scope": "ANY",
+      "enabled": true,
+      "source": {
+        "type": "PORT",        
+        "uri": "{{hdfs-site/dfs.namenode.http-address}}",
+        "default_port": 50070,
+        "reporting": {
+          "ok": {
+            "text": "TCP OK - {0:.4f} response on port {1}"
+          },
+          "critical": {
+            "text": "Connection failed: {0} on host {1}:{2}"
+          }
+        }        
+      }
+    }    
   ],
   "SECONDARY_NAMENODE": [
     {
       "name": "secondary_namenode_process",
       "label": "Secondary NameNode Process",
       "interval": 1,
-      "scope": "any",
+      "scope": "ANY",
       "enabled": true,
       "source": {
         "type": "PORT",        
@@ -105,7 +328,7 @@
       "name": "journalnode_process",
       "label": "JournalNode Process",
       "interval": 1,
-      "scope": "host",
+      "scope": "HOST",
       "enabled": true,
       "source": {
         "type": "PORT",        
@@ -127,12 +350,12 @@
       "name": "datanode_process",
       "label": "DateNode Process",
       "interval": 1,
-      "scope": "host",
+      "scope": "HOST",
       "enabled": true,
       "source": {
         "type": "PORT",        
-        "uri": "{{hdfs-site/dfs.datanode.http.address}}",
-        "default_port": 50075,
+        "uri": "{{hdfs-site/dfs.datanode.address}}",
+        "default_port": 50010,
         "reporting": {
           "ok": {
             "text": "TCP OK - {0:.4f} response on port {1}"
@@ -142,6 +365,69 @@
           }
         }        
       }
+    },
+    {
+      "name": "datanode_webui",
+      "label": "DataNode Web UI",
+      "interval": 1,
+      "scope": "HOST",
+      "enabled": true,
+      "source": {
+        "type": "WEB",
+        "uri": {
+          "http": "{{hdfs-site/dfs.datanode.http.address}}",
+          "https": "{{hdfs-site/dfs.datanode.https.address}}",
+          "https_property": "{{hdfs-site/dfs.http.policy}}",
+          "https_property_value": "HTTPS_ONLY"
+        },
+        "reporting": {
+          "ok": {
+            "text": "The UI returned a response code of {0}"
+          },
+          "warning":{
+            "text": "The UI returned a response code of {0}"
+          },
+          "critical": {
+            "text": "Connection failed to {1}:{2}"
+          }
+        }        
+      }
+    },    
+    {
+      "name": "datanode_storage",
+      "label": "DataNode Storage",
+      "interval": 2,
+      "scope": "HOST",
+      "enabled": true,
+      "source": {
+        "type": "METRIC",
+        "uri": {
+          "http": "{{hdfs-site/dfs.datanode.http.address}}",
+          "https": "{{hdfs-site/dfs.datanode.https.address}}",
+          "https_property": "{{hdfs-site/dfs.http.policy}}",
+          "https_property_value": "HTTPS_ONLY"
+        },
+        "reporting": {
+          "ok": {
+            "text": "Remaining Capacity:[{0}], Total Capacity:[{2:d}% Used, {1}]"
+          },
+          "warning": {
+            "text": "Remaining Capacity:[{0}], Total Capacity:[{2:d}% Used, {1}]",
+            "value": 80
+          },
+          "critical": {
+            "text": "Remaining Capacity:[{0}], Total Capacity:[{2:d}% Used, {1}]",
+            "value": 90
+          }
+        },
+        "jmx": {
+          "property_list": [
+            "Hadoop:service=DataNode,name=FSDatasetState-*/Remaining",
+            "Hadoop:service=DataNode,name=FSDatasetState-*/Capacity"
+          ],
+          "value": "({1} - {0})/{1} * 100"
+        }
+      }
     }    
   ]
 }

+ 18 - 3
ambari-server/src/test/java/org/apache/ambari/server/api/services/AmbariMetaInfoTest.java

@@ -64,6 +64,7 @@ import org.apache.ambari.server.state.Stack;
 import org.apache.ambari.server.state.StackId;
 import org.apache.ambari.server.state.StackInfo;
 import org.apache.ambari.server.state.alert.AlertDefinition;
+import org.apache.ambari.server.state.alert.MetricSource;
 import org.apache.ambari.server.state.alert.PortSource;
 import org.apache.ambari.server.state.alert.Reporting;
 import org.apache.ambari.server.state.alert.Source;
@@ -1575,6 +1576,7 @@ public class AmbariMetaInfoTest {
     // find two different definitions and test each one
     AlertDefinition nameNodeProcess = null;
     AlertDefinition nameNodeCpu = null;
+    AlertDefinition datanodeStorage = null;
 
     Iterator<AlertDefinition> iterator = set.iterator();
     while (iterator.hasNext()) {
@@ -1586,12 +1588,16 @@ public class AmbariMetaInfoTest {
       if (definition.getName().equals("namenode_cpu")) {
         nameNodeCpu = definition;
       }
+
+      if (definition.getName().equals("datanode_storage")) {
+        datanodeStorage = definition;
+      }
     }
 
     assertNotNull(nameNodeProcess);
     assertNotNull(nameNodeCpu);
 
-    assertEquals("NameNode host CPU Utilization", nameNodeCpu.getLabel());
+    assertEquals("NameNode Host CPU Utilization", nameNodeCpu.getLabel());
 
     // test namenode_process
     Source source = nameNodeProcess.getSource();
@@ -1621,6 +1627,15 @@ public class AmbariMetaInfoTest {
     assertNotNull(reporting.getWarning());
     assertNotNull(reporting.getWarning().getText());
     assertNotNull(reporting.getWarning().getValue());
+
+    // test a metric alert
+    assertNotNull(datanodeStorage);
+    MetricSource metricSource = (MetricSource) datanodeStorage.getSource();
+    assertNotNull( metricSource.getUri() );
+    assertNotNull( metricSource.getUri().getHttpsProperty() );
+    assertNotNull( metricSource.getUri().getHttpsPropertyValue() );
+    assertNotNull( metricSource.getUri().getHttpsUri() );
+    assertNotNull( metricSource.getUri().getHttpUri() );
   }
 
   /**
@@ -1642,7 +1657,7 @@ public class AmbariMetaInfoTest {
 
     AlertDefinitionDAO dao = injector.getInstance(AlertDefinitionDAO.class);
     List<AlertDefinitionEntity> definitions = dao.findAll();
-    assertEquals(4, definitions.size());
+    assertEquals(5, definitions.size());
 
     for (AlertDefinitionEntity definition : definitions) {
       definition.setScheduleInterval(28);
@@ -1652,7 +1667,7 @@ public class AmbariMetaInfoTest {
     metaInfo.reconcileAlertDefinitions(clusters);
 
     definitions = dao.findAll();
-    assertEquals(4, definitions.size());
+    assertEquals(5, definitions.size());
 
     for (AlertDefinitionEntity definition : definitions) {
       assertEquals(28, definition.getScheduleInterval().intValue());

+ 1 - 1
ambari-server/src/test/java/org/apache/ambari/server/state/alerts/AlertEventPublisherTest.java

@@ -141,7 +141,7 @@ public class AlertEventPublisherTest {
   public void testAlertDefinitionInsertion() throws Exception {
     Assert.assertEquals(0, definitionDao.findAll().size());
     installHdfsService();
-    Assert.assertEquals(4, definitionDao.findAll().size());
+    Assert.assertEquals(5, definitionDao.findAll().size());
   }
 
   /**

+ 59 - 13
ambari-server/src/test/resources/stacks/HDP/2.0.5/services/HDFS/alerts.json

@@ -4,28 +4,37 @@
   "NAMENODE": [
     {
       "name": "namenode_cpu",
-      "label": "NameNode host CPU Utilization",
+      "label": "NameNode Host CPU Utilization",
+      "interval": 2,
       "scope": "ANY",
+      "enabled": true,
       "source": {
         "type": "METRIC",
-        "jmx": {
-          "property_list": [
-            "java.lang:type=OperatingSystem/SystemCpuLoad"
-          ]
+        "uri": {
+          "http": "{{hdfs-site/dfs.namenode.http-address}}",
+          "https": "{{hdfs-site/dfs.namenode.https-address}}",
+          "https_property": "{{hdfs-site/dfs.http.policy}}",
+          "https_property_value": "HTTPS_ONLY"
         },
-        "host": "{{hdfs-site/dfs.namenode.secondary.http-address}}",
         "reporting": {
           "ok": {
-            "text": "System CPU Load is OK"
+            "text": "{1} CPU, load {0:.1%}"
           },
           "warning": {
-            "text": "System CPU Load is Nearing Critical",
-            "value": 70
-          },          
+            "text": "{1} CPU, load {0:.1%}",
+            "value": 200
+          },
           "critical": {
-            "text": "System CPU Load is Critical",
-            "value": 80
+            "text": "{1} CPU, load {0:.1%}",
+            "value": 250
           }
+        },
+        "jmx": {
+          "property_list": [
+            "java.lang:type=OperatingSystem/SystemCpuLoad",
+            "java.lang:type=OperatingSystem/AvailableProcessors"
+          ],
+          "value": "{0} * 100"
         }
       }
     },
@@ -73,5 +82,42 @@
       }
     }
   ],  
-  "DATANODE": []
+  "DATANODE": [
+    {
+      "name": "datanode_storage",
+      "label": "DataNode Storage",
+      "interval": 2,
+      "scope": "HOST",
+      "enabled": true,
+      "source": {
+        "type": "METRIC",
+        "uri": {
+          "http": "{{hdfs-site/dfs.datanode.http.address}}",
+          "https": "{{hdfs-site/dfs.datanode.https.address}}",
+          "https_property": "{{hdfs-site/dfs.http.policy}}",
+          "https_property_value": "HTTPS_ONLY"
+        },
+        "reporting": {
+          "ok": {
+            "text": "Remaining Capacity:[{0}], Total Capacity:[{2:d}% Used, {1}]"
+          },
+          "warning": {
+            "text": "Remaining Capacity:[{0}], Total Capacity:[{2:d}% Used, {1}]",
+            "value": 80
+          },
+          "critical": {
+            "text": "Remaining Capacity:[{0}], Total Capacity:[{2:d}% Used, {1}]",
+            "value": 90
+          }
+        },
+        "jmx": {
+          "property_list": [
+            "Hadoop:service=DataNode,name=FSDatasetState-*/Remaining",
+            "Hadoop:service=DataNode,name=FSDatasetState-*/Capacity"
+          ],
+          "value": "({1} - {0})/{1} * 100"
+        }
+      }
+    }  
+  ]
 }