Browse Source

AMBARI-10816 - Expose Customizable Parameters For SCRIPT Alerts (jonathanhurley)

Jonathan Hurley 10 năm trước cách đây
mục cha
commit
6727c1dc8c
31 tập tin đã thay đổi với 1096 bổ sung373 xóa
  1. 18 4
      ambari-agent/src/main/python/ambari_agent/alerts/script_alert.py
  2. 55 0
      ambari-agent/src/test/python/ambari_agent/TestAlerts.py
  3. 12 7
      ambari-agent/src/test/python/ambari_agent/dummy_files/test_script.py
  4. 195 2
      ambari-server/src/main/java/org/apache/ambari/server/state/alert/ScriptSource.java
  5. 30 1
      ambari-server/src/main/resources/alerts.json
  6. 7 6
      ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/package/alerts/alert_ambari_metrics_monitor.py
  7. 10 1
      ambari-server/src/main/resources/common-services/FLUME/1.4.0.2.0/alerts.json
  8. 15 8
      ambari-server/src/main/resources/common-services/FLUME/1.4.0.2.0/package/alerts/alert_flume_agent_status.py
  9. 42 2
      ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/alerts.json
  10. 44 24
      ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_checkpoint_time.py
  11. 22 14
      ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_ha_namenode_health.py
  12. 60 3
      ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/alerts.json
  13. 43 23
      ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/alerts/alert_hive_metastore.py
  14. 51 30
      ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/alerts/alert_hive_thrift_port.py
  15. 30 19
      ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/alerts/alert_webhcat_server.py
  16. 18 17
      ambari-server/src/main/resources/common-services/OOZIE/4.0.0.2.0/package/alerts/alert_check_oozie_server.py
  17. 3 2
      ambari-server/src/main/resources/common-services/STORM/0.9.1.2.1/package/alerts/check_supervisor_process_win.py
  18. 24 2
      ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/alerts.json
  19. 21 12
      ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/alerts/alert_nodemanager_health.py
  20. 23 15
      ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/alerts/alert_nodemanagers_summary.py
  21. 43 12
      ambari-server/src/main/resources/host_scripts/alert_disk_space.py
  22. 10 1
      ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/FLUME/alerts.json
  23. 16 9
      ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/FLUME/package/files/alert_flume_agent_status.py
  24. 42 2
      ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HDFS/alerts.json
  25. 44 24
      ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HDFS/package/files/alert_checkpoint_time.py
  26. 24 16
      ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HDFS/package/files/alert_ha_namenode_health.py
  27. 51 31
      ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HIVE/package/files/alert_hive_thrift_port.py
  28. 76 54
      ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/OOZIE/package/files/alert_check_oozie_server.py
  29. 30 19
      ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/WEBHCAT/package/files/alert_webhcat_server.py
  30. 12 1
      ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/YARN/alerts.json
  31. 25 12
      ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/YARN/package/files/alert_nodemanager_health.py

+ 18 - 4
ambari-agent/src/main/python/ambari_agent/alerts/script_alert.py

@@ -46,6 +46,7 @@ class ScriptAlert(BaseAlert):
     self.common_services_dir = None
     self.common_services_dir = None
     self.host_scripts_dir = None
     self.host_scripts_dir = None
     self.path_to_script = None
     self.path_to_script = None
+    self.parameters = {}
     
     
     if 'path' in alert_source_meta:
     if 'path' in alert_source_meta:
       self.path = alert_source_meta['path']
       self.path = alert_source_meta['path']
@@ -59,11 +60,24 @@ class ScriptAlert(BaseAlert):
     if 'host_scripts_directory' in alert_source_meta:
     if 'host_scripts_directory' in alert_source_meta:
       self.host_scripts_dir = alert_source_meta['host_scripts_directory']
       self.host_scripts_dir = alert_source_meta['host_scripts_directory']
 
 
+    # convert a list of script parameters, like timeouts, into a dictionary
+    # so the the scripts can easily lookup the data
+    if 'parameters' in alert_source_meta:
+      parameters = alert_source_meta['parameters']
+      for parameter in parameters:
+        if 'name' not in parameter or 'value' not in parameter:
+          continue
+
+        # create the dictionary value
+        parameter_name = parameter['name']
+        parameter_value = parameter['value']
+        self.parameters[parameter_name] = parameter_value
+
   def _collect(self):
   def _collect(self):
     cmd_module = self._load_source()
     cmd_module = self._load_source()
 
 
     if cmd_module is not None:
     if cmd_module is not None:
-      parameters = {}
+      configurations = {}
 
 
       try:
       try:
         tokens = cmd_module.get_tokens()
         tokens = cmd_module.get_tokens()
@@ -73,7 +87,7 @@ class ScriptAlert(BaseAlert):
           for token in tokens:
           for token in tokens:
             value = self._get_configuration_value(token)
             value = self._get_configuration_value(token)
             if value is not None:
             if value is not None:
-              parameters[token] = value
+              configurations[token] = value
       except AttributeError:
       except AttributeError:
         # it's OK if the module doesn't have get_tokens() ; no tokens will
         # it's OK if the module doesn't have get_tokens() ; no tokens will
         # be passed in so hopefully the script doesn't need any
         # be passed in so hopefully the script doesn't need any
@@ -85,9 +99,9 @@ class ScriptAlert(BaseAlert):
       if matchObj:
       if matchObj:
         basedir = matchObj.group(1)
         basedir = matchObj.group(1)
         with Environment(basedir, tmp_dir=self.config.get('agent', 'tmp_dir')) as env:
         with Environment(basedir, tmp_dir=self.config.get('agent', 'tmp_dir')) as env:
-          return cmd_module.execute(parameters, self.host_name)
+          return cmd_module.execute(configurations, self.parameters, self.host_name)
       else:
       else:
-        return cmd_module.execute(parameters, self.host_name)
+        return cmd_module.execute(configurations, self.parameters, self.host_name)
     else:
     else:
       return (self.RESULT_UNKNOWN, ["Unable to execute script {0}".format(self.path)])
       return (self.RESULT_UNKNOWN, ["Unable to execute script {0}".format(self.path)])
     
     

+ 55 - 0
ambari-agent/src/test/python/ambari_agent/TestAlerts.py

@@ -210,6 +210,40 @@ class TestAlerts(TestCase):
     self.assertEquals('bar is rendered-bar, baz is rendered-baz', alerts[0]['text'])
     self.assertEquals('bar is rendered-bar, baz is rendered-baz', alerts[0]['text'])
 
 
 
 
+  def test_script_alert_with_parameters(self):
+    definition_json = self._get_script_alert_definition_with_parameters()
+
+    # normally set by AlertSchedulerHandler
+    definition_json['source']['stacks_directory'] = os.path.join('ambari_agent', 'dummy_files')
+    definition_json['source']['common_services_directory'] = os.path.join('ambari_agent', 'common-services')
+    definition_json['source']['host_scripts_directory'] = os.path.join('ambari_agent', 'host_scripts')
+
+    configuration = {'foo-site' :
+      { 'bar': 'rendered-bar', 'baz' : 'rendered-baz' }
+    }
+
+    collector = AlertCollector()
+    cluster_configuration = self.__get_cluster_configuration()
+    self.__update_cluster_configuration(cluster_configuration, configuration)
+
+    alert = ScriptAlert(definition_json, definition_json['source'], MagicMock())
+    alert.set_helpers(collector, cluster_configuration )
+    alert.set_cluster("c1", "c6401.ambari.apache.org")
+
+    self.assertEquals(definition_json['source']['path'], alert.path)
+    self.assertEquals(definition_json['source']['stacks_directory'], alert.stacks_dir)
+    self.assertEquals(definition_json['source']['common_services_directory'], alert.common_services_dir)
+    self.assertEquals(definition_json['source']['host_scripts_directory'], alert.host_scripts_dir)
+
+    alert.collect()
+
+    alerts = collector.alerts()
+    self.assertEquals(0, len(collector.alerts()))
+
+    self.assertEquals('OK', alerts[0]['state'])
+    self.assertEquals('Script parameter detected: foo bar baz', alerts[0]['text'])
+
+
   @patch.object(MetricAlert, "_load_jmx")
   @patch.object(MetricAlert, "_load_jmx")
   def test_metric_alert(self, ma_load_jmx_mock):
   def test_metric_alert(self, ma_load_jmx_mock):
     definition_json = self._get_metric_alert_definition()
     definition_json = self._get_metric_alert_definition()
@@ -969,6 +1003,27 @@ class TestAlerts(TestCase):
       }
       }
     }
     }
 
 
+  def _get_script_alert_definition_with_parameters(self):
+    return {
+      "name": "namenode_process",
+      "service": "HDFS",
+      "component": "NAMENODE",
+      "label": "NameNode process",
+      "interval": 6,
+      "scope": "host",
+      "enabled": True,
+      "uuid": "c1f73191-4481-4435-8dae-fd380e4c0be1",
+      "source": {
+        "type": "SCRIPT",
+        "path": "test_script.py",
+        "parameters": [
+          {
+          "name": "script.parameter.foo",
+          "value": "foo bar baz"
+          }
+        ]
+      }
+    }
 
 
   def _get_port_alert_definition(self):
   def _get_port_alert_definition(self):
     return { "name": "namenode_process",
     return { "name": "namenode_process",

+ 12 - 7
ambari-agent/src/test/python/ambari_agent/dummy_files/test_script.py

@@ -26,18 +26,23 @@ def get_tokens():
   return ('{{foo-site/bar}}','{{foo-site/baz}}')
   return ('{{foo-site/bar}}','{{foo-site/baz}}')
   
   
 
 
-def execute(parameters=None, host_name=None):
+def execute(configurations={}, parameters={}, host_name=None):
   '''
   '''
   returns a tuple containing the result code and a pre-formatted result label
   returns a tuple containing the result code and a pre-formatted result label
   '''
   '''
-  if parameters is not None:
-    if '{{foo-site/bar}}' in parameters:
-      bar = parameters['{{foo-site/bar}}']
+
+  # short circuit the script when a parameter is present
+  if "script.parameter.foo" in parameters:
+    return "OK", ["Script parameter detected: " + parameters["script.parameter.foo"]]
+
+  if configurations is not None:
+    if '{{foo-site/bar}}' in configurations:
+      bar = configurations['{{foo-site/bar}}']
     
     
-    if '{{foo-site/baz}}' in parameters:
-      baz = parameters['{{foo-site/baz}}']
+    if '{{foo-site/baz}}' in configurations:
+      baz = configurations['{{foo-site/baz}}']
 
 
-    if '{{foo-site/skip}}' in parameters:
+    if '{{foo-site/skip}}' in configurations:
       return ('SKIPPED', ['This alert is skipped and will not be in the collector'])
       return ('SKIPPED', ['This alert is skipped and will not be in the collector'])
   
   
   label = "bar is {0}, baz is {1}".format(bar, baz)  
   label = "bar is {0}, baz is {1}".format(bar, baz)  

+ 195 - 2
ambari-server/src/main/java/org/apache/ambari/server/state/alert/ScriptSource.java

@@ -17,6 +17,10 @@
  */
  */
 package org.apache.ambari.server.state.alert;
 package org.apache.ambari.server.state.alert;
 
 
+import java.util.List;
+
+import org.apache.ambari.server.state.AlertState;
+
 import com.google.gson.annotations.SerializedName;
 import com.google.gson.annotations.SerializedName;
 
 
 /**
 /**
@@ -30,6 +34,12 @@ public class ScriptSource extends Source {
   @SerializedName("path")
   @SerializedName("path")
   private String m_path = null;
   private String m_path = null;
 
 
+  /**
+   * A list of all of the script parameters, if any.
+   */
+  @SerializedName("parameters")
+  private List<ScriptParameter> m_parameters;
+
   /**
   /**
    * @return the path to the script file.
    * @return the path to the script file.
    */
    */
@@ -38,7 +48,7 @@ public class ScriptSource extends Source {
   }
   }
 
 
   /**
   /**
-   *
+   * {@inheritDoc}
    */
    */
   @Override
   @Override
   public int hashCode() {
   public int hashCode() {
@@ -50,7 +60,7 @@ public class ScriptSource extends Source {
   }
   }
 
 
   /**
   /**
-   *
+   * {@inheritDoc}
    */
    */
   @Override
   @Override
   public boolean equals(Object obj) {
   public boolean equals(Object obj) {
@@ -78,4 +88,187 @@ public class ScriptSource extends Source {
 
 
     return true;
     return true;
   }
   }
+
+  /**
+   * The {@link ScriptParameter} class represents a single parameter that can be
+   * passed into a script alert.
+   */
+  public static class ScriptParameter {
+    @SerializedName("name")
+    private String m_name;
+
+    @SerializedName("display_name")
+    private String m_displayName;
+
+    @SerializedName("units")
+    private String m_units;
+
+    @SerializedName("value")
+    private Object m_value;
+
+    @SerializedName("description")
+    private String m_description;
+
+    @SerializedName("type")
+    private ScriptParameterType m_type;
+
+    /**
+     * If this script parameter controls a threshold, then its specified here,
+     * otherwise it's {@code null}.
+     */
+    @SerializedName("threshold")
+    private AlertState m_threshold;
+
+    /**
+     * Gets the unique name of the parameter.
+     *
+     * @return the name
+     */
+    public String getName() {
+      return m_name;
+    }
+
+    /**
+     * Gets the human readable name of the parameter.
+     *
+     * @return the displayName
+     */
+    public String getDisplayName() {
+      return m_displayName;
+    }
+
+    /**
+     * Gets the display units of the paramter.
+     *
+     * @return the units
+     */
+    public String getUnits() {
+      return m_units;
+    }
+
+    /**
+     * Gets the value of the parameter.
+     *
+     * @return the value
+     */
+    public Object getValue() {
+      return m_value;
+    }
+
+    /**
+     * Gets the description of the parameter.
+     *
+     * @return the description
+     */
+    public String getDescription() {
+      return m_description;
+    }
+
+    /**
+     * Gets the threshold that this parameter directly controls, or {@code null}
+     * for none.
+     *
+     * @return the threshold, or {@code null}.
+     */
+    public AlertState getThreshold() {
+      return m_threshold;
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Override
+    public int hashCode() {
+      final int prime = 31;
+      int result = 1;
+      result = prime * result + ((m_description == null) ? 0 : m_description.hashCode());
+      result = prime * result + ((m_displayName == null) ? 0 : m_displayName.hashCode());
+      result = prime * result + ((m_name == null) ? 0 : m_name.hashCode());
+      result = prime * result + ((m_threshold == null) ? 0 : m_threshold.hashCode());
+      result = prime * result + ((m_type == null) ? 0 : m_type.hashCode());
+      result = prime * result + ((m_units == null) ? 0 : m_units.hashCode());
+      result = prime * result + ((m_value == null) ? 0 : m_value.hashCode());
+      return result;
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Override
+    public boolean equals(Object obj) {
+      if (this == obj) {
+        return true;
+      }
+      if (obj == null) {
+        return false;
+      }
+      if (getClass() != obj.getClass()) {
+        return false;
+      }
+      ScriptParameter other = (ScriptParameter) obj;
+      if (m_description == null) {
+        if (other.m_description != null) {
+          return false;
+        }
+      } else if (!m_description.equals(other.m_description)) {
+        return false;
+      }
+      if (m_displayName == null) {
+        if (other.m_displayName != null) {
+          return false;
+        }
+      } else if (!m_displayName.equals(other.m_displayName)) {
+        return false;
+      }
+      if (m_name == null) {
+        if (other.m_name != null) {
+          return false;
+        }
+      } else if (!m_name.equals(other.m_name)) {
+        return false;
+      }
+      if (m_threshold != other.m_threshold) {
+        return false;
+      }
+      if (m_type != other.m_type) {
+        return false;
+      }
+      if (m_units == null) {
+        if (other.m_units != null) {
+          return false;
+        }
+      } else if (!m_units.equals(other.m_units)) {
+        return false;
+      }
+      if (m_value == null) {
+        if (other.m_value != null) {
+          return false;
+        }
+      } else if (!m_value.equals(other.m_value)) {
+        return false;
+      }
+      return true;
+    }
+
+
+    /**
+     * The {@link ScriptParameterType} enum represents the value type.
+     */
+    public enum ScriptParameterType {
+      /**
+       * String
+       */
+      STRING,
+
+      /**
+       * Integers, longs, floats, etc.
+       */
+      NUMERIC,
+
+      /**
+       * A percent value, expessed as a float.
+       */
+      PERCENT
+    }
+  }
 }
 }

+ 30 - 1
ambari-server/src/main/resources/alerts.json

@@ -38,7 +38,36 @@
         "enabled": true,
         "enabled": true,
         "source": {
         "source": {
           "type": "SCRIPT",
           "type": "SCRIPT",
-          "path": "alert_disk_space.py"
+          "path": "alert_disk_space.py",
+          "parameters": [
+            {
+              "name": "minimum.free.space",
+              "display_name": "Minimum Free Space",
+              "value": 5000000000,
+              "type": "NUMERIC",
+              "description": "The overall amount of free disk space left before an alert is triggered.",
+              "units": "bytes",
+              "threshold": "WARNING"
+            },
+            {
+              "name": "percent.used.space.warning.threshold",
+              "display_name": "Warning",
+              "value": 0.5,
+              "type": "PERCENT",
+              "description": "The percent of disk space consumed before a warning is triggered.",
+              "units": "%",
+              "threshold": "WARNING"
+            },
+            {
+              "name": "percent.free.space.critical.threshold",
+              "display_name": "Critical",
+              "value": 0.8,
+              "type": "PERCENT",
+              "description": "The percent of disk space consumed before a critical alert is triggered.",
+              "units": "%",
+              "threshold": "CRITICAL"
+            }
+          ]
         }
         }
       }
       }
     ]
     ]

+ 7 - 6
ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/package/alerts/alert_ambari_metrics_monitor.py

@@ -73,20 +73,21 @@ def is_monitor_process_live(pid_file):
   return live
   return live
 
 
 
 
-def execute(parameters=None, host_name=None):
+def execute(configurations={}, parameters={}, host_name=None):
   """
   """
   Returns a tuple containing the result code and a pre-formatted result label
   Returns a tuple containing the result code and a pre-formatted result label
 
 
   Keyword arguments:
   Keyword arguments:
-  parameters (dictionary): a mapping of parameter key to value
+  configurations (dictionary): a mapping of configuration key to value
+  parameters (dictionary): a mapping of script parameter key to value
   host_name (string): the name of this host where the alert is running
   host_name (string): the name of this host where the alert is running
   """
   """
 
 
-  if parameters is None:
-    return (RESULT_CODE_UNKNOWN, ['There were no parameters supplied to the script.'])
+  if configurations is None:
+    return (RESULT_CODE_UNKNOWN, ['There were no configurations supplied to the script.'])
 
 
-  if set([AMS_MONITOR_PID_DIR]).issubset(parameters):
-    AMS_MONITOR_PID_PATH = os.path.join(parameters[AMS_MONITOR_PID_DIR], 'ambari-metrics-monitor.pid')
+  if set([AMS_MONITOR_PID_DIR]).issubset(configurations):
+    AMS_MONITOR_PID_PATH = os.path.join(configurations[AMS_MONITOR_PID_DIR], 'ambari-metrics-monitor.pid')
   else:
   else:
     return (RESULT_CODE_UNKNOWN, ['The ams_monitor_pid_dir is a required parameter.'])
     return (RESULT_CODE_UNKNOWN, ['The ams_monitor_pid_dir is a required parameter.'])
 
 

+ 10 - 1
ambari-server/src/main/resources/common-services/FLUME/1.4.0.2.0/alerts.json

@@ -10,7 +10,16 @@
         "scope": "ANY",
         "scope": "ANY",
         "source": {
         "source": {
           "type": "SCRIPT",
           "type": "SCRIPT",
-          "path": "FLUME/1.4.0.2.0/package/alerts/alert_flume_agent_status.py"
+          "path": "FLUME/1.4.0.2.0/package/alerts/alert_flume_agent_status.py",
+          "parameters": [
+            {
+              "name": "run.directory",
+              "display_name": "Run Directory",
+              "value": "/var/run/flume",
+              "type": "STRING",
+              "description": "The directory where flume agent processes will place their PID files."
+            }
+          ]
         }
         }
       }
       }
     ]
     ]

+ 15 - 8
ambari-server/src/main/resources/common-services/FLUME/1.4.0.2.0/package/alerts/alert_flume_agent_status.py

@@ -29,7 +29,8 @@ RESULT_CODE_UNKNOWN = 'UNKNOWN'
 
 
 FLUME_CONF_DIR_KEY = '{{flume-env/flume_conf_dir}}'
 FLUME_CONF_DIR_KEY = '{{flume-env/flume_conf_dir}}'
 
 
-FLUME_RUN_DIR = '/var/run/flume'
+FLUME_RUN_DIR_KEY = "run.directory"
+FLUME_RUN_DIR_DEFAULT = '/var/run/flume'
 
 
 def get_tokens():
 def get_tokens():
   """
   """
@@ -39,21 +40,22 @@ def get_tokens():
   return (FLUME_CONF_DIR_KEY,)
   return (FLUME_CONF_DIR_KEY,)
   
   
 
 
-def execute(parameters=None, host_name=None):
+def execute(configurations={}, parameters={}, host_name=None):
   """
   """
   Returns a tuple containing the result code and a pre-formatted result label
   Returns a tuple containing the result code and a pre-formatted result label
 
 
   Keyword arguments:
   Keyword arguments:
-  parameters (dictionary): a mapping of parameter key to value
+  configurations (dictionary): a mapping of configuration key to value
+  parameters (dictionary): a mapping of script parameter key to value
   host_name (string): the name of this host where the alert is running
   host_name (string): the name of this host where the alert is running
   """
   """
 
 
-  if parameters is None:
-    return (RESULT_CODE_UNKNOWN, ['There were no parameters supplied to the script.'])
+  if configurations is None:
+    return (RESULT_CODE_UNKNOWN, ['There were no configurations supplied to the script.'])
 
 
   flume_conf_directory = None
   flume_conf_directory = None
-  if FLUME_CONF_DIR_KEY in parameters:
-    flume_conf_directory = parameters[FLUME_CONF_DIR_KEY]
+  if FLUME_CONF_DIR_KEY in configurations:
+    flume_conf_directory = configurations[FLUME_CONF_DIR_KEY]
 
 
   if flume_conf_directory is None:
   if flume_conf_directory is None:
     return (RESULT_CODE_UNKNOWN, ['The Flume configuration directory is a required parameter.'])
     return (RESULT_CODE_UNKNOWN, ['The Flume configuration directory is a required parameter.'])
@@ -61,7 +63,12 @@ def execute(parameters=None, host_name=None):
   if host_name is None:
   if host_name is None:
     host_name = socket.getfqdn()
     host_name = socket.getfqdn()
 
 
-  processes = get_flume_status(flume_conf_directory, FLUME_RUN_DIR)
+  # parse script arguments
+  flume_run_directory = FLUME_RUN_DIR_DEFAULT
+  if FLUME_RUN_DIR_KEY in parameters:
+    flume_run_directory = parameters[FLUME_RUN_DIR_KEY]
+
+  processes = get_flume_status(flume_conf_directory, flume_run_directory)
   expected_agents = find_expected_agent_names(flume_conf_directory)
   expected_agents = find_expected_agent_names(flume_conf_directory)
 
 
   alert_label = ''
   alert_label = ''

+ 42 - 2
ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/alerts.json

@@ -387,7 +387,36 @@
         "enabled": true,
         "enabled": true,
         "source": {
         "source": {
           "type": "SCRIPT",
           "type": "SCRIPT",
-          "path": "HDFS/2.1.0.2.0/package/alerts/alert_checkpoint_time.py"
+          "path": "HDFS/2.1.0.2.0/package/alerts/alert_checkpoint_time.py",
+          "parameters": [
+            {
+              "name": "connection.timeout",
+              "display_name": "Connection Timeout",
+              "value": 5.0,
+              "type": "NUMERIC",
+              "description": "The maximum time before this alert is considered to be CRITICAL",
+              "units": "seconds",
+              "threshold": "CRITICAL"
+            },
+            {
+              "name": "checkpoint.time.warning.threshold",
+              "display_name": "Checkpoint Warning",
+              "value": 2.0,
+              "type": "PERCENT",
+              "description": "The percentage of the last checkpoint time greater than the interval in order to trigger a warning alert.",
+              "units": "%",
+              "threshold": "WARNING"
+            },
+            {
+              "name": "checkpoint.time.critical.threshold",
+              "display_name": "Checkpoint Critical",
+              "value": 2.0,
+              "type": "PERCENT",
+              "description": "The percentage of the last checkpoint time greater than the interval in order to trigger a critical alert.",
+              "units": "%",
+              "threshold": "CRITICAL"
+            }
+	        ]
         }
         }
       },
       },
       {
       {
@@ -400,7 +429,18 @@
         "ignore_host": true,
         "ignore_host": true,
         "source": {
         "source": {
           "type": "SCRIPT",
           "type": "SCRIPT",
-          "path": "HDFS/2.1.0.2.0/package/alerts/alert_ha_namenode_health.py"
+          "path": "HDFS/2.1.0.2.0/package/alerts/alert_ha_namenode_health.py",
+          "parameters": [
+            {
+              "name": "connection.timeout",
+              "display_name": "Connection Timeout",
+              "value": 5.0,
+              "type": "NUMERIC",
+              "description": "The maximum time before this alert is considered to be CRITICAL",
+              "units": "seconds",
+              "threshold": "CRITICAL"
+            }
+          ]
         }
         }
       }
       }
     ],
     ],

+ 44 - 24
ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_checkpoint_time.py

@@ -30,13 +30,17 @@ NN_HTTP_POLICY_KEY = '{{hdfs-site/dfs.http.policy}}'
 NN_CHECKPOINT_TX_KEY = '{{hdfs-site/dfs.namenode.checkpoint.txns}}'
 NN_CHECKPOINT_TX_KEY = '{{hdfs-site/dfs.namenode.checkpoint.txns}}'
 NN_CHECKPOINT_PERIOD_KEY = '{{hdfs-site/dfs.namenode.checkpoint.period}}'
 NN_CHECKPOINT_PERIOD_KEY = '{{hdfs-site/dfs.namenode.checkpoint.period}}'
 
 
-PERCENT_WARNING = 200
-PERCENT_CRITICAL = 200
+PERCENT_WARNING_KEY = 'checkpoint.time.warning.threshold'
+PERCENT_WARNING_DEFAULT = 200
+
+PERCENT_CRITICAL_KEY = 'checkpoint.time.critical.threshold'
+PERCENT_CRITICAL_DEFAULT = 200
 
 
 CHECKPOINT_TX_DEFAULT = 1000000
 CHECKPOINT_TX_DEFAULT = 1000000
 CHECKPOINT_PERIOD_DEFAULT = 21600
 CHECKPOINT_PERIOD_DEFAULT = 21600
 
 
-CONNECTION_TIMEOUT = 5.0
+CONNECTION_TIMEOUT_KEY = 'connection.timeout'
+CONNECTION_TIMEOUT_DEFAULT = 5.0
 
 
 def get_tokens():
 def get_tokens():
   """
   """
@@ -47,43 +51,55 @@ def get_tokens():
       NN_CHECKPOINT_TX_KEY, NN_CHECKPOINT_PERIOD_KEY)      
       NN_CHECKPOINT_TX_KEY, NN_CHECKPOINT_PERIOD_KEY)      
   
   
 
 
-def execute(parameters=None, host_name=None):
+def execute(configurations={}, parameters={}, host_name=None):
   """
   """
   Returns a tuple containing the result code and a pre-formatted result label
   Returns a tuple containing the result code and a pre-formatted result label
 
 
   Keyword arguments:
   Keyword arguments:
-  parameters (dictionary): a mapping of parameter key to value
+  configurations (dictionary): a mapping of configuration key to value
+  parameters (dictionary): a mapping of script parameter key to value
   host_name (string): the name of this host where the alert is running
   host_name (string): the name of this host where the alert is running
   """
   """
 
 
-  if parameters is None:
-    return (('UNKNOWN', ['There were no parameters supplied to the script.']))
+  if configurations is None:
+    return (('UNKNOWN', ['There were no configurations supplied to the script.']))
   
   
   uri = None
   uri = None
   scheme = 'http'  
   scheme = 'http'  
   http_uri = None
   http_uri = None
   https_uri = None
   https_uri = None
   http_policy = 'HTTP_ONLY'
   http_policy = 'HTTP_ONLY'
-  percent_warning = PERCENT_WARNING
-  percent_critical = PERCENT_CRITICAL
   checkpoint_tx = CHECKPOINT_TX_DEFAULT
   checkpoint_tx = CHECKPOINT_TX_DEFAULT
   checkpoint_period = CHECKPOINT_PERIOD_DEFAULT
   checkpoint_period = CHECKPOINT_PERIOD_DEFAULT
   
   
-  if NN_HTTP_ADDRESS_KEY in parameters:
-    http_uri = parameters[NN_HTTP_ADDRESS_KEY]
+  if NN_HTTP_ADDRESS_KEY in configurations:
+    http_uri = configurations[NN_HTTP_ADDRESS_KEY]
 
 
-  if NN_HTTPS_ADDRESS_KEY in parameters:
-    https_uri = parameters[NN_HTTPS_ADDRESS_KEY]
+  if NN_HTTPS_ADDRESS_KEY in configurations:
+    https_uri = configurations[NN_HTTPS_ADDRESS_KEY]
 
 
-  if NN_HTTP_POLICY_KEY in parameters:
-    http_policy = parameters[NN_HTTP_POLICY_KEY]
+  if NN_HTTP_POLICY_KEY in configurations:
+    http_policy = configurations[NN_HTTP_POLICY_KEY]
 
 
-  if NN_CHECKPOINT_TX_KEY in parameters:
-    checkpoint_tx = parameters[NN_CHECKPOINT_TX_KEY]
+  if NN_CHECKPOINT_TX_KEY in configurations:
+    checkpoint_tx = configurations[NN_CHECKPOINT_TX_KEY]
+
+  if NN_CHECKPOINT_PERIOD_KEY in configurations:
+    checkpoint_period = configurations[NN_CHECKPOINT_PERIOD_KEY]
+
+  # parse script arguments
+  connection_timeout = CONNECTION_TIMEOUT_DEFAULT
+  if CONNECTION_TIMEOUT_KEY in parameters:
+    connection_timeout = float(parameters[CONNECTION_TIMEOUT_KEY])
+
+  percent_warning = PERCENT_WARNING_DEFAULT
+  if PERCENT_WARNING_KEY in parameters:
+    percent_warning = float(parameters[PERCENT_WARNING_KEY]) * 100
+
+  percent_critical = PERCENT_CRITICAL_DEFAULT
+  if PERCENT_CRITICAL_KEY in parameters:
+    percent_critical = float(parameters[PERCENT_CRITICAL_KEY]) * 100
 
 
-  if NN_CHECKPOINT_PERIOD_KEY in parameters:
-    checkpoint_period = parameters[NN_CHECKPOINT_PERIOD_KEY]
-    
   # determine the right URI and whether to use SSL
   # determine the right URI and whether to use SSL
   uri = http_uri
   uri = http_uri
   if http_policy == 'HTTPS_ONLY':
   if http_policy == 'HTTPS_ONLY':
@@ -102,8 +118,12 @@ def execute(parameters=None, host_name=None):
   result_code = "OK"
   result_code = "OK"
 
 
   try:
   try:
-    last_checkpoint_time = int(get_value_from_jmx(last_checkpoint_time_qry,"LastCheckpointTime"))
-    journal_transaction_info = get_value_from_jmx(journal_transaction_info_qry,"JournalTransactionInfo")
+    last_checkpoint_time = int(get_value_from_jmx(last_checkpoint_time_qry,
+      "LastCheckpointTime", connection_timeout))
+
+    journal_transaction_info = get_value_from_jmx(journal_transaction_info_qry,
+      "JournalTransactionInfo", connection_timeout)
+
     journal_transaction_info_dict = json.loads(journal_transaction_info)
     journal_transaction_info_dict = json.loads(journal_transaction_info)
   
   
     last_tx = int(journal_transaction_info_dict['LastAppliedOrWrittenTxId'])
     last_tx = int(journal_transaction_info_dict['LastAppliedOrWrittenTxId'])
@@ -131,11 +151,11 @@ def get_time(delta):
   return {'h':h, 'm':m}
   return {'h':h, 'm':m}
 
 
 
 
-def get_value_from_jmx(query, jmx_property):
+def get_value_from_jmx(query, jmx_property, connection_timeout):
   response = None
   response = None
   
   
   try:
   try:
-    response = urllib2.urlopen(query, timeout=CONNECTION_TIMEOUT)
+    response = urllib2.urlopen(query, timeout=connection_timeout)
     data = response.read()
     data = response.read()
 
 
     data_dict = json.loads(data)
     data_dict = json.loads(data)

+ 22 - 14
ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_ha_namenode_health.py

@@ -35,7 +35,8 @@ NN_HTTP_ADDRESS_KEY = '{{hdfs-site/dfs.namenode.http-address}}'
 NN_HTTPS_ADDRESS_KEY = '{{hdfs-site/dfs.namenode.https-address}}'
 NN_HTTPS_ADDRESS_KEY = '{{hdfs-site/dfs.namenode.https-address}}'
 DFS_POLICY_KEY = '{{hdfs-site/dfs.http.policy}}'
 DFS_POLICY_KEY = '{{hdfs-site/dfs.http.policy}}'
 
 
-CONNECTION_TIMEOUT = 5.0
+CONNECTION_TIMEOUT_KEY = 'connection.timeout'
+CONNECTION_TIMEOUT_DEFAULT = 5.0
 
 
 def get_tokens():
 def get_tokens():
   """
   """
@@ -46,34 +47,41 @@ def get_tokens():
   NN_HTTPS_ADDRESS_KEY, DFS_POLICY_KEY)
   NN_HTTPS_ADDRESS_KEY, DFS_POLICY_KEY)
   
   
 
 
-def execute(parameters=None, host_name=None):
+def execute(configurations={}, parameters={}, host_name=None):
   """
   """
   Returns a tuple containing the result code and a pre-formatted result label
   Returns a tuple containing the result code and a pre-formatted result label
 
 
   Keyword arguments:
   Keyword arguments:
-  parameters (dictionary): a mapping of parameter key to value
+  configurations (dictionary): a mapping of configuration key to value
+  parameters (dictionary): a mapping of script parameter key to value
   host_name (string): the name of this host where the alert is running
   host_name (string): the name of this host where the alert is running
   """
   """
-  if parameters is None:
-    return (RESULT_STATE_UNKNOWN, ['There were no parameters supplied to the script.'])
+  if configurations is None:
+    return (RESULT_STATE_UNKNOWN, ['There were no configurations supplied to the script.'])
 
 
   # if not in HA mode, then SKIP
   # if not in HA mode, then SKIP
-  if not NAMESERVICE_KEY in parameters:
+  if not NAMESERVICE_KEY in configurations:
     return (RESULT_STATE_SKIPPED, ['NameNode HA is not enabled'])
     return (RESULT_STATE_SKIPPED, ['NameNode HA is not enabled'])
 
 
   # hdfs-site is required
   # hdfs-site is required
-  if not HDFS_SITE_KEY in parameters:
+  if not HDFS_SITE_KEY in configurations:
     return (RESULT_STATE_UNKNOWN, ['{0} is a required parameter for the script'.format(HDFS_SITE_KEY)])
     return (RESULT_STATE_UNKNOWN, ['{0} is a required parameter for the script'.format(HDFS_SITE_KEY)])
 
 
+  # parse script arguments
+  connection_timeout = CONNECTION_TIMEOUT_DEFAULT
+  if CONNECTION_TIMEOUT_KEY in parameters:
+    connection_timeout = float(parameters[CONNECTION_TIMEOUT_KEY])
+
+
   # determine whether or not SSL is enabled
   # determine whether or not SSL is enabled
   is_ssl_enabled = False
   is_ssl_enabled = False
-  if DFS_POLICY_KEY in parameters:
-    dfs_policy = parameters[DFS_POLICY_KEY]
+  if DFS_POLICY_KEY in configurations:
+    dfs_policy = configurations[DFS_POLICY_KEY]
     if dfs_policy == "HTTPS_ONLY":
     if dfs_policy == "HTTPS_ONLY":
       is_ssl_enabled = True
       is_ssl_enabled = True
 
 
-  name_service = parameters[NAMESERVICE_KEY]
-  hdfs_site = parameters[HDFS_SITE_KEY]
+  name_service = configurations[NAMESERVICE_KEY]
+  hdfs_site = configurations[HDFS_SITE_KEY]
 
 
   # look for dfs.ha.namenodes.foo
   # look for dfs.ha.namenodes.foo
   nn_unique_ids_key = 'dfs.ha.namenodes.' + name_service
   nn_unique_ids_key = 'dfs.ha.namenodes.' + name_service
@@ -105,7 +113,7 @@ def execute(parameters=None, host_name=None):
 
 
       try:
       try:
         jmx_uri = jmx_uri_fragment.format(value)
         jmx_uri = jmx_uri_fragment.format(value)
-        state = get_value_from_jmx(jmx_uri,'State')
+        state = get_value_from_jmx(jmx_uri, 'State', connection_timeout)
 
 
         if state == HDFS_NN_STATE_ACTIVE:
         if state == HDFS_NN_STATE_ACTIVE:
           active_namenodes.append(value)
           active_namenodes.append(value)
@@ -161,11 +169,11 @@ def execute(parameters=None, host_name=None):
       return (RESULT_STATE_SKIPPED, ['Another host will report this alert'])
       return (RESULT_STATE_SKIPPED, ['Another host will report this alert'])
 
 
 
 
-def get_value_from_jmx(query, jmx_property):
+def get_value_from_jmx(query, jmx_property, connection_timeout):
   response = None
   response = None
   
   
   try:
   try:
-    response = urllib2.urlopen(query, timeout=CONNECTION_TIMEOUT)
+    response = urllib2.urlopen(query, timeout=connection_timeout)
     data = response.read()
     data = response.read()
 
 
     data_dict = json.loads(data)
     data_dict = json.loads(data)

+ 60 - 3
ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/alerts.json

@@ -11,7 +11,30 @@
         "enabled": true,
         "enabled": true,
         "source": {
         "source": {
           "type": "SCRIPT",
           "type": "SCRIPT",
-          "path": "HIVE/0.12.0.2.0/package/alerts/alert_hive_metastore.py"
+          "path": "HIVE/0.12.0.2.0/package/alerts/alert_hive_metastore.py",
+          "parameters": [
+            {
+              "name": "default.smoke.user",
+              "display_name": "Default Smoke User",
+              "value": "ambari-qa",
+              "type": "STRING",
+              "description": "The user that will run the Hive commands if not specified in cluster-env/smokeuser"
+            },
+            {
+              "name": "default.smoke.principal",
+              "display_name": "Default Smoke Principal",
+              "value": "ambari-qa@EXAMPLE.COM",
+              "type": "STRING",
+              "description": "The principal to use when retrieving the kerberos ticket if not specified in cluster-env/smokeuser_principal_name"
+            },
+            {
+              "name": "default.smoke.keytab",
+              "display_name": "Default Smoke Keytab",
+              "value": "/etc/security/keytabs/smokeuser.headless.keytab",
+              "type": "STRING",
+              "description": "The keytab to use when retrieving the kerberos ticket if not specified in cluster-env/smokeuser_keytab"
+            }
+          ]
         }
         }
       }
       }
     ],
     ],
@@ -25,7 +48,30 @@
         "enabled": true,
         "enabled": true,
         "source": {
         "source": {
           "type": "SCRIPT",
           "type": "SCRIPT",
-          "path": "HIVE/0.12.0.2.0/package/alerts/alert_hive_thrift_port.py"
+          "path": "HIVE/0.12.0.2.0/package/alerts/alert_hive_thrift_port.py",
+          "parameters": [
+            {
+              "name": "default.smoke.user",
+              "display_name": "Default Smoke User",
+              "value": "ambari-qa",
+              "type": "STRING",
+              "description": "The user that will run the Hive commands if not specified in cluster-env/smokeuser"
+            },
+            {
+              "name": "default.smoke.principal",
+              "display_name": "Default Smoke Principal",
+              "value": "ambari-qa@EXAMPLE.COM",
+              "type": "STRING",
+              "description": "The principal to use when retrieving the kerberos ticket if not specified in cluster-env/smokeuser_principal_name"
+            },
+            {
+              "name": "default.smoke.keytab",
+              "display_name": "Default Smoke Keytab",
+              "value": "/etc/security/keytabs/smokeuser.headless.keytab",
+              "type": "STRING",
+              "description": "The keytab to use when retrieving the kerberos ticket if not specified in cluster-env/smokeuser_keytab"
+            }
+          ]
         }
         }
       }
       }
     ],
     ],
@@ -39,7 +85,18 @@
         "enabled": true,
         "enabled": true,
         "source": {
         "source": {
           "type": "SCRIPT",
           "type": "SCRIPT",
-          "path": "HIVE/0.12.0.2.0/package/alerts/alert_webhcat_server.py"
+          "path": "HIVE/0.12.0.2.0/package/alerts/alert_webhcat_server.py",
+          "parameters": [
+            {
+              "name": "connection.timeout",
+              "display_name": "Connection Timeout",
+              "value": 5.0,
+              "type": "NUMERIC",
+              "description": "The maximum time before this alert is considered to be CRITICAL",
+              "units": "seconds",
+              "threshold": "CRITICAL"
+            }
+          ]
         }
         }
       }    
       }    
     ]
     ]

+ 43 - 23
ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/alerts/alert_hive_metastore.py

@@ -33,15 +33,20 @@ SMOKEUSER_KEYTAB_KEY = '{{cluster-env/smokeuser_keytab}}'
 SMOKEUSER_PRINCIPAL_KEY = '{{cluster-env/smokeuser_principal_name}}'
 SMOKEUSER_PRINCIPAL_KEY = '{{cluster-env/smokeuser_principal_name}}'
 SMOKEUSER_KEY = '{{cluster-env/smokeuser}}'
 SMOKEUSER_KEY = '{{cluster-env/smokeuser}}'
 HIVE_METASTORE_URIS_KEY = '{{hive-site/hive.metastore.uris}}'
 HIVE_METASTORE_URIS_KEY = '{{hive-site/hive.metastore.uris}}'
+
 # The configured Kerberos executable search paths, if any
 # The configured Kerberos executable search paths, if any
 KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY = '{{kerberos-env/executable_search_paths}}'
 KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY = '{{kerberos-env/executable_search_paths}}'
 
 
-PERCENT_WARNING = 200
-PERCENT_CRITICAL = 200
-
-
+# default keytab location
+SMOKEUSER_KEYTAB_SCRIPT_PARAM_KEY = 'default.smoke.keytab'
 SMOKEUSER_KEYTAB_DEFAULT = '/etc/security/keytabs/smokeuser.headless.keytab'
 SMOKEUSER_KEYTAB_DEFAULT = '/etc/security/keytabs/smokeuser.headless.keytab'
+
+# default smoke principal
+SMOKEUSER_PRINCIPAL_SCRIPT_PARAM_KEY = 'default.smoke.principal'
 SMOKEUSER_PRINCIPAL_DEFAULT = 'ambari-qa@EXAMPLE.COM'
 SMOKEUSER_PRINCIPAL_DEFAULT = 'ambari-qa@EXAMPLE.COM'
+
+# default smoke user
+SMOKEUSER_SCRIPT_PARAM_KEY = 'default.smoke.user'
 SMOKEUSER_DEFAULT = 'ambari-qa'
 SMOKEUSER_DEFAULT = 'ambari-qa'
 
 
 def get_tokens():
 def get_tokens():
@@ -53,46 +58,61 @@ def get_tokens():
     HIVE_METASTORE_URIS_KEY, SMOKEUSER_KEY, KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY)
     HIVE_METASTORE_URIS_KEY, SMOKEUSER_KEY, KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY)
 
 
 
 
-def execute(parameters=None, host_name=None):
+def execute(configurations={}, parameters={}, host_name=None):
   """
   """
   Returns a tuple containing the result code and a pre-formatted result label
   Returns a tuple containing the result code and a pre-formatted result label
 
 
   Keyword arguments:
   Keyword arguments:
-  parameters (dictionary): a mapping of parameter key to value
+  configurations (dictionary): a mapping of configuration key to value
+  parameters (dictionary): a mapping of script parameter key to value
   host_name (string): the name of this host where the alert is running
   host_name (string): the name of this host where the alert is running
   """
   """
 
 
-  if parameters is None:
-    return (('UNKNOWN', ['There were no parameters supplied to the script.']))
+  if configurations is None:
+    return (('UNKNOWN', ['There were no configurations supplied to the script.']))
 
 
-  if not HIVE_METASTORE_URIS_KEY in parameters:
+  if not HIVE_METASTORE_URIS_KEY in configurations:
     return (('UNKNOWN', ['Hive metastore uris were not supplied to the script.']))
     return (('UNKNOWN', ['Hive metastore uris were not supplied to the script.']))
-  metastore_uris = parameters[HIVE_METASTORE_URIS_KEY].split(',')
+
+  metastore_uris = configurations[HIVE_METASTORE_URIS_KEY].split(',')
 
 
   security_enabled = False
   security_enabled = False
-  if SECURITY_ENABLED_KEY in parameters:
-    security_enabled = str(parameters[SECURITY_ENABLED_KEY]).upper() == 'TRUE'
+  if SECURITY_ENABLED_KEY in configurations:
+    security_enabled = str(configurations[SECURITY_ENABLED_KEY]).upper() == 'TRUE'
 
 
+  # defaults
+  smokeuser_keytab = SMOKEUSER_KEYTAB_DEFAULT
   smokeuser_principal = SMOKEUSER_PRINCIPAL_DEFAULT
   smokeuser_principal = SMOKEUSER_PRINCIPAL_DEFAULT
-  if SMOKEUSER_PRINCIPAL_KEY in parameters:
-    smokeuser_principal = parameters[SMOKEUSER_PRINCIPAL_KEY]
-
   smokeuser = SMOKEUSER_DEFAULT
   smokeuser = SMOKEUSER_DEFAULT
-  if SMOKEUSER_KEY in parameters:
-    smokeuser = parameters[SMOKEUSER_KEY]
+
+  # check script params
+  if SMOKEUSER_PRINCIPAL_SCRIPT_PARAM_KEY in parameters:
+    smokeuser_principal = parameters[SMOKEUSER_PRINCIPAL_SCRIPT_PARAM_KEY]
+
+  if SMOKEUSER_SCRIPT_PARAM_KEY in parameters:
+    smokeuser = parameters[SMOKEUSER_SCRIPT_PARAM_KEY]
+
+  if SMOKEUSER_KEYTAB_SCRIPT_PARAM_KEY in parameters:
+    smokeuser_keytab = parameters[SMOKEUSER_KEYTAB_SCRIPT_PARAM_KEY]
+
+
+  # check configurations last as they should always take precedence
+  if SMOKEUSER_PRINCIPAL_KEY in configurations:
+    smokeuser_principal = configurations[SMOKEUSER_PRINCIPAL_KEY]
+
+  if SMOKEUSER_KEY in configurations:
+    smokeuser = configurations[SMOKEUSER_KEY]
 
 
   result_code = None
   result_code = None
 
 
   try:
   try:
     if security_enabled:
     if security_enabled:
-      smokeuser_keytab = SMOKEUSER_KEYTAB_DEFAULT
-
-      if SMOKEUSER_KEYTAB_KEY in parameters:
-        smokeuser_keytab = parameters[SMOKEUSER_KEYTAB_KEY]
+      if SMOKEUSER_KEYTAB_KEY in configurations:
+        smokeuser_keytab = configurations[SMOKEUSER_KEYTAB_KEY]
 
 
       # Get the configured Kerberos executable search paths, if any
       # Get the configured Kerberos executable search paths, if any
-      if KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY in parameters:
-        kerberos_executable_search_paths = parameters[KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY]
+      if KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY in configurations:
+        kerberos_executable_search_paths = configurations[KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY]
       else:
       else:
         kerberos_executable_search_paths = None
         kerberos_executable_search_paths = None
              
              

+ 51 - 30
ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/alerts/alert_hive_thrift_port.py

@@ -36,18 +36,25 @@ HIVE_SERVER_PRINCIPAL_KEY = '{{hive-site/hive.server2.authentication.kerberos.pr
 SMOKEUSER_KEYTAB_KEY = '{{cluster-env/smokeuser_keytab}}'
 SMOKEUSER_KEYTAB_KEY = '{{cluster-env/smokeuser_keytab}}'
 SMOKEUSER_PRINCIPAL_KEY = '{{cluster-env/smokeuser_principal_name}}'
 SMOKEUSER_PRINCIPAL_KEY = '{{cluster-env/smokeuser_principal_name}}'
 SMOKEUSER_KEY = '{{cluster-env/smokeuser}}'
 SMOKEUSER_KEY = '{{cluster-env/smokeuser}}'
+
 # The configured Kerberos executable search paths, if any
 # The configured Kerberos executable search paths, if any
 KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY = '{{kerberos-env/executable_search_paths}}'
 KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY = '{{kerberos-env/executable_search_paths}}'
 
 
-PERCENT_WARNING = 200
-PERCENT_CRITICAL = 200
-
 THRIFT_PORT_DEFAULT = 10000
 THRIFT_PORT_DEFAULT = 10000
 HIVE_SERVER_TRANSPORT_MODE_DEFAULT = 'binary'
 HIVE_SERVER_TRANSPORT_MODE_DEFAULT = 'binary'
 HIVE_SERVER_PRINCIPAL_DEFAULT = 'hive/_HOST@EXAMPLE.COM'
 HIVE_SERVER_PRINCIPAL_DEFAULT = 'hive/_HOST@EXAMPLE.COM'
 HIVE_SERVER2_AUTHENTICATION_DEFAULT = 'NOSASL'
 HIVE_SERVER2_AUTHENTICATION_DEFAULT = 'NOSASL'
+
+# default keytab location
+SMOKEUSER_KEYTAB_SCRIPT_PARAM_KEY = 'default.smoke.keytab'
 SMOKEUSER_KEYTAB_DEFAULT = '/etc/security/keytabs/smokeuser.headless.keytab'
 SMOKEUSER_KEYTAB_DEFAULT = '/etc/security/keytabs/smokeuser.headless.keytab'
+
+# default smoke principal
+SMOKEUSER_PRINCIPAL_SCRIPT_PARAM_KEY = 'default.smoke.principal'
 SMOKEUSER_PRINCIPAL_DEFAULT = 'ambari-qa@EXAMPLE.COM'
 SMOKEUSER_PRINCIPAL_DEFAULT = 'ambari-qa@EXAMPLE.COM'
+
+# default smoke user
+SMOKEUSER_SCRIPT_PARAM_KEY = 'default.smoke.user'
 SMOKEUSER_DEFAULT = 'ambari-qa'
 SMOKEUSER_DEFAULT = 'ambari-qa'
 
 
 def get_tokens():
 def get_tokens():
@@ -61,59 +68,73 @@ def get_tokens():
     HIVE_SERVER_TRANSPORT_MODE_KEY,KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY)
     HIVE_SERVER_TRANSPORT_MODE_KEY,KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY)
 
 
 
 
-def execute(parameters=None, host_name=None):
+def execute(configurations={}, parameters={}, host_name=None):
   """
   """
   Returns a tuple containing the result code and a pre-formatted result label
   Returns a tuple containing the result code and a pre-formatted result label
 
 
   Keyword arguments:
   Keyword arguments:
-  parameters (dictionary): a mapping of parameter key to value
+  configurations (dictionary): a mapping of configuration key to value
+  parameters (dictionary): a mapping of script parameter key to value
   host_name (string): the name of this host where the alert is running
   host_name (string): the name of this host where the alert is running
   """
   """
 
 
-  if parameters is None:
-    return ('UNKNOWN', ['There were no parameters supplied to the script.'])
+  if configurations is None:
+    return ('UNKNOWN', ['There were no configurations supplied to the script.'])
 
 
   transport_mode = HIVE_SERVER_TRANSPORT_MODE_DEFAULT
   transport_mode = HIVE_SERVER_TRANSPORT_MODE_DEFAULT
-  if HIVE_SERVER_TRANSPORT_MODE_KEY in parameters:
-    transport_mode = parameters[HIVE_SERVER_TRANSPORT_MODE_KEY]
+  if HIVE_SERVER_TRANSPORT_MODE_KEY in configurations:
+    transport_mode = configurations[HIVE_SERVER_TRANSPORT_MODE_KEY]
 
 
   port = THRIFT_PORT_DEFAULT
   port = THRIFT_PORT_DEFAULT
-  if transport_mode.lower() == 'binary' and HIVE_SERVER_THRIFT_PORT_KEY in parameters:
-    port = int(parameters[HIVE_SERVER_THRIFT_PORT_KEY])
-  elif  transport_mode.lower() == 'http' and HIVE_SERVER_THRIFT_HTTP_PORT_KEY in parameters:
-    port = int(parameters[HIVE_SERVER_THRIFT_HTTP_PORT_KEY])
+  if transport_mode.lower() == 'binary' and HIVE_SERVER_THRIFT_PORT_KEY in configurations:
+    port = int(configurations[HIVE_SERVER_THRIFT_PORT_KEY])
+  elif  transport_mode.lower() == 'http' and HIVE_SERVER_THRIFT_HTTP_PORT_KEY in configurations:
+    port = int(configurations[HIVE_SERVER_THRIFT_HTTP_PORT_KEY])
 
 
   security_enabled = False
   security_enabled = False
-  if SECURITY_ENABLED_KEY in parameters:
-    security_enabled = str(parameters[SECURITY_ENABLED_KEY]).upper() == 'TRUE'
+  if SECURITY_ENABLED_KEY in configurations:
+    security_enabled = str(configurations[SECURITY_ENABLED_KEY]).upper() == 'TRUE'
 
 
   hive_server2_authentication = HIVE_SERVER2_AUTHENTICATION_DEFAULT
   hive_server2_authentication = HIVE_SERVER2_AUTHENTICATION_DEFAULT
-  if HIVE_SERVER2_AUTHENTICATION_KEY in parameters:
-    hive_server2_authentication = parameters[HIVE_SERVER2_AUTHENTICATION_KEY]
+  if HIVE_SERVER2_AUTHENTICATION_KEY in configurations:
+    hive_server2_authentication = configurations[HIVE_SERVER2_AUTHENTICATION_KEY]
 
 
+  # defaults
+  smokeuser_keytab = SMOKEUSER_KEYTAB_DEFAULT
   smokeuser_principal = SMOKEUSER_PRINCIPAL_DEFAULT
   smokeuser_principal = SMOKEUSER_PRINCIPAL_DEFAULT
-  if SMOKEUSER_PRINCIPAL_KEY in parameters:
-    smokeuser_principal = parameters[SMOKEUSER_PRINCIPAL_KEY]
-
   smokeuser = SMOKEUSER_DEFAULT
   smokeuser = SMOKEUSER_DEFAULT
-  if SMOKEUSER_KEY in parameters:
-    smokeuser = parameters[SMOKEUSER_KEY]
+
+  # check script params
+  if SMOKEUSER_PRINCIPAL_SCRIPT_PARAM_KEY in parameters:
+    smokeuser_principal = parameters[SMOKEUSER_PRINCIPAL_SCRIPT_PARAM_KEY]
+
+  if SMOKEUSER_SCRIPT_PARAM_KEY in parameters:
+    smokeuser = parameters[SMOKEUSER_SCRIPT_PARAM_KEY]
+
+  if SMOKEUSER_KEYTAB_SCRIPT_PARAM_KEY in parameters:
+    smokeuser_keytab = parameters[SMOKEUSER_KEYTAB_SCRIPT_PARAM_KEY]
+
+
+  # check configurations last as they should always take precedence
+  if SMOKEUSER_PRINCIPAL_KEY in configurations:
+    smokeuser_principal = configurations[SMOKEUSER_PRINCIPAL_KEY]
+
+  if SMOKEUSER_KEY in configurations:
+    smokeuser = configurations[SMOKEUSER_KEY]
 
 
   result_code = None
   result_code = None
 
 
   if security_enabled:
   if security_enabled:
     hive_server_principal = HIVE_SERVER_PRINCIPAL_DEFAULT
     hive_server_principal = HIVE_SERVER_PRINCIPAL_DEFAULT
-    if HIVE_SERVER_PRINCIPAL_KEY in parameters:
-      hive_server_principal = parameters[HIVE_SERVER_PRINCIPAL_KEY]
-
-    smokeuser_keytab = SMOKEUSER_KEYTAB_DEFAULT
+    if HIVE_SERVER_PRINCIPAL_KEY in configurations:
+      hive_server_principal = configurations[HIVE_SERVER_PRINCIPAL_KEY]
 
 
-    if SMOKEUSER_KEYTAB_KEY in parameters:
-      smokeuser_keytab = parameters[SMOKEUSER_KEYTAB_KEY]
+    if SMOKEUSER_KEYTAB_KEY in configurations:
+      smokeuser_keytab = configurations[SMOKEUSER_KEYTAB_KEY]
 
 
     # Get the configured Kerberos executable search paths, if any
     # Get the configured Kerberos executable search paths, if any
-    if KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY in parameters:
-      kerberos_executable_search_paths = parameters[KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY]
+    if KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY in configurations:
+      kerberos_executable_search_paths = configurations[KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY]
     else:
     else:
       kerberos_executable_search_paths = None
       kerberos_executable_search_paths = None
 
 

+ 30 - 19
ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/alerts/alert_webhcat_server.py

@@ -53,8 +53,10 @@ KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY = '{{kerberos-env/executable_search_paths}}
 WEBHCAT_OK_RESPONSE = 'ok'
 WEBHCAT_OK_RESPONSE = 'ok'
 WEBHCAT_PORT_DEFAULT = 50111
 WEBHCAT_PORT_DEFAULT = 50111
 
 
-CURL_CONNECTION_TIMEOUT = '5'
-CONNECTION_TIMEOUT = 5.0
+CONNECTION_TIMEOUT_KEY = 'connection.timeout'
+CONNECTION_TIMEOUT_DEFAULT = 5.0
+CURL_CONNECTION_TIMEOUT_DEFAULT = str(int(CONNECTION_TIMEOUT_DEFAULT))
+
 
 
 def get_tokens():
 def get_tokens():
   """
   """
@@ -64,27 +66,36 @@ def get_tokens():
   return (TEMPLETON_PORT_KEY, SECURITY_ENABLED_KEY, WEBHCAT_KEYTAB_KEY, WEBHCAT_PRINCIPAL_KEY, KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY)
   return (TEMPLETON_PORT_KEY, SECURITY_ENABLED_KEY, WEBHCAT_KEYTAB_KEY, WEBHCAT_PRINCIPAL_KEY, KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY)
   
   
 
 
-def execute(parameters=None, host_name=None):
+def execute(configurations={}, parameters={}, host_name=None):
   """
   """
   Returns a tuple containing the result code and a pre-formatted result label
   Returns a tuple containing the result code and a pre-formatted result label
 
 
   Keyword arguments:
   Keyword arguments:
-  parameters (dictionary): a mapping of parameter key to value
+  configurations (dictionary): a mapping of configuration key to value
+  parameters (dictionary): a mapping of script parameter key to value
   host_name (string): the name of this host where the alert is running
   host_name (string): the name of this host where the alert is running
   """
   """
 
 
   result_code = RESULT_CODE_UNKNOWN
   result_code = RESULT_CODE_UNKNOWN
 
 
-  if parameters is None:
-    return (result_code, ['There were no parameters supplied to the script.'])
+  if configurations is None:
+    return (result_code, ['There were no configurations supplied to the script.'])
 
 
   webhcat_port = WEBHCAT_PORT_DEFAULT
   webhcat_port = WEBHCAT_PORT_DEFAULT
-  if TEMPLETON_PORT_KEY in parameters:
-    webhcat_port = int(parameters[TEMPLETON_PORT_KEY])
+  if TEMPLETON_PORT_KEY in configurations:
+    webhcat_port = int(configurations[TEMPLETON_PORT_KEY])
 
 
   security_enabled = False
   security_enabled = False
-  if SECURITY_ENABLED_KEY in parameters:
-    security_enabled = parameters[SECURITY_ENABLED_KEY].lower() == 'true'
+  if SECURITY_ENABLED_KEY in configurations:
+    security_enabled = configurations[SECURITY_ENABLED_KEY].lower() == 'true'
+
+  # parse script arguments
+  connection_timeout = CONNECTION_TIMEOUT_DEFAULT
+  curl_connection_timeout = CURL_CONNECTION_TIMEOUT_DEFAULT
+  if CONNECTION_TIMEOUT_KEY in parameters:
+    connection_timeout = float(parameters[CONNECTION_TIMEOUT_KEY])
+    curl_connection_timeout = str(int(connection_timeout))
+
 
 
   # the alert will always run on the webhcat host
   # the alert will always run on the webhcat host
   if host_name is None:
   if host_name is None:
@@ -98,12 +109,12 @@ def execute(parameters=None, host_name=None):
   json_response = {}
   json_response = {}
 
 
   if security_enabled:
   if security_enabled:
-    if WEBHCAT_KEYTAB_KEY not in parameters or WEBHCAT_PRINCIPAL_KEY not in parameters:
-      return (RESULT_CODE_UNKNOWN, [str(parameters)])
+    if WEBHCAT_KEYTAB_KEY not in configurations or WEBHCAT_PRINCIPAL_KEY not in configurations:
+      return (RESULT_CODE_UNKNOWN, [str(configurations)])
 
 
     try:
     try:
-      webhcat_keytab = parameters[WEBHCAT_KEYTAB_KEY]
-      webhcat_principal = parameters[WEBHCAT_PRINCIPAL_KEY]
+      webhcat_keytab = configurations[WEBHCAT_KEYTAB_KEY]
+      webhcat_principal = configurations[WEBHCAT_PRINCIPAL_KEY]
 
 
       # substitute _HOST in kerberos principal with actual fqdn
       # substitute _HOST in kerberos principal with actual fqdn
       webhcat_principal = webhcat_principal.replace('_HOST', host_name)
       webhcat_principal = webhcat_principal.replace('_HOST', host_name)
@@ -115,8 +126,8 @@ def execute(parameters=None, host_name=None):
       kerberos_env = {'KRB5CCNAME': ccache_file}
       kerberos_env = {'KRB5CCNAME': ccache_file}
 
 
       # Get the configured Kerberos executable search paths, if any
       # Get the configured Kerberos executable search paths, if any
-      if KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY in parameters:
-        kerberos_executable_search_paths = parameters[KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY]
+      if KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY in configurations:
+        kerberos_executable_search_paths = configurations[KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY]
       else:
       else:
         kerberos_executable_search_paths = None
         kerberos_executable_search_paths = None
 
 
@@ -136,7 +147,7 @@ def execute(parameters=None, host_name=None):
 
 
       # make a single curl call to get just the http code
       # make a single curl call to get just the http code
       curl = subprocess.Popen(['curl', '--negotiate', '-u', ':', '-sL', '-w',
       curl = subprocess.Popen(['curl', '--negotiate', '-u', ':', '-sL', '-w',
-        '%{http_code}', '--connect-timeout', CURL_CONNECTION_TIMEOUT,
+        '%{http_code}', '--connect-timeout', curl_connection_timeout,
         '-o', '/dev/null', query_url], stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=kerberos_env)
         '-o', '/dev/null', query_url], stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=kerberos_env)
 
 
       stdout, stderr = curl.communicate()
       stdout, stderr = curl.communicate()
@@ -160,7 +171,7 @@ def execute(parameters=None, host_name=None):
       # now that we have the http status and it was 200, get the content
       # now that we have the http status and it was 200, get the content
       start_time = time.time()
       start_time = time.time()
       curl = subprocess.Popen(['curl', '--negotiate', '-u', ':', '-sL',
       curl = subprocess.Popen(['curl', '--negotiate', '-u', ':', '-sL',
-        '--connect-timeout', CURL_CONNECTION_TIMEOUT, query_url, ],
+        '--connect-timeout', curl_connection_timeout, query_url, ],
         stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=kerberos_env)
         stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=kerberos_env)
 
 
       stdout, stderr = curl.communicate()
       stdout, stderr = curl.communicate()
@@ -178,7 +189,7 @@ def execute(parameters=None, host_name=None):
     try:
     try:
       # execute the query for the JSON that includes WebHCat status
       # execute the query for the JSON that includes WebHCat status
       start_time = time.time()
       start_time = time.time()
-      url_response = urllib2.urlopen(query_url, timeout=CONNECTION_TIMEOUT)
+      url_response = urllib2.urlopen(query_url, timeout=connection_timeout)
       total_time = time.time() - start_time
       total_time = time.time() - start_time
 
 
       json_response = json.loads(url_response.read())
       json_response = json.loads(url_response.read())

+ 18 - 17
ambari-server/src/main/resources/common-services/OOZIE/4.0.0.2.0/package/alerts/alert_check_oozie_server.py

@@ -59,7 +59,7 @@ def get_tokens():
   return (OOZIE_URL_KEY, OOZIE_PRINCIPAL, SECURITY_ENABLED, OOZIE_KEYTAB, KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY)
   return (OOZIE_URL_KEY, OOZIE_PRINCIPAL, SECURITY_ENABLED, OOZIE_KEYTAB, KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY)
 
 
 @OsFamilyFuncImpl(os_family=OSConst.WINSRV_FAMILY)
 @OsFamilyFuncImpl(os_family=OSConst.WINSRV_FAMILY)
-def get_check_command(oozie_url, host_name, parameters):
+def get_check_command(oozie_url, host_name, configurations):
   from resource_management.libraries.functions import reload_windows_env
   from resource_management.libraries.functions import reload_windows_env
   reload_windows_env()
   reload_windows_env()
   oozie_home = os.environ['OOZIE_HOME']
   oozie_home = os.environ['OOZIE_HOME']
@@ -67,20 +67,20 @@ def get_check_command(oozie_url, host_name, parameters):
   return (command, None)
   return (command, None)
 
 
 @OsFamilyFuncImpl(os_family=OsFamilyImpl.DEFAULT)
 @OsFamilyFuncImpl(os_family=OsFamilyImpl.DEFAULT)
-def get_check_command(oozie_url, host_name, parameters):
+def get_check_command(oozie_url, host_name, configurations):
   security_enabled = False
   security_enabled = False
-  if SECURITY_ENABLED in parameters:
-    security_enabled = str(parameters[SECURITY_ENABLED]).upper() == 'TRUE'
+  if SECURITY_ENABLED in configurations:
+    security_enabled = str(configurations[SECURITY_ENABLED]).upper() == 'TRUE'
   kerberos_env = None
   kerberos_env = None
   if security_enabled:
   if security_enabled:
-    if OOZIE_KEYTAB in parameters and OOZIE_PRINCIPAL in parameters:
-      oozie_keytab = parameters[OOZIE_KEYTAB]
-      oozie_principal = parameters[OOZIE_PRINCIPAL]
+    if OOZIE_KEYTAB in configurations and OOZIE_PRINCIPAL in configurations:
+      oozie_keytab = configurations[OOZIE_KEYTAB]
+      oozie_principal = configurations[OOZIE_PRINCIPAL]
 
 
       # substitute _HOST in kerberos principal with actual fqdn
       # substitute _HOST in kerberos principal with actual fqdn
       oozie_principal = oozie_principal.replace('_HOST', host_name)
       oozie_principal = oozie_principal.replace('_HOST', host_name)
     else:
     else:
-      raise KerberosPropertiesNotFound('The Oozie keytab and principal are required parameters when security is enabled.')
+      raise KerberosPropertiesNotFound('The Oozie keytab and principal are required configurations when security is enabled.')
 
 
     # Create the kerberos credentials cache (ccache) file and set it in the environment to use
     # Create the kerberos credentials cache (ccache) file and set it in the environment to use
     # when executing curl
     # when executing curl
@@ -89,8 +89,8 @@ def get_check_command(oozie_url, host_name, parameters):
     kerberos_env = {'KRB5CCNAME': ccache_file}
     kerberos_env = {'KRB5CCNAME': ccache_file}
 
 
     # Get the configured Kerberos executable search paths, if any
     # Get the configured Kerberos executable search paths, if any
-    if KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY in parameters:
-      kerberos_executable_search_paths = parameters[KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY]
+    if KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY in configurations:
+      kerberos_executable_search_paths = configurations[KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY]
     else:
     else:
       kerberos_executable_search_paths = None
       kerberos_executable_search_paths = None
 
 
@@ -110,30 +110,31 @@ def get_check_command(oozie_url, host_name, parameters):
   command = format("source /etc/oozie/conf/oozie-env.sh ; oozie admin -oozie {oozie_url} -status")
   command = format("source /etc/oozie/conf/oozie-env.sh ; oozie admin -oozie {oozie_url} -status")
   return (command, kerberos_env)
   return (command, kerberos_env)
 
 
-def execute(parameters=None, host_name=None):
+def execute(configurations={}, parameters={}, host_name=None):
   """
   """
   Returns a tuple containing the result code and a pre-formatted result label
   Returns a tuple containing the result code and a pre-formatted result label
 
 
   Keyword arguments:
   Keyword arguments:
-  parameters (dictionary): a mapping of parameter key to value
+  configurations (dictionary): a mapping of configuration key to value
+  parameters (dictionary): a mapping of script parameter key to value
   host_name (string): the name of this host where the alert is running
   host_name (string): the name of this host where the alert is running
   """
   """
 
 
-  if parameters is None:
-    return (RESULT_CODE_UNKNOWN, ['There were no parameters supplied to the script.'])
+  if configurations is None:
+    return (RESULT_CODE_UNKNOWN, ['There were no configurations supplied to the script.'])
 
 
-  if not OOZIE_URL_KEY in parameters:
+  if not OOZIE_URL_KEY in configurations:
     return (RESULT_CODE_UNKNOWN, ['The Oozie URL is a required parameter.'])
     return (RESULT_CODE_UNKNOWN, ['The Oozie URL is a required parameter.'])
 
 
   # use localhost on Windows, 0.0.0.0 on others; 0.0.0.0 means bind to all
   # use localhost on Windows, 0.0.0.0 on others; 0.0.0.0 means bind to all
   # interfaces, which doesn't work on Windows
   # interfaces, which doesn't work on Windows
   localhost_address = 'localhost' if OSCheck.get_os_family() == OSConst.WINSRV_FAMILY else '0.0.0.0'
   localhost_address = 'localhost' if OSCheck.get_os_family() == OSConst.WINSRV_FAMILY else '0.0.0.0'
 
 
-  oozie_url = parameters[OOZIE_URL_KEY]
+  oozie_url = configurations[OOZIE_URL_KEY]
   oozie_url = oozie_url.replace(urlparse(oozie_url).hostname,localhost_address)
   oozie_url = oozie_url.replace(urlparse(oozie_url).hostname,localhost_address)
 
 
   try:
   try:
-    command, env = get_check_command(oozie_url, host_name, parameters)
+    command, env = get_check_command(oozie_url, host_name, configurations)
     # execute the command
     # execute the command
     Execute(command, environment=env)
     Execute(command, environment=env)
 
 

+ 3 - 2
ambari-server/src/main/resources/common-services/STORM/0.9.1.2.1/package/alerts/check_supervisor_process_win.py

@@ -33,12 +33,13 @@ def get_tokens():
   """
   """
   return ()
   return ()
 
 
-def execute(parameters=None, host_name=None):
+def execute(configurations={}, parameters={}, host_name=None):
   """
   """
   Returns a tuple containing the result code and a pre-formatted result label
   Returns a tuple containing the result code and a pre-formatted result label
 
 
   Keyword arguments:
   Keyword arguments:
-  parameters (dictionary): a mapping of parameter key to value
+  configurations (dictionary): a mapping of configuration key to value
+  parameters (dictionary): a mapping of script parameter key to value
   host_name (string): the name of this host where the alert is running
   host_name (string): the name of this host where the alert is running
   """
   """
 
 

+ 24 - 2
ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/alerts.json

@@ -203,7 +203,18 @@
         "enabled": true,
         "enabled": true,
         "source": {
         "source": {
           "type": "SCRIPT",
           "type": "SCRIPT",
-          "path": "YARN/2.1.0.2.0/package/alerts/alert_nodemanager_health.py"
+          "path": "YARN/2.1.0.2.0/package/alerts/alert_nodemanager_health.py",
+          "parameters": [
+            {
+              "name": "connection.timeout",
+              "display_name": "Connection Timeout",
+              "value": 5.0,
+              "type": "NUMERIC",
+              "description": "The maximum time before this alert is considered to be CRITICAL",
+              "units": "seconds",
+              "threshold": "CRITICAL"
+            }
+          ]
         }
         }
       }
       }
     ],
     ],
@@ -337,7 +348,18 @@
         "enabled": true,
         "enabled": true,
         "source": {
         "source": {
           "type": "SCRIPT",
           "type": "SCRIPT",
-          "path": "YARN/2.1.0.2.0/package/alerts/alert_nodemanagers_summary.py"
+          "path": "YARN/2.1.0.2.0/package/alerts/alert_nodemanagers_summary.py",
+          "parameters": [
+            {
+              "name": "connection.timeout",
+              "display_name": "Connection Timeout",
+              "value": 5.0,
+              "type": "NUMERIC",
+              "description": "The maximum time before this alert is considered to be CRITICAL",
+              "units": "seconds",
+              "threshold": "CRITICAL"
+            }
+          ]
         }
         }
       }
       }
     ],
     ],

+ 21 - 12
ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/alerts/alert_nodemanager_health.py

@@ -40,7 +40,8 @@ CRITICAL_NODEMANAGER_UNKNOWN_JSON_MESSAGE = 'Unable to determine NodeManager hea
 
 
 NODEMANAGER_DEFAULT_PORT = 8042
 NODEMANAGER_DEFAULT_PORT = 8042
 
 
-CONNECTION_TIMEOUT = 5.0
+CONNECTION_TIMEOUT_KEY = 'connection.timeout'
+CONNECTION_TIMEOUT_DEFAULT = 5.0
 
 
 def get_tokens():
 def get_tokens():
   """
   """
@@ -51,32 +52,40 @@ def get_tokens():
   YARN_HTTP_POLICY_KEY)
   YARN_HTTP_POLICY_KEY)
   
   
 
 
-def execute(parameters=None, host_name=None):
+def execute(configurations={}, parameters={}, host_name=None):
   """
   """
   Returns a tuple containing the result code and a pre-formatted result label
   Returns a tuple containing the result code and a pre-formatted result label
 
 
   Keyword arguments:
   Keyword arguments:
-  parameters (dictionary): a mapping of parameter key to value
+  configurations (dictionary): a mapping of configuration key to value
+  parameters (dictionary): a mapping of script parameter key to value
   host_name (string): the name of this host where the alert is running
   host_name (string): the name of this host where the alert is running
   """
   """
   result_code = RESULT_CODE_UNKNOWN
   result_code = RESULT_CODE_UNKNOWN
 
 
-  if parameters is None:
-    return (result_code, ['There were no parameters supplied to the script.'])
+  if configurations is None:
+    return (result_code, ['There were no configurations supplied to the script.'])
 
 
   scheme = 'http'
   scheme = 'http'
   http_uri = None
   http_uri = None
   https_uri = None
   https_uri = None
   http_policy = 'HTTP_ONLY'
   http_policy = 'HTTP_ONLY'
 
 
-  if NODEMANAGER_HTTP_ADDRESS_KEY in parameters:
-    http_uri = parameters[NODEMANAGER_HTTP_ADDRESS_KEY]
+  if NODEMANAGER_HTTP_ADDRESS_KEY in configurations:
+    http_uri = configurations[NODEMANAGER_HTTP_ADDRESS_KEY]
 
 
-  if NODEMANAGER_HTTPS_ADDRESS_KEY in parameters:
-    https_uri = parameters[NODEMANAGER_HTTPS_ADDRESS_KEY]
+  if NODEMANAGER_HTTPS_ADDRESS_KEY in configurations:
+    https_uri = configurations[NODEMANAGER_HTTPS_ADDRESS_KEY]
+
+  if YARN_HTTP_POLICY_KEY in configurations:
+    http_policy = configurations[YARN_HTTP_POLICY_KEY]
+
+
+  # parse script arguments
+  connection_timeout = CONNECTION_TIMEOUT_DEFAULT
+  if CONNECTION_TIMEOUT_KEY in parameters:
+    connection_timeout = float(parameters[CONNECTION_TIMEOUT_KEY])
 
 
-  if YARN_HTTP_POLICY_KEY in parameters:
-    http_policy = parameters[YARN_HTTP_POLICY_KEY]
 
 
   # determine the right URI and whether to use SSL
   # determine the right URI and whether to use SSL
   uri = http_uri
   uri = http_uri
@@ -108,7 +117,7 @@ def execute(parameters=None, host_name=None):
 
 
   try:
   try:
     # execute the query for the JSON that includes templeton status
     # execute the query for the JSON that includes templeton status
-    url_response = urllib2.urlopen(query, timeout=CONNECTION_TIMEOUT)
+    url_response = urllib2.urlopen(query, timeout=connection_timeout)
   except urllib2.HTTPError, httpError:
   except urllib2.HTTPError, httpError:
     label = CRITICAL_HTTP_STATUS_MESSAGE.format(str(httpError.code), query,
     label = CRITICAL_HTTP_STATUS_MESSAGE.format(str(httpError.code), query,
       str(httpError))
       str(httpError))

+ 23 - 15
ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/alerts/alert_nodemanagers_summary.py

@@ -30,7 +30,8 @@ NODEMANAGER_HTTP_ADDRESS_KEY = '{{yarn-site/yarn.resourcemanager.webapp.address}
 NODEMANAGER_HTTPS_ADDRESS_KEY = '{{yarn-site/yarn.resourcemanager.webapp.https.address}}'
 NODEMANAGER_HTTPS_ADDRESS_KEY = '{{yarn-site/yarn.resourcemanager.webapp.https.address}}'
 YARN_HTTP_POLICY_KEY = '{{yarn-site/yarn.http.policy}}'
 YARN_HTTP_POLICY_KEY = '{{yarn-site/yarn.http.policy}}'
 
 
-CONNECTION_TIMEOUT = 5.0
+CONNECTION_TIMEOUT_KEY = 'connection.timeout'
+CONNECTION_TIMEOUT_DEFAULT = 5.0
   
   
 def get_tokens():
 def get_tokens():
   """
   """
@@ -41,32 +42,38 @@ def get_tokens():
     YARN_HTTP_POLICY_KEY
     YARN_HTTP_POLICY_KEY
 
 
 
 
-def execute(parameters=None, host_name=None):
+def execute(configurations={}, parameters={}, host_name=None):
   """
   """
   Returns a tuple containing the result code and a pre-formatted result label
   Returns a tuple containing the result code and a pre-formatted result label
 
 
   Keyword arguments:
   Keyword arguments:
-  parameters (dictionary): a mapping of parameter key to value
+  configurations (dictionary): a mapping of configuration key to value
+  parameters (dictionary): a mapping of script parameter key to value
   host_name (string): the name of this host where the alert is running
   host_name (string): the name of this host where the alert is running
   """
   """
 
 
-  if parameters is None:
-    return (('UNKNOWN', ['There were no parameters supplied to the script.']))
+  if configurations is None:
+    return (('UNKNOWN', ['There were no configurations supplied to the script.']))
 
 
   scheme = 'http'  
   scheme = 'http'  
   http_uri = None
   http_uri = None
   https_uri = None
   https_uri = None
   http_policy = 'HTTP_ONLY'
   http_policy = 'HTTP_ONLY'
   
   
-  if NODEMANAGER_HTTP_ADDRESS_KEY in parameters:
-    http_uri = parameters[NODEMANAGER_HTTP_ADDRESS_KEY]
+  if NODEMANAGER_HTTP_ADDRESS_KEY in configurations:
+    http_uri = configurations[NODEMANAGER_HTTP_ADDRESS_KEY]
 
 
-  if NODEMANAGER_HTTPS_ADDRESS_KEY in parameters:
-    https_uri = parameters[NODEMANAGER_HTTPS_ADDRESS_KEY]
+  if NODEMANAGER_HTTPS_ADDRESS_KEY in configurations:
+    https_uri = configurations[NODEMANAGER_HTTPS_ADDRESS_KEY]
+
+  if YARN_HTTP_POLICY_KEY in configurations:
+    http_policy = configurations[YARN_HTTP_POLICY_KEY]
+
+  # parse script arguments
+  connection_timeout = CONNECTION_TIMEOUT_DEFAULT
+  if CONNECTION_TIMEOUT_KEY in parameters:
+    connection_timeout = float(parameters[CONNECTION_TIMEOUT_KEY])
 
 
-  if YARN_HTTP_POLICY_KEY in parameters:
-    http_policy = parameters[YARN_HTTP_POLICY_KEY]
-    
   # determine the right URI and whether to use SSL
   # determine the right URI and whether to use SSL
   uri = http_uri
   uri = http_uri
   if http_policy == 'HTTPS_ONLY':
   if http_policy == 'HTTPS_ONLY':
@@ -78,7 +85,8 @@ def execute(parameters=None, host_name=None):
   live_nodemanagers_qry = "{0}://{1}/jmx?qry=Hadoop:service=ResourceManager,name=RMNMInfo".format(scheme, uri)
   live_nodemanagers_qry = "{0}://{1}/jmx?qry=Hadoop:service=ResourceManager,name=RMNMInfo".format(scheme, uri)
 
 
   try:
   try:
-    live_nodemanagers = json.loads(get_value_from_jmx(live_nodemanagers_qry, "LiveNodeManagers"))
+    live_nodemanagers = json.loads(get_value_from_jmx(live_nodemanagers_qry,
+      "LiveNodeManagers", connection_timeout))
 
 
     unhealthy_count = 0
     unhealthy_count = 0
 
 
@@ -104,14 +112,14 @@ def execute(parameters=None, host_name=None):
   return (result_code, [label])
   return (result_code, [label])
 
 
 
 
-def get_value_from_jmx(query, jmx_property):
+def get_value_from_jmx(query, jmx_property, connection_timeout):
   response = None
   response = None
   
   
   try:
   try:
     # use a customer header process that will look for the non-standard
     # use a customer header process that will look for the non-standard
     # "Refresh" header and attempt to follow the redirect
     # "Refresh" header and attempt to follow the redirect
     url_opener = urllib2.build_opener(RefreshHeaderProcessor())
     url_opener = urllib2.build_opener(RefreshHeaderProcessor())
-    response = url_opener.open(query, timeout=CONNECTION_TIMEOUT)
+    response = url_opener.open(query, timeout=connection_timeout)
 
 
     data = response.read()
     data = response.read()
     data_dict = json.loads(data)
     data_dict = json.loads(data)

+ 43 - 12
ambari-server/src/main/resources/host_scripts/alert_disk_space.py

@@ -25,7 +25,16 @@ from ambari_commons.os_family_impl import OsFamilyFuncImpl, OsFamilyImpl
 from ambari_commons import OSConst
 from ambari_commons import OSConst
 
 
 DiskInfo = collections.namedtuple('DiskInfo', 'total used free path')
 DiskInfo = collections.namedtuple('DiskInfo', 'total used free path')
-MIN_FREE_SPACE = 5000000000L   # 5GB
+
+# script parameter keys
+MIN_FREE_SPACE_KEY = "minimum.free.space"
+PERCENT_USED_WARNING_KEY = "percent.used.space.warning.threshold"
+PERCENT_USED_CRITICAL_KEY = "percent.free.space.critical.threshold"
+
+# defaults in case no script parameters are passed
+MIN_FREE_SPACE_DEFAULT = 5000000000L
+PERCENT_USED_WARNING_DEFAULT = 50
+PERCENT_USED_CRITICAL_DEFAULT = 80
 
 
 # the location where HDP installs components when using HDP 2.2+
 # the location where HDP installs components when using HDP 2.2+
 HDP_HOME_DIR = "/usr/hdp"
 HDP_HOME_DIR = "/usr/hdp"
@@ -40,8 +49,9 @@ def get_tokens():
   """
   """
   return None
   return None
 
 
+
 @OsFamilyFuncImpl(os_family=OsFamilyImpl.DEFAULT)
 @OsFamilyFuncImpl(os_family=OsFamilyImpl.DEFAULT)
-def execute(parameters=None, host_name=None):
+def execute(configurations={}, parameters={}, host_name=None):
   """
   """
   Performs advanced disk checks under Linux. This will first attempt to
   Performs advanced disk checks under Linux. This will first attempt to
   check the HDP installation directories if they exist. If they do not exist,
   check the HDP installation directories if they exist. If they do not exist,
@@ -50,7 +60,8 @@ def execute(parameters=None, host_name=None):
   Returns a tuple containing the result code and a pre-formatted result label
   Returns a tuple containing the result code and a pre-formatted result label
 
 
   Keyword arguments:
   Keyword arguments:
-  parameters (dictionary): a mapping of parameter key to value
+  configurations (dictionary): a mapping of configuration key to value
+  parameters (dictionary): a mapping of script parameter key to value
   host_name (string): the name of this host where the alert is running
   host_name (string): the name of this host where the alert is running
   """
   """
 
 
@@ -66,21 +77,40 @@ def execute(parameters=None, host_name=None):
 
 
   try:
   try:
     disk_usage = _get_disk_usage(path)
     disk_usage = _get_disk_usage(path)
-    result_code, label = _get_warnings_for_partition(disk_usage)
+    result_code, label = _get_warnings_for_partition(parameters, disk_usage)
   except NotImplementedError, platform_error:
   except NotImplementedError, platform_error:
     return 'CRITICAL', [str(platform_error)]
     return 'CRITICAL', [str(platform_error)]
 
 
   return result_code, [label]
   return result_code, [label]
 
 
-def _get_warnings_for_partition(disk_usage):
+
+def _get_warnings_for_partition(parameters, disk_usage):
+
+  # start with hard coded defaults
+  min_free_space = MIN_FREE_SPACE_DEFAULT
+  warning_percent = PERCENT_USED_WARNING_DEFAULT
+  critical_percent = PERCENT_USED_CRITICAL_DEFAULT
+
+  # parse script parameters
+  if MIN_FREE_SPACE_KEY in parameters:
+    # long(float(5e9)) seems like gson likes scientific notation
+    min_free_space = long(float(parameters[MIN_FREE_SPACE_KEY]))
+
+  if PERCENT_USED_WARNING_KEY in parameters:
+    warning_percent = float(parameters[PERCENT_USED_WARNING_KEY]) * 100
+
+  if PERCENT_USED_CRITICAL_KEY in parameters:
+    critical_percent = float(parameters[PERCENT_USED_CRITICAL_KEY]) * 100
+
+
   if disk_usage is None or disk_usage.total == 0:
   if disk_usage is None or disk_usage.total == 0:
     return 'CRITICAL', ['Unable to determine the disk usage']
     return 'CRITICAL', ['Unable to determine the disk usage']
 
 
   result_code = 'OK'
   result_code = 'OK'
   percent = disk_usage.used / float(disk_usage.total) * 100
   percent = disk_usage.used / float(disk_usage.total) * 100
-  if percent > 80:
+  if percent > critical_percent:
     result_code = 'CRITICAL'
     result_code = 'CRITICAL'
-  elif percent > 50:
+  elif percent > warning_percent:
     result_code = 'WARNING'
     result_code = 'WARNING'
 
 
   label = 'Capacity Used: [{0:.2f}%, {1}], Capacity Total: [{2}]'.format(
   label = 'Capacity Used: [{0:.2f}%, {1}], Capacity Total: [{2}]'.format(
@@ -92,26 +122,27 @@ def _get_warnings_for_partition(disk_usage):
 
 
   if result_code == 'OK':
   if result_code == 'OK':
     # Check absolute disk space value
     # Check absolute disk space value
-    if disk_usage.free < MIN_FREE_SPACE:
+    if disk_usage.free < min_free_space:
       result_code = 'WARNING'
       result_code = 'WARNING'
-      label += '. Total free space is less than {0}'.format(_get_formatted_size(MIN_FREE_SPACE))
+      label += '. Total free space is less than {0}'.format(_get_formatted_size(min_free_space))
 
 
   return result_code, label
   return result_code, label
 
 
 
 
 @OsFamilyFuncImpl(os_family=OSConst.WINSRV_FAMILY)
 @OsFamilyFuncImpl(os_family=OSConst.WINSRV_FAMILY)
-def execute(parameters=None, host_name=None):
+def execute(configurations={}, parameters={}, host_name=None):
   """
   """
   Performs simplified disk checks under Windows
   Performs simplified disk checks under Windows
   Returns a tuple containing the result code and a pre-formatted result label
   Returns a tuple containing the result code and a pre-formatted result label
 
 
   Keyword arguments:
   Keyword arguments:
-  parameters (dictionary): a mapping of parameter key to value
+  configurations (dictionary): a mapping of configuration key to value
+  parameters (dictionary): a mapping of script parameter key to value
   host_name (string): the name of this host where the alert is running
   host_name (string): the name of this host where the alert is running
   """
   """
   try:
   try:
     disk_usage = _get_disk_usage()
     disk_usage = _get_disk_usage()
-    result = _get_warnings_for_partition(disk_usage)
+    result = _get_warnings_for_partition(parameters, disk_usage)
   except NotImplementedError, platform_error:
   except NotImplementedError, platform_error:
     result = ('CRITICAL', [str(platform_error)])
     result = ('CRITICAL', [str(platform_error)])
   return result
   return result

+ 10 - 1
ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/FLUME/alerts.json

@@ -10,7 +10,16 @@
         "scope": "ANY",
         "scope": "ANY",
         "source": {
         "source": {
           "type": "SCRIPT",
           "type": "SCRIPT",
-          "path": "BIGTOP/0.8/services/FLUME/package/files/alert_flume_agent_status.py"
+          "path": "BIGTOP/0.8/services/FLUME/package/files/alert_flume_agent_status.py",
+          "parameters": [
+            {
+              "name": "run.directory",
+              "display_name": "Run Directory",
+              "value": "/var/run/flume",
+              "type": "STRING",
+              "description": "The directory where flume agent processes will place their PID files."
+            }
+          ]
         }
         }
       }
       }
     ]
     ]

+ 16 - 9
ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/FLUME/package/files/alert_flume_agent_status.py

@@ -29,7 +29,8 @@ RESULT_CODE_UNKNOWN = 'UNKNOWN'
 
 
 FLUME_CONF_DIR_KEY = '{{flume-env/flume_conf_dir}}'
 FLUME_CONF_DIR_KEY = '{{flume-env/flume_conf_dir}}'
 
 
-FLUME_RUN_DIR = '/var/run/flume'
+FLUME_RUN_DIR_KEY = "run.directory"
+FLUME_RUN_DIR_DEFAULT = '/var/run/flume'
 
 
 def get_tokens():
 def get_tokens():
   """
   """
@@ -37,23 +38,24 @@ def get_tokens():
   to build the dictionary passed into execute
   to build the dictionary passed into execute
   """
   """
   return (FLUME_CONF_DIR_KEY,)
   return (FLUME_CONF_DIR_KEY,)
-  
 
 
-def execute(parameters=None, host_name=None):
+
+def execute(configurations={}, parameters={}, host_name=None):
   """
   """
   Returns a tuple containing the result code and a pre-formatted result label
   Returns a tuple containing the result code and a pre-formatted result label
 
 
   Keyword arguments:
   Keyword arguments:
-  parameters (dictionary): a mapping of parameter key to value
+  configurations (dictionary): a mapping of configuration key to value
+  parameters (dictionary): a mapping of script parameter key to value
   host_name (string): the name of this host where the alert is running
   host_name (string): the name of this host where the alert is running
   """
   """
 
 
-  if parameters is None:
-    return (RESULT_CODE_UNKNOWN, ['There were no parameters supplied to the script.'])
+  if configurations is None:
+    return (RESULT_CODE_UNKNOWN, ['There were no configurations supplied to the script.'])
 
 
   flume_conf_directory = None
   flume_conf_directory = None
-  if FLUME_CONF_DIR_KEY in parameters:
-    flume_conf_directory = parameters[FLUME_CONF_DIR_KEY]
+  if FLUME_CONF_DIR_KEY in configurations:
+    flume_conf_directory = configurations[FLUME_CONF_DIR_KEY]
 
 
   if flume_conf_directory is None:
   if flume_conf_directory is None:
     return (RESULT_CODE_UNKNOWN, ['The Flume configuration directory is a required parameter.'])
     return (RESULT_CODE_UNKNOWN, ['The Flume configuration directory is a required parameter.'])
@@ -61,7 +63,12 @@ def execute(parameters=None, host_name=None):
   if host_name is None:
   if host_name is None:
     host_name = socket.getfqdn()
     host_name = socket.getfqdn()
 
 
-  processes = get_flume_status(flume_conf_directory, FLUME_RUN_DIR)
+  # parse script arguments
+  flume_run_directory = FLUME_RUN_DIR_DEFAULT
+  if FLUME_RUN_DIR_KEY in parameters:
+    flume_run_directory = parameters[FLUME_RUN_DIR_KEY]
+
+  processes = get_flume_status(flume_conf_directory, flume_run_directory)
   expected_agents = find_expected_agent_names(flume_conf_directory)
   expected_agents = find_expected_agent_names(flume_conf_directory)
 
 
   alert_label = ''
   alert_label = ''

+ 42 - 2
ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HDFS/alerts.json

@@ -387,7 +387,36 @@
         "enabled": true,
         "enabled": true,
         "source": {
         "source": {
           "type": "SCRIPT",
           "type": "SCRIPT",
-          "path": "BIGTOP/0.8/services/HDFS/package/files/alert_checkpoint_time.py"
+          "path": "BIGTOP/0.8/services/HDFS/package/files/alert_checkpoint_time.py",
+          "parameters": [
+            {
+              "name": "connection.timeout",
+              "display_name": "Connection Timeout",
+              "value": 5.0,
+              "type": "NUMERIC",
+              "description": "The maximum time before this alert is considered to be CRITICAL",
+              "units": "seconds",
+              "threshold": "CRITICAL"
+            },
+            {
+              "name": "checkpoint_time_warning_threshold",
+              "display_name": "Checkpoint Warning",
+              "value": 2.0,
+              "type": "PERCENT",
+              "description": "The percentage of the last checkpoint time greater than the interval in order to trigger a warning alert.",
+              "units": "%",
+              "threshold": "WARNING"
+            },
+            {
+              "name": "checkpoint_time_critical_threshold",
+              "display_name": "Checkpoint Critical",
+              "value": 2.0,
+              "type": "PERCENT",
+              "description": "The percentage of the last checkpoint time greater than the interval in order to trigger a critical alert.",
+              "units": "%",
+              "threshold": "CRITICAL"
+            }           
+          ]
         }
         }
       },
       },
       {
       {
@@ -400,7 +429,18 @@
         "ignore_host": true,
         "ignore_host": true,
         "source": {
         "source": {
           "type": "SCRIPT",
           "type": "SCRIPT",
-          "path": "BIGTOP/0.8/services/HDFS/package/files/alert_ha_namenode_health.py"
+          "path": "BIGTOP/0.8/services/HDFS/package/files/alert_ha_namenode_health.py",
+          "parameters": [
+            {
+              "name": "connection.timeout",
+              "display_name": "Connection Timeout",
+              "value": 5.0,
+              "type": "NUMERIC",
+              "description": "The maximum time before this alert is considered to be CRITICAL",
+              "units": "seconds",
+              "threshold": "CRITICAL"
+            }
+          ]
         }
         }
       }
       }
     ],
     ],

+ 44 - 24
ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HDFS/package/files/alert_checkpoint_time.py

@@ -30,13 +30,17 @@ NN_HTTP_POLICY_KEY = '{{hdfs-site/dfs.http.policy}}'
 NN_CHECKPOINT_TX_KEY = '{{hdfs-site/dfs.namenode.checkpoint.txns}}'
 NN_CHECKPOINT_TX_KEY = '{{hdfs-site/dfs.namenode.checkpoint.txns}}'
 NN_CHECKPOINT_PERIOD_KEY = '{{hdfs-site/dfs.namenode.checkpoint.period}}'
 NN_CHECKPOINT_PERIOD_KEY = '{{hdfs-site/dfs.namenode.checkpoint.period}}'
 
 
-PERCENT_WARNING = 200
-PERCENT_CRITICAL = 200
+PERCENT_WARNING_KEY = 'checkpoint.time.warning.threshold'
+PERCENT_WARNING_DEFAULT = 200
+
+PERCENT_CRITICAL_KEY = 'checkpoint.time.critical.threshold'
+PERCENT_CRITICAL_DEFAULT = 200
 
 
 CHECKPOINT_TX_DEFAULT = 1000000
 CHECKPOINT_TX_DEFAULT = 1000000
 CHECKPOINT_PERIOD_DEFAULT = 21600
 CHECKPOINT_PERIOD_DEFAULT = 21600
 
 
-CONNECTION_TIMEOUT = 5.0
+CONNECTION_TIMEOUT_KEY = 'connection.timeout'
+CONNECTION_TIMEOUT_DEFAULT = 5.0
 
 
 def get_tokens():
 def get_tokens():
   """
   """
@@ -47,43 +51,55 @@ def get_tokens():
       NN_CHECKPOINT_TX_KEY, NN_CHECKPOINT_PERIOD_KEY)      
       NN_CHECKPOINT_TX_KEY, NN_CHECKPOINT_PERIOD_KEY)      
   
   
 
 
-def execute(parameters=None, host_name=None):
+def execute(configurations={}, parameters={}, host_name=None):
   """
   """
   Returns a tuple containing the result code and a pre-formatted result label
   Returns a tuple containing the result code and a pre-formatted result label
 
 
   Keyword arguments:
   Keyword arguments:
-  parameters (dictionary): a mapping of parameter key to value
+  configurations (dictionary): a mapping of configuration key to value
+  parameters (dictionary): a mapping of script parameter key to value
   host_name (string): the name of this host where the alert is running
   host_name (string): the name of this host where the alert is running
   """
   """
 
 
-  if parameters is None:
-    return (('UNKNOWN', ['There were no parameters supplied to the script.']))
+  if configurations is None:
+    return (('UNKNOWN', ['There were no configurations supplied to the script.']))
   
   
   uri = None
   uri = None
   scheme = 'http'  
   scheme = 'http'  
   http_uri = None
   http_uri = None
   https_uri = None
   https_uri = None
   http_policy = 'HTTP_ONLY'
   http_policy = 'HTTP_ONLY'
-  percent_warning = PERCENT_WARNING
-  percent_critical = PERCENT_CRITICAL
   checkpoint_tx = CHECKPOINT_TX_DEFAULT
   checkpoint_tx = CHECKPOINT_TX_DEFAULT
   checkpoint_period = CHECKPOINT_PERIOD_DEFAULT
   checkpoint_period = CHECKPOINT_PERIOD_DEFAULT
   
   
-  if NN_HTTP_ADDRESS_KEY in parameters:
-    http_uri = parameters[NN_HTTP_ADDRESS_KEY]
+  if NN_HTTP_ADDRESS_KEY in configurations:
+    http_uri = configurations[NN_HTTP_ADDRESS_KEY]
 
 
-  if NN_HTTPS_ADDRESS_KEY in parameters:
-    https_uri = parameters[NN_HTTPS_ADDRESS_KEY]
+  if NN_HTTPS_ADDRESS_KEY in configurations:
+    https_uri = configurations[NN_HTTPS_ADDRESS_KEY]
 
 
-  if NN_HTTP_POLICY_KEY in parameters:
-    http_policy = parameters[NN_HTTP_POLICY_KEY]
+  if NN_HTTP_POLICY_KEY in configurations:
+    http_policy = configurations[NN_HTTP_POLICY_KEY]
 
 
-  if NN_CHECKPOINT_TX_KEY in parameters:
-    checkpoint_tx = parameters[NN_CHECKPOINT_TX_KEY]
+  if NN_CHECKPOINT_TX_KEY in configurations:
+    checkpoint_tx = configurations[NN_CHECKPOINT_TX_KEY]
+
+  if NN_CHECKPOINT_PERIOD_KEY in configurations:
+    checkpoint_period = configurations[NN_CHECKPOINT_PERIOD_KEY]
+
+  # parse script arguments
+  connection_timeout = CONNECTION_TIMEOUT_DEFAULT
+  if CONNECTION_TIMEOUT_KEY in parameters:
+    connection_timeout = float(parameters[CONNECTION_TIMEOUT_KEY])
+
+  percent_warning = PERCENT_WARNING_DEFAULT
+  if PERCENT_WARNING_KEY in parameters:
+    percent_warning = float(parameters[PERCENT_WARNING_KEY]) * 100
+
+  percent_critical = PERCENT_CRITICAL_DEFAULT
+  if PERCENT_CRITICAL_KEY in parameters:
+    percent_critical = float(parameters[PERCENT_CRITICAL_KEY]) * 100
 
 
-  if NN_CHECKPOINT_PERIOD_KEY in parameters:
-    checkpoint_period = parameters[NN_CHECKPOINT_PERIOD_KEY]
-    
   # determine the right URI and whether to use SSL
   # determine the right URI and whether to use SSL
   uri = http_uri
   uri = http_uri
   if http_policy == 'HTTPS_ONLY':
   if http_policy == 'HTTPS_ONLY':
@@ -102,8 +118,12 @@ def execute(parameters=None, host_name=None):
   result_code = "OK"
   result_code = "OK"
 
 
   try:
   try:
-    last_checkpoint_time = int(get_value_from_jmx(last_checkpoint_time_qry,"LastCheckpointTime"))
-    journal_transaction_info = get_value_from_jmx(journal_transaction_info_qry,"JournalTransactionInfo")
+    last_checkpoint_time = int(get_value_from_jmx(last_checkpoint_time_qry,
+      "LastCheckpointTime", connection_timeout))
+
+    journal_transaction_info = get_value_from_jmx(journal_transaction_info_qry,
+      "JournalTransactionInfo", connection_timeout)
+
     journal_transaction_info_dict = json.loads(journal_transaction_info)
     journal_transaction_info_dict = json.loads(journal_transaction_info)
   
   
     last_tx = int(journal_transaction_info_dict['LastAppliedOrWrittenTxId'])
     last_tx = int(journal_transaction_info_dict['LastAppliedOrWrittenTxId'])
@@ -131,11 +151,11 @@ def get_time(delta):
   return {'h':h, 'm':m}
   return {'h':h, 'm':m}
 
 
 
 
-def get_value_from_jmx(query, jmx_property):
+def get_value_from_jmx(query, jmx_property, connection_timeout):
   response = None
   response = None
   
   
   try:
   try:
-    response = urllib2.urlopen(query, timeout=CONNECTION_TIMEOUT)
+    response = urllib2.urlopen(query, timeout=connection_timeout)
     data = response.read()
     data = response.read()
 
 
     data_dict = json.loads(data)
     data_dict = json.loads(data)

+ 24 - 16
ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HDFS/package/files/alert_ha_namenode_health.py

@@ -35,7 +35,8 @@ NN_HTTP_ADDRESS_KEY = '{{hdfs-site/dfs.namenode.http-address}}'
 NN_HTTPS_ADDRESS_KEY = '{{hdfs-site/dfs.namenode.https-address}}'
 NN_HTTPS_ADDRESS_KEY = '{{hdfs-site/dfs.namenode.https-address}}'
 DFS_POLICY_KEY = '{{hdfs-site/dfs.http.policy}}'
 DFS_POLICY_KEY = '{{hdfs-site/dfs.http.policy}}'
 
 
-CONNECTION_TIMEOUT = 5.0
+CONNECTION_TIMEOUT_KEY = 'connection.timeout'
+CONNECTION_TIMEOUT_DEFAULT = 5.0
 
 
 def get_tokens():
 def get_tokens():
   """
   """
@@ -44,36 +45,43 @@ def get_tokens():
   """
   """
   return (HDFS_SITE_KEY, NAMESERVICE_KEY, NN_HTTP_ADDRESS_KEY,
   return (HDFS_SITE_KEY, NAMESERVICE_KEY, NN_HTTP_ADDRESS_KEY,
   NN_HTTPS_ADDRESS_KEY, DFS_POLICY_KEY)
   NN_HTTPS_ADDRESS_KEY, DFS_POLICY_KEY)
-  
 
 
-def execute(parameters=None, host_name=None):
+
+def execute(configurations={}, parameters={}, host_name=None):
   """
   """
   Returns a tuple containing the result code and a pre-formatted result label
   Returns a tuple containing the result code and a pre-formatted result label
 
 
   Keyword arguments:
   Keyword arguments:
-  parameters (dictionary): a mapping of parameter key to value
+  configurations (dictionary): a mapping of configuration key to value
+  parameters (dictionary): a mapping of script parameter key to value
   host_name (string): the name of this host where the alert is running
   host_name (string): the name of this host where the alert is running
   """
   """
-  if parameters is None:
-    return (RESULT_STATE_UNKNOWN, ['There were no parameters supplied to the script.'])
+  if configurations is None:
+    return (RESULT_STATE_UNKNOWN, ['There were no configurations supplied to the script.'])
 
 
   # if not in HA mode, then SKIP
   # if not in HA mode, then SKIP
-  if not NAMESERVICE_KEY in parameters:
+  if not NAMESERVICE_KEY in configurations:
     return (RESULT_STATE_SKIPPED, ['NameNode HA is not enabled'])
     return (RESULT_STATE_SKIPPED, ['NameNode HA is not enabled'])
 
 
   # hdfs-site is required
   # hdfs-site is required
-  if not HDFS_SITE_KEY in parameters:
+  if not HDFS_SITE_KEY in configurations:
     return (RESULT_STATE_UNKNOWN, ['{0} is a required parameter for the script'.format(HDFS_SITE_KEY)])
     return (RESULT_STATE_UNKNOWN, ['{0} is a required parameter for the script'.format(HDFS_SITE_KEY)])
 
 
+  # parse script arguments
+  connection_timeout = CONNECTION_TIMEOUT_DEFAULT
+  if CONNECTION_TIMEOUT_KEY in parameters:
+    connection_timeout = float(parameters[CONNECTION_TIMEOUT_KEY])
+
+
   # determine whether or not SSL is enabled
   # determine whether or not SSL is enabled
   is_ssl_enabled = False
   is_ssl_enabled = False
-  if DFS_POLICY_KEY in parameters:
-    dfs_policy = parameters[DFS_POLICY_KEY]
+  if DFS_POLICY_KEY in configurations:
+    dfs_policy = configurations[DFS_POLICY_KEY]
     if dfs_policy == "HTTPS_ONLY":
     if dfs_policy == "HTTPS_ONLY":
       is_ssl_enabled = True
       is_ssl_enabled = True
 
 
-  name_service = parameters[NAMESERVICE_KEY]
-  hdfs_site = parameters[HDFS_SITE_KEY]
+  name_service = configurations[NAMESERVICE_KEY]
+  hdfs_site = configurations[HDFS_SITE_KEY]
 
 
   # look for dfs.ha.namenodes.foo
   # look for dfs.ha.namenodes.foo
   nn_unique_ids_key = 'dfs.ha.namenodes.' + name_service
   nn_unique_ids_key = 'dfs.ha.namenodes.' + name_service
@@ -105,7 +113,7 @@ def execute(parameters=None, host_name=None):
 
 
       try:
       try:
         jmx_uri = jmx_uri_fragment.format(value)
         jmx_uri = jmx_uri_fragment.format(value)
-        state = get_value_from_jmx(jmx_uri,'State')
+        state = get_value_from_jmx(jmx_uri, 'State', connection_timeout)
 
 
         if state == HDFS_NN_STATE_ACTIVE:
         if state == HDFS_NN_STATE_ACTIVE:
           active_namenodes.append(value)
           active_namenodes.append(value)
@@ -161,11 +169,11 @@ def execute(parameters=None, host_name=None):
       return (RESULT_STATE_SKIPPED, ['Another host will report this alert'])
       return (RESULT_STATE_SKIPPED, ['Another host will report this alert'])
 
 
 
 
-def get_value_from_jmx(query, jmx_property):
+def get_value_from_jmx(query, jmx_property, connection_timeout):
   response = None
   response = None
-  
+
   try:
   try:
-    response = urllib2.urlopen(query, timeout=CONNECTION_TIMEOUT)
+    response = urllib2.urlopen(query, timeout=connection_timeout)
     data = response.read()
     data = response.read()
 
 
     data_dict = json.loads(data)
     data_dict = json.loads(data)

+ 51 - 31
ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HIVE/package/files/alert_hive_thrift_port.py

@@ -40,15 +40,21 @@ SMOKEUSER_KEY = '{{cluster-env/smokeuser}}'
 # The configured Kerberos executable search paths, if any
 # The configured Kerberos executable search paths, if any
 KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY = '{{kerberos-env/executable_search_paths}}'
 KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY = '{{kerberos-env/executable_search_paths}}'
 
 
-PERCENT_WARNING = 200
-PERCENT_CRITICAL = 200
-
 THRIFT_PORT_DEFAULT = 10000
 THRIFT_PORT_DEFAULT = 10000
 HIVE_SERVER_TRANSPORT_MODE_DEFAULT = 'binary'
 HIVE_SERVER_TRANSPORT_MODE_DEFAULT = 'binary'
 HIVE_SERVER_PRINCIPAL_DEFAULT = 'hive/_HOST@EXAMPLE.COM'
 HIVE_SERVER_PRINCIPAL_DEFAULT = 'hive/_HOST@EXAMPLE.COM'
 HIVE_SERVER2_AUTHENTICATION_DEFAULT = 'NOSASL'
 HIVE_SERVER2_AUTHENTICATION_DEFAULT = 'NOSASL'
+
+# default keytab location
+SMOKEUSER_KEYTAB_SCRIPT_PARAM_KEY = 'default.smoke.keytab'
 SMOKEUSER_KEYTAB_DEFAULT = '/etc/security/keytabs/smokeuser.headless.keytab'
 SMOKEUSER_KEYTAB_DEFAULT = '/etc/security/keytabs/smokeuser.headless.keytab'
+
+# default smoke principal
+SMOKEUSER_PRINCIPAL_SCRIPT_PARAM_KEY = 'default.smoke.principal'
 SMOKEUSER_PRINCIPAL_DEFAULT = 'ambari-qa@EXAMPLE.COM'
 SMOKEUSER_PRINCIPAL_DEFAULT = 'ambari-qa@EXAMPLE.COM'
+
+# default smoke user
+SMOKEUSER_SCRIPT_PARAM_KEY = 'default.smoke.user'
 SMOKEUSER_DEFAULT = 'ambari-qa'
 SMOKEUSER_DEFAULT = 'ambari-qa'
 
 
 def get_tokens():
 def get_tokens():
@@ -59,62 +65,76 @@ def get_tokens():
   return (HIVE_SERVER_THRIFT_PORT_KEY,SECURITY_ENABLED_KEY, SMOKEUSER_KEY,
   return (HIVE_SERVER_THRIFT_PORT_KEY,SECURITY_ENABLED_KEY, SMOKEUSER_KEY,
     HIVE_SERVER2_AUTHENTICATION_KEY,HIVE_SERVER_PRINCIPAL_KEY,
     HIVE_SERVER2_AUTHENTICATION_KEY,HIVE_SERVER_PRINCIPAL_KEY,
     SMOKEUSER_KEYTAB_KEY,SMOKEUSER_PRINCIPAL_KEY,HIVE_SERVER_THRIFT_HTTP_PORT_KEY,
     SMOKEUSER_KEYTAB_KEY,SMOKEUSER_PRINCIPAL_KEY,HIVE_SERVER_THRIFT_HTTP_PORT_KEY,
-    HIVE_SERVER_TRANSPORT_MODE_KEY, KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY)
+    HIVE_SERVER_TRANSPORT_MODE_KEY,KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY)
 
 
 
 
-def execute(parameters=None, host_name=None):
+def execute(configurations={}, parameters={}, host_name=None):
   """
   """
   Returns a tuple containing the result code and a pre-formatted result label
   Returns a tuple containing the result code and a pre-formatted result label
 
 
   Keyword arguments:
   Keyword arguments:
-  parameters (dictionary): a mapping of parameter key to value
+  configurations (dictionary): a mapping of configuration key to value
+  parameters (dictionary): a mapping of script parameter key to value
   host_name (string): the name of this host where the alert is running
   host_name (string): the name of this host where the alert is running
   """
   """
 
 
-  if parameters is None:
-    return ('UNKNOWN', ['There were no parameters supplied to the script.'])
+  if configurations is None:
+    return ('UNKNOWN', ['There were no configurations supplied to the script.'])
 
 
   transport_mode = HIVE_SERVER_TRANSPORT_MODE_DEFAULT
   transport_mode = HIVE_SERVER_TRANSPORT_MODE_DEFAULT
-  if HIVE_SERVER_TRANSPORT_MODE_KEY in parameters:
-    transport_mode = parameters[HIVE_SERVER_TRANSPORT_MODE_KEY]
+  if HIVE_SERVER_TRANSPORT_MODE_KEY in configurations:
+    transport_mode = configurations[HIVE_SERVER_TRANSPORT_MODE_KEY]
 
 
   port = THRIFT_PORT_DEFAULT
   port = THRIFT_PORT_DEFAULT
-  if transport_mode.lower() == 'binary' and HIVE_SERVER_THRIFT_PORT_KEY in parameters:
-    port = int(parameters[HIVE_SERVER_THRIFT_PORT_KEY])
-  elif  transport_mode.lower() == 'http' and HIVE_SERVER_THRIFT_HTTP_PORT_KEY in parameters:
-    port = int(parameters[HIVE_SERVER_THRIFT_HTTP_PORT_KEY])
+  if transport_mode.lower() == 'binary' and HIVE_SERVER_THRIFT_PORT_KEY in configurations:
+    port = int(configurations[HIVE_SERVER_THRIFT_PORT_KEY])
+  elif  transport_mode.lower() == 'http' and HIVE_SERVER_THRIFT_HTTP_PORT_KEY in configurations:
+    port = int(configurations[HIVE_SERVER_THRIFT_HTTP_PORT_KEY])
 
 
   security_enabled = False
   security_enabled = False
-  if SECURITY_ENABLED_KEY in parameters:
-    security_enabled = str(parameters[SECURITY_ENABLED_KEY]).upper() == 'TRUE'
+  if SECURITY_ENABLED_KEY in configurations:
+    security_enabled = str(configurations[SECURITY_ENABLED_KEY]).upper() == 'TRUE'
 
 
   hive_server2_authentication = HIVE_SERVER2_AUTHENTICATION_DEFAULT
   hive_server2_authentication = HIVE_SERVER2_AUTHENTICATION_DEFAULT
-  if HIVE_SERVER2_AUTHENTICATION_KEY in parameters:
-    hive_server2_authentication = parameters[HIVE_SERVER2_AUTHENTICATION_KEY]
+  if HIVE_SERVER2_AUTHENTICATION_KEY in configurations:
+    hive_server2_authentication = configurations[HIVE_SERVER2_AUTHENTICATION_KEY]
 
 
+  # defaults
+  smokeuser_keytab = SMOKEUSER_KEYTAB_DEFAULT
   smokeuser_principal = SMOKEUSER_PRINCIPAL_DEFAULT
   smokeuser_principal = SMOKEUSER_PRINCIPAL_DEFAULT
-  if SMOKEUSER_PRINCIPAL_KEY in parameters:
-    smokeuser_principal = parameters[SMOKEUSER_PRINCIPAL_KEY]
-
   smokeuser = SMOKEUSER_DEFAULT
   smokeuser = SMOKEUSER_DEFAULT
-  if SMOKEUSER_KEY in parameters:
-    smokeuser = parameters[SMOKEUSER_KEY]
+
+  # check script params
+  if SMOKEUSER_PRINCIPAL_SCRIPT_PARAM_KEY in parameters:
+    smokeuser_principal = parameters[SMOKEUSER_PRINCIPAL_SCRIPT_PARAM_KEY]
+
+  if SMOKEUSER_SCRIPT_PARAM_KEY in parameters:
+    smokeuser = parameters[SMOKEUSER_SCRIPT_PARAM_KEY]
+
+  if SMOKEUSER_KEYTAB_SCRIPT_PARAM_KEY in parameters:
+    smokeuser_keytab = parameters[SMOKEUSER_KEYTAB_SCRIPT_PARAM_KEY]
+
+
+  # check configurations last as they should always take precedence
+  if SMOKEUSER_PRINCIPAL_KEY in configurations:
+    smokeuser_principal = configurations[SMOKEUSER_PRINCIPAL_KEY]
+
+  if SMOKEUSER_KEY in configurations:
+    smokeuser = configurations[SMOKEUSER_KEY]
 
 
   result_code = None
   result_code = None
 
 
   if security_enabled:
   if security_enabled:
     hive_server_principal = HIVE_SERVER_PRINCIPAL_DEFAULT
     hive_server_principal = HIVE_SERVER_PRINCIPAL_DEFAULT
-    if HIVE_SERVER_PRINCIPAL_KEY in parameters:
-      hive_server_principal = parameters[HIVE_SERVER_PRINCIPAL_KEY]
-
-    smokeuser_keytab = SMOKEUSER_KEYTAB_DEFAULT
+    if HIVE_SERVER_PRINCIPAL_KEY in configurations:
+      hive_server_principal = configurations[HIVE_SERVER_PRINCIPAL_KEY]
 
 
-    if SMOKEUSER_KEYTAB_KEY in parameters:
-      smokeuser_keytab = parameters[SMOKEUSER_KEYTAB_KEY]
+    if SMOKEUSER_KEYTAB_KEY in configurations:
+      smokeuser_keytab = configurations[SMOKEUSER_KEYTAB_KEY]
 
 
     # Get the configured Kerberos executable search paths, if any
     # Get the configured Kerberos executable search paths, if any
-    if KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY in parameters:
-      kerberos_executable_search_paths = parameters[KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY]
+    if KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY in configurations:
+      kerberos_executable_search_paths = configurations[KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY]
     else:
     else:
       kerberos_executable_search_paths = None
       kerberos_executable_search_paths = None
 
 

+ 76 - 54
ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/OOZIE/package/files/alert_check_oozie_server.py

@@ -24,7 +24,8 @@ from resource_management.libraries.functions import format
 from resource_management.libraries.functions import get_kinit_path
 from resource_management.libraries.functions import get_kinit_path
 from resource_management.libraries.functions import get_klist_path
 from resource_management.libraries.functions import get_klist_path
 from ambari_commons.os_check import OSConst, OSCheck
 from ambari_commons.os_check import OSConst, OSCheck
-from os import getpid, sep
+from ambari_commons.os_family_impl import OsFamilyFuncImpl, OsFamilyImpl
+import os
 from urlparse import urlparse
 from urlparse import urlparse
 
 
 RESULT_CODE_OK = 'OK'
 RESULT_CODE_OK = 'OK'
@@ -39,6 +40,17 @@ SECURITY_ENABLED = '{{cluster-env/security_enabled}}'
 OOZIE_PRINCIPAL = '{{oozie-site/oozie.authentication.kerberos.principal}}'
 OOZIE_PRINCIPAL = '{{oozie-site/oozie.authentication.kerberos.principal}}'
 OOZIE_KEYTAB = '{{oozie-site/oozie.authentication.kerberos.keytab}}'
 OOZIE_KEYTAB = '{{oozie-site/oozie.authentication.kerberos.keytab}}'
 
 
+class KerberosPropertiesNotFound(Exception): pass
+
+@OsFamilyFuncImpl(os_family=OSConst.WINSRV_FAMILY)
+def get_tokens():
+  """
+  Returns a tuple of tokens in the format {{site/property}} that will be used
+  to build the dictionary passed into execute
+  """
+  return (OOZIE_URL_KEY,)
+
+@OsFamilyFuncImpl(os_family=OsFamilyImpl.DEFAULT)
 def get_tokens():
 def get_tokens():
   """
   """
   Returns a tuple of tokens in the format {{site/property}} that will be used
   Returns a tuple of tokens in the format {{site/property}} that will be used
@@ -46,78 +58,88 @@ def get_tokens():
   """
   """
   return (OOZIE_URL_KEY, OOZIE_PRINCIPAL, SECURITY_ENABLED, OOZIE_KEYTAB, KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY)
   return (OOZIE_URL_KEY, OOZIE_PRINCIPAL, SECURITY_ENABLED, OOZIE_KEYTAB, KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY)
 
 
-def execute(parameters=None, host_name=None):
+@OsFamilyFuncImpl(os_family=OSConst.WINSRV_FAMILY)
+def get_check_command(oozie_url, host_name, configurations):
+  from resource_management.libraries.functions import reload_windows_env
+  reload_windows_env()
+  oozie_home = os.environ['OOZIE_HOME']
+  command = format("{oozie_home}\\bin\\oozie.cmd admin -oozie {oozie_url} -status")
+  return (command, None)
+
+@OsFamilyFuncImpl(os_family=OsFamilyImpl.DEFAULT)
+def get_check_command(oozie_url, host_name, configurations):
+  security_enabled = False
+  if SECURITY_ENABLED in configurations:
+    security_enabled = str(configurations[SECURITY_ENABLED]).upper() == 'TRUE'
+  kerberos_env = None
+  if security_enabled:
+    if OOZIE_KEYTAB in configurations and OOZIE_PRINCIPAL in configurations:
+      oozie_keytab = configurations[OOZIE_KEYTAB]
+      oozie_principal = configurations[OOZIE_PRINCIPAL]
+
+      # substitute _HOST in kerberos principal with actual fqdn
+      oozie_principal = oozie_principal.replace('_HOST', host_name)
+    else:
+      raise KerberosPropertiesNotFound('The Oozie keytab and principal are required configurations when security is enabled.')
+
+    # Create the kerberos credentials cache (ccache) file and set it in the environment to use
+    # when executing curl
+    env = Environment.get_instance()
+    ccache_file = "{0}{1}oozie_alert_cc_{2}".format(env.tmp_dir, os.sep, os.getpid())
+    kerberos_env = {'KRB5CCNAME': ccache_file}
+
+    # Get the configured Kerberos executable search paths, if any
+    if KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY in configurations:
+      kerberos_executable_search_paths = configurations[KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY]
+    else:
+      kerberos_executable_search_paths = None
+
+    klist_path_local = get_klist_path(kerberos_executable_search_paths)
+    klist_command = format("{klist_path_local} -s {ccache_file}")
+
+    # Determine if we need to kinit by testing to see if the relevant cache exists and has
+    # non-expired tickets.  Tickets are marked to expire after 5 minutes to help reduce the number
+    # it kinits we do but recover quickly when keytabs are regenerated
+    return_code, _ = call(klist_command)
+    if return_code != 0:
+      kinit_path_local = get_kinit_path(kerberos_executable_search_paths)
+      kinit_command = format("{kinit_path_local} -l 5m -kt {oozie_keytab} {oozie_principal}; ")
+
+      # kinit
+      Execute(kinit_command, environment=kerberos_env)
+  command = format("source /etc/oozie/conf/oozie-env.sh ; oozie admin -oozie {oozie_url} -status")
+  return (command, kerberos_env)
+
+def execute(configurations={}, parameters={}, host_name=None):
   """
   """
   Returns a tuple containing the result code and a pre-formatted result label
   Returns a tuple containing the result code and a pre-formatted result label
 
 
   Keyword arguments:
   Keyword arguments:
-  parameters (dictionary): a mapping of parameter key to value
+  configurations (dictionary): a mapping of configuration key to value
+  parameters (dictionary): a mapping of script parameter key to value
   host_name (string): the name of this host where the alert is running
   host_name (string): the name of this host where the alert is running
   """
   """
 
 
-  if parameters is None:
-    return (RESULT_CODE_UNKNOWN, ['There were no parameters supplied to the script.'])
+  if configurations is None:
+    return (RESULT_CODE_UNKNOWN, ['There were no configurations supplied to the script.'])
 
 
-  if not OOZIE_URL_KEY in parameters:
+  if not OOZIE_URL_KEY in configurations:
     return (RESULT_CODE_UNKNOWN, ['The Oozie URL is a required parameter.'])
     return (RESULT_CODE_UNKNOWN, ['The Oozie URL is a required parameter.'])
 
 
   # use localhost on Windows, 0.0.0.0 on others; 0.0.0.0 means bind to all
   # use localhost on Windows, 0.0.0.0 on others; 0.0.0.0 means bind to all
   # interfaces, which doesn't work on Windows
   # interfaces, which doesn't work on Windows
   localhost_address = 'localhost' if OSCheck.get_os_family() == OSConst.WINSRV_FAMILY else '0.0.0.0'
   localhost_address = 'localhost' if OSCheck.get_os_family() == OSConst.WINSRV_FAMILY else '0.0.0.0'
 
 
-  oozie_url = parameters[OOZIE_URL_KEY]
+  oozie_url = configurations[OOZIE_URL_KEY]
   oozie_url = oozie_url.replace(urlparse(oozie_url).hostname,localhost_address)
   oozie_url = oozie_url.replace(urlparse(oozie_url).hostname,localhost_address)
 
 
-  security_enabled = False
-  if SECURITY_ENABLED in parameters:
-    security_enabled = str(parameters[SECURITY_ENABLED]).upper() == 'TRUE'
-
-  command = format("source /etc/oozie/conf/oozie-env.sh ; oozie admin -oozie {oozie_url} -status")
-
   try:
   try:
-    # kinit if security is enabled so that oozie-env.sh can make the web request
-    kerberos_env = None
-
-    if security_enabled:
-      if OOZIE_KEYTAB in parameters and OOZIE_PRINCIPAL in parameters:
-        oozie_keytab = parameters[OOZIE_KEYTAB]
-        oozie_principal = parameters[OOZIE_PRINCIPAL]
-
-        # substitute _HOST in kerberos principal with actual fqdn
-        oozie_principal = oozie_principal.replace('_HOST', host_name)
-      else:
-        return (RESULT_CODE_UNKNOWN, ['The Oozie keytab and principal are required parameters when security is enabled.'])
-
-      # Create the kerberos credentials cache (ccache) file and set it in the environment to use
-      # when executing curl
-      env = Environment.get_instance()
-      ccache_file = "{0}{1}oozie_alert_cc_{2}".format(env.tmp_dir, sep, getpid())
-      kerberos_env = {'KRB5CCNAME': ccache_file}
-
-      # Get the configured Kerberos executable search paths, if any
-      if KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY in parameters:
-        kerberos_executable_search_paths = parameters[KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY]
-      else:
-        kerberos_executable_search_paths = None
-
-      klist_path_local = get_klist_path(kerberos_executable_search_paths)
-      klist_command = format("{klist_path_local} -s {ccache_file}")
-
-      # Determine if we need to kinit by testing to see if the relevant cache exists and has
-      # non-expired tickets.  Tickets are marked to expire after 5 minutes to help reduce the number
-      # it kinits we do but recover quickly when keytabs are regenerated
-      return_code, _ = call(klist_command)
-      if return_code != 0:
-        kinit_path_local = get_kinit_path(kerberos_executable_search_paths)
-        kinit_command = format("{kinit_path_local} -l 5m -kt {oozie_keytab} {oozie_principal}; ")
-
-        # kinit
-        Execute(kinit_command, environment=kerberos_env)
-
+    command, env = get_check_command(oozie_url, host_name, configurations)
     # execute the command
     # execute the command
-    Execute(command, environment=kerberos_env)
+    Execute(command, environment=env)
 
 
     return (RESULT_CODE_OK, ["Successful connection to {0}".format(oozie_url)])
     return (RESULT_CODE_OK, ["Successful connection to {0}".format(oozie_url)])
-
+  except KerberosPropertiesNotFound, ex:
+    return (RESULT_CODE_UNKNOWN, [str(ex)])
   except Exception, ex:
   except Exception, ex:
     return (RESULT_CODE_CRITICAL, [str(ex)])
     return (RESULT_CODE_CRITICAL, [str(ex)])

+ 30 - 19
ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/WEBHCAT/package/files/alert_webhcat_server.py

@@ -53,8 +53,10 @@ KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY = '{{kerberos-env/executable_search_paths}}
 WEBHCAT_OK_RESPONSE = 'ok'
 WEBHCAT_OK_RESPONSE = 'ok'
 WEBHCAT_PORT_DEFAULT = 50111
 WEBHCAT_PORT_DEFAULT = 50111
 
 
-CURL_CONNECTION_TIMEOUT = '5'
-CONNECTION_TIMEOUT = 5.0
+CONNECTION_TIMEOUT_KEY = 'connection.timeout'
+CONNECTION_TIMEOUT_DEFAULT = 5.0
+CURL_CONNECTION_TIMEOUT_DEFAULT = str(int(CONNECTION_TIMEOUT_DEFAULT))
+
 
 
 def get_tokens():
 def get_tokens():
   """
   """
@@ -64,27 +66,36 @@ def get_tokens():
   return (TEMPLETON_PORT_KEY, SECURITY_ENABLED_KEY, WEBHCAT_KEYTAB_KEY, WEBHCAT_PRINCIPAL_KEY, KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY)
   return (TEMPLETON_PORT_KEY, SECURITY_ENABLED_KEY, WEBHCAT_KEYTAB_KEY, WEBHCAT_PRINCIPAL_KEY, KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY)
   
   
 
 
-def execute(parameters=None, host_name=None):
+def execute(configurations={}, parameters={}, host_name=None):
   """
   """
   Returns a tuple containing the result code and a pre-formatted result label
   Returns a tuple containing the result code and a pre-formatted result label
 
 
   Keyword arguments:
   Keyword arguments:
-  parameters (dictionary): a mapping of parameter key to value
+  configurations (dictionary): a mapping of configuration key to value
+  parameters (dictionary): a mapping of script parameter key to value
   host_name (string): the name of this host where the alert is running
   host_name (string): the name of this host where the alert is running
   """
   """
 
 
   result_code = RESULT_CODE_UNKNOWN
   result_code = RESULT_CODE_UNKNOWN
 
 
-  if parameters is None:
-    return (result_code, ['There were no parameters supplied to the script.'])
+  if configurations is None:
+    return (result_code, ['There were no configurations supplied to the script.'])
 
 
   webhcat_port = WEBHCAT_PORT_DEFAULT
   webhcat_port = WEBHCAT_PORT_DEFAULT
-  if TEMPLETON_PORT_KEY in parameters:
-    webhcat_port = int(parameters[TEMPLETON_PORT_KEY])
+  if TEMPLETON_PORT_KEY in configurations:
+    webhcat_port = int(configurations[TEMPLETON_PORT_KEY])
 
 
   security_enabled = False
   security_enabled = False
-  if SECURITY_ENABLED_KEY in parameters:
-    security_enabled = parameters[SECURITY_ENABLED_KEY].lower() == 'true'
+  if SECURITY_ENABLED_KEY in configurations:
+    security_enabled = configurations[SECURITY_ENABLED_KEY].lower() == 'true'
+
+  # parse script arguments
+  connection_timeout = CONNECTION_TIMEOUT_DEFAULT
+  curl_connection_timeout = CURL_CONNECTION_TIMEOUT_DEFAULT
+  if CONNECTION_TIMEOUT_KEY in parameters:
+    connection_timeout = float(parameters[CONNECTION_TIMEOUT_KEY])
+    curl_connection_timeout = str(int(connection_timeout))
+
 
 
   # the alert will always run on the webhcat host
   # the alert will always run on the webhcat host
   if host_name is None:
   if host_name is None:
@@ -98,12 +109,12 @@ def execute(parameters=None, host_name=None):
   json_response = {}
   json_response = {}
 
 
   if security_enabled:
   if security_enabled:
-    if WEBHCAT_KEYTAB_KEY not in parameters or WEBHCAT_PRINCIPAL_KEY not in parameters:
-      return (RESULT_CODE_UNKNOWN, [str(parameters)])
+    if WEBHCAT_KEYTAB_KEY not in configurations or WEBHCAT_PRINCIPAL_KEY not in configurations:
+      return (RESULT_CODE_UNKNOWN, [str(configurations)])
 
 
     try:
     try:
-      webhcat_keytab = parameters[WEBHCAT_KEYTAB_KEY]
-      webhcat_principal = parameters[WEBHCAT_PRINCIPAL_KEY]
+      webhcat_keytab = configurations[WEBHCAT_KEYTAB_KEY]
+      webhcat_principal = configurations[WEBHCAT_PRINCIPAL_KEY]
 
 
       # substitute _HOST in kerberos principal with actual fqdn
       # substitute _HOST in kerberos principal with actual fqdn
       webhcat_principal = webhcat_principal.replace('_HOST', host_name)
       webhcat_principal = webhcat_principal.replace('_HOST', host_name)
@@ -115,8 +126,8 @@ def execute(parameters=None, host_name=None):
       kerberos_env = {'KRB5CCNAME': ccache_file}
       kerberos_env = {'KRB5CCNAME': ccache_file}
 
 
       # Get the configured Kerberos executable search paths, if any
       # Get the configured Kerberos executable search paths, if any
-      if KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY in parameters:
-        kerberos_executable_search_paths = parameters[KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY]
+      if KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY in configurations:
+        kerberos_executable_search_paths = configurations[KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY]
       else:
       else:
         kerberos_executable_search_paths = None
         kerberos_executable_search_paths = None
 
 
@@ -136,7 +147,7 @@ def execute(parameters=None, host_name=None):
 
 
       # make a single curl call to get just the http code
       # make a single curl call to get just the http code
       curl = subprocess.Popen(['curl', '--negotiate', '-u', ':', '-sL', '-w',
       curl = subprocess.Popen(['curl', '--negotiate', '-u', ':', '-sL', '-w',
-        '%{http_code}', '--connect-timeout', CURL_CONNECTION_TIMEOUT,
+        '%{http_code}', '--connect-timeout', curl_connection_timeout,
         '-o', '/dev/null', query_url], stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=kerberos_env)
         '-o', '/dev/null', query_url], stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=kerberos_env)
 
 
       stdout, stderr = curl.communicate()
       stdout, stderr = curl.communicate()
@@ -160,7 +171,7 @@ def execute(parameters=None, host_name=None):
       # now that we have the http status and it was 200, get the content
       # now that we have the http status and it was 200, get the content
       start_time = time.time()
       start_time = time.time()
       curl = subprocess.Popen(['curl', '--negotiate', '-u', ':', '-sL',
       curl = subprocess.Popen(['curl', '--negotiate', '-u', ':', '-sL',
-        '--connect-timeout', CURL_CONNECTION_TIMEOUT, query_url, ],
+        '--connect-timeout', curl_connection_timeout, query_url, ],
         stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=kerberos_env)
         stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=kerberos_env)
 
 
       stdout, stderr = curl.communicate()
       stdout, stderr = curl.communicate()
@@ -178,7 +189,7 @@ def execute(parameters=None, host_name=None):
     try:
     try:
       # execute the query for the JSON that includes WebHCat status
       # execute the query for the JSON that includes WebHCat status
       start_time = time.time()
       start_time = time.time()
-      url_response = urllib2.urlopen(query_url, timeout=CONNECTION_TIMEOUT)
+      url_response = urllib2.urlopen(query_url, timeout=connection_timeout)
       total_time = time.time() - start_time
       total_time = time.time() - start_time
 
 
       json_response = json.loads(url_response.read())
       json_response = json.loads(url_response.read())

+ 12 - 1
ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/YARN/alerts.json

@@ -203,7 +203,18 @@
         "enabled": true,
         "enabled": true,
         "source": {
         "source": {
           "type": "SCRIPT",
           "type": "SCRIPT",
-          "path": "YARN/2.1.0.2.0/package/alerts/alert_nodemanager_health.py"
+          "path": "YARN/2.1.0.2.0/package/alerts/alert_nodemanager_health.py",
+          "parameters": [
+            {
+              "name": "connection.timeout",
+              "display_name": "Connection Timeout",
+              "value": 5.0,
+              "type": "NUMERIC",
+              "description": "The maximum time before this alert is considered to be CRITICAL",
+              "units": "seconds",
+              "threshold": "CRITICAL"
+            }
+          ]
         }
         }
       }
       }
     ],
     ],

+ 25 - 12
ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/YARN/package/files/alert_nodemanager_health.py

@@ -40,7 +40,8 @@ CRITICAL_NODEMANAGER_UNKNOWN_JSON_MESSAGE = 'Unable to determine NodeManager hea
 
 
 NODEMANAGER_DEFAULT_PORT = 8042
 NODEMANAGER_DEFAULT_PORT = 8042
 
 
-CONNECTION_TIMEOUT = 5.0
+CONNECTION_TIMEOUT_KEY = 'connection.timeout'
+CONNECTION_TIMEOUT_DEFAULT = 5.0
 
 
 def get_tokens():
 def get_tokens():
   """
   """
@@ -51,32 +52,40 @@ def get_tokens():
   YARN_HTTP_POLICY_KEY)
   YARN_HTTP_POLICY_KEY)
   
   
 
 
-def execute(parameters=None, host_name=None):
+def execute(configurations={}, parameters={}, host_name=None):
   """
   """
   Returns a tuple containing the result code and a pre-formatted result label
   Returns a tuple containing the result code and a pre-formatted result label
 
 
   Keyword arguments:
   Keyword arguments:
-  parameters (dictionary): a mapping of parameter key to value
+  configurations (dictionary): a mapping of configuration key to value
+  parameters (dictionary): a mapping of script parameter key to value
   host_name (string): the name of this host where the alert is running
   host_name (string): the name of this host where the alert is running
   """
   """
   result_code = RESULT_CODE_UNKNOWN
   result_code = RESULT_CODE_UNKNOWN
 
 
-  if parameters is None:
-    return (result_code, ['There were no parameters supplied to the script.'])
+  if configurations is None:
+    return (result_code, ['There were no configurations supplied to the script.'])
 
 
   scheme = 'http'
   scheme = 'http'
   http_uri = None
   http_uri = None
   https_uri = None
   https_uri = None
   http_policy = 'HTTP_ONLY'
   http_policy = 'HTTP_ONLY'
 
 
-  if NODEMANAGER_HTTP_ADDRESS_KEY in parameters:
-    http_uri = parameters[NODEMANAGER_HTTP_ADDRESS_KEY]
+  if NODEMANAGER_HTTP_ADDRESS_KEY in configurations:
+    http_uri = configurations[NODEMANAGER_HTTP_ADDRESS_KEY]
 
 
-  if NODEMANAGER_HTTPS_ADDRESS_KEY in parameters:
-    https_uri = parameters[NODEMANAGER_HTTPS_ADDRESS_KEY]
+  if NODEMANAGER_HTTPS_ADDRESS_KEY in configurations:
+    https_uri = configurations[NODEMANAGER_HTTPS_ADDRESS_KEY]
+
+  if YARN_HTTP_POLICY_KEY in configurations:
+    http_policy = configurations[YARN_HTTP_POLICY_KEY]
+
+
+  # parse script arguments
+  connection_timeout = CONNECTION_TIMEOUT_DEFAULT
+  if CONNECTION_TIMEOUT_KEY in parameters:
+    connection_timeout = float(parameters[CONNECTION_TIMEOUT_KEY])
 
 
-  if YARN_HTTP_POLICY_KEY in parameters:
-    http_policy = parameters[YARN_HTTP_POLICY_KEY]
 
 
   # determine the right URI and whether to use SSL
   # determine the right URI and whether to use SSL
   uri = http_uri
   uri = http_uri
@@ -108,7 +117,7 @@ def execute(parameters=None, host_name=None):
 
 
   try:
   try:
     # execute the query for the JSON that includes templeton status
     # execute the query for the JSON that includes templeton status
-    url_response = urllib2.urlopen(query, timeout=CONNECTION_TIMEOUT)
+    url_response = urllib2.urlopen(query, timeout=connection_timeout)
   except urllib2.HTTPError, httpError:
   except urllib2.HTTPError, httpError:
     label = CRITICAL_HTTP_STATUS_MESSAGE.format(str(httpError.code), query,
     label = CRITICAL_HTTP_STATUS_MESSAGE.format(str(httpError.code), query,
       str(httpError))
       str(httpError))
@@ -122,6 +131,7 @@ def execute(parameters=None, host_name=None):
   try:
   try:
     json_response = json.loads(url_response.read())
     json_response = json.loads(url_response.read())
     node_healthy = json_response['nodeInfo']['nodeHealthy']
     node_healthy = json_response['nodeInfo']['nodeHealthy']
+    node_healthy_report = json_response['nodeInfo']['healthReport']
 
 
     # convert boolean to string
     # convert boolean to string
     node_healthy = str(node_healthy)
     node_healthy = str(node_healthy)
@@ -138,6 +148,9 @@ def execute(parameters=None, host_name=None):
   if node_healthy.lower() == 'true':
   if node_healthy.lower() == 'true':
     result_code = RESULT_CODE_OK
     result_code = RESULT_CODE_OK
     label = OK_MESSAGE
     label = OK_MESSAGE
+  elif node_healthy.lower() == 'false':
+    result_code = RESULT_CODE_CRITICAL
+    label = node_healthy_report
   else:
   else:
     result_code = RESULT_CODE_CRITICAL
     result_code = RESULT_CODE_CRITICAL
     label = CRITICAL_NODEMANAGER_STATUS_MESSAGE.format(node_healthy)
     label = CRITICAL_NODEMANAGER_STATUS_MESSAGE.format(node_healthy)