Browse Source

AMBARI-7791. HBase Master CPU utilization alert is not suppressed at MM (dlysnichenko)

Lisnichenko Dmitro 11 years ago
parent
commit
7092d80d32
19 changed files with 1632 additions and 274 deletions
  1. 1 1
      ambari-server/src/main/python/ambari-server.py
  2. 0 91
      ambari-server/src/main/resources/stacks/HDP/1.3.2/services/NAGIOS/package/files/check_wrapper.sh
  3. 326 0
      ambari-server/src/main/resources/stacks/HDP/1.3.2/services/NAGIOS/package/files/mm_wrapper.py
  4. 1 1
      ambari-server/src/main/resources/stacks/HDP/1.3.2/services/NAGIOS/package/scripts/nagios_server_config.py
  5. 19 19
      ambari-server/src/main/resources/stacks/HDP/1.3.2/services/NAGIOS/package/templates/hadoop-commands.cfg.j2
  6. 0 13
      ambari-server/src/main/resources/stacks/HDP/1.3.2/services/NAGIOS/package/templates/hadoop-services.cfg.j2
  7. 1 1
      ambari-server/src/main/resources/stacks/HDP/1.3.2/services/NAGIOS/package/templates/nagios.cfg.j2
  8. 1 1
      ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/files/check_checkpoint_time.py
  9. 0 94
      ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/files/check_wrapper.sh
  10. 326 0
      ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/files/mm_wrapper.py
  11. 1 1
      ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/nagios_server_config.py
  12. 23 23
      ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-commands.cfg.j2
  13. 0 15
      ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-services.cfg.j2
  14. 1 1
      ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/nagios.cfg.j2
  15. 459 0
      ambari-server/src/test/python/stacks/1.3.2/NAGIOS/test_mm_wrapper.py
  16. 4 4
      ambari-server/src/test/python/stacks/1.3.2/NAGIOS/test_nagios_server.py
  17. 459 0
      ambari-server/src/test/python/stacks/2.0.6/NAGIOS/test_mm_wrapper.py
  18. 5 5
      ambari-server/src/test/python/stacks/2.0.6/NAGIOS/test_nagios_server.py
  19. 5 4
      ambari-server/src/test/python/unitTests.py

+ 1 - 1
ambari-server/src/main/python/ambari-server.py

@@ -2945,7 +2945,7 @@ def status(args):
   status, pid = is_server_runing()
   status, pid = is_server_runing()
   if status:
   if status:
     print "Ambari Server running"
     print "Ambari Server running"
-    print "Found Ambari Server PID: '" + str(pid) + " at: " + PID_DIR + os.sep + PID_NAME
+    print "Found Ambari Server PID: " + str(pid) + " at: " + PID_DIR + os.sep + PID_NAME
   else:
   else:
     print "Ambari Server not running. Stale PID File at: " + PID_DIR + os.sep + PID_NAME
     print "Ambari Server not running. Stale PID File at: " + PID_DIR + os.sep + PID_NAME
 
 

+ 0 - 91
ambari-server/src/main/resources/stacks/HDP/1.3.2/services/NAGIOS/package/files/check_wrapper.sh

@@ -1,91 +0,0 @@
-#!/bin/bash
-#
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#
-
-function real_service() {
-  desc=$NAGIOS_SERVICEGROUPNAME
-  eval "$1='$desc'"
-}
-
-function real_component() {
-  arrDesc=(${NAGIOS_SERVICEDESC//::/ })
-
-  compName="${arrDesc[0]}"
-
-  case "$compName" in
-    HBASEMASTER)
-      realCompName="HBASE_MASTER"
-    ;;
-    REGIONSERVER)
-      realCompName="HBASE_REGIONSERVER"
-    ;;
-    HIVE-METASTORE)
-      realCompName="HIVE_METASTORE"
-    ;;
-    HIVE-SERVER)
-      realCompName="HIVE_SERVER"
-    ;;
-    FLUME)
-      realCompName="FLUME_SERVER"
-    ;;
-    HUE)
-      realCompName="HUE_SERVER"
-    ;;
-    WEBHCAT)
-      realCompName="WEBHCAT_SERVER"
-    ;;
-    *)
-      realCompName=$compName
-    ;;
-  esac
-
-  eval "$1='$realCompName'"
-}
-
-real_service_var=""
-real_service real_service_var
-
-real_comp_var=""
-real_component real_comp_var
-
-
-wrapper_output=`exec "$@"`
-wrapper_result=$?
-
-if [ "$wrapper_result" == "0" ]; then
-  echo "$wrapper_output"
-  exit $wrapper_result
-fi
-
-if [ ! -f /var/nagios/ignore.dat ]; then
-  echo "$wrapper_output"
-  exit $wrapper_result
-else
-  count=$(grep $NAGIOS_HOSTNAME /var/nagios/ignore.dat | grep $real_service_var | grep $real_comp_var | wc -l)
-  if [ "$count" -ne "0" ]; then
-    echo "$wrapper_output\nAMBARIPASSIVE=${wrapper_result}" | sed 's/^[ \t]*//g'
-    exit 0
-  else
-    echo "$wrapper_output"
-    exit $wrapper_result
-  fi
-fi
-

+ 326 - 0
ambari-server/src/main/resources/stacks/HDP/1.3.2/services/NAGIOS/package/files/mm_wrapper.py

@@ -0,0 +1,326 @@
+#!/usr/bin/env python
+
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import sys
+import subprocess
+import os
+
+N_SGN = 'NAGIOS_SERVICEGROUPNAME'
+N_SD = 'NAGIOS_SERVICEDESC'
+N_HOST = 'NAGIOS_HOSTNAME'
+
+LIST_SEPARATOR = "--"
+HOSTNAME_PLACEHOLDER = "^^"
+IGNORE_DAT_FILE = "/var/nagios/ignore.dat"
+
+# Mode constants
+OR = 0
+AND = 1
+ENV_ONLY = 2
+FILTER_MM = 3
+LEGACY_CHECK_WRAPPER = 4
+MODES = ['or', 'and', 'env_only', 'filter_mm', 'legacy_check_wrapper']
+
+
+def ignored_host_list(service, component):
+  """
+  :param service: current service
+  :param component: current component
+  :return: all hosts where specified host component is in ignored state
+  """
+  try:
+    with open(IGNORE_DAT_FILE) as f:
+      lines = f.readlines()
+  except IOError:
+    return []
+  result = []
+  if lines:
+    for l in lines:
+      tokens = l.split(' ')
+      if len(tokens) == 3 and tokens[1] == service and tokens[2].strip() == component:
+        result.append(tokens[0])
+  return result
+
+
+def get_real_service():
+  try:
+    service = os.environ[N_SGN]  # e.g. 'HBASE'
+  except KeyError:
+    service = ''
+  return service
+
+
+def get_real_component():
+  try:
+    arr_desc = os.environ[N_SD]  # e.g. 'HBASE::Percent RegionServers live'
+    SEPARATOR = "::"
+    comp_name = arr_desc.replace(SEPARATOR, ' ').split(' ')[0]
+  except KeyError:
+    comp_name = ''
+  mapping = {
+    'HBASEMASTER': 'HBASE_MASTER',
+    'REGIONSERVER': 'HBASE_REGIONSERVER',
+    'JOBHISTORY': 'MAPREDUCE2',
+    'HIVE-METASTORE': 'HIVE_METASTORE',
+    'HIVE-SERVER': 'HIVE_SERVER',
+    'FLUME': 'FLUME_HANDLER',
+    'HUE': 'HUE_SERVER',
+    'WEBHCAT': 'WEBHCAT_SERVER',
+  }
+  if comp_name in mapping:
+    comp_name = mapping.get(comp_name)
+  return comp_name
+
+
+def check_output(*popenargs, **kwargs):
+  """
+  Imitate subprocess.check_output() for python 2.6
+  """
+  process = subprocess.Popen(stdout=subprocess.PIPE, stderr=subprocess.PIPE,
+                             *popenargs, **kwargs)
+  output, unused_err = process.communicate()
+  retcode = process.poll()
+  if retcode:
+    cmd = kwargs.get("args")
+    if cmd is None:
+      cmd = popenargs[0]
+    err = subprocess.CalledProcessError(retcode, cmd)
+    # Monkey-patching for python 2.6
+    err.output = output
+    raise err
+  return output
+
+
+def print_usage():
+  """
+  Prints usage and exits with a non-zero exit code
+  """
+  print "Usage: mm_wrapper.py MODE HOST1 HOST2 .. HOSTN %s command arg1 arg2 .. argN" % LIST_SEPARATOR
+  print "MODE is one of the following: or, and, env_only, filter_mm, legacy_check_wrapper"
+  print "%s is a separator between list of hostnames and command with args" % LIST_SEPARATOR
+  print "%s is used as a hostname placeholder at command args" % HOSTNAME_PLACEHOLDER
+  print "Also script provides $MM_HOSTS shell variable to commands"
+  print "NOTE: Script makes use of Nagios-populated env vars %s and %s" % (N_SGN, N_SD)
+  print "For more info, please see docstrings at %s" % os.path.realpath(__file__)
+  sys.exit(1)
+
+
+def parse_args(args):
+  if not args or not LIST_SEPARATOR in args or args[0] not in MODES:
+    print_usage()
+  else:
+    mode = MODES.index(args[0])  # identify operation mode
+    args = args[1:]  # Shift args left
+    hostnames = []
+    command_line = []
+    # Parse command line args
+    passed_separator = False  # True if met LIST_SEPARATOR
+    for arg in args:
+      if not passed_separator:
+        if arg != LIST_SEPARATOR:
+          hostnames.append(arg)
+        else:
+          passed_separator = True
+      else:
+        if arg != LIST_SEPARATOR:
+          command_line.append(arg)
+        else:  # Something definitely goes wrong
+          print "Could not parse arguments: " \
+                "There is more than one %s argument." % LIST_SEPARATOR
+          print_usage()
+
+    if not command_line:
+      print "No command provided."
+      print_usage()
+    return mode, hostnames, command_line
+
+
+def do_work(mode, hostnames, command_line):
+  # Execute commands
+  ignored_hosts = ignored_host_list(get_real_service(), get_real_component())
+  empty_check_result = {
+    'message': 'No checks have been run (no hostnames provided)',
+    'retcode': -1,
+    'real_retcode': None
+  }
+  custom_env = os.environ.copy()
+  if ignored_hosts:
+    custom_env['MM_HOSTS'] = \
+      reduce(lambda a, b: "%s %s" % (a, b), ignored_hosts)
+  if mode == OR:
+    check_result = work_in_or_mode(hostnames, ignored_hosts, command_line, custom_env, empty_check_result)
+  elif mode == AND:
+    check_result = work_in_and_mode(hostnames, ignored_hosts, command_line, custom_env, empty_check_result)
+  elif mode == ENV_ONLY:
+    check_result = work_in_env_only_mode(hostnames, command_line, custom_env)
+  elif mode == FILTER_MM:
+    check_result = work_in_filter_mm_mode(hostnames, ignored_hosts, command_line, custom_env, empty_check_result)
+  else:  # mode == LEGACY_CHECK_WRAPPER:
+    check_result = work_in_legacy_check_wrapper_mode(ignored_hosts, command_line, custom_env)
+  # Build the final output
+  final_output = []
+  output = check_result.get('message')
+  if output is not None:
+    for string in output.splitlines():
+      final_output.append(string.strip())
+  real_retcode = check_result.get('real_retcode')
+  if real_retcode:
+    # This string is used at check_aggregate.php when aggregating alerts
+    final_output.append("AMBARIPASSIVE=%s" % real_retcode)
+  return final_output, check_result.get('retcode')
+
+
+def work_in_or_mode(hostnames, ignored_hosts, command_line, custom_env, empty_check_result):
+  check_result = empty_check_result
+  for hostname in hostnames:
+    concrete_command_line = map(  # Substitute hostname where needed
+                                  lambda x: hostname if x == HOSTNAME_PLACEHOLDER else x,
+                                  command_line)
+    try:
+      returncode = 0
+      real_retcode = None
+      message = check_output(concrete_command_line, env=custom_env)
+    except subprocess.CalledProcessError, e:
+      if hostname not in ignored_hosts:
+        returncode = e.returncode
+      else:  # Host is in MM
+        real_retcode = e.returncode
+      message = e.output
+    really_positive_result = hostname not in ignored_hosts and returncode == 0
+    if check_result.get('retcode') <= returncode or really_positive_result:
+      check_result = {
+        'message': message,
+        'retcode': returncode,
+        'real_retcode': real_retcode  # Real (not suppressed) program retcode
+      }
+    if really_positive_result:
+      break  # Exit on first real success
+  return check_result
+
+
+def work_in_and_mode(hostnames, ignored_hosts, command_line, custom_env, empty_check_result):
+  check_result = empty_check_result
+  for hostname in hostnames:
+    concrete_command_line = map(  # Substitute hostname where needed
+                                  lambda x: hostname if x == HOSTNAME_PLACEHOLDER else x,
+                                  command_line)
+    try:
+      returncode = 0
+      real_retcode = None
+      message = check_output(concrete_command_line, env=custom_env)
+    except subprocess.CalledProcessError, e:
+      if hostname not in ignored_hosts:
+        returncode = e.returncode
+      else:
+        real_retcode = e.returncode
+      message = e.output
+    if check_result.get('retcode') <= returncode:
+      check_result = {
+        'message': message,
+        'retcode': returncode,
+        'real_retcode': real_retcode  # Real (not suppressed) program retcode
+      }
+  return check_result
+
+
+def work_in_env_only_mode(hostnames, command_line, custom_env):
+  concrete_command_line = []
+  for item in command_line:
+    if item == HOSTNAME_PLACEHOLDER:
+      concrete_command_line.extend(hostnames)
+    else:
+      concrete_command_line.append(item)
+  try:
+    returncode = 0
+    message = check_output(concrete_command_line, env=custom_env)
+  except subprocess.CalledProcessError, e:
+    returncode = e.returncode
+    message = e.output
+  check_result = {
+    'message': message,
+    'retcode': returncode,
+    'real_retcode': None  # Real (not suppressed) program retcode
+  }
+  return check_result
+
+
+def work_in_filter_mm_mode(hostnames, ignored_hosts, command_line, custom_env, empty_check_result):
+  not_mm_hosts = [hostname for hostname in hostnames if hostname not in ignored_hosts]
+  if not not_mm_hosts:  # All hosts have been filtered
+    return empty_check_result
+  else:
+    return work_in_env_only_mode(not_mm_hosts, command_line, custom_env)
+
+
+def work_in_legacy_check_wrapper_mode(ignored_hosts, command_line, custom_env):
+  host = os.environ[N_HOST]
+  result = work_in_env_only_mode([host], command_line, custom_env)
+  real_retcode = result['retcode']
+  if host in ignored_hosts and real_retcode != 0:  # Ignore fail
+    result['retcode'] = 0
+    result['real_retcode'] = real_retcode
+  return result
+
+
+def main():
+  """
+  This script allows to run nagios service check commands for host components
+  located at different hosts.
+  Also script passes to every command a $MM_HOSTS shell variable with a list of
+  hosts that are in MM
+
+  or mode: return 0 exit code if at least one service check succeeds.
+  Command exits on a first success.
+  Failures for host components that are in MM are suppressed (return code
+  is set to 0).
+  If command fails for all provided hostnames, script returns alert with the
+  greatest exit code value.
+
+  and mode:
+  Perform checks of all host components (effectively ignoring negative results
+  for MM components). If service check is successful for all hosts, script
+  also returns zero exit code. Otherwise alert with the greatest exit code is
+  returned.
+
+  env_only mode:
+  Pass list of all hosts to command and run it once. The only role of
+  mm_wrapper script in this mode is to provide properly initialized
+  $MM_HOSTS env variable to command being run. All duties of ignoring failures
+  of MM host components are delegated to a command being run.
+
+  filter_mm
+  Similar to env_only mode. The only difference is that hostnames for
+  host components that are in MM are filtered (not passed to command at all)
+
+  legacy_check_wrapper
+  Designed as a drop-in replacement for check_wrapper.sh . It reads $NAGIOS_HOSTNAME
+  env var and ignores check results if host component on this host is in MM.
+  When host subtitution symbol is encountered, hostname defined by $NAGIOS_HOSTNAME
+  is substituted,
+  """
+  args = sys.argv[1:]  # Shift args left
+  mode, hostnames, command_line = parse_args(args)
+  output, ret_code = do_work(mode, hostnames, command_line)
+  for line in output:
+    print line
+  sys.exit(ret_code)
+
+
+if __name__ == "__main__":
+  main()

+ 1 - 1
ambari-server/src/main/resources/stacks/HDP/1.3.2/services/NAGIOS/package/scripts/nagios_server_config.py

@@ -65,8 +65,8 @@ def nagios_server_config():
   nagios_server_check( 'check_mapred_local_dir_used.sh')
   nagios_server_check( 'check_mapred_local_dir_used.sh')
   nagios_server_check( 'check_nodemanager_health.sh')
   nagios_server_check( 'check_nodemanager_health.sh')
   nagios_server_check( 'check_namenodes_ha.sh')
   nagios_server_check( 'check_namenodes_ha.sh')
-  nagios_server_check( 'check_wrapper.sh')
   nagios_server_check( 'hdp_nagios_init.php')
   nagios_server_check( 'hdp_nagios_init.php')
+  nagios_server_check( 'mm_wrapper.py' )
   nagios_server_check( 'check_hive_thrift_port.py' )
   nagios_server_check( 'check_hive_thrift_port.py' )
 
 
 
 

+ 19 - 19
ambari-server/src/main/resources/stacks/HDP/1.3.2/services/NAGIOS/package/templates/hadoop-commands.cfg.j2

@@ -42,88 +42,88 @@
 # 'check_cpu' check remote cpu load
 # 'check_cpu' check remote cpu load
 define command {
 define command {
         command_name    check_cpu
         command_name    check_cpu
-        command_line    $USER1$/check_wrapper.sh php $USER1$/check_cpu.php -h $HOSTADDRESS$ -p $ARG1$ -w $ARG2$ -c $ARG3$ -e $ARG4$ -k $ARG5$ -r $ARG6$ -t $ARG7$ -u $ARG8$
+        command_line    /var/lib/ambari-agent/ambari-python-wrap $USER1$/mm_wrapper.py legacy_check_wrapper -- php $USER1$/check_cpu.php -h $HOSTADDRESS$ -p $ARG1$ -w $ARG2$ -c $ARG3$ -e $ARG4$ -k $ARG5$ -r $ARG6$ -t $ARG7$ -u $ARG8$
        }
        }
 
 
 define command {
 define command {
         command_name    check_cpu_ha
         command_name    check_cpu_ha
-        command_line    $USER1$/check_wrapper.sh php $USER1$/check_cpu_ha.php -h $ARG1$ -p $ARG2$ -w $ARG3$ -c $ARG4$ -e $ARG5$ -k $ARG6$ -r $ARG7$ -t $ARG8$ -u $ARG9$
+        command_line    /var/lib/ambari-agent/ambari-python-wrap $USER1$/mm_wrapper.py or $ARG1$ -- php $USER1$/check_cpu_ha.php -h ^^ -p $ARG2$ -w $ARG3$ -c $ARG4$ -e $ARG5$ -k $ARG6$ -r $ARG7$ -t $ARG8$ -u $ARG9$
        }
        }
 {% endif %}
 {% endif %}
 
 
 # Check data node storage full 
 # Check data node storage full 
 define command {
 define command {
         command_name    check_datanode_storage
         command_name    check_datanode_storage
-        command_line    $USER1$/check_wrapper.sh php $USER1$/check_datanode_storage.php -h $HOSTADDRESS$ -p $ARG1$ -w $ARG2$ -c $ARG3$ -e $ARG4$ -k $ARG5$ -r $ARG6$ -t $ARG7$ -s $ARG8$
+        command_line    /var/lib/ambari-agent/ambari-python-wrap $USER1$/mm_wrapper.py legacy_check_wrapper -- php $USER1$/check_datanode_storage.php -h $HOSTADDRESS$ -p $ARG1$ -w $ARG2$ -c $ARG3$ -e $ARG4$ -k $ARG5$ -r $ARG6$ -t $ARG7$ -s $ARG8$
        }
        }
 
 
 define command{
 define command{
         command_name    check_hdfs_blocks
         command_name    check_hdfs_blocks
-        command_line    $USER1$/check_wrapper.sh php $USER1$/check_hdfs_blocks.php -h $ARG1$ -p $ARG2$ -s $ARG3$ -e $ARG4$ -k $ARG5$ -r $ARG6$ -t $ARG7$ -u $ARG8$
+        command_line    /var/lib/ambari-agent/ambari-python-wrap $USER1$/mm_wrapper.py and $ARG1$ -- php $USER1$/check_hdfs_blocks.php -h ^^ -p $ARG2$ -s $ARG3$ -e $ARG4$ -k $ARG5$ -r $ARG6$ -t $ARG7$ -u $ARG8$
        }
        }
 
 
 define command{
 define command{
         command_name    check_hdfs_capacity
         command_name    check_hdfs_capacity
-        command_line    $USER1$/check_wrapper.sh php $USER1$/check_hdfs_capacity.php -h $ARG1$ -p $ARG2$ -w $ARG3$ -c $ARG4$ -e $ARG5$ -k $ARG6$ -r $ARG7$ -t $ARG8$ -s $ARG9$
+        command_line    /var/lib/ambari-agent/ambari-python-wrap $USER1$/mm_wrapper.py and $ARG1$ -- php $USER1$/check_hdfs_capacity.php -h ^^ -p $ARG2$ -w $ARG3$ -c $ARG4$ -e $ARG5$ -k $ARG6$ -r $ARG7$ -t $ARG8$ -s $ARG9$
        }
        }
 
 
 define command{
 define command{
         command_name    check_aggregate
         command_name    check_aggregate
-        command_line    $USER1$/check_wrapper.sh php $USER1$/check_aggregate.php -f /var/nagios/status.dat -s 1 -t service -n $ARG1$ -w $ARG2$ -c $ARG3$
+        command_line    php $USER1$/check_aggregate.php -f /var/nagios/status.dat -s 1 -t service -n $ARG1$ -w $ARG2$ -c $ARG3$
        }
        }
 
 
 define command{
 define command{
         command_name    check_rpcq_latency
         command_name    check_rpcq_latency
-        command_line    $USER1$/check_wrapper.sh php $USER1$/check_rpcq_latency.php -h $HOSTADDRESS$ -p $ARG2$ -n $ARG1$ -w $ARG3$ -c $ARG4$ -e $ARG5$ -k $ARG6$ -r $ARG7$ -t $ARG8$ -s $ARG9$
+        command_line    /var/lib/ambari-agent/ambari-python-wrap $USER1$/mm_wrapper.py legacy_check_wrapper -- php $USER1$/check_rpcq_latency.php -h $HOSTADDRESS$ -p $ARG2$ -n $ARG1$ -w $ARG3$ -c $ARG4$ -e $ARG5$ -k $ARG6$ -r $ARG7$ -t $ARG8$ -s $ARG9$
        }
        }
 
 
 define command{
 define command{
         command_name    check_nagios
         command_name    check_nagios
-        command_line    $USER1$/check_wrapper.sh $USER1$/check_nagios -e $ARG1$ -F $ARG2$ -C $ARG3$ 
+        command_line    /var/lib/ambari-agent/ambari-python-wrap $USER1$/mm_wrapper.py legacy_check_wrapper -- $USER1$/check_nagios -e $ARG1$ -F $ARG2$ -C $ARG3$ 
        }
        }
 
 
 define command{
 define command{
         command_name    check_webui
         command_name    check_webui
-        command_line    $USER1$/check_wrapper.sh $USER1$/check_webui.sh $ARG1$ $HOSTADDRESS$ $ARG2$
+        command_line    /var/lib/ambari-agent/ambari-python-wrap $USER1$/mm_wrapper.py legacy_check_wrapper -- $USER1$/check_webui.sh $ARG1$ $HOSTADDRESS$ $ARG2$
        }
        }
 
 
 define command{
 define command{
         command_name    check_name_dir_status
         command_name    check_name_dir_status
-        command_line    $USER1$/check_wrapper.sh php $USER1$/check_name_dir_status.php -h $HOSTADDRESS$ -p $ARG1$ -e $ARG2$ -k $ARG3$ -r $ARG4$ -t $ARG5$ -s $ARG6$
+        command_line    /var/lib/ambari-agent/ambari-python-wrap $USER1$/mm_wrapper.py legacy_check_wrapper -- php $USER1$/check_name_dir_status.php -h $HOSTADDRESS$ -p $ARG1$ -e $ARG2$ -k $ARG3$ -r $ARG4$ -t $ARG5$ -s $ARG6$
        }
        }
 
 
 define command{
 define command{
         command_name    check_oozie_status
         command_name    check_oozie_status
-        command_line    $USER1$/check_wrapper.sh $USER1$/check_oozie_status.sh $HOSTADDRESS$ $ARG1$ $ARG2$ $ARG3$ $ARG4$ $ARG5$ $ARG6$ $ARG7$
+        command_line    /var/lib/ambari-agent/ambari-python-wrap $USER1$/mm_wrapper.py legacy_check_wrapper -- $USER1$/check_oozie_status.sh $HOSTADDRESS$ $ARG1$ $ARG2$ $ARG3$ $ARG4$ $ARG5$ $ARG6$ $ARG7$
        }
        }
 
 
 define command{
 define command{
         command_name    check_templeton_status
         command_name    check_templeton_status
-        command_line    $USER1$/check_wrapper.sh $USER1$/check_templeton_status.sh $HOSTADDRESS$ $ARG1$ $ARG2$ $ARG3$ $ARG4$ $ARG5$ $ARG6$ $ARG7$
+        command_line    /var/lib/ambari-agent/ambari-python-wrap $USER1$/mm_wrapper.py legacy_check_wrapper -- $USER1$/check_templeton_status.sh $HOSTADDRESS$ $ARG1$ $ARG2$ $ARG3$ $ARG4$ $ARG5$ $ARG6$ $ARG7$
        }
        }
 
 
 define command{
 define command{
         command_name    check_hive_metastore_status
         command_name    check_hive_metastore_status
-        command_line    $USER1$/check_wrapper.sh $USER1$/check_hive_metastore_status.sh $HOSTADDRESS$ $ARG1$ $ARG2$ $ARG3$ $ARG4$ $ARG5$ $ARG6$ $ARG7$
+        command_line    /var/lib/ambari-agent/ambari-python-wrap $USER1$/mm_wrapper.py legacy_check_wrapper -- $USER1$/check_hive_metastore_status.sh $HOSTADDRESS$ $ARG1$ $ARG2$ $ARG3$ $ARG4$ $ARG5$ $ARG6$ $ARG7$
        }
        }
 define command{
 define command{
         command_name    check_hue_status
         command_name    check_hue_status
-        command_line    $USER1$/check_wrapper.sh $USER1$/check_hue_status.sh
+        command_line    /var/lib/ambari-agent/ambari-python-wrap $USER1$/mm_wrapper.py legacy_check_wrapper -- $USER1$/check_hue_status.sh
        }
        }
 
 
 define command{
 define command{
         command_name    check_mapred_local_dir_used_space
         command_name    check_mapred_local_dir_used_space
-        command_line    $USER1$/check_wrapper.sh $USER1$/check_mapred_local_dir_used.sh $ARG1$ $ARG2$
+        command_line    /var/lib/ambari-agent/ambari-python-wrap $USER1$/mm_wrapper.py legacy_check_wrapper -- $USER1$/check_mapred_local_dir_used.sh $ARG1$ $ARG2$
        }
        }
 
 
 define command{
 define command{
         command_name    check_namenodes_ha
         command_name    check_namenodes_ha
-        command_line    $USER1$/check_wrapper.sh $USER1$/check_namenodes_ha.sh $ARG1$ $ARG2$
+        command_line    /var/lib/ambari-agent/ambari-python-wrap $USER1$/mm_wrapper.py legacy_check_wrapper -- $USER1$/check_namenodes_ha.sh $ARG1$ $ARG2$
        }
        }
 
 
 define command{
 define command{
         command_name    check_nodemanager_health
         command_name    check_nodemanager_health
-        command_line    $USER1$/check_wrapper.sh $USER1$/check_nodemanager_health.sh $HOSTADDRESS$ $ARG1$
+        command_line    /var/lib/ambari-agent/ambari-python-wrap $USER1$/mm_wrapper.py legacy_check_wrapper -- $USER1$/check_nodemanager_health.sh $HOSTADDRESS$ $ARG1$
        }
        }
 
 
 define command{
 define command{
@@ -138,10 +138,10 @@ define command{
 
 
 define command{
 define command{
         command_name check_tcp_wrapper
         command_name check_tcp_wrapper
-        command_line  $USER1$/check_wrapper.sh $USER1$/check_tcp -H $HOSTADDRESS$ -p $ARG1$ $ARG2$
+        command_line /var/lib/ambari-agent/ambari-python-wrap $USER1$/mm_wrapper.py and $HOSTADDRESS$ -- $USER1$/check_tcp -H ^^ -p $ARG1$ $ARG2$
        }
        }
 
 
 define command{
 define command{
         command_name check_tcp_wrapper_sasl
         command_name check_tcp_wrapper_sasl
-        command_line $USER1$/check_wrapper.sh /var/lib/ambari-agent/ambari-python-wrap $USER1$/check_hive_thrift_port.py -H $HOSTADDRESS$ -p $ARG1$ $ARG2$
+        command_line /var/lib/ambari-agent/ambari-python-wrap $USER1$/mm_wrapper.py legacy_check_wrapper -- /var/lib/ambari-agent/ambari-python-wrap $USER1$/check_hive_thrift_port.py -H $HOSTADDRESS$ -p $ARG1$ $ARG2$
        }
        }

+ 0 - 13
ambari-server/src/main/resources/stacks/HDP/1.3.2/services/NAGIOS/package/templates/hadoop-services.cfg.j2

@@ -340,7 +340,6 @@ define service {
         use                     hadoop-service
         use                     hadoop-service
         service_description     JOBTRACKER::JobTracker CPU utilization
         service_description     JOBTRACKER::JobTracker CPU utilization
         servicegroups           MAPREDUCE
         servicegroups           MAPREDUCE
-#        check_command           check_cpu!200%!250%
         check_command           check_cpu!{{ jtnode_port }}!200%!250%!{{ str(hadoop_ssl_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}!{{ str(security_enabled).lower() }}
         check_command           check_cpu!{{ jtnode_port }}!200%!250%!{{ str(hadoop_ssl_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}!{{ str(security_enabled).lower() }}
         normal_check_interval   5
         normal_check_interval   5
         retry_check_interval    2 
         retry_check_interval    2 
@@ -479,17 +478,6 @@ define service {
         max_check_attempts      3
         max_check_attempts      3
 }
 }
 
 
-{# HBASE:: MASTER Checks
-# define service {
-#         hostgroup_name          hbasemasters
-#         use                     hadoop-service
-#         service_description     HBASEMASTER::HBase Master Web UI
-#         servicegroups           HBASE
-#         check_command           check_webui!hbase!{{ hbase_master_port }}
-#         normal_check_interval   1
-#         retry_check_interval    1
-#         max_check_attempts      3
-# #}
 {% if hostgroup_defs['hbasemasters'] %}
 {% if hostgroup_defs['hbasemasters'] %}
 {% if check_cpu_on %}
 {% if check_cpu_on %}
 define service {
 define service {
@@ -497,7 +485,6 @@ define service {
         use                     hadoop-service
         use                     hadoop-service
         service_description     HBASEMASTER::HBase Master CPU utilization
         service_description     HBASEMASTER::HBase Master CPU utilization
         servicegroups           HBASE
         servicegroups           HBASE
-#        check_command           check_cpu!200%!250%
         check_command           check_cpu_ha!{{ hbase_master_hosts_in_str }}!{{ hbase_master_port }}!200%!250%!{{ str(hadoop_ssl_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}!{{ str(security_enabled).lower() }}
         check_command           check_cpu_ha!{{ hbase_master_hosts_in_str }}!{{ hbase_master_port }}!200%!250%!{{ str(hadoop_ssl_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}!{{ str(security_enabled).lower() }}
         normal_check_interval   5
         normal_check_interval   5
         retry_check_interval    2
         retry_check_interval    2

+ 1 - 1
ambari-server/src/main/resources/stacks/HDP/1.3.2/services/NAGIOS/package/templates/nagios.cfg.j2

@@ -1283,7 +1283,7 @@ use_large_installation_tweaks=1
 # Values: 1 - Enable environment variable macros (default)
 # Values: 1 - Enable environment variable macros (default)
 #         0 - Disable environment variable macros
 #         0 - Disable environment variable macros
 
 
-# NAGIOS_* macros are required for Ambari Maintenance Mode (check_wrapper.sh)
+# NAGIOS_* macros are required for Ambari Maintenance Mode (mm_wrapper.py)
 enable_environment_macros=1
 enable_environment_macros=1
 
 
 
 

+ 1 - 1
ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/files/check_checkpoint_time.py

@@ -104,7 +104,7 @@ def get_value_from_jmx(qry, property):
     exit(1)
     exit(1)
 
 
   data_dict = json.loads(data)
   data_dict = json.loads(data)
-  return (data_dict["beans"][0][property])
+  return data_dict["beans"][0][property]
 
 
 
 
 def get_available_nn_host(options, scheme):
 def get_available_nn_host(options, scheme):

+ 0 - 94
ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/files/check_wrapper.sh

@@ -1,94 +0,0 @@
-#!/usr/bin/env bash
-#
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#
-
-function real_service() {
-  desc=$NAGIOS_SERVICEGROUPNAME
-  eval "$1='$desc'"
-}
-
-function real_component() {
-  arrDesc=(${NAGIOS_SERVICEDESC//::/ })
-
-  compName="${arrDesc[0]}"
-
-  case "$compName" in
-    HBASEMASTER)
-      realCompName="HBASE_MASTER"
-    ;;
-    REGIONSERVER)
-      realCompName="HBASE_REGIONSERVER"
-    ;;
-    JOBHISTORY)
-      realCompName="MAPREDUCE2"
-    ;;
-    HIVE-METASTORE)
-      realCompName="HIVE_METASTORE"
-    ;;
-    HIVE-SERVER)
-      realCompName="HIVE_SERVER"
-    ;;
-    FLUME)
-      realCompName="FLUME_HANDLER"
-    ;;
-    HUE)
-      realCompName="HUE_SERVER"
-    ;;
-    WEBHCAT)
-      realCompName="WEBHCAT_SERVER"
-    ;;
-    *)
-      realCompName=$compName
-    ;;
-  esac
-
-  eval "$1='$realCompName'"
-}
-
-real_service_var=""
-real_service real_service_var
-
-real_comp_var=""
-real_component real_comp_var
-
-
-wrapper_output=`exec "$@"`
-wrapper_result=$?
-
-if [ "$wrapper_result" == "0" ]; then
-  echo "$wrapper_output"
-  exit $wrapper_result
-fi
-
-if [ ! -f /var/nagios/ignore.dat ]; then
-  echo "$wrapper_output"
-  exit $wrapper_result
-else
-  count=$(grep $NAGIOS_HOSTNAME /var/nagios/ignore.dat | grep $real_service_var | grep $real_comp_var | wc -l)
-  if [ "$count" -ne "0" ]; then
-    echo "$wrapper_output\nAMBARIPASSIVE=${wrapper_result}" | sed 's/^[ \t]*//g'
-    exit 0
-  else
-    echo "$wrapper_output"
-    exit $wrapper_result
-  fi
-fi
-

+ 326 - 0
ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/files/mm_wrapper.py

@@ -0,0 +1,326 @@
+#!/usr/bin/env python
+
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import sys
+import subprocess
+import os
+
+N_SGN = 'NAGIOS_SERVICEGROUPNAME'
+N_SD = 'NAGIOS_SERVICEDESC'
+N_HOST = 'NAGIOS_HOSTNAME'
+
+LIST_SEPARATOR = "--"
+HOSTNAME_PLACEHOLDER = "^^"
+IGNORE_DAT_FILE = "/var/nagios/ignore.dat"
+
+# Mode constants
+OR = 0
+AND = 1
+ENV_ONLY = 2
+FILTER_MM = 3
+LEGACY_CHECK_WRAPPER = 4
+MODES = ['or', 'and', 'env_only', 'filter_mm', 'legacy_check_wrapper']
+
+
+def ignored_host_list(service, component):
+  """
+  :param service: current service
+  :param component: current component
+  :return: all hosts where specified host component is in ignored state
+  """
+  try:
+    with open(IGNORE_DAT_FILE) as f:
+      lines = f.readlines()
+  except IOError:
+    return []
+  result = []
+  if lines:
+    for l in lines:
+      tokens = l.split(' ')
+      if len(tokens) == 3 and tokens[1] == service and tokens[2].strip() == component:
+        result.append(tokens[0])
+  return result
+
+
+def get_real_service():
+  try:
+    service = os.environ[N_SGN]  # e.g. 'HBASE'
+  except KeyError:
+    service = ''
+  return service
+
+
+def get_real_component():
+  try:
+    arr_desc = os.environ[N_SD]  # e.g. 'HBASE::Percent RegionServers live'
+    SEPARATOR = "::"
+    comp_name = arr_desc.replace(SEPARATOR, ' ').split(' ')[0]
+  except KeyError:
+    comp_name = ''
+  mapping = {
+    'HBASEMASTER': 'HBASE_MASTER',
+    'REGIONSERVER': 'HBASE_REGIONSERVER',
+    'JOBHISTORY': 'MAPREDUCE2',
+    'HIVE-METASTORE': 'HIVE_METASTORE',
+    'HIVE-SERVER': 'HIVE_SERVER',
+    'FLUME': 'FLUME_HANDLER',
+    'HUE': 'HUE_SERVER',
+    'WEBHCAT': 'WEBHCAT_SERVER',
+  }
+  if comp_name in mapping:
+    comp_name = mapping.get(comp_name)
+  return comp_name
+
+
+def check_output(*popenargs, **kwargs):
+  """
+  Imitate subprocess.check_output() for python 2.6
+  """
+  process = subprocess.Popen(stdout=subprocess.PIPE, stderr=subprocess.PIPE,
+                             *popenargs, **kwargs)
+  output, unused_err = process.communicate()
+  retcode = process.poll()
+  if retcode:
+    cmd = kwargs.get("args")
+    if cmd is None:
+      cmd = popenargs[0]
+    err = subprocess.CalledProcessError(retcode, cmd)
+    # Monkey-patching for python 2.6
+    err.output = output
+    raise err
+  return output
+
+
+def print_usage():
+  """
+  Prints usage and exits with a non-zero exit code
+  """
+  print "Usage: mm_wrapper.py MODE HOST1 HOST2 .. HOSTN %s command arg1 arg2 .. argN" % LIST_SEPARATOR
+  print "MODE is one of the following: or, and, env_only, filter_mm, legacy_check_wrapper"
+  print "%s is a separator between list of hostnames and command with args" % LIST_SEPARATOR
+  print "%s is used as a hostname placeholder at command args" % HOSTNAME_PLACEHOLDER
+  print "Also script provides $MM_HOSTS shell variable to commands"
+  print "NOTE: Script makes use of Nagios-populated env vars %s and %s" % (N_SGN, N_SD)
+  print "For more info, please see docstrings at %s" % os.path.realpath(__file__)
+  sys.exit(1)
+
+
+def parse_args(args):
+  if not args or not LIST_SEPARATOR in args or args[0] not in MODES:
+    print_usage()
+  else:
+    mode = MODES.index(args[0])  # identify operation mode
+    args = args[1:]  # Shift args left
+    hostnames = []
+    command_line = []
+    # Parse command line args
+    passed_separator = False  # True if met LIST_SEPARATOR
+    for arg in args:
+      if not passed_separator:
+        if arg != LIST_SEPARATOR:
+          hostnames.append(arg)
+        else:
+          passed_separator = True
+      else:
+        if arg != LIST_SEPARATOR:
+          command_line.append(arg)
+        else:  # Something definitely goes wrong
+          print "Could not parse arguments: " \
+                "There is more than one %s argument." % LIST_SEPARATOR
+          print_usage()
+
+    if not command_line:
+      print "No command provided."
+      print_usage()
+    return mode, hostnames, command_line
+
+
+def do_work(mode, hostnames, command_line):
+  # Execute commands
+  ignored_hosts = ignored_host_list(get_real_service(), get_real_component())
+  empty_check_result = {
+    'message': 'No checks have been run (no hostnames provided)',
+    'retcode': -1,
+    'real_retcode': None
+  }
+  custom_env = os.environ.copy()
+  if ignored_hosts:
+    custom_env['MM_HOSTS'] = \
+      reduce(lambda a, b: "%s %s" % (a, b), ignored_hosts)
+  if mode == OR:
+    check_result = work_in_or_mode(hostnames, ignored_hosts, command_line, custom_env, empty_check_result)
+  elif mode == AND:
+    check_result = work_in_and_mode(hostnames, ignored_hosts, command_line, custom_env, empty_check_result)
+  elif mode == ENV_ONLY:
+    check_result = work_in_env_only_mode(hostnames, command_line, custom_env)
+  elif mode == FILTER_MM:
+    check_result = work_in_filter_mm_mode(hostnames, ignored_hosts, command_line, custom_env, empty_check_result)
+  else:  # mode == LEGACY_CHECK_WRAPPER:
+    check_result = work_in_legacy_check_wrapper_mode(ignored_hosts, command_line, custom_env)
+  # Build the final output
+  final_output = []
+  output = check_result.get('message')
+  if output is not None:
+    for string in output.splitlines():
+      final_output.append(string.strip())
+  real_retcode = check_result.get('real_retcode')
+  if real_retcode:
+    # This string is used at check_aggregate.php when aggregating alerts
+    final_output.append("AMBARIPASSIVE=%s" % real_retcode)
+  return final_output, check_result.get('retcode')
+
+
+def work_in_or_mode(hostnames, ignored_hosts, command_line, custom_env, empty_check_result):
+  check_result = empty_check_result
+  for hostname in hostnames:
+    concrete_command_line = map(  # Substitute hostname where needed
+                                  lambda x: hostname if x == HOSTNAME_PLACEHOLDER else x,
+                                  command_line)
+    try:
+      returncode = 0
+      real_retcode = None
+      message = check_output(concrete_command_line, env=custom_env)
+    except subprocess.CalledProcessError, e:
+      if hostname not in ignored_hosts:
+        returncode = e.returncode
+      else:  # Host is in MM
+        real_retcode = e.returncode
+      message = e.output
+    really_positive_result = hostname not in ignored_hosts and returncode == 0
+    if check_result.get('retcode') <= returncode or really_positive_result:
+      check_result = {
+        'message': message,
+        'retcode': returncode,
+        'real_retcode': real_retcode  # Real (not suppressed) program retcode
+      }
+    if really_positive_result:
+      break  # Exit on first real success
+  return check_result
+
+
+def work_in_and_mode(hostnames, ignored_hosts, command_line, custom_env, empty_check_result):
+  check_result = empty_check_result
+  for hostname in hostnames:
+    concrete_command_line = map(  # Substitute hostname where needed
+                                  lambda x: hostname if x == HOSTNAME_PLACEHOLDER else x,
+                                  command_line)
+    try:
+      returncode = 0
+      real_retcode = None
+      message = check_output(concrete_command_line, env=custom_env)
+    except subprocess.CalledProcessError, e:
+      if hostname not in ignored_hosts:
+        returncode = e.returncode
+      else:
+        real_retcode = e.returncode
+      message = e.output
+    if check_result.get('retcode') <= returncode:
+      check_result = {
+        'message': message,
+        'retcode': returncode,
+        'real_retcode': real_retcode  # Real (not suppressed) program retcode
+      }
+  return check_result
+
+
+def work_in_env_only_mode(hostnames, command_line, custom_env):
+  concrete_command_line = []
+  for item in command_line:
+    if item == HOSTNAME_PLACEHOLDER:
+      concrete_command_line.extend(hostnames)
+    else:
+      concrete_command_line.append(item)
+  try:
+    returncode = 0
+    message = check_output(concrete_command_line, env=custom_env)
+  except subprocess.CalledProcessError, e:
+    returncode = e.returncode
+    message = e.output
+  check_result = {
+    'message': message,
+    'retcode': returncode,
+    'real_retcode': None  # Real (not suppressed) program retcode
+  }
+  return check_result
+
+
+def work_in_filter_mm_mode(hostnames, ignored_hosts, command_line, custom_env, empty_check_result):
+  not_mm_hosts = [hostname for hostname in hostnames if hostname not in ignored_hosts]
+  if not not_mm_hosts:  # All hosts have been filtered
+    return empty_check_result
+  else:
+    return work_in_env_only_mode(not_mm_hosts, command_line, custom_env)
+
+
+def work_in_legacy_check_wrapper_mode(ignored_hosts, command_line, custom_env):
+  host = os.environ[N_HOST]
+  result = work_in_env_only_mode([host], command_line, custom_env)
+  real_retcode = result['retcode']
+  if host in ignored_hosts and real_retcode != 0:  # Ignore fail
+    result['retcode'] = 0
+    result['real_retcode'] = real_retcode
+  return result
+
+
+def main():
+  """
+  This script allows to run nagios service check commands for host components
+  located at different hosts.
+  Also script passes to every command a $MM_HOSTS shell variable with a list of
+  hosts that are in MM
+
+  or mode: return 0 exit code if at least one service check succeeds.
+  Command exits on a first success.
+  Failures for host components that are in MM are suppressed (return code
+  is set to 0).
+  If command fails for all provided hostnames, script returns alert with the
+  greatest exit code value.
+
+  and mode:
+  Perform checks of all host components (effectively ignoring negative results
+  for MM components). If service check is successful for all hosts, script
+  also returns zero exit code. Otherwise alert with the greatest exit code is
+  returned.
+
+  env_only mode:
+  Pass list of all hosts to command and run it once. The only role of
+  mm_wrapper script in this mode is to provide properly initialized
+  $MM_HOSTS env variable to command being run. All duties of ignoring failures
+  of MM host components are delegated to a command being run.
+
+  filter_mm
+  Similar to env_only mode. The only difference is that hostnames for
+  host components that are in MM are filtered (not passed to command at all)
+
+  legacy_check_wrapper
+  Designed as a drop-in replacement for check_wrapper.sh . It reads $NAGIOS_HOSTNAME
+  env var and ignores check results if host component on this host is in MM.
+  When host subtitution symbol is encountered, hostname defined by $NAGIOS_HOSTNAME
+  is substituted,
+  """
+  args = sys.argv[1:]  # Shift args left
+  mode, hostnames, command_line = parse_args(args)
+  output, ret_code = do_work(mode, hostnames, command_line)
+  for line in output:
+    print line
+  sys.exit(ret_code)
+
+
+if __name__ == "__main__":
+  main()

+ 1 - 1
ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/nagios_server_config.py

@@ -67,11 +67,11 @@ def nagios_server_config():
   nagios_server_check( 'check_mapred_local_dir_used.sh')
   nagios_server_check( 'check_mapred_local_dir_used.sh')
   nagios_server_check( 'check_nodemanager_health.sh')
   nagios_server_check( 'check_nodemanager_health.sh')
   nagios_server_check( 'check_namenodes_ha.sh')
   nagios_server_check( 'check_namenodes_ha.sh')
-  nagios_server_check( 'check_wrapper.sh')
   nagios_server_check( 'hdp_nagios_init.php')
   nagios_server_check( 'hdp_nagios_init.php')
   nagios_server_check( 'check_checkpoint_time.py' )
   nagios_server_check( 'check_checkpoint_time.py' )
   nagios_server_check( 'sys_logger.py' )
   nagios_server_check( 'sys_logger.py' )
   nagios_server_check( 'check_ambari_alerts.py' )
   nagios_server_check( 'check_ambari_alerts.py' )
+  nagios_server_check( 'mm_wrapper.py' )
   nagios_server_check( 'check_hive_thrift_port.py' )
   nagios_server_check( 'check_hive_thrift_port.py' )
 
 
 def nagios_server_configfile(
 def nagios_server_configfile(

+ 23 - 23
ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-commands.cfg.j2

@@ -42,97 +42,97 @@
 # 'check_cpu' check remote cpu load
 # 'check_cpu' check remote cpu load
 define command {
 define command {
         command_name    check_cpu
         command_name    check_cpu
-        command_line    $USER1$/check_wrapper.sh php $USER1$/check_cpu.php -h $HOSTADDRESS$ -p $ARG1$ -w $ARG2$ -c $ARG3$ -e $ARG4$ -k $ARG5$ -r $ARG6$ -t $ARG7$ -u $ARG8$
+        command_line    /var/lib/ambari-agent/ambari-python-wrap $USER1$/mm_wrapper.py legacy_check_wrapper -- php $USER1$/check_cpu.php -h $HOSTADDRESS$ -p $ARG1$ -w $ARG2$ -c $ARG3$ -e $ARG4$ -k $ARG5$ -r $ARG6$ -t $ARG7$ -u $ARG8$
        }
        }
 define command {
 define command {
         command_name    check_cpu_ha
         command_name    check_cpu_ha
-        command_line    $USER1$/check_wrapper.sh php $USER1$/check_cpu_ha.php -h $ARG1$ -p $ARG2$ -w $ARG3$ -c $ARG4$ -e $ARG5$ -k $ARG6$ -r $ARG7$ -t $ARG8$ -u $ARG9$
+        command_line    /var/lib/ambari-agent/ambari-python-wrap $USER1$/mm_wrapper.py or $ARG1$ -- php $USER1$/check_cpu_ha.php -h ^^ -p $ARG2$ -w $ARG3$ -c $ARG4$ -e $ARG5$ -k $ARG6$ -r $ARG7$ -t $ARG8$ -u $ARG9$
        }
        }
 {% endif %}
 {% endif %}
 
 
 # Check data node storage full 
 # Check data node storage full 
 define command {
 define command {
         command_name    check_datanode_storage
         command_name    check_datanode_storage
-        command_line    $USER1$/check_wrapper.sh php $USER1$/check_datanode_storage.php -h $HOSTADDRESS$ -p $ARG1$ -w $ARG2$ -c $ARG3$ -e $ARG4$ -k $ARG5$ -r $ARG6$ -t $ARG7$ -s $ARG8$
+        command_line    /var/lib/ambari-agent/ambari-python-wrap $USER1$/mm_wrapper.py legacy_check_wrapper -- php $USER1$/check_datanode_storage.php -h $HOSTADDRESS$ -p $ARG1$ -w $ARG2$ -c $ARG3$ -e $ARG4$ -k $ARG5$ -r $ARG6$ -t $ARG7$ -s $ARG8$
        }
        }
 
 
 define command{
 define command{
         command_name    check_hdfs_blocks
         command_name    check_hdfs_blocks
-        command_line    $USER1$/check_wrapper.sh php $USER1$/check_hdfs_blocks.php -h $ARG1$ -p $ARG2$ -s $ARG3$ -e $ARG4$ -k $ARG5$ -r $ARG6$ -t $ARG7$ -u $ARG8$
+        command_line    /var/lib/ambari-agent/ambari-python-wrap $USER1$/mm_wrapper.py and $ARG1$ -- php $USER1$/check_hdfs_blocks.php -h ^^ -p $ARG2$ -s $ARG3$ -e $ARG4$ -k $ARG5$ -r $ARG6$ -t $ARG7$ -u $ARG8$
        }
        }
 
 
 define command{
 define command{
         command_name    check_hdfs_capacity
         command_name    check_hdfs_capacity
-        command_line    $USER1$/check_wrapper.sh php $USER1$/check_hdfs_capacity.php -h $ARG1$ -p $ARG2$ -w $ARG3$ -c $ARG4$ -e $ARG5$ -k $ARG6$ -r $ARG7$ -t $ARG8$ -s $ARG9$
+        command_line    /var/lib/ambari-agent/ambari-python-wrap $USER1$/mm_wrapper.py and $ARG1$ -- php $USER1$/check_hdfs_capacity.php -h ^^ -p $ARG2$ -w $ARG3$ -c $ARG4$ -e $ARG5$ -k $ARG6$ -r $ARG7$ -t $ARG8$ -s $ARG9$
        }
        }
 
 
 define command{
 define command{
         command_name    check_aggregate
         command_name    check_aggregate
-        command_line    $USER1$/check_wrapper.sh php $USER1$/check_aggregate.php -f /var/nagios/status.dat -s 1 -t service -n $ARG1$ -w $ARG2$ -c $ARG3$
+        command_line    php $USER1$/check_aggregate.php -f /var/nagios/status.dat -s 1 -t service -n $ARG1$ -w $ARG2$ -c $ARG3$
        }
        }
 
 
 define command{
 define command{
         command_name    check_rpcq_latency
         command_name    check_rpcq_latency
-        command_line    $USER1$/check_wrapper.sh php $USER1$/check_rpcq_latency.php -h $HOSTADDRESS$ -p $ARG2$ -n $ARG1$ -w $ARG3$ -c $ARG4$ -e $ARG5$ -k $ARG6$ -r $ARG7$ -t $ARG8$ -s $ARG9$
+        command_line    /var/lib/ambari-agent/ambari-python-wrap $USER1$/mm_wrapper.py legacy_check_wrapper -- php $USER1$/check_rpcq_latency.php -h $HOSTADDRESS$ -p $ARG2$ -n $ARG1$ -w $ARG3$ -c $ARG4$ -e $ARG5$ -k $ARG6$ -r $ARG7$ -t $ARG8$ -s $ARG9$
        }
        }
 
 
 define command{
 define command{
         command_name    check_rpcq_latency_ha
         command_name    check_rpcq_latency_ha
-        command_line    $USER1$/check_wrapper.sh php $USER1$/check_rpcq_latency_ha.php -h $ARG1$ -p $ARG3$ -n $ARG2$ -w $ARG4$ -c $ARG5$ -e $ARG6$ -k $ARG7$ -r $ARG8$ -t $ARG9$ -s $ARG10$
+        command_line    /var/lib/ambari-agent/ambari-python-wrap $USER1$/mm_wrapper.py or $ARG1$ -- php $USER1$/check_rpcq_latency_ha.php -h ^^ -p $ARG3$ -n $ARG2$ -w $ARG4$ -c $ARG5$ -e $ARG6$ -k $ARG7$ -r $ARG8$ -t $ARG9$ -s $ARG10$
        }
        }
 
 
 define command{
 define command{
         command_name    check_nagios
         command_name    check_nagios
-        command_line    $USER1$/check_wrapper.sh $USER1$/check_nagios -e $ARG1$ -F $ARG2$ -C $ARG3$ 
+        command_line    /var/lib/ambari-agent/ambari-python-wrap $USER1$/mm_wrapper.py legacy_check_wrapper -- $USER1$/check_nagios -e $ARG1$ -F $ARG2$ -C $ARG3$ 
        }
        }
 
 
 define command{
 define command{
         command_name    check_webui
         command_name    check_webui
-        command_line    $USER1$/check_wrapper.sh $USER1$/check_webui.sh $ARG1$ $HOSTADDRESS$ $ARG2$
+        command_line    /var/lib/ambari-agent/ambari-python-wrap $USER1$/mm_wrapper.py legacy_check_wrapper -- $USER1$/check_webui.sh $ARG1$ $HOSTADDRESS$ $ARG2$
        }
        }
 
 
 define command{
 define command{
         command_name    check_webui_ha
         command_name    check_webui_ha
-        command_line    $USER1$/check_wrapper.sh $USER1$/check_webui_ha.sh $ARG1$ $ARG2$ $ARG3$
+        command_line    /var/lib/ambari-agent/ambari-python-wrap $USER1$/mm_wrapper.py and $ARG2$ -- $USER1$/check_webui_ha.sh $ARG1$ ^^ $ARG3$
        }
        }
 
 
 define command{
 define command{
         command_name    check_name_dir_status
         command_name    check_name_dir_status
-        command_line    $USER1$/check_wrapper.sh php $USER1$/check_name_dir_status.php -h $HOSTADDRESS$ -p $ARG1$ -e $ARG2$ -k $ARG3$ -r $ARG4$ -t $ARG5$ -s $ARG6$
+        command_line    /var/lib/ambari-agent/ambari-python-wrap $USER1$/mm_wrapper.py legacy_check_wrapper -- php $USER1$/check_name_dir_status.php -h $HOSTADDRESS$ -p $ARG1$ -e $ARG2$ -k $ARG3$ -r $ARG4$ -t $ARG5$ -s $ARG6$
        }
        }
 
 
 define command{
 define command{
         command_name    check_oozie_status
         command_name    check_oozie_status
-        command_line    $USER1$/check_wrapper.sh $USER1$/check_oozie_status.sh $HOSTADDRESS$ $ARG1$ $ARG2$ $ARG3$ $ARG4$ $ARG5$ $ARG6$ $ARG7$
+        command_line    /var/lib/ambari-agent/ambari-python-wrap $USER1$/mm_wrapper.py legacy_check_wrapper -- $USER1$/check_oozie_status.sh $HOSTADDRESS$ $ARG1$ $ARG2$ $ARG3$ $ARG4$ $ARG5$ $ARG6$ $ARG7$
        }
        }
 
 
 define command{
 define command{
         command_name    check_templeton_status
         command_name    check_templeton_status
-        command_line    $USER1$/check_wrapper.sh $USER1$/check_templeton_status.sh $HOSTADDRESS$ $ARG1$ $ARG2$ $ARG3$ $ARG4$ $ARG5$ $ARG6$ $ARG7$
+        command_line    /var/lib/ambari-agent/ambari-python-wrap $USER1$/mm_wrapper.py legacy_check_wrapper -- $USER1$/check_templeton_status.sh $HOSTADDRESS$ $ARG1$ $ARG2$ $ARG3$ $ARG4$ $ARG5$ $ARG6$ $ARG7$
        }
        }
 
 
 define command{
 define command{
         command_name    check_hive_metastore_status
         command_name    check_hive_metastore_status
-        command_line    $USER1$/check_wrapper.sh $USER1$/check_hive_metastore_status.sh $HOSTADDRESS$ $ARG1$ $ARG2$ $ARG3$ $ARG4$ $ARG5$ $ARG6$ $ARG7$
+        command_line    /var/lib/ambari-agent/ambari-python-wrap $USER1$/mm_wrapper.py legacy_check_wrapper -- $USER1$/check_hive_metastore_status.sh $HOSTADDRESS$ $ARG1$ $ARG2$ $ARG3$ $ARG4$ $ARG5$ $ARG6$ $ARG7$
        }
        }
 define command{
 define command{
         command_name    check_hue_status
         command_name    check_hue_status
-        command_line    $USER1$/check_wrapper.sh $USER1$/check_hue_status.sh
+        command_line    /var/lib/ambari-agent/ambari-python-wrap $USER1$/mm_wrapper.py legacy_check_wrapper -- $USER1$/check_hue_status.sh
        }
        }
 
 
 define command{
 define command{
        command_name    check_mapred_local_dir_used_space
        command_name    check_mapred_local_dir_used_space
-       command_line    $USER1$/check_wrapper.sh $USER1$/check_mapred_local_dir_used.sh $ARG1$ $ARG2$
+       command_line    /var/lib/ambari-agent/ambari-python-wrap $USER1$/mm_wrapper.py legacy_check_wrapper -- $USER1$/check_mapred_local_dir_used.sh $ARG1$ $ARG2$
        }
        }
 
 
 define command{
 define command{
        command_name    check_namenodes_ha
        command_name    check_namenodes_ha
-       command_line    $USER1$/check_wrapper.sh $USER1$/check_namenodes_ha.sh $ARG1$ $ARG2$
+       command_line    /var/lib/ambari-agent/ambari-python-wrap $USER1$/mm_wrapper.py legacy_check_wrapper -- $USER1$/check_namenodes_ha.sh $ARG1$ $ARG2$
        }
        }
 
 
 define command{
 define command{
         command_name    check_nodemanager_health
         command_name    check_nodemanager_health
-        command_line    $USER1$/check_wrapper.sh $USER1$/check_nodemanager_health.sh $HOSTADDRESS$ $ARG1$
+        command_line    /var/lib/ambari-agent/ambari-python-wrap $USER1$/mm_wrapper.py legacy_check_wrapper -- $USER1$/check_nodemanager_health.sh $HOSTADDRESS$ $ARG1$
        }
        }
 
 
 define command{
 define command{
@@ -147,20 +147,20 @@ define command{
 
 
 define command{
 define command{
         command_name check_tcp_wrapper
         command_name check_tcp_wrapper
-        command_line  $USER1$/check_wrapper.sh $USER1$/check_tcp -H $HOSTADDRESS$ -p $ARG1$ $ARG2$
+        command_line  /var/lib/ambari-agent/ambari-python-wrap $USER1$/mm_wrapper.py and $HOSTADDRESS$ -- $USER1$/check_tcp -H ^^ -p $ARG1$ $ARG2$
        }
        }
 
 
 define command{
 define command{
         command_name check_checkpoint_time
         command_name check_checkpoint_time
-        command_line $USER1$/check_wrapper.sh /var/lib/ambari-agent/ambari-python-wrap $USER1$/check_checkpoint_time.py -H "$ARG1$" -p $ARG2$ -w $ARG3$ -c $ARG4$ -t $ARG5$ -x $ARG6$ -s $ARG7$
+        command_line /var/lib/ambari-agent/ambari-python-wrap $USER1$/mm_wrapper.py or $ARG1$ -- /var/lib/ambari-agent/ambari-python-wrap $USER1$/check_checkpoint_time.py -H ^^ -p $ARG2$ -w $ARG3$ -c $ARG4$ -t $ARG5$ -x $ARG6$ -s $ARG7$
        }
        }
 
 
 define command{
 define command{
         command_name check_tcp_wrapper_sasl
         command_name check_tcp_wrapper_sasl
-        command_line $USER1$/check_wrapper.sh /var/lib/ambari-agent/ambari-python-wrap $USER1$/check_hive_thrift_port.py -H $HOSTADDRESS$ -p $ARG1$ $ARG2$
+        command_line /var/lib/ambari-agent/ambari-python-wrap $USER1$/mm_wrapper.py and $HOSTADDRESS$ -- /var/lib/ambari-agent/ambari-python-wrap $USER1$/check_hive_thrift_port.py -H ^^ -p $ARG1$ $ARG2$
        }
        }
 
 
 define command{
 define command{
         command_name check_ambari
         command_name check_ambari
-        command_line $USER1$/check_wrapper.sh /var/lib/ambari-agent/ambari-python-wrap $USER1$/check_ambari_alerts.py -H $HOSTADDRESS$ -f $ARG1$ -n $ARG2$
+        command_line /var/lib/ambari-agent/ambari-python-wrap $USER1$/mm_wrapper.py legacy_check_wrapper -- /var/lib/ambari-agent/ambari-python-wrap $USER1$/check_ambari_alerts.py -H $HOSTADDRESS$ -f $ARG1$ -n $ARG2$
        }
        }

+ 0 - 15
ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-services.cfg.j2

@@ -344,7 +344,6 @@ define service {
         use                     hadoop-service
         use                     hadoop-service
         service_description     NAMENODE::NameNode host CPU utilization on {{ namenode_hostname }}
         service_description     NAMENODE::NameNode host CPU utilization on {{ namenode_hostname }}
         servicegroups           HDFS
         servicegroups           HDFS
-#        check_command           check_cpu!200%!250%
         check_command           check_cpu!{{ namenode_port }}!200%!250%!{{ str(hdfs_ssl_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}!{{ str(security_enabled).lower() }}
         check_command           check_cpu!{{ namenode_port }}!200%!250%!{{ str(hdfs_ssl_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}!{{ str(security_enabled).lower() }}
         normal_check_interval   5
         normal_check_interval   5
         retry_check_interval    2
         retry_check_interval    2
@@ -441,7 +440,6 @@ define service {
         use                     hadoop-service
         use                     hadoop-service
         service_description     RESOURCEMANAGER::ResourceManager CPU utilization
         service_description     RESOURCEMANAGER::ResourceManager CPU utilization
         servicegroups           YARN
         servicegroups           YARN
-#       check_command           check_cpu!200%!250%
         check_command           check_cpu_ha!{{ rm_hosts_in_str }}!{{ rm_port }}!200%!250%!{{ str(yarn_ssl_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}!{{ str(security_enabled).lower() }}
         check_command           check_cpu_ha!{{ rm_hosts_in_str }}!{{ rm_port }}!200%!250%!{{ str(yarn_ssl_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}!{{ str(security_enabled).lower() }}
         normal_check_interval   5
         normal_check_interval   5
         retry_check_interval    2 
         retry_check_interval    2 
@@ -528,7 +526,6 @@ define service {
         use                     hadoop-service
         use                     hadoop-service
         service_description     JOBHISTORY::HistoryServer CPU utilization
         service_description     JOBHISTORY::HistoryServer CPU utilization
         servicegroups           MAPREDUCE
         servicegroups           MAPREDUCE
-#        check_command           check_cpu!200%!250%
         check_command           check_cpu!{{ hs_port }}!200%!250%!{{ str(mapreduce_ssl_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}!{{ str(security_enabled).lower() }}
         check_command           check_cpu!{{ hs_port }}!200%!250%!{{ str(mapreduce_ssl_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}!{{ str(security_enabled).lower() }}
         normal_check_interval   5
         normal_check_interval   5
         retry_check_interval    2 
         retry_check_interval    2 
@@ -640,17 +637,6 @@ define service {
         max_check_attempts      3
         max_check_attempts      3
 }
 }
 
 
-{# HBASE:: MASTER Checks
-# define service {
-#         hostgroup_name          hbasemasters
-#         use                     hadoop-service
-#         service_description     HBASEMASTER::HBase Master Web UI
-#         servicegroups           HBASE
-#         check_command           check_webui!hbase!{{ hbase_master_port }}
-#         normal_check_interval   1
-#         retry_check_interval    1
-#         max_check_attempts      3
-# #}
 {% if hostgroup_defs['hbasemasters'] %}
 {% if hostgroup_defs['hbasemasters'] %}
 {% if check_cpu_on %}
 {% if check_cpu_on %}
 define service {
 define service {
@@ -658,7 +644,6 @@ define service {
         use                     hadoop-service
         use                     hadoop-service
         service_description     HBASEMASTER::HBase Master CPU utilization
         service_description     HBASEMASTER::HBase Master CPU utilization
         servicegroups           HBASE
         servicegroups           HBASE
-#        check_command           check_cpu!200%!250%
         check_command           check_cpu_ha!{{ hbase_master_hosts_in_str }}!{{ hbase_master_port }}!200%!250%!false!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}!{{ str(security_enabled).lower() }}
         check_command           check_cpu_ha!{{ hbase_master_hosts_in_str }}!{{ hbase_master_port }}!200%!250%!false!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}!{{ str(security_enabled).lower() }}
         normal_check_interval   5
         normal_check_interval   5
         retry_check_interval    2
         retry_check_interval    2

+ 1 - 1
ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/nagios.cfg.j2

@@ -1280,7 +1280,7 @@ use_large_installation_tweaks=1
 # Values: 1 - Enable environment variable macros (default)
 # Values: 1 - Enable environment variable macros (default)
 #         0 - Disable environment variable macros
 #         0 - Disable environment variable macros
 
 
-# NAGIOS_* macros are required for Ambari Maintenance Mode (check_wrapper.sh)
+# NAGIOS_* macros are required for Ambari Maintenance Mode (mm_wrapper.py)
 enable_environment_macros=1
 enable_environment_macros=1
 
 
 
 

+ 459 - 0
ambari-server/src/test/python/stacks/1.3.2/NAGIOS/test_mm_wrapper.py

@@ -0,0 +1,459 @@
+#!/usr/bin/env python
+
+'''
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+'''
+import StringIO
+
+import os, sys
+import pprint
+import subprocess
+from unittest import TestCase
+from mock.mock import Mock, MagicMock, patch
+import mm_wrapper
+
+class TestOrWrapper(TestCase):
+
+  dummy_ignore_file = """
+vm-4.vm HIVE HIVE_METASTORE
+vm-5.vm GANGLIA GANGLIA_MONITOR
+vm-4.vm YARN NODEMANAGER
+vm-3.vm YARN NODEMANAGER
+vm-3.vm HBASE HBASE_REGIONSERVER
+vm-4.vm HBASE HBASE_REGIONSERVER
+vm-4.vm STORM STORM_REST_API
+vm-4.vm HDFS DATANODE
+vm-4.vm STORM SUPERVISOR
+vm-4.vm STORM NIMBUS
+vm-4.vm STORM STORM_UI_SERVER
+vm-3.vm STORM SUPERVISOR
+vm-4.vm HDFS SECONDARY_NAMENODE
+vm-3.vm FLUME FLUME_HANDLER
+vm-4.vm GANGLIA GANGLIA_SERVER
+vm-4.vm HIVE HIVE_SERVER
+vm-4.vm ZOOKEEPER ZOOKEEPER_SERVER
+vm-4.vm WEBHCAT WEBHCAT_SERVER
+vm-3.vm HBASE HBASE_MASTER
+vm-4.vm GANGLIA GANGLIA_MONITOR
+vm-3.vm GANGLIA GANGLIA_MONITOR
+vm-3.vm HDFS NAMENODE
+vm-4.vm HIVE MYSQL_SERVER
+vm-4.vm YARN APP_TIMELINE_SERVER
+vm-4.vm FALCON FALCON_SERVER
+vm-3.vm HDFS DATANODE
+vm-4.vm YARN RESOURCEMANAGER
+vm-4.vm OOZIE OOZIE_SERVER
+vm-4.vm MAPREDUCE2 HISTORYSERVER
+vm-4.vm STORM DRPC_SERVER
+vm-4.vm FLUME FLUME_HANDLER
+vm-3.vm ZOOKEEPER ZOOKEEPER_SERVER
+"""
+
+  default_empty_check_result = {
+    'message': 'No checks have been run (no hostnames provided)',
+    'retcode': -1,
+    'real_retcode': None
+  }
+
+
+  @patch("__builtin__.open")
+  def test_ignored_host_list(self, open_mock):
+    # Check with empty file content
+    open_mock.return_value.__enter__.return_value.readlines.return_value = []
+    lst = mm_wrapper.ignored_host_list('STORM', 'SUPERVISOR')
+    self.assertEqual(pprint.pformat(lst), '[]')
+    # Check with dummy content
+    open_mock.return_value.__enter__.return_value.readlines.return_value = self.dummy_ignore_file.splitlines()
+    lst = mm_wrapper.ignored_host_list('STORM', 'SUPERVISOR')
+    self.assertEqual(pprint.pformat(lst), "['vm-4.vm', 'vm-3.vm']")
+    # Check if service name/comp name are not defined
+    open_mock.return_value.__enter__.return_value.readlines.return_value = self.dummy_ignore_file.splitlines()
+    lst = mm_wrapper.ignored_host_list('', '')
+    self.assertEqual(pprint.pformat(lst), "[]")
+
+
+  @patch("sys.exit")
+  def test_print_usage(self, exit_mock):
+    mm_wrapper.print_usage()
+    self.assertTrue(exit_mock.called)
+    self.assertEqual(exit_mock.call_args_list[0][0][0], 1)
+
+
+  def test_get_real_component(self):
+    with patch.dict(os.environ, {'NAGIOS_SERVICEDESC': 'SUPERVISOR::Supervisors process'}, clear=True):
+      component = mm_wrapper.get_real_component()
+      self.assertEqual(component, 'SUPERVISOR')
+    with patch.dict(os.environ, {'NAGIOS_SERVICEDESC': 'JOBHISTORY::HistoryServer process'}, clear=True):
+      component = mm_wrapper.get_real_component()
+      self.assertEqual(component, 'MAPREDUCE2')
+
+
+  @patch("mm_wrapper.print_usage")
+  def test_parse_args(self, print_usage_mock):
+    args = ['or', 'h1', 'h2', '--', 'prog', '-h', '^^', '-opt', 'yet', 'another', 'opt']
+    mode, hostnames, command_line = mm_wrapper.parse_args(args)
+    self.assertEquals(mode, mm_wrapper.OR)
+    self.assertEquals(hostnames, ['h1', 'h2'])
+    self.assertEquals(command_line, ['prog', '-h', '^^', '-opt', 'yet', 'another', 'opt'])
+
+    args = ['and', 'h1', 'h2', '--', 'prog', '-h', '^^', '-opt', 'yet', 'another', 'opt']
+    mode, hostnames, command_line = mm_wrapper.parse_args(args)
+    self.assertEquals(mode, mm_wrapper.AND)
+    self.assertEquals(hostnames, ['h1', 'h2'])
+    self.assertEquals(command_line, ['prog', '-h', '^^', '-opt', 'yet', 'another', 'opt'])
+
+    args = ['env_only', 'h1', 'h2', '--', 'prog', '-h', '^^', '-opt', 'yet', 'another', 'opt']
+    mode, hostnames, command_line = mm_wrapper.parse_args(args)
+    self.assertEquals(mode, mm_wrapper.ENV_ONLY)
+    self.assertEquals(hostnames, ['h1', 'h2'])
+    self.assertEquals(command_line, ['prog', '-h', '^^', '-opt', 'yet', 'another', 'opt'])
+
+    # Check wrong usage
+    args = []
+    mm_wrapper.parse_args(args)
+    self.assertTrue(print_usage_mock.called)
+
+
+  @patch("mm_wrapper.ignored_host_list")
+  @patch("mm_wrapper.work_in_or_mode")
+  @patch("mm_wrapper.work_in_and_mode")
+  @patch("mm_wrapper.work_in_env_only_mode")
+  @patch("mm_wrapper.work_in_filter_mm_mode")
+  @patch("mm_wrapper.work_in_legacy_check_wrapper_mode")
+  def test_do_work(self, work_in_legacy_check_wrapper_mode, work_in_filter_mm_mode_mock,
+                   work_in_env_only_mode_mock, work_in_and_mode_mock,
+                   work_in_or_mode_mock,
+                   ignored_host_list_mock):
+    hostnames = ['h1', 'h2', 'h3', 'h4']
+    ignored_host_list_mock.return_value = ['h2', 'h3']
+    command_line = ['prog', '-h', '^^', '-opt', 'yet', 'another', 'opt']
+    work_in_or_mode_mock.return_value = {
+      'message': "or_mode mode result",
+      'retcode': 0,
+      'real_retcode': None
+    }
+    work_in_and_mode_mock.return_value = {
+      'message': "and_mode mode result",
+      'retcode': 0,
+      'real_retcode': None
+    }
+    work_in_env_only_mode_mock.return_value = {
+      'message': "env_only mode result",
+      'retcode': 0,
+      'real_retcode': None
+    }
+    work_in_filter_mm_mode_mock.return_value = {
+      'message': "filter_mm mode result",
+      'retcode': 0,
+      'real_retcode': None
+    }
+    work_in_legacy_check_wrapper_mode.return_value = {
+      'message': "legacy_check_wrapper mode result",
+      'retcode': 0,
+      'real_retcode': None
+    }
+    result = mm_wrapper.do_work(mm_wrapper.OR, hostnames, command_line)
+    self.assertEquals(str(result), "(['or_mode mode result'], 0)")
+
+    result = mm_wrapper.do_work(mm_wrapper.AND, hostnames, command_line)
+    self.assertEquals(str(result), "(['and_mode mode result'], 0)")
+
+    result = mm_wrapper.do_work(mm_wrapper.ENV_ONLY, hostnames, command_line)
+    self.assertEquals(str(result), "(['env_only mode result'], 0)")
+
+    result = mm_wrapper.do_work(mm_wrapper.FILTER_MM, hostnames, command_line)
+    self.assertEquals(str(result), "(['filter_mm mode result'], 0)")
+
+    result = mm_wrapper.do_work(mm_wrapper.LEGACY_CHECK_WRAPPER, hostnames, command_line)
+    self.assertEquals(str(result), "(['legacy_check_wrapper mode result'], 0)")
+
+    # Check behaviour when real_retcode is defined
+    work_in_or_mode_mock.return_value = {
+      'message': "or_mode mode result",
+      'retcode': 0,
+      'real_retcode': 1
+    }
+    result = mm_wrapper.do_work(mm_wrapper.OR, hostnames, command_line)
+    self.assertEquals(str(result), "(['or_mode mode result', 'AMBARIPASSIVE=1'], 0)")
+
+
+  @patch("mm_wrapper.check_output")
+  def test_work_in_or_mode(self, check_output_mock):
+    hostnames = ['h1', 'h2', 'h3', 'h4']
+    ignored_hosts = ['h2', 'h3']
+    command_line = ['prog', '-h', '^^', '-opt', 'yet', 'another', 'opt']
+    custom_env = {'MM_HOSTS': ignored_hosts}
+
+    # Normal usage
+    check_output_mock.return_value = 'Dummy message'
+    result = mm_wrapper.work_in_or_mode(hostnames, ignored_hosts, command_line,
+                                        custom_env,
+                                        self.default_empty_check_result)
+    self.assertEquals(str(result),
+                      "{'message': 'Dummy message', 'real_retcode': None, 'retcode': 0}")
+    self.assertEquals(check_output_mock.call_count, 1)  # Exited on first success
+    self.assertEquals(check_output_mock.call_args[1]['env']['MM_HOSTS'], ignored_hosts)
+    for check_tupple in zip(check_output_mock.call_args_list, hostnames):
+      self.assertEquals(check_tupple[0][0][0], ['prog', '-h', check_tupple[1], '-opt', 'yet', 'another', 'opt'])
+
+    check_output_mock.reset_mock()
+
+    # Failed all checks
+    check_output_mock.side_effect = [
+      subprocess.CalledProcessError(1, 'dummy cmd', output='dummy output1'),
+      subprocess.CalledProcessError(3, 'dummy cmd', output='dummy output2'),
+      subprocess.CalledProcessError(1, 'dummy cmd', output='dummy output3'),
+      subprocess.CalledProcessError(2, 'dummy cmd', output='dummy output4'),
+      ]
+    result = mm_wrapper.work_in_or_mode(hostnames, ignored_hosts, command_line,
+                                        custom_env,
+                                        self.default_empty_check_result)
+    self.assertEquals(check_output_mock.call_count, 4)
+    self.assertEquals(str(result),
+                      "{'message': 'dummy output4', 'real_retcode': None, 'retcode': 2}")
+
+    check_output_mock.reset_mock()
+
+    # Failed all but MM host component checks
+    check_output_mock.side_effect = [
+      subprocess.CalledProcessError(1, 'dummy cmd', output='dummy output1'),
+      subprocess.CalledProcessError(0, 'dummy cmd', output='dummy output2'),
+      subprocess.CalledProcessError(2, 'dummy cmd', output='dummy output3'),
+      subprocess.CalledProcessError(3, 'dummy cmd', output='dummy output4'),
+    ]
+    result = mm_wrapper.work_in_or_mode(hostnames, ignored_hosts, command_line,
+                                        custom_env,
+                                        self.default_empty_check_result)
+    self.assertEquals(check_output_mock.call_count, 4)
+    self.assertEquals(str(result),
+                      "{'message': 'dummy output4', 'real_retcode': None, 'retcode': 3}")
+
+    check_output_mock.reset_mock()
+
+    # Components check only for one check is successful
+    ignored_hosts = []
+    check_output_mock.side_effect = [
+      subprocess.CalledProcessError(1, 'dummy cmd', output='dummy output1'),
+      subprocess.CalledProcessError(0, 'dummy cmd', output='dummy output2'),
+      subprocess.CalledProcessError(2, 'dummy cmd', output='dummy output3'),
+      subprocess.CalledProcessError(3, 'dummy cmd', output='dummy output4'),
+    ]
+    result = mm_wrapper.work_in_or_mode(hostnames, ignored_hosts, command_line,
+                                        custom_env,
+                                        self.default_empty_check_result)
+    self.assertEquals(check_output_mock.call_count, 2)
+    self.assertEquals(str(result),
+                      "{'message': 'dummy output2', 'real_retcode': None, 'retcode': 0}")
+
+
+  @patch("mm_wrapper.check_output")
+  def test_work_in_and_mode(self, check_output_mock):
+    hostnames = ['h1', 'h2', 'h3', 'h4']
+    ignored_hosts = ['h2', 'h3']
+    command_line = ['prog', '-h', '^^', '-opt', 'yet', 'another', 'opt']
+    custom_env = {'MM_HOSTS': ignored_hosts}
+
+    # Normal usage
+    check_output_mock.return_value = 'Dummy message'
+    result = mm_wrapper.work_in_and_mode(hostnames, ignored_hosts, command_line,
+                                        custom_env,
+                                        self.default_empty_check_result)
+    self.assertEquals(str(result),
+                      "{'message': 'Dummy message', 'real_retcode': None, 'retcode': 0}")
+    self.assertEquals(check_output_mock.call_count, 4)
+    self.assertEquals(check_output_mock.call_args[1]['env']['MM_HOSTS'], ignored_hosts)
+    for check_tupple in zip(check_output_mock.call_args_list, hostnames):
+      self.assertEquals(check_tupple[0][0][0], ['prog', '-h', check_tupple[1], '-opt', 'yet', 'another', 'opt'])
+
+    check_output_mock.reset_mock()
+
+    # Failed all checks
+    check_output_mock.side_effect = [
+      subprocess.CalledProcessError(1, 'dummy cmd', output='dummy output1'),
+      subprocess.CalledProcessError(3, 'dummy cmd', output='dummy output2'),
+      subprocess.CalledProcessError(1, 'dummy cmd', output='dummy output3'),
+      subprocess.CalledProcessError(2, 'dummy cmd', output='dummy output4'),
+    ]
+    result = mm_wrapper.work_in_and_mode(hostnames, ignored_hosts, command_line,
+                                        custom_env,
+                                        self.default_empty_check_result)
+    self.assertEquals(check_output_mock.call_count, 4)
+    self.assertEquals(str(result),
+                      "{'message': 'dummy output4', 'real_retcode': None, 'retcode': 2}")
+
+    check_output_mock.reset_mock()
+
+    # Failed all but MM host component checks
+    check_output_mock.side_effect = [
+      subprocess.CalledProcessError(1, 'dummy cmd', output='dummy output1'),
+      subprocess.CalledProcessError(0, 'dummy cmd', output='dummy output2'),
+      subprocess.CalledProcessError(2, 'dummy cmd', output='dummy output3'),
+      subprocess.CalledProcessError(3, 'dummy cmd', output='dummy output4'),
+      ]
+    result = mm_wrapper.work_in_and_mode(hostnames, ignored_hosts, command_line,
+                                        custom_env,
+                                        self.default_empty_check_result)
+    self.assertEquals(check_output_mock.call_count, 4)
+    self.assertEquals(str(result),
+                      "{'message': 'dummy output4', 'real_retcode': None, 'retcode': 3}")
+
+    check_output_mock.reset_mock()
+
+    # Components check only for one check is successful
+    ignored_hosts = []
+    check_output_mock.side_effect = [
+      subprocess.CalledProcessError(1, 'dummy cmd', output='dummy output1'),
+      subprocess.CalledProcessError(0, 'dummy cmd', output='dummy output2'),
+      subprocess.CalledProcessError(2, 'dummy cmd', output='dummy output3'),
+      subprocess.CalledProcessError(3, 'dummy cmd', output='dummy output4'),
+      ]
+    result = mm_wrapper.work_in_and_mode(hostnames, ignored_hosts, command_line,
+                                        custom_env,
+                                        self.default_empty_check_result)
+    self.assertEquals(check_output_mock.call_count, 4)
+    self.assertEquals(str(result),
+                      "{'message': 'dummy output4', 'real_retcode': None, 'retcode': 3}")
+
+
+  @patch("mm_wrapper.check_output")
+  def test_work_in_env_only_mode(self, check_output_mock):
+    hostnames = ['h1', 'h2', 'h3', 'h4']
+    ignored_hosts = ['h2', 'h3']
+    command_line = ['prog', '-h', '^^', '-opt', 'yet', 'another', 'opt']
+    custom_env = {'MM_HOSTS' : ignored_hosts}
+
+    # Normal usage
+    check_output_mock.return_value = 'Dummy message'
+    result = mm_wrapper.work_in_env_only_mode(hostnames, command_line, custom_env)
+    self.assertEquals(str(result),
+                      "{'message': 'Dummy message', 'real_retcode': None, 'retcode': 0}")
+    self.assertEquals(check_output_mock.call_count, 1)
+    self.assertEquals(check_output_mock.call_args[1]['env']['MM_HOSTS'], ignored_hosts)
+    self.assertEquals(check_output_mock.call_args[0][0],
+                      ['prog', '-h', 'h1', 'h2', 'h3', 'h4', '-opt', 'yet', 'another', 'opt'])
+
+    check_output_mock.reset_mock()
+
+    # Failed all checks
+    check_output_mock.side_effect = [
+      subprocess.CalledProcessError(1, 'dummy cmd', output='dummy output1'),
+    ]
+    result = mm_wrapper.work_in_env_only_mode(hostnames, command_line, custom_env)
+    self.assertEquals(check_output_mock.call_count, 1)
+    self.assertEquals(str(result),
+                      "{'message': 'dummy output1', 'real_retcode': None, 'retcode': 1}")
+
+    check_output_mock.reset_mock()
+
+
+  @patch("mm_wrapper.check_output")
+  def test_work_in_filter_mm_mode(self, check_output_mock):
+    hostnames = ['h1', 'h2', 'h3', 'h4']
+    ignored_hosts = ['h2', 'h3']
+    command_line = ['prog', '-h', '^^', '-opt', 'yet', 'another', 'opt']
+    custom_env = {'MM_HOSTS' : ignored_hosts}
+
+    # Normal usage
+    check_output_mock.return_value = 'Dummy message'
+    result = mm_wrapper.work_in_filter_mm_mode(hostnames, ignored_hosts, command_line,
+                                               custom_env,
+                                               self.default_empty_check_result)
+    self.assertEquals(str(result),
+                      "{'message': 'Dummy message', 'real_retcode': None, 'retcode': 0}")
+    self.assertEquals(check_output_mock.call_count, 1)
+    self.assertEquals(check_output_mock.call_args[1]['env']['MM_HOSTS'], ignored_hosts)
+    self.assertEquals(check_output_mock.call_args[0][0],
+                      ['prog', '-h', 'h1', 'h4', '-opt', 'yet', 'another', 'opt'])
+
+    check_output_mock.reset_mock()
+
+    # Failed all checks
+    check_output_mock.side_effect = [
+      subprocess.CalledProcessError(1, 'dummy cmd', output='dummy output1'),
+    ]
+    result = mm_wrapper.work_in_filter_mm_mode(hostnames, ignored_hosts, command_line,
+                                               custom_env,
+                                               self.default_empty_check_result)
+    self.assertEquals(check_output_mock.call_count, 1)
+    self.assertEquals(str(result),
+                      "{'message': 'dummy output1', 'real_retcode': None, 'retcode': 1}")
+
+    check_output_mock.reset_mock()
+
+    # All host components are in MM
+    ignored_hosts = hostnames
+    check_output_mock.side_effect = [
+      subprocess.CalledProcessError(1, 'dummy cmd', output='dummy output1'),
+      ]
+    result = mm_wrapper.work_in_filter_mm_mode(hostnames, ignored_hosts, command_line,
+                                               custom_env,
+                                               self.default_empty_check_result)
+    self.assertEquals(check_output_mock.call_count, 0)
+    self.assertEquals(str(result),
+                      "{'message': 'No checks have been run (no hostnames provided)', "
+                      "'real_retcode': None, 'retcode': -1}")
+
+    check_output_mock.reset_mock()
+
+
+  @patch("mm_wrapper.check_output")
+  @patch.dict(os.environ, {'NAGIOS_HOSTNAME': 'h2'}, clear=True)
+  def test_work_in_legacy_check_wrapper_mode(self, check_output_mock):
+    command_line = ['prog', '-opt', 'yet', 'another', 'opt']
+    ignored_hosts = []
+    custom_env = {'MM_HOSTS': ignored_hosts}
+
+    # Normal usage
+    ignored_hosts = []
+    check_output_mock.return_value = 'Dummy message'
+    result = mm_wrapper.work_in_legacy_check_wrapper_mode(ignored_hosts, command_line,
+                                               custom_env)
+    self.assertEquals(str(result),
+                      "{'message': 'Dummy message', 'real_retcode': None, 'retcode': 0}")
+    self.assertEquals(check_output_mock.call_count, 1)
+    self.assertEquals(check_output_mock.call_args[1]['env']['MM_HOSTS'], ignored_hosts)
+    self.assertEquals(check_output_mock.call_args[0][0],
+                      ['prog', '-opt', 'yet', 'another', 'opt'])
+
+    check_output_mock.reset_mock()
+
+    # Failed check on host that is not in MM state
+    ignored_hosts = ['h3']
+    check_output_mock.side_effect = [
+      subprocess.CalledProcessError(1, 'dummy cmd', output='dummy output1'),
+      ]
+    result = mm_wrapper.work_in_legacy_check_wrapper_mode(ignored_hosts, command_line,
+                                               custom_env)
+    self.assertEquals(check_output_mock.call_count, 1)
+    self.assertEquals(str(result),
+                      "{'message': 'dummy output1', 'real_retcode': None, 'retcode': 1}")
+
+    check_output_mock.reset_mock()
+
+    # Failed check on host that is in MM state
+    ignored_hosts = ['h2']
+    check_output_mock.side_effect = [
+      subprocess.CalledProcessError(1, 'dummy cmd', output='dummy output1'),
+      ]
+    result = mm_wrapper.work_in_legacy_check_wrapper_mode(ignored_hosts, command_line,
+                                               custom_env)
+    self.assertEquals(check_output_mock.call_count, 1)
+    self.assertEquals(str(result),
+                      "{'message': 'dummy output1', 'real_retcode': 1, 'retcode': 0}")
+
+    check_output_mock.reset_mock()
+

+ 4 - 4
ambari-server/src/test/python/stacks/1.3.2/NAGIOS/test_nagios_server.py

@@ -245,13 +245,13 @@ class TestNagiosServer(RMFTestCase):
                               mode=0755
                               mode=0755
     )
     )
     self.assertResourceCalled('File',
     self.assertResourceCalled('File',
-                              '/usr/lib64/nagios/plugins/check_wrapper.sh',
-                              content=StaticFile('check_wrapper.sh'),
+                              '/usr/lib64/nagios/plugins/hdp_nagios_init.php',
+                              content=StaticFile('hdp_nagios_init.php'),
                               mode=0755
                               mode=0755
     )
     )
     self.assertResourceCalled('File',
     self.assertResourceCalled('File',
-                              '/usr/lib64/nagios/plugins/hdp_nagios_init.php',
-                              content=StaticFile('hdp_nagios_init.php'),
+                              '/usr/lib64/nagios/plugins/mm_wrapper.py',
+                              content=StaticFile('mm_wrapper.py'),
                               mode=0755
                               mode=0755
     )
     )
     self.assertResourceCalled('File',
     self.assertResourceCalled('File',

+ 459 - 0
ambari-server/src/test/python/stacks/2.0.6/NAGIOS/test_mm_wrapper.py

@@ -0,0 +1,459 @@
+#!/usr/bin/env python
+
+'''
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+'''
+import StringIO
+
+import os, sys
+import pprint
+import subprocess
+from unittest import TestCase
+from mock.mock import Mock, MagicMock, patch
+import mm_wrapper
+
+class TestOrWrapper(TestCase):
+
+  dummy_ignore_file = """
+vm-4.vm HIVE HIVE_METASTORE
+vm-5.vm GANGLIA GANGLIA_MONITOR
+vm-4.vm YARN NODEMANAGER
+vm-3.vm YARN NODEMANAGER
+vm-3.vm HBASE HBASE_REGIONSERVER
+vm-4.vm HBASE HBASE_REGIONSERVER
+vm-4.vm STORM STORM_REST_API
+vm-4.vm HDFS DATANODE
+vm-4.vm STORM SUPERVISOR
+vm-4.vm STORM NIMBUS
+vm-4.vm STORM STORM_UI_SERVER
+vm-3.vm STORM SUPERVISOR
+vm-4.vm HDFS SECONDARY_NAMENODE
+vm-3.vm FLUME FLUME_HANDLER
+vm-4.vm GANGLIA GANGLIA_SERVER
+vm-4.vm HIVE HIVE_SERVER
+vm-4.vm ZOOKEEPER ZOOKEEPER_SERVER
+vm-4.vm WEBHCAT WEBHCAT_SERVER
+vm-3.vm HBASE HBASE_MASTER
+vm-4.vm GANGLIA GANGLIA_MONITOR
+vm-3.vm GANGLIA GANGLIA_MONITOR
+vm-3.vm HDFS NAMENODE
+vm-4.vm HIVE MYSQL_SERVER
+vm-4.vm YARN APP_TIMELINE_SERVER
+vm-4.vm FALCON FALCON_SERVER
+vm-3.vm HDFS DATANODE
+vm-4.vm YARN RESOURCEMANAGER
+vm-4.vm OOZIE OOZIE_SERVER
+vm-4.vm MAPREDUCE2 HISTORYSERVER
+vm-4.vm STORM DRPC_SERVER
+vm-4.vm FLUME FLUME_HANDLER
+vm-3.vm ZOOKEEPER ZOOKEEPER_SERVER
+"""
+
+  default_empty_check_result = {
+    'message': 'No checks have been run (no hostnames provided)',
+    'retcode': -1,
+    'real_retcode': None
+  }
+
+
+  @patch("__builtin__.open")
+  def test_ignored_host_list(self, open_mock):
+    # Check with empty file content
+    open_mock.return_value.__enter__.return_value.readlines.return_value = []
+    lst = mm_wrapper.ignored_host_list('STORM', 'SUPERVISOR')
+    self.assertEqual(pprint.pformat(lst), '[]')
+    # Check with dummy content
+    open_mock.return_value.__enter__.return_value.readlines.return_value = self.dummy_ignore_file.splitlines()
+    lst = mm_wrapper.ignored_host_list('STORM', 'SUPERVISOR')
+    self.assertEqual(pprint.pformat(lst), "['vm-4.vm', 'vm-3.vm']")
+    # Check if service name/comp name are not defined
+    open_mock.return_value.__enter__.return_value.readlines.return_value = self.dummy_ignore_file.splitlines()
+    lst = mm_wrapper.ignored_host_list('', '')
+    self.assertEqual(pprint.pformat(lst), "[]")
+
+
+  @patch("sys.exit")
+  def test_print_usage(self, exit_mock):
+    mm_wrapper.print_usage()
+    self.assertTrue(exit_mock.called)
+    self.assertEqual(exit_mock.call_args_list[0][0][0], 1)
+
+
+  def test_get_real_component(self):
+    with patch.dict(os.environ, {'NAGIOS_SERVICEDESC': 'SUPERVISOR::Supervisors process'}, clear=True):
+      component = mm_wrapper.get_real_component()
+      self.assertEqual(component, 'SUPERVISOR')
+    with patch.dict(os.environ, {'NAGIOS_SERVICEDESC': 'JOBHISTORY::HistoryServer process'}, clear=True):
+      component = mm_wrapper.get_real_component()
+      self.assertEqual(component, 'MAPREDUCE2')
+
+
+  @patch("mm_wrapper.print_usage")
+  def test_parse_args(self, print_usage_mock):
+    args = ['or', 'h1', 'h2', '--', 'prog', '-h', '^^', '-opt', 'yet', 'another', 'opt']
+    mode, hostnames, command_line = mm_wrapper.parse_args(args)
+    self.assertEquals(mode, mm_wrapper.OR)
+    self.assertEquals(hostnames, ['h1', 'h2'])
+    self.assertEquals(command_line, ['prog', '-h', '^^', '-opt', 'yet', 'another', 'opt'])
+
+    args = ['and', 'h1', 'h2', '--', 'prog', '-h', '^^', '-opt', 'yet', 'another', 'opt']
+    mode, hostnames, command_line = mm_wrapper.parse_args(args)
+    self.assertEquals(mode, mm_wrapper.AND)
+    self.assertEquals(hostnames, ['h1', 'h2'])
+    self.assertEquals(command_line, ['prog', '-h', '^^', '-opt', 'yet', 'another', 'opt'])
+
+    args = ['env_only', 'h1', 'h2', '--', 'prog', '-h', '^^', '-opt', 'yet', 'another', 'opt']
+    mode, hostnames, command_line = mm_wrapper.parse_args(args)
+    self.assertEquals(mode, mm_wrapper.ENV_ONLY)
+    self.assertEquals(hostnames, ['h1', 'h2'])
+    self.assertEquals(command_line, ['prog', '-h', '^^', '-opt', 'yet', 'another', 'opt'])
+
+    # Check wrong usage
+    args = []
+    mm_wrapper.parse_args(args)
+    self.assertTrue(print_usage_mock.called)
+
+
+  @patch("mm_wrapper.ignored_host_list")
+  @patch("mm_wrapper.work_in_or_mode")
+  @patch("mm_wrapper.work_in_and_mode")
+  @patch("mm_wrapper.work_in_env_only_mode")
+  @patch("mm_wrapper.work_in_filter_mm_mode")
+  @patch("mm_wrapper.work_in_legacy_check_wrapper_mode")
+  def test_do_work(self, work_in_legacy_check_wrapper_mode, work_in_filter_mm_mode_mock,
+                   work_in_env_only_mode_mock, work_in_and_mode_mock,
+                   work_in_or_mode_mock,
+                   ignored_host_list_mock):
+    hostnames = ['h1', 'h2', 'h3', 'h4']
+    ignored_host_list_mock.return_value = ['h2', 'h3']
+    command_line = ['prog', '-h', '^^', '-opt', 'yet', 'another', 'opt']
+    work_in_or_mode_mock.return_value = {
+      'message': "or_mode mode result",
+      'retcode': 0,
+      'real_retcode': None
+    }
+    work_in_and_mode_mock.return_value = {
+      'message': "and_mode mode result",
+      'retcode': 0,
+      'real_retcode': None
+    }
+    work_in_env_only_mode_mock.return_value = {
+      'message': "env_only mode result",
+      'retcode': 0,
+      'real_retcode': None
+    }
+    work_in_filter_mm_mode_mock.return_value = {
+      'message': "filter_mm mode result",
+      'retcode': 0,
+      'real_retcode': None
+    }
+    work_in_legacy_check_wrapper_mode.return_value = {
+      'message': "legacy_check_wrapper mode result",
+      'retcode': 0,
+      'real_retcode': None
+    }
+    result = mm_wrapper.do_work(mm_wrapper.OR, hostnames, command_line)
+    self.assertEquals(str(result), "(['or_mode mode result'], 0)")
+
+    result = mm_wrapper.do_work(mm_wrapper.AND, hostnames, command_line)
+    self.assertEquals(str(result), "(['and_mode mode result'], 0)")
+
+    result = mm_wrapper.do_work(mm_wrapper.ENV_ONLY, hostnames, command_line)
+    self.assertEquals(str(result), "(['env_only mode result'], 0)")
+
+    result = mm_wrapper.do_work(mm_wrapper.FILTER_MM, hostnames, command_line)
+    self.assertEquals(str(result), "(['filter_mm mode result'], 0)")
+
+    result = mm_wrapper.do_work(mm_wrapper.LEGACY_CHECK_WRAPPER, hostnames, command_line)
+    self.assertEquals(str(result), "(['legacy_check_wrapper mode result'], 0)")
+
+    # Check behaviour when real_retcode is defined
+    work_in_or_mode_mock.return_value = {
+      'message': "or_mode mode result",
+      'retcode': 0,
+      'real_retcode': 1
+    }
+    result = mm_wrapper.do_work(mm_wrapper.OR, hostnames, command_line)
+    self.assertEquals(str(result), "(['or_mode mode result', 'AMBARIPASSIVE=1'], 0)")
+
+
+  @patch("mm_wrapper.check_output")
+  def test_work_in_or_mode(self, check_output_mock):
+    hostnames = ['h1', 'h2', 'h3', 'h4']
+    ignored_hosts = ['h2', 'h3']
+    command_line = ['prog', '-h', '^^', '-opt', 'yet', 'another', 'opt']
+    custom_env = {'MM_HOSTS': ignored_hosts}
+
+    # Normal usage
+    check_output_mock.return_value = 'Dummy message'
+    result = mm_wrapper.work_in_or_mode(hostnames, ignored_hosts, command_line,
+                                        custom_env,
+                                        self.default_empty_check_result)
+    self.assertEquals(str(result),
+                      "{'message': 'Dummy message', 'real_retcode': None, 'retcode': 0}")
+    self.assertEquals(check_output_mock.call_count, 1)  # Exited on first success
+    self.assertEquals(check_output_mock.call_args[1]['env']['MM_HOSTS'], ignored_hosts)
+    for check_tupple in zip(check_output_mock.call_args_list, hostnames):
+      self.assertEquals(check_tupple[0][0][0], ['prog', '-h', check_tupple[1], '-opt', 'yet', 'another', 'opt'])
+
+    check_output_mock.reset_mock()
+
+    # Failed all checks
+    check_output_mock.side_effect = [
+      subprocess.CalledProcessError(1, 'dummy cmd', output='dummy output1'),
+      subprocess.CalledProcessError(3, 'dummy cmd', output='dummy output2'),
+      subprocess.CalledProcessError(1, 'dummy cmd', output='dummy output3'),
+      subprocess.CalledProcessError(2, 'dummy cmd', output='dummy output4'),
+      ]
+    result = mm_wrapper.work_in_or_mode(hostnames, ignored_hosts, command_line,
+                                        custom_env,
+                                        self.default_empty_check_result)
+    self.assertEquals(check_output_mock.call_count, 4)
+    self.assertEquals(str(result),
+                      "{'message': 'dummy output4', 'real_retcode': None, 'retcode': 2}")
+
+    check_output_mock.reset_mock()
+
+    # Failed all but MM host component checks
+    check_output_mock.side_effect = [
+      subprocess.CalledProcessError(1, 'dummy cmd', output='dummy output1'),
+      subprocess.CalledProcessError(0, 'dummy cmd', output='dummy output2'),
+      subprocess.CalledProcessError(2, 'dummy cmd', output='dummy output3'),
+      subprocess.CalledProcessError(3, 'dummy cmd', output='dummy output4'),
+    ]
+    result = mm_wrapper.work_in_or_mode(hostnames, ignored_hosts, command_line,
+                                        custom_env,
+                                        self.default_empty_check_result)
+    self.assertEquals(check_output_mock.call_count, 4)
+    self.assertEquals(str(result),
+                      "{'message': 'dummy output4', 'real_retcode': None, 'retcode': 3}")
+
+    check_output_mock.reset_mock()
+
+    # Components check only for one check is successful
+    ignored_hosts = []
+    check_output_mock.side_effect = [
+      subprocess.CalledProcessError(1, 'dummy cmd', output='dummy output1'),
+      subprocess.CalledProcessError(0, 'dummy cmd', output='dummy output2'),
+      subprocess.CalledProcessError(2, 'dummy cmd', output='dummy output3'),
+      subprocess.CalledProcessError(3, 'dummy cmd', output='dummy output4'),
+    ]
+    result = mm_wrapper.work_in_or_mode(hostnames, ignored_hosts, command_line,
+                                        custom_env,
+                                        self.default_empty_check_result)
+    self.assertEquals(check_output_mock.call_count, 2)
+    self.assertEquals(str(result),
+                      "{'message': 'dummy output2', 'real_retcode': None, 'retcode': 0}")
+
+
+  @patch("mm_wrapper.check_output")
+  def test_work_in_and_mode(self, check_output_mock):
+    hostnames = ['h1', 'h2', 'h3', 'h4']
+    ignored_hosts = ['h2', 'h3']
+    command_line = ['prog', '-h', '^^', '-opt', 'yet', 'another', 'opt']
+    custom_env = {'MM_HOSTS': ignored_hosts}
+
+    # Normal usage
+    check_output_mock.return_value = 'Dummy message'
+    result = mm_wrapper.work_in_and_mode(hostnames, ignored_hosts, command_line,
+                                        custom_env,
+                                        self.default_empty_check_result)
+    self.assertEquals(str(result),
+                      "{'message': 'Dummy message', 'real_retcode': None, 'retcode': 0}")
+    self.assertEquals(check_output_mock.call_count, 4)
+    self.assertEquals(check_output_mock.call_args[1]['env']['MM_HOSTS'], ignored_hosts)
+    for check_tupple in zip(check_output_mock.call_args_list, hostnames):
+      self.assertEquals(check_tupple[0][0][0], ['prog', '-h', check_tupple[1], '-opt', 'yet', 'another', 'opt'])
+
+    check_output_mock.reset_mock()
+
+    # Failed all checks
+    check_output_mock.side_effect = [
+      subprocess.CalledProcessError(1, 'dummy cmd', output='dummy output1'),
+      subprocess.CalledProcessError(3, 'dummy cmd', output='dummy output2'),
+      subprocess.CalledProcessError(1, 'dummy cmd', output='dummy output3'),
+      subprocess.CalledProcessError(2, 'dummy cmd', output='dummy output4'),
+    ]
+    result = mm_wrapper.work_in_and_mode(hostnames, ignored_hosts, command_line,
+                                        custom_env,
+                                        self.default_empty_check_result)
+    self.assertEquals(check_output_mock.call_count, 4)
+    self.assertEquals(str(result),
+                      "{'message': 'dummy output4', 'real_retcode': None, 'retcode': 2}")
+
+    check_output_mock.reset_mock()
+
+    # Failed all but MM host component checks
+    check_output_mock.side_effect = [
+      subprocess.CalledProcessError(1, 'dummy cmd', output='dummy output1'),
+      subprocess.CalledProcessError(0, 'dummy cmd', output='dummy output2'),
+      subprocess.CalledProcessError(2, 'dummy cmd', output='dummy output3'),
+      subprocess.CalledProcessError(3, 'dummy cmd', output='dummy output4'),
+      ]
+    result = mm_wrapper.work_in_and_mode(hostnames, ignored_hosts, command_line,
+                                        custom_env,
+                                        self.default_empty_check_result)
+    self.assertEquals(check_output_mock.call_count, 4)
+    self.assertEquals(str(result),
+                      "{'message': 'dummy output4', 'real_retcode': None, 'retcode': 3}")
+
+    check_output_mock.reset_mock()
+
+    # Components check only for one check is successful
+    ignored_hosts = []
+    check_output_mock.side_effect = [
+      subprocess.CalledProcessError(1, 'dummy cmd', output='dummy output1'),
+      subprocess.CalledProcessError(0, 'dummy cmd', output='dummy output2'),
+      subprocess.CalledProcessError(2, 'dummy cmd', output='dummy output3'),
+      subprocess.CalledProcessError(3, 'dummy cmd', output='dummy output4'),
+      ]
+    result = mm_wrapper.work_in_and_mode(hostnames, ignored_hosts, command_line,
+                                        custom_env,
+                                        self.default_empty_check_result)
+    self.assertEquals(check_output_mock.call_count, 4)
+    self.assertEquals(str(result),
+                      "{'message': 'dummy output4', 'real_retcode': None, 'retcode': 3}")
+
+
+  @patch("mm_wrapper.check_output")
+  def test_work_in_env_only_mode(self, check_output_mock):
+    hostnames = ['h1', 'h2', 'h3', 'h4']
+    ignored_hosts = ['h2', 'h3']
+    command_line = ['prog', '-h', '^^', '-opt', 'yet', 'another', 'opt']
+    custom_env = {'MM_HOSTS' : ignored_hosts}
+
+    # Normal usage
+    check_output_mock.return_value = 'Dummy message'
+    result = mm_wrapper.work_in_env_only_mode(hostnames, command_line, custom_env)
+    self.assertEquals(str(result),
+                      "{'message': 'Dummy message', 'real_retcode': None, 'retcode': 0}")
+    self.assertEquals(check_output_mock.call_count, 1)
+    self.assertEquals(check_output_mock.call_args[1]['env']['MM_HOSTS'], ignored_hosts)
+    self.assertEquals(check_output_mock.call_args[0][0],
+                      ['prog', '-h', 'h1', 'h2', 'h3', 'h4', '-opt', 'yet', 'another', 'opt'])
+
+    check_output_mock.reset_mock()
+
+    # Failed all checks
+    check_output_mock.side_effect = [
+      subprocess.CalledProcessError(1, 'dummy cmd', output='dummy output1'),
+    ]
+    result = mm_wrapper.work_in_env_only_mode(hostnames, command_line, custom_env)
+    self.assertEquals(check_output_mock.call_count, 1)
+    self.assertEquals(str(result),
+                      "{'message': 'dummy output1', 'real_retcode': None, 'retcode': 1}")
+
+    check_output_mock.reset_mock()
+
+
+  @patch("mm_wrapper.check_output")
+  def test_work_in_filter_mm_mode(self, check_output_mock):
+    hostnames = ['h1', 'h2', 'h3', 'h4']
+    ignored_hosts = ['h2', 'h3']
+    command_line = ['prog', '-h', '^^', '-opt', 'yet', 'another', 'opt']
+    custom_env = {'MM_HOSTS' : ignored_hosts}
+
+    # Normal usage
+    check_output_mock.return_value = 'Dummy message'
+    result = mm_wrapper.work_in_filter_mm_mode(hostnames, ignored_hosts, command_line,
+                                               custom_env,
+                                               self.default_empty_check_result)
+    self.assertEquals(str(result),
+                      "{'message': 'Dummy message', 'real_retcode': None, 'retcode': 0}")
+    self.assertEquals(check_output_mock.call_count, 1)
+    self.assertEquals(check_output_mock.call_args[1]['env']['MM_HOSTS'], ignored_hosts)
+    self.assertEquals(check_output_mock.call_args[0][0],
+                      ['prog', '-h', 'h1', 'h4', '-opt', 'yet', 'another', 'opt'])
+
+    check_output_mock.reset_mock()
+
+    # Failed all checks
+    check_output_mock.side_effect = [
+      subprocess.CalledProcessError(1, 'dummy cmd', output='dummy output1'),
+    ]
+    result = mm_wrapper.work_in_filter_mm_mode(hostnames, ignored_hosts, command_line,
+                                               custom_env,
+                                               self.default_empty_check_result)
+    self.assertEquals(check_output_mock.call_count, 1)
+    self.assertEquals(str(result),
+                      "{'message': 'dummy output1', 'real_retcode': None, 'retcode': 1}")
+
+    check_output_mock.reset_mock()
+
+    # All host components are in MM
+    ignored_hosts = hostnames
+    check_output_mock.side_effect = [
+      subprocess.CalledProcessError(1, 'dummy cmd', output='dummy output1'),
+      ]
+    result = mm_wrapper.work_in_filter_mm_mode(hostnames, ignored_hosts, command_line,
+                                               custom_env,
+                                               self.default_empty_check_result)
+    self.assertEquals(check_output_mock.call_count, 0)
+    self.assertEquals(str(result),
+                      "{'message': 'No checks have been run (no hostnames provided)', "
+                      "'real_retcode': None, 'retcode': -1}")
+
+    check_output_mock.reset_mock()
+
+
+  @patch("mm_wrapper.check_output")
+  @patch.dict(os.environ, {'NAGIOS_HOSTNAME': 'h2'}, clear=True)
+  def test_work_in_legacy_check_wrapper_mode(self, check_output_mock):
+    command_line = ['prog', '-opt', 'yet', 'another', 'opt']
+    ignored_hosts = []
+    custom_env = {'MM_HOSTS': ignored_hosts}
+
+    # Normal usage
+    ignored_hosts = []
+    check_output_mock.return_value = 'Dummy message'
+    result = mm_wrapper.work_in_legacy_check_wrapper_mode(ignored_hosts, command_line,
+                                               custom_env)
+    self.assertEquals(str(result),
+                      "{'message': 'Dummy message', 'real_retcode': None, 'retcode': 0}")
+    self.assertEquals(check_output_mock.call_count, 1)
+    self.assertEquals(check_output_mock.call_args[1]['env']['MM_HOSTS'], ignored_hosts)
+    self.assertEquals(check_output_mock.call_args[0][0],
+                      ['prog', '-opt', 'yet', 'another', 'opt'])
+
+    check_output_mock.reset_mock()
+
+    # Failed check on host that is not in MM state
+    ignored_hosts = ['h3']
+    check_output_mock.side_effect = [
+      subprocess.CalledProcessError(1, 'dummy cmd', output='dummy output1'),
+      ]
+    result = mm_wrapper.work_in_legacy_check_wrapper_mode(ignored_hosts, command_line,
+                                               custom_env)
+    self.assertEquals(check_output_mock.call_count, 1)
+    self.assertEquals(str(result),
+                      "{'message': 'dummy output1', 'real_retcode': None, 'retcode': 1}")
+
+    check_output_mock.reset_mock()
+
+    # Failed check on host that is in MM state
+    ignored_hosts = ['h2']
+    check_output_mock.side_effect = [
+      subprocess.CalledProcessError(1, 'dummy cmd', output='dummy output1'),
+      ]
+    result = mm_wrapper.work_in_legacy_check_wrapper_mode(ignored_hosts, command_line,
+                                               custom_env)
+    self.assertEquals(check_output_mock.call_count, 1)
+    self.assertEquals(str(result),
+                      "{'message': 'dummy output1', 'real_retcode': 1, 'retcode': 0}")
+
+    check_output_mock.reset_mock()
+

+ 5 - 5
ambari-server/src/test/python/stacks/2.0.6/NAGIOS/test_nagios_server.py

@@ -259,11 +259,6 @@ class TestNagiosServer(RMFTestCase):
                               content=StaticFile('check_namenodes_ha.sh'),
                               content=StaticFile('check_namenodes_ha.sh'),
                               mode=0755
                               mode=0755
     )
     )
-    self.assertResourceCalled('File',
-                              '/usr/lib64/nagios/plugins/check_wrapper.sh',
-                              content=StaticFile('check_wrapper.sh'),
-                              mode=0755
-    )
     self.assertResourceCalled('File',
     self.assertResourceCalled('File',
                               '/usr/lib64/nagios/plugins/hdp_nagios_init.php',
                               '/usr/lib64/nagios/plugins/hdp_nagios_init.php',
                               content=StaticFile('hdp_nagios_init.php'),
                               content=StaticFile('hdp_nagios_init.php'),
@@ -283,6 +278,11 @@ class TestNagiosServer(RMFTestCase):
                               content=StaticFile('check_ambari_alerts.py'),
                               content=StaticFile('check_ambari_alerts.py'),
                               mode=0755
                               mode=0755
     )
     )
+    self.assertResourceCalled('File',
+                              '/usr/lib64/nagios/plugins/mm_wrapper.py',
+                              content=StaticFile('mm_wrapper.py'),
+                              mode=0755
+    )
     self.assertResourceCalled('File',
     self.assertResourceCalled('File',
                               '/usr/lib64/nagios/plugins/check_hive_thrift_port.py',
                               '/usr/lib64/nagios/plugins/check_hive_thrift_port.py',
                               content=StaticFile('check_hive_thrift_port.py'),
                               content=StaticFile('check_hive_thrift_port.py'),

+ 5 - 4
ambari-server/src/test/python/unitTests.py

@@ -64,7 +64,7 @@ def get_test_files(path, mask = None, recursive=True):
   return current
   return current
 
 
 
 
-def stack_test_executor(base_folder, stack, service, custom_tests, executor_result):
+def stack_test_executor(base_folder, service, stack, custom_tests, executor_result):
   """
   """
   Stack tests executor. Must be executed in separate process to prevent module
   Stack tests executor. Must be executed in separate process to prevent module
   name conflicts in different stacks.
   name conflicts in different stacks.
@@ -75,14 +75,15 @@ def stack_test_executor(base_folder, stack, service, custom_tests, executor_resu
   else:
   else:
     test_mask = TEST_MASK
     test_mask = TEST_MASK
 
 
-  server_src_dir = get_parent_path(base_folder,'src')
+  server_src_dir = get_parent_path(base_folder, 'src')
 
 
   base_stack_folder = os.path.join(server_src_dir,
   base_stack_folder = os.path.join(server_src_dir,
                                    'main/resources/stacks/HDP/{0}'.format(stack))
                                    'main/resources/stacks/HDP/{0}'.format(stack))
 
 
   script_folders = set()
   script_folders = set()
-  for root, subFolders, files in os.walk(base_stack_folder):
-    if os.path.split(root)[-1] == "scripts" and service in root:
+  for root, subFolders, files in os.walk(os.path.join(base_stack_folder,
+                                                      "services", service)):
+    if os.path.split(root)[-1] in ["scripts", "files"] and service in root:
       script_folders.add(root)
       script_folders.add(root)
 
 
   sys.path.extend(script_folders)
   sys.path.extend(script_folders)