瀏覽代碼

AMBARI-7951. Alerts are present on host after enabling maintenance mode (dsen via dlysnichenko)

Lisnichenko Dmitro 10 年之前
父節點
當前提交
3b9c1edf6c

+ 6 - 7
ambari-server/src/main/resources/stacks/HDP/1.3.2/services/NAGIOS/package/files/mm_wrapper.py

@@ -22,7 +22,7 @@ import subprocess
 import os
 
 N_SGN = 'NAGIOS_SERVICEGROUPNAME'
-N_SD = 'NAGIOS_SERVICEDESC'
+N_SD = 'NAGIOS__SERVICEHOST_COMPONENT'
 N_HOST = 'NAGIOS_HOSTNAME'
 
 LIST_SEPARATOR = "--"
@@ -53,14 +53,15 @@ def ignored_host_list(service, component):
   if lines:
     for l in lines:
       tokens = l.split(' ')
-      if len(tokens) == 3 and tokens[1] == service and tokens[2].strip() == component:
-        result.append(tokens[0])
+      if len(tokens) == 3 and tokens[1].strip().upper() == service.strip().upper() and \
+        tokens[2].strip().upper() == component.strip().upper():
+          result.append(tokens[0])
   return result
 
 
 def get_real_service():
   try:
-    service = os.environ[N_SGN]  # e.g. 'HBASE'
+    service = os.environ[N_SGN].strip().upper()  # e.g. 'HBASE'
   except KeyError:
     service = ''
   return service
@@ -68,9 +69,7 @@ def get_real_service():
 
 def get_real_component():
   try:
-    arr_desc = os.environ[N_SD]  # e.g. 'HBASE::Percent RegionServers live'
-    SEPARATOR = "::"
-    comp_name = arr_desc.replace(SEPARATOR, ' ').split(' ')[0]
+    comp_name = os.environ[N_SD].strip()
   except KeyError:
     comp_name = ''
   mapping = {

+ 31 - 0
ambari-server/src/main/resources/stacks/HDP/1.3.2/services/NAGIOS/package/templates/hadoop-services.cfg.j2

@@ -139,6 +139,7 @@ define service {
         use                     hadoop-service
         service_description     GANGLIA::Ganglia Server process
         servicegroups           GANGLIA
+        _host_component         GANGLIA_SERVER
         check_command           check_tcp_wrapper!{{ ganglia_port }}!-w 1 -c 1
         normal_check_interval   0.25
         retry_check_interval    0.25
@@ -152,6 +153,7 @@ define service {
         use                     hadoop-service
         service_description     GANGLIA::Ganglia Monitor process for NameNode
         servicegroups           GANGLIA
+        _host_component         GANGLIA_MONITOR
         check_command           check_tcp_wrapper!{{ ganglia_collector_namenode_port }}!-w 1 -c 1
         normal_check_interval   0.25
         retry_check_interval    0.25
@@ -167,6 +169,7 @@ define service {
         use                     hadoop-service
         service_description     GANGLIA::Ganglia Monitor process for JobTracker
         servicegroups           GANGLIA
+        _host_component         GANGLIA_MONITOR
         check_command           check_tcp_wrapper!{{ ganglia_collector_jobtracker_port }}!-w 1 -c 1
         normal_check_interval   0.25
         retry_check_interval    0.25
@@ -182,6 +185,7 @@ define service {
         use                     hadoop-service
         service_description     GANGLIA::Ganglia Monitor process for HBase Master
         servicegroups           GANGLIA
+        _host_component         GANGLIA_MONITOR
         check_command           check_tcp_wrapper!{{ ganglia_collector_hbase_port }}!-w 1 -c 1
         normal_check_interval   0.25
         retry_check_interval    0.25
@@ -198,6 +202,7 @@ define service {
         use                     hadoop-service
         service_description     GANGLIA::Ganglia Monitor process for HistoryServer
         servicegroups           GANGLIA
+        _host_component         GANGLIA_MONITOR
         check_command           check_tcp_wrapper!{{ ganglia_collector_hs_port }}!-w 1 -c 1
         normal_check_interval   0.25
         retry_check_interval    0.25
@@ -216,6 +221,7 @@ define service {
         service_description     NAMENODE::Secondary NameNode process
         servicegroups           HDFS
         check_command           check_tcp_wrapper!{{ snamenode_port }}!-w 1 -c 1
+        _host_component         SECONDARY_NAMENODE
         normal_check_interval   0.5
         retry_check_interval    0.25
         max_check_attempts      3
@@ -233,6 +239,7 @@ define service {
         service_description     NAMENODE::NameNode edit logs directory status on {{ namenode_hostname }}
         servicegroups           HDFS
         check_command           check_name_dir_status!{{ namenode_port }}!{{ str(hadoop_ssl_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}!{{ str(security_enabled).lower() }}
+        _host_component         NAMENODE
         normal_check_interval   0.5
         retry_check_interval    0.5
         max_check_attempts      3
@@ -246,6 +253,7 @@ define service {
         servicegroups           HDFS
 #        check_command           check_cpu!200%!250%
         check_command           check_cpu!{{ namenode_port }}!200%!250%!{{ str(hadoop_ssl_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}!{{ str(security_enabled).lower() }}
+        _host_component         NAMENODE
         normal_check_interval   5
         retry_check_interval    2
         max_check_attempts      5
@@ -258,6 +266,7 @@ define service {
         service_description     NAMENODE::NameNode Web UI on {{ namenode_hostname }}
         servicegroups           HDFS
         check_command           check_webui!namenode!{{ namenode_port }}
+        _host_component         NAMENODE
         normal_check_interval   1
         retry_check_interval    1
         max_check_attempts      3
@@ -269,6 +278,7 @@ define service {
         service_description     NAMENODE::NameNode process on {{ namenode_hostname }}
         servicegroups           HDFS
         check_command           check_tcp_wrapper!{{ namenode_metadata_port }}!-w 1 -c 1
+        _host_component         NAMENODE
         normal_check_interval   0.5
         retry_check_interval    0.25
         max_check_attempts      3
@@ -280,6 +290,7 @@ define service {
         service_description     HDFS::NameNode RPC latency on {{ namenode_hostname }}
         servicegroups           HDFS
         check_command           check_rpcq_latency!NameNode!{{ namenode_port }}!3000!5000!{{ str(hadoop_ssl_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}!{{ str(security_enabled).lower() }}
+        _host_component         NAMENODE
         normal_check_interval   5
         retry_check_interval    1
         max_check_attempts      5
@@ -293,6 +304,7 @@ define service {
         service_description     HDFS::Blocks health
         servicegroups           HDFS
         check_command           check_hdfs_blocks!$HOSTGROUPMEMBERS:namenode$!{{ namenode_port }}!{{ nn_metrics_property }}!{{ str(hadoop_ssl_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}!{{ str(security_enabled).lower() }}
+        _host_component         DATANODE
         normal_check_interval   2
         retry_check_interval    1 
         max_check_attempts      1
@@ -304,6 +316,7 @@ define service {
         service_description     HDFS::HDFS capacity utilization
         servicegroups           HDFS
         check_command           check_hdfs_capacity!$HOSTGROUPMEMBERS:namenode$!{{ namenode_port }}!80%!90%!{{ str(hadoop_ssl_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}!{{ str(security_enabled).lower() }}
+        _host_component         DATANODE
         normal_check_interval   2
         retry_check_interval    1 
         max_check_attempts      1
@@ -319,6 +332,7 @@ define service {
         service_description     JOBTRACKER::JobTracker Web UI
         servicegroups           MAPREDUCE
         check_command           check_webui!jobtracker!{{ jtnode_port }}
+        _host_component         JOBTRACKER
         normal_check_interval   1
         retry_check_interval    1
         max_check_attempts      3
@@ -330,6 +344,7 @@ define service {
         service_description     JOBTRACKER::HistoryServer Web UI
         servicegroups           MAPREDUCE
         check_command           check_webui!jobhistory!{{ jobhistory_port }}
+        _host_component         HISTORYSERVER
         normal_check_interval   1
         retry_check_interval    1
         max_check_attempts      3
@@ -341,6 +356,7 @@ define service {
         service_description     JOBTRACKER::JobTracker CPU utilization
         servicegroups           MAPREDUCE
         check_command           check_cpu!{{ jtnode_port }}!200%!250%!{{ str(hadoop_ssl_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}!{{ str(security_enabled).lower() }}
+        _host_component         JOBTRACKER
         normal_check_interval   5
         retry_check_interval    2 
         max_check_attempts      5
@@ -354,6 +370,7 @@ define service {
         service_description     JOBTRACKER::JobTracker process
         servicegroups           MAPREDUCE
         check_command           check_tcp_wrapper!{{ jtnode_port }}!-w 1 -c 1
+        _host_component         JOBTRACKER
         normal_check_interval   0.5
         retry_check_interval    0.25
         max_check_attempts      4
@@ -365,6 +382,7 @@ define service {
         service_description     MAPREDUCE::JobTracker RPC latency
         servicegroups           MAPREDUCE
         check_command           check_rpcq_latency!JobTracker!{{ jtnode_port }}!3000!5000!{{ str(hadoop_ssl_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}!{{ str(security_enabled).lower() }}
+        _host_component         JOBTRACKER
         normal_check_interval   5
         retry_check_interval    1 
         max_check_attempts      5
@@ -389,6 +407,7 @@ define service {
         service_description     TASKTRACKER::TaskTracker process
         servicegroups           MAPREDUCE
         check_command           check_tcp_wrapper!{{ tasktracker_port }}!-w 1 -c 1
+        _host_component         TASKTRACKER
         normal_check_interval   1
         retry_check_interval    0.5
         max_check_attempts      3
@@ -418,6 +437,7 @@ define service {
         service_description     DATANODE::DataNode process
         servicegroups           HDFS
         check_command           check_tcp_wrapper!{{datanode_port}}!-w 1 -c 1
+        _host_component         DATANODE
         normal_check_interval   1
         retry_check_interval    0.5
         max_check_attempts      3
@@ -429,6 +449,7 @@ define service {
         service_description     DATANODE::DataNode space
         servicegroups           HDFS
         check_command           check_datanode_storage!{{ datanode_port }}!90%!90%!{{ str(hadoop_ssl_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}!{{ str(security_enabled).lower() }}
+        _host_component         DATANODE
         normal_check_interval   2
         retry_check_interval    1
         max_check_attempts      2
@@ -444,6 +465,7 @@ define service {
         service_description     FLUME::Flume Agent process
         servicegroups           FLUME
         check_command           check_tcp_wrapper!{{ flume_port }}!-w 1 -c 1
+        _host_component         FLUME
         normal_check_interval   1
         retry_check_interval    0.5
         max_check_attempts      3
@@ -459,6 +481,7 @@ define service {
         service_description     ZOOKEEPER::ZooKeeper Server process
         servicegroups           ZOOKEEPER
         check_command           check_tcp_wrapper!{{ clientPort }}!-w 1 -c 1
+        _host_component         ZOOKEEPER_SERVER
         normal_check_interval   1
         retry_check_interval    0.5
         max_check_attempts      3
@@ -473,6 +496,7 @@ define service {
         service_description     REGIONSERVER::RegionServer process
         servicegroups           HBASE
         check_command           check_tcp_wrapper!{{ hbase_rs_port }}!-w 1 -c 1
+        _host_component         HBASE_REGIONSERVER
         normal_check_interval   1
         retry_check_interval    0.5
         max_check_attempts      3
@@ -486,6 +510,7 @@ define service {
         service_description     HBASEMASTER::HBase Master CPU utilization
         servicegroups           HBASE
         check_command           check_cpu_ha!{{ hbase_master_hosts_in_str }}!{{ hbase_master_port }}!200%!250%!{{ str(hadoop_ssl_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}!{{ str(security_enabled).lower() }}
+        _host_component         HBASE_MASTER
         normal_check_interval   5
         retry_check_interval    2
         max_check_attempts      5
@@ -500,6 +525,7 @@ define service {
         service_description     HBASEMASTER::HBase Master process on {{ hbasemaster }}
         servicegroups           HBASE
         check_command           check_tcp_wrapper!{{ hbase_master_rpc_port }}!-w 1 -c 1
+        _host_component         HBASE_MASTER
         normal_check_interval   0.5
         retry_check_interval    0.25
         max_check_attempts      4
@@ -515,6 +541,7 @@ define service {
         service_description     HIVE-METASTORE::Hive Metastore process
         servicegroups           HIVE
         check_command           check_tcp_wrapper!{{ hive_metastore_port }}!-w 1 -c 1
+        _host_component         HIVE_METASTORE
         normal_check_interval   0.5
         retry_check_interval    0.5
         max_check_attempts      3
@@ -527,6 +554,7 @@ define service {
         service_description     HIVE-SERVER::HiveServer2 process
         servicegroups           HIVE
         check_command           check_tcp_wrapper_sasl!{{ hive_server_port }}!{{ '--security-enabled' if security_enabled else '' }}!-w 1 -c 1
+        _host_component         HIVE_SERVER
         normal_check_interval   0.5
         retry_check_interval    0.5
         max_check_attempts      3
@@ -544,6 +572,7 @@ define service {
         {% else %}
         check_command           check_oozie_status!{{ oozie_server_port }}!{{ java64_home }}!false
         {% endif %}
+        _host_component         OOZIE_SERVER
         normal_check_interval   1
         retry_check_interval    1
         max_check_attempts      3
@@ -561,6 +590,7 @@ define service {
         {% else %}
         check_command           check_templeton_status!{{ templeton_port }}!v1!false
         {% endif %}
+        _host_component         WEBHCAT_SERVER
         normal_check_interval   1
         retry_check_interval    0.5
         max_check_attempts      3
@@ -574,6 +604,7 @@ define service {
         service_description     HUE::Hue Server status
         servicegroups           HUE
         check_command           check_hue_status
+        _host_component         HUE
         normal_check_interval   100
         retry_check_interval    0.5
         max_check_attempts      3

+ 7 - 8
ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/files/mm_wrapper.py

@@ -22,7 +22,7 @@ import subprocess
 import os
 
 N_SGN = 'NAGIOS_SERVICEGROUPNAME'
-N_SD = 'NAGIOS_SERVICEDESC'
+N_SD = 'NAGIOS__SERVICEHOST_COMPONENT'
 N_HOST = 'NAGIOS_HOSTNAME'
 
 LIST_SEPARATOR = "--"
@@ -53,14 +53,15 @@ def ignored_host_list(service, component):
   if lines:
     for l in lines:
       tokens = l.split(' ')
-      if len(tokens) == 3 and tokens[1] == service and tokens[2].strip() == component:
-        result.append(tokens[0])
+      if len(tokens) == 3 and tokens[1].strip().upper() == service.strip().upper() and \
+        tokens[2].strip().upper() == component.strip().upper():
+          result.append(tokens[0])
   return result
 
 
 def get_real_service():
   try:
-    service = os.environ[N_SGN]  # e.g. 'HBASE'
+    service = os.environ[N_SGN].strip().upper()  # e.g. 'HBASE'
   except KeyError:
     service = ''
   return service
@@ -68,15 +69,13 @@ def get_real_service():
 
 def get_real_component():
   try:
-    arr_desc = os.environ[N_SD]  # e.g. 'HBASE::Percent RegionServers live'
-    SEPARATOR = "::"
-    comp_name = arr_desc.replace(SEPARATOR, ' ').split(' ')[0]
+    comp_name = os.environ[N_SD].strip()
   except KeyError:
     comp_name = ''
   mapping = {
     'HBASEMASTER': 'HBASE_MASTER',
     'REGIONSERVER': 'HBASE_REGIONSERVER',
-    'JOBHISTORY': 'MAPREDUCE2',
+    'JOBHISTORY': 'HISTORYSERVER',
     'HIVE-METASTORE': 'HIVE_METASTORE',
     'HIVE-SERVER': 'HIVE_SERVER',
     'FLUME': 'FLUME_HANDLER',

+ 1 - 1
ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-servicegroups.cfg.j2

@@ -31,7 +31,7 @@
 {%if hostgroup_defs['jobtracker'] or
   hostgroup_defs['historyserver2']-%}
 define servicegroup {
-  servicegroup_name  MAPREDUCE
+  servicegroup_name  MAPREDUCE2
   alias  MAPREDUCE Checks
 }
 {% endif %}

+ 55 - 4
ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-services.cfg.j2

@@ -95,6 +95,7 @@ define service {
         service_description     HDFS::NameNode HA Healthy
         servicegroups           HDFS
         check_command           check_namenodes_ha!$HOSTGROUPMEMBERS:namenode$!{{ namenode_port }}
+        _host_component         NAMENODE
         normal_check_interval   0.5
         retry_check_interval    0.25
         max_check_attempts      5
@@ -155,6 +156,7 @@ define service {
         service_description     GANGLIA::Ganglia Server process
         servicegroups           GANGLIA
         check_command           check_tcp_wrapper!{{ ganglia_port }}!-w 1 -c 1
+        _host_component         GANGLIA_SERVER
         normal_check_interval   0.25
         retry_check_interval    0.25
         max_check_attempts      4
@@ -167,6 +169,7 @@ define service {
         service_description     GANGLIA::Ganglia Monitor process for NameNode
         servicegroups           GANGLIA
         check_command           check_tcp_wrapper!{{ ganglia_collector_namenode_port }}!-w 1 -c 1
+        _host_component         GANGLIA_MONITOR
         normal_check_interval   0.25
         retry_check_interval    0.25
         max_check_attempts      4
@@ -180,6 +183,7 @@ define service {
         service_description     GANGLIA::Ganglia Monitor process for HBase Master
         servicegroups           GANGLIA
         check_command           check_tcp_wrapper!{{ ganglia_collector_hbase_port }}!-w 1 -c 1
+        _host_component         GANGLIA_MONITOR
         normal_check_interval   0.25
         retry_check_interval    0.25
         max_check_attempts      4
@@ -193,6 +197,7 @@ define service {
         service_description     GANGLIA::Ganglia Monitor process for ResourceManager
         servicegroups           GANGLIA
         check_command           check_tcp_wrapper!{{ ganglia_collector_rm_port }}!-w 1 -c 1
+        _host_component         GANGLIA_MONITOR
         normal_check_interval   0.25
         retry_check_interval    0.25
         max_check_attempts      4
@@ -206,6 +211,7 @@ define service {
         service_description     GANGLIA::Ganglia Monitor process for HistoryServer
         servicegroups           GANGLIA
         check_command           check_tcp_wrapper!{{ ganglia_collector_hs_port }}!-w 1 -c 1
+        _host_component         GANGLIA_MONITOR
         normal_check_interval   0.25
         retry_check_interval    0.25
         max_check_attempts      4
@@ -222,6 +228,7 @@ define service {
         service_description     NAMENODE::Secondary NameNode process
         servicegroups           HDFS
         check_command           check_tcp_wrapper!{{ snamenode_port }}!-w 1 -c 1
+        _host_component         SECONDARY_NAMENODE
         normal_check_interval   0.5
         retry_check_interval    0.25
         max_check_attempts      3
@@ -236,6 +243,7 @@ define service {
         service_description     STORM_UI_SERVER::Storm UI on {{ hostgroup_defs['storm_ui'][0] }}
         servicegroups           STORM
         check_command           check_webui!storm_ui!{{ storm_ui_port }}
+        _host_component         STORM_UI_SERVER
         normal_check_interval   1
         retry_check_interval    1
         max_check_attempts      3
@@ -250,6 +258,7 @@ define service {
         service_description     STORM_UI_SERVER::Storm UI Server process
         servicegroups           STORM
         check_command           check_tcp_wrapper!{{ storm_ui_port }}!-w 1 -c 1
+        _host_component         STORM_UI_SERVER
         normal_check_interval   0.5
         retry_check_interval    0.25
         max_check_attempts      3
@@ -264,6 +273,7 @@ define service {
         service_description     NIMBUS::Nimbus process
         servicegroups           STORM
         check_command           check_tcp_wrapper!{{ nimbus_port }}!-w 1 -c 1
+        _host_component         NIMBUS
         normal_check_interval   0.5
         retry_check_interval    0.25
         max_check_attempts      3
@@ -278,6 +288,7 @@ define service {
         service_description     DRPC_SERVER::DRPC Server process
         servicegroups           STORM
         check_command           check_tcp_wrapper!{{ drpc_port }}!-w 1 -c 1
+        _host_component         DRPC_SERVER
         normal_check_interval   0.5
         retry_check_interval    0.25
         max_check_attempts      3
@@ -292,6 +303,7 @@ define service {
         service_description     STORM_REST_API::Storm REST API Server process
         servicegroups           STORM
         check_command           check_tcp_wrapper!{{ storm_rest_api_port }}!-w 1 -c 1
+        _host_component         STORM_REST_API
         normal_check_interval   0.5
         retry_check_interval    0.25
         max_check_attempts      3
@@ -306,6 +318,7 @@ define service {
         service_description     SUPERVISOR::Percent Supervisors live
         servicegroups           STORM
         check_command           check_aggregate!"SUPERVISOR::Supervisors process"!10%!30%
+        _host_component         SUPERVISOR
         normal_check_interval   0.5
         retry_check_interval    0.25
         max_check_attempts      3
@@ -317,6 +330,7 @@ define service {
         service_description     SUPERVISOR::Supervisors process
         servicegroups           STORM
         check_command           check_tcp_wrapper!{{ supervisor_port }}!-w 1 -c 1
+        _host_component         SUPERVISOR
         normal_check_interval   1
         retry_check_interval    0.5
         max_check_attempts      3
@@ -333,6 +347,7 @@ define service {
         service_description     NAMENODE::NameNode edit logs directory status on {{ namenode_hostname }}
         servicegroups           HDFS
         check_command           check_name_dir_status!{{ namenode_port }}!{{ str(hdfs_ssl_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}!{{ str(security_enabled).lower() }}
+        _host_component         NAMENODE
         normal_check_interval   0.5
         retry_check_interval    0.5
         max_check_attempts      3
@@ -345,6 +360,7 @@ define service {
         service_description     NAMENODE::NameNode host CPU utilization on {{ namenode_hostname }}
         servicegroups           HDFS
         check_command           check_cpu!{{ namenode_port }}!200%!250%!{{ str(hdfs_ssl_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}!{{ str(security_enabled).lower() }}
+        _host_component         NAMENODE
         normal_check_interval   5
         retry_check_interval    2
         max_check_attempts      5
@@ -357,6 +373,7 @@ define service {
         service_description     NAMENODE::NameNode Web UI on {{ namenode_hostname }}
         servicegroups           HDFS
         check_command           check_webui!namenode!{{ namenode_port }}
+        _host_component         NAMENODE
         normal_check_interval   1
         retry_check_interval    1
         max_check_attempts      3
@@ -368,6 +385,7 @@ define service {
         service_description     NAMENODE::NameNode process on {{ namenode_hostname }}
         servicegroups           HDFS
         check_command           check_tcp_wrapper!{{nn_ha_host_port_map[namenode_hostname]}}!-w 1 -c 1
+        _host_component         NAMENODE
         normal_check_interval   0.5
         retry_check_interval    0.25
         max_check_attempts      3
@@ -379,6 +397,7 @@ define service {
         service_description     HDFS::NameNode RPC latency on {{ namenode_hostname }}
         servicegroups           HDFS
         check_command           check_rpcq_latency!NameNode!{{ namenode_port }}!3000!5000!{{ str(hdfs_ssl_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}!{{ str(security_enabled).lower() }}
+        _host_component         NAMENODE
         normal_check_interval   5
         retry_check_interval    1
         max_check_attempts      5
@@ -392,6 +411,7 @@ define service {
         service_description     NAMENODE::Last checkpoint time
         servicegroups           HDFS
         check_command           check_checkpoint_time!{{ nn_hosts_string }}!{{ namenode_port }}!200!200!{{ dfs_namenode_checkpoint_period }}!{{dfs_namenode_checkpoint_txns}}!{{str(hdfs_ssl_enabled).lower()}}
+        _host_component         NAMENODE
         normal_check_interval   0.5
         retry_check_interval    0.25
         max_check_attempts      3
@@ -403,6 +423,7 @@ define service {
         service_description     HDFS::Blocks health
         servicegroups           HDFS
         check_command           check_hdfs_blocks!$HOSTGROUPMEMBERS:namenode$!{{ namenode_port }}!{{ nn_metrics_property }}!{{ str(hdfs_ssl_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}!{{ str(security_enabled).lower() }}
+        _host_component         DATANODE
         normal_check_interval   2
         retry_check_interval    1
         max_check_attempts      1
@@ -414,6 +435,7 @@ define service {
         service_description     HDFS::HDFS capacity utilization
         servicegroups           HDFS
         check_command           check_hdfs_capacity!$HOSTGROUPMEMBERS:namenode$!{{ namenode_port }}!80%!90%!{{ str(hdfs_ssl_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}!{{ str(security_enabled).lower() }}
+        _host_component         DATANODE
         normal_check_interval   2
         retry_check_interval    1
         max_check_attempts      1
@@ -429,6 +451,7 @@ define service {
         service_description     RESOURCEMANAGER::ResourceManager Web UI
         servicegroups           YARN
         check_command           check_webui_ha!resourcemanager!{{ rm_hosts_in_str }}!{{ rm_port }}
+        _host_component         RESOURCEMANAGER
         normal_check_interval   1
         retry_check_interval    1
         max_check_attempts      3
@@ -441,6 +464,7 @@ define service {
         service_description     RESOURCEMANAGER::ResourceManager CPU utilization
         servicegroups           YARN
         check_command           check_cpu_ha!{{ rm_hosts_in_str }}!{{ rm_port }}!200%!250%!{{ str(yarn_ssl_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}!{{ str(security_enabled).lower() }}
+        _host_component         RESOURCEMANAGER
         normal_check_interval   5
         retry_check_interval    2 
         max_check_attempts      5
@@ -453,6 +477,7 @@ define service {
         service_description     RESOURCEMANAGER::ResourceManager RPC latency
         servicegroups           YARN
         check_command           check_rpcq_latency_ha!{{ rm_hosts_in_str }}!ResourceManager!{{ rm_port }}!3000!5000!{{ str(yarn_ssl_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}!{{ str(security_enabled).lower() }}
+        _host_component         RESOURCEMANAGER
         normal_check_interval   5
         retry_check_interval    1 
         max_check_attempts      5
@@ -465,6 +490,7 @@ define service {
         service_description     RESOURCEMANAGER::ResourceManager process on {{ rm_host }}
         servicegroups           YARN
         check_command           check_tcp_wrapper!{{ rm_port }}!-w 1 -c 1
+        _host_component         RESOURCEMANAGER
         normal_check_interval   1
         retry_check_interval    0.5
         max_check_attempts      3
@@ -480,6 +506,7 @@ define service {
         service_description     NODEMANAGER::NodeManager process
         servicegroups           YARN
         check_command           check_tcp_wrapper!{{ nm_port }}!-w 1 -c 1
+        _host_component         NODEMANAGER
         normal_check_interval   1
         retry_check_interval    0.5
         max_check_attempts      3
@@ -491,6 +518,7 @@ define service {
         service_description     NODEMANAGER::NodeManager health
         servicegroups           YARN
         check_command           check_nodemanager_health!{{ nm_port }}!{{ str(security_enabled).lower() }}!{{ str(yarn_ssl_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}
+        _host_component         NODEMANAGER
         normal_check_interval   1
         retry_check_interval    1
         max_check_attempts      3
@@ -501,6 +529,7 @@ define service {
         service_description     NODEMANAGER::Percent NodeManagers live
         servicegroups           YARN
         check_command           check_aggregate!"NODEMANAGER::NodeManager process"!10%!30%
+        _host_component         NODEMANAGER
         normal_check_interval   0.5
         retry_check_interval    0.25
         max_check_attempts      3
@@ -513,8 +542,9 @@ define service {
         hostgroup_name          historyserver2
         use                     hadoop-service
         service_description     JOBHISTORY::HistoryServer Web UI
-        servicegroups           MAPREDUCE
+        servicegroups           MAPREDUCE2
         check_command           check_webui!historyserver2!{{ hs_port }}
+        _host_component         HISTORYSERVER
         normal_check_interval   1
         retry_check_interval    1
         max_check_attempts      3
@@ -525,8 +555,9 @@ define service {
         hostgroup_name          historyserver2
         use                     hadoop-service
         service_description     JOBHISTORY::HistoryServer CPU utilization
-        servicegroups           MAPREDUCE
+        servicegroups           MAPREDUCE2
         check_command           check_cpu!{{ hs_port }}!200%!250%!{{ str(mapreduce_ssl_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}!{{ str(security_enabled).lower() }}
+        _host_component         HISTORYSERVER
         normal_check_interval   5
         retry_check_interval    2 
         max_check_attempts      5
@@ -537,8 +568,9 @@ define service {
         hostgroup_name          historyserver2
         use                     hadoop-service
         service_description     JOBHISTORY::HistoryServer RPC latency
-        servicegroups           MAPREDUCE
+        servicegroups           MAPREDUCE2
         check_command           check_rpcq_latency!JobHistoryServer!{{ hs_port }}!3000!5000!{{ str(mapreduce_ssl_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}!{{ str(security_enabled).lower() }}
+        _host_component         HISTORYSERVER
         normal_check_interval   5
         retry_check_interval    1 
         max_check_attempts      5
@@ -548,8 +580,9 @@ define service {
         hostgroup_name          historyserver2
         use                     hadoop-service
         service_description     JOBHISTORY::HistoryServer process
-        servicegroups           MAPREDUCE
+        servicegroups           MAPREDUCE2
         check_command           check_tcp_wrapper!{{ hs_port }}!-w 1 -c 1
+        _host_component         HISTORYSERVER
         normal_check_interval   1
         retry_check_interval    0.5
         max_check_attempts      3
@@ -565,6 +598,7 @@ define service {
         service_description     JOURNALNODE::JournalNode process
         servicegroups           HDFS
         check_command           check_tcp_wrapper!{{ journalnode_port }}!-w 1 -c 1
+        _host_component         JOURNALNODE
         normal_check_interval   1
         retry_check_interval    0.5
         max_check_attempts      3
@@ -592,6 +626,7 @@ define service {
         service_description     DATANODE::DataNode process
         servicegroups           HDFS
         check_command           check_tcp_wrapper!{{datanode_port}}!-w 1 -c 1
+        _host_component         DATANODE
         normal_check_interval   1
         retry_check_interval    0.5
         max_check_attempts      3
@@ -603,6 +638,7 @@ define service {
         service_description     DATANODE::DataNode space
         servicegroups           HDFS
         check_command           check_datanode_storage!{{ datanode_port }}!90%!90%!{{ str(hdfs_ssl_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}!{{ str(security_enabled).lower() }}
+        _host_component         DATANODE
         normal_check_interval   2 
         retry_check_interval    1
         max_check_attempts      2
@@ -618,6 +654,7 @@ define service {
         service_description     ZOOKEEPER::ZooKeeper Server process
         servicegroups           ZOOKEEPER
         check_command           check_tcp_wrapper!{{ clientPort }}!-w 1 -c 1
+        _host_component         ZOOKEEPER_SERVER
         normal_check_interval   1
         retry_check_interval    0.5
         max_check_attempts      3
@@ -632,6 +669,7 @@ define service {
         service_description     REGIONSERVER::RegionServer process
         servicegroups           HBASE
         check_command           check_tcp_wrapper!{{ hbase_rs_port }}!-w 1 -c 1
+        _host_component         HBASE_REGIONSERVER
         normal_check_interval   1
         retry_check_interval    0.5
         max_check_attempts      3
@@ -645,6 +683,7 @@ define service {
         service_description     HBASEMASTER::HBase Master CPU utilization
         servicegroups           HBASE
         check_command           check_cpu_ha!{{ hbase_master_hosts_in_str }}!{{ hbase_master_port }}!200%!250%!false!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}!{{ str(security_enabled).lower() }}
+        _host_component         HBASE_MASTER
         normal_check_interval   5
         retry_check_interval    2
         max_check_attempts      5
@@ -659,6 +698,7 @@ define service {
         service_description     HBASEMASTER::HBase Master process on {{ hbasemaster }}
         servicegroups           HBASE
         check_command           check_tcp_wrapper!{{ hbase_master_rpc_port }}!-w 1 -c 1
+        _host_component         HBASE_MASTER
         normal_check_interval   0.5
         retry_check_interval    0.25
         max_check_attempts      4
@@ -674,6 +714,7 @@ define service {
         service_description     HIVE-METASTORE::Hive Metastore process
         servicegroups           HIVE
         check_command           check_tcp_wrapper!{{ hive_metastore_port }}!-w 1 -c 1
+        _host_component         HIVE_METASTORE
         normal_check_interval   0.5
         retry_check_interval    0.5
         max_check_attempts      3
@@ -686,6 +727,7 @@ define service {
         service_description     HIVE-SERVER::HiveServer2 process
         servicegroups           HIVE
         check_command           check_tcp_wrapper_sasl!{{ hive_server_port }}!{{ '--security-enabled' if security_enabled else '' }}!-w 1 -c 1
+        _host_component         HIVE_SERVER
         normal_check_interval   0.5
         retry_check_interval    0.5
         max_check_attempts      3
@@ -703,6 +745,7 @@ define service {
         {% else %}
         check_command           check_oozie_status!{{ oozie_server_port }}!{{ java64_home }}!false
         {% endif %}
+        _host_component         OOZIE_SERVER
         normal_check_interval   1
         retry_check_interval    1
         max_check_attempts      3
@@ -720,6 +763,7 @@ define service {
         {% else %}
         check_command           check_templeton_status!{{ templeton_port }}!v1!false
         {% endif %}
+        _host_component         WEBHCAT_SERVER
         normal_check_interval   1
         retry_check_interval    0.5
         max_check_attempts      3
@@ -733,6 +777,7 @@ define service {
         service_description     HUE::Hue Server status
         servicegroups           HUE
         check_command           check_hue_status
+        _host_component         HUE
         normal_check_interval   100
         retry_check_interval    0.5
         max_check_attempts      3
@@ -746,6 +791,7 @@ define service {
         service_description     FALCON::Falcon Server process
         servicegroups           FALCON
         check_command           check_tcp_wrapper!{{ falcon_port }}!-w 1 -c 1
+        _host_component         FALCON_SERVER
         normal_check_interval   1
         retry_check_interval    0.5
         max_check_attempts      3
@@ -755,6 +801,7 @@ define service {
         service_description     FALCON::Falcon Server Web UI
         servicegroups           FALCON
         check_command           check_webui!falconserver!{{ falcon_port }}
+        _host_component         FALCON_SERVER
         normal_check_interval   1
         retry_check_interval    0.5
         max_check_attempts      3
@@ -767,6 +814,7 @@ define service {
         service_description     APP_TIMELINE_SERVER::App Timeline Server process
         servicegroups           YARN
         check_command           check_tcp_wrapper!{{ ahs_port }}!-w 1 -c 1
+        _host_component         APP_TIMELINE_SERVER
         normal_check_interval   1
         retry_check_interval    0.5
         max_check_attempts      3
@@ -781,6 +829,7 @@ define service {
         service_description     FLUME::Flume Agent process
         servicegroups           FLUME
         check_command           check_ambari!/var/nagios/ambari.json!flume_agent
+        _host_component         FLUME_HANDLER
         normal_check_interval   1
         retry_check_interval    0.5
         max_check_attempts      3
@@ -795,6 +844,7 @@ define service {
         service_description     KNOX::Knox Gateway process
         servicegroups           KNOX
         check_command           check_tcp_wrapper!{{ knox_gateway_port }}!-w 1 -c 1
+        _host_component         KNOX_GATEWAY
         normal_check_interval   1
         retry_check_interval    0.5
         max_check_attempts      3
@@ -809,6 +859,7 @@ define service {
         service_description     KAFKA::Kafka Broker process
         servicegroups           KAFKA
         check_command           check_tcp_wrapper!{{ kafka_broker_port }}!-w 1 -c 1
+        _host_component         KAFKA_BROKER
         normal_check_interval   1
         retry_check_interval    0.5
         max_check_attempts      3

+ 2 - 2
ambari-server/src/test/python/stacks/1.3.2/NAGIOS/test_mm_wrapper.py

@@ -94,10 +94,10 @@ vm-3.vm ZOOKEEPER ZOOKEEPER_SERVER
 
 
   def test_get_real_component(self):
-    with patch.dict(os.environ, {'NAGIOS_SERVICEDESC': 'SUPERVISOR::Supervisors process'}, clear=True):
+    with patch.dict(os.environ, {'NAGIOS__SERVICEHOST_COMPONENT': 'SUPERVISOR'}, clear=True):
       component = mm_wrapper.get_real_component()
       self.assertEqual(component, 'SUPERVISOR')
-    with patch.dict(os.environ, {'NAGIOS_SERVICEDESC': 'JOBHISTORY::HistoryServer process'}, clear=True):
+    with patch.dict(os.environ, {'NAGIOS__SERVICEHOST_COMPONENT': 'JOBHISTORY'}, clear=True):
       component = mm_wrapper.get_real_component()
       self.assertEqual(component, 'MAPREDUCE2')
 

+ 2 - 2
ambari-server/src/test/python/stacks/2.0.6/NAGIOS/test_mm_wrapper.py

@@ -94,10 +94,10 @@ vm-3.vm ZOOKEEPER ZOOKEEPER_SERVER
 
 
   def test_get_real_component(self):
-    with patch.dict(os.environ, {'NAGIOS_SERVICEDESC': 'SUPERVISOR::Supervisors process'}, clear=True):
+    with patch.dict(os.environ, {'NAGIOS__SERVICEHOST_COMPONENT': 'SUPERVISOR'}, clear=True):
       component = mm_wrapper.get_real_component()
       self.assertEqual(component, 'SUPERVISOR')
-    with patch.dict(os.environ, {'NAGIOS_SERVICEDESC': 'JOBHISTORY::HistoryServer process'}, clear=True):
+    with patch.dict(os.environ, {'NAGIOS__SERVICEHOST_COMPONENT': 'MAPREDUCE2'}, clear=True):
       component = mm_wrapper.get_real_component()
       self.assertEqual(component, 'MAPREDUCE2')