Prechádzať zdrojové kódy

AMBARI-1533. Add Nagios check for ambari-agent process for each host in the cluster. (smohanty)

git-svn-id: https://svn.apache.org/repos/asf/incubator/ambari/trunk@1461316 13f79535-47bb-0310-9956-ffa450edef68
Sumit Mohanty 12 rokov pred
rodič
commit
b3a2afee9e

+ 3 - 0
CHANGES.txt

@@ -527,6 +527,9 @@ Trunk (unreleased changes):
 
  BUG FIXES
 
+ AMBARI-1533. Add Nagios check for ambari-agent process for each host in 
+ the cluster. (smohanty)
+
  AMBARI-1713. Need to delete private ssh key from /var/run/ambari-server
  /bootstrap/* on Ambari Server after bootstrap is complete. (swagle)
 

+ 39 - 0
ambari-agent/src/main/puppet/modules/hdp-nagios/files/check_ambari_agent_status.sh

@@ -0,0 +1,39 @@
+#!/usr/bin/env bash
+#
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#
+AMBARI_AGENT_PID_PATH="/var/run/ambari-agent/ambari-agent.pid";
+RES="3";
+if [ -f $AMBARI_AGENT_PID_PATH ]
+then
+  RES=`cat $AMBARI_AGENT_PID_PATH | xargs ps -f -p | wc -l`;
+  AMBARI_AGENT_PID=`cat $AMBARI_AGENT_PID_PATH`; 
+else 
+  RES=-1; 
+fi
+
+if [ $RES -eq "2" ]
+then
+  echo "OK: Ambari agent is running [PID:$AMBARI_AGENT_PID]";
+  exit 0;
+else
+  echo "CRITICAL: Ambari agent is not running [$AMBARI_AGENT_PID_PATH not found]";
+  exit 2;
+fi

+ 1 - 0
ambari-agent/src/main/puppet/modules/hdp-nagios/manifests/server/config.pp

@@ -44,6 +44,7 @@ class hdp-nagios::server::config()
   hdp-nagios::server::check { 'check_oozie_status.sh': }
   hdp-nagios::server::check { 'check_templeton_status.sh': }
   hdp-nagios::server::check { 'check_hive_metastore_status.sh': }
+  hdp-nagios::server::check { 'check_ambari_agent_status.sh': }
 
   anchor{'hdp-nagios::server::config::begin':} -> Hdp-nagios::Server::Configfile<||> -> anchor{'hdp-nagios::server::config::end':}
   Anchor['hdp-nagios::server::config::begin'] -> Hdp-nagios::Server::Check<||> -> Anchor['hdp-nagios::server::config::end']

+ 4 - 0
ambari-agent/src/main/puppet/modules/hdp-nagios/templates/hadoop-commands.cfg.erb

@@ -83,3 +83,7 @@ define command{
         command_name    check_hive_metastore_status
         command_line    $USER1$/check_hive_metastore_status.sh $HOSTADDRESS$ $ARG1$ $ARG2$ $ARG3$ $ARG4$ $ARG5$
        }
+define command{
+        command_name    check_ambari_agent_status
+        command_line    $USER1$/check_ambari_agent_status.sh
+       }

+ 4 - 0
ambari-agent/src/main/puppet/modules/hdp-nagios/templates/hadoop-servicegroups.cfg.erb

@@ -34,3 +34,7 @@ define servicegroup {
   servicegroup_name  ZOOKEEPER
   alias  ZOOKEEPER Checks
 }
+define servicegroup {
+  servicegroup_name  AMBARI
+  alias  AMBARI Checks
+}

+ 12 - 0
ambari-agent/src/main/puppet/modules/hdp-nagios/templates/hadoop-services.cfg.erb

@@ -76,6 +76,18 @@ define service {
         max_check_attempts      3
 }
 
+# AMBARI AGENT Checks
+define service {
+        hostgroup_name          nagios-server
+        use                     hadoop-service
+        service_description     AMBARI::Check ambari-agent process
+        servicegroups           AMBARI
+        check_command           check_ambari_agent_status
+        normal_check_interval   5
+        retry_check_interval    0.5
+        max_check_attempts      2
+}
+
 # NAGIOS SERVER ZOOKEEPER Checks
 <%if scope.function_hdp_nagios_members_exist('zookeeper-servers')-%>
 define service {

+ 3 - 0
contrib/addons/src/addOns/nagios/scripts/nagios_alerts.php

@@ -381,6 +381,9 @@ function hdp_mon_generate_response( $response_data )
       case "ZKSERVERS":
 	    $pieces[0] = "ZOOKEEPER";
         break;
+      case "AMBARI":
+	    $pieces[0] = "AMBARI";
+      break;      
       case "NAGIOS":
       case "HDFS":
       case "MAPREDUCE":