Browse Source

AMBARI-2395. Implement Nagios alert if local dir of the data node's disk is 100% full. (Oleksandr Diachenko via smohanty)

git-svn-id: https://svn.apache.org/repos/asf/incubator/ambari/trunk@1495178 13f79535-47bb-0310-9956-ffa450edef68
Sumit Mohanty 12 năm trước cách đây
mục cha
commit
e9fd10e3bd

+ 34 - 0
ambari-agent/src/main/puppet/modules/hdp-nagios/files/check_mapred_local_dir_used.sh

@@ -0,0 +1,34 @@
+#!/bin/bash
+#
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#
+MAPRED_LOCAL_DIRS=$1
+CRITICAL=`echo $2 | cut -d % -f 1`
+IFS=","
+for mapred_dir in $MAPRED_LOCAL_DIRS
+do
+  percent=`df -hl $mapred_dir | awk '{percent=$5;} END{print percent}' | cut -d % -f 1`
+  if [ $percent -ge $CRITICAL ]; then
+    echo "CRITICAL: Mapreduce local dir is full."
+    exit 2
+  fi
+done
+echo "OK: Mapreduce local dir space is available."
+exit 0

+ 1 - 0
ambari-agent/src/main/puppet/modules/hdp-nagios/manifests/server/config.pp

@@ -49,6 +49,7 @@ class hdp-nagios::server::config()
   hdp-nagios::server::check { 'check_hive_metastore_status.sh': }
   hdp-nagios::server::check { 'check_ambari_agent_status.sh': }
   hdp-nagios::server::check { 'check_hue_status.sh': }
+  hdp-nagios::server::check { 'check_mapred_local_dir_used.sh': }
 
   anchor{'hdp-nagios::server::config::begin':} -> Hdp-nagios::Server::Configfile<||> -> anchor{'hdp-nagios::server::config::end':}
   Anchor['hdp-nagios::server::config::begin'] -> Hdp-nagios::Server::Check<||> -> Anchor['hdp-nagios::server::config::end']

+ 5 - 0
ambari-agent/src/main/puppet/modules/hdp-nagios/templates/hadoop-commands.cfg.erb

@@ -91,3 +91,8 @@ define command{
         command_name    check_hue_status
         command_line    $USER1$/check_hue_status.sh
        }
+
+define command{
+       command_name    check_mapred_local_dir_used_space
+       command_line    $USER1$/check_mapred_local_dir_used.sh $ARG1$ $ARG2$
+       }

+ 12 - 0
ambari-agent/src/main/puppet/modules/hdp-nagios/templates/hadoop-services.cfg.erb

@@ -384,6 +384,18 @@ define service {
         max_check_attempts      3
 }
 
+# MAPREDUCE::TASKTRACKER Mapreduuce locad dir used space
+define service {
+        hostgroup_name          tasktracker-servers
+        use                     hadoop-service
+        service_description     TASKTRACKER::Mapreduce local dir used space
+        servicegroups           MAPREDUCE
+        check_command           check_mapred_local_dir_used_space!<%=scope.function_hdp_default("mapred-site/mapred.local.dir")%>!85%
+        normal_check_interval   0.5
+        retry_check_interval    0.25
+        max_check_attempts      3
+}
+
 <%end-%>
 
 <%if scope.function_hdp_nagios_members_exist('resorcemanager')-%>