Browse Source

AMBARI-5600. NameNode/ResourceManager/HBase Master CPU Utilization with disabled SNMP (ncole)

Nate Cole 11 năm trước cách đây
mục cha
commit
4481ac2495
32 tập tin đã thay đổi với 328 bổ sung229 xóa
  1. 1 7
      ambari-server/src/main/resources/stacks/HDP/1.3.2/hooks/before-INSTALL/scripts/shared_initialization.py
  2. 0 1
      ambari-server/src/main/resources/stacks/HDP/1.3.2/hooks/before-START/scripts/hook.py
  3. 0 5
      ambari-server/src/main/resources/stacks/HDP/1.3.2/hooks/before-START/scripts/params.py
  4. 1 8
      ambari-server/src/main/resources/stacks/HDP/1.3.2/hooks/before-START/scripts/shared_initialization.py
  5. 0 28
      ambari-server/src/main/resources/stacks/HDP/1.3.2/hooks/before-START/templates/health_check-v2.j2
  6. 0 28
      ambari-server/src/main/resources/stacks/HDP/1.3.2/hooks/before-START/templates/health_check.j2
  7. 0 4
      ambari-server/src/main/resources/stacks/HDP/1.3.2/services/NAGIOS/metainfo.xml
  8. 109 0
      ambari-server/src/main/resources/stacks/HDP/1.3.2/services/NAGIOS/package/files/check_cpu.php
  9. 26 1
      ambari-server/src/main/resources/stacks/HDP/1.3.2/services/NAGIOS/package/scripts/functions.py
  10. 1 0
      ambari-server/src/main/resources/stacks/HDP/1.3.2/services/NAGIOS/package/scripts/nagios_server_config.py
  11. 4 1
      ambari-server/src/main/resources/stacks/HDP/1.3.2/services/NAGIOS/package/scripts/params.py
  12. 3 2
      ambari-server/src/main/resources/stacks/HDP/1.3.2/services/NAGIOS/package/templates/hadoop-commands.cfg.j2
  13. 16 11
      ambari-server/src/main/resources/stacks/HDP/1.3.2/services/NAGIOS/package/templates/hadoop-services.cfg.j2
  14. 1 8
      ambari-server/src/main/resources/stacks/HDP/2.0.6/hooks/before-INSTALL/scripts/shared_initialization.py
  15. 0 1
      ambari-server/src/main/resources/stacks/HDP/2.0.6/hooks/before-START/scripts/hook.py
  16. 0 5
      ambari-server/src/main/resources/stacks/HDP/2.0.6/hooks/before-START/scripts/params.py
  17. 0 17
      ambari-server/src/main/resources/stacks/HDP/2.0.6/hooks/before-START/scripts/shared_initialization.py
  18. 0 28
      ambari-server/src/main/resources/stacks/HDP/2.0.6/hooks/before-START/templates/health_check-v2.j2
  19. 0 27
      ambari-server/src/main/resources/stacks/HDP/2.0.6/hooks/before-START/templates/health_check.j2
  20. 0 4
      ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/metainfo.xml
  21. 109 0
      ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/files/check_cpu.php
  22. 26 1
      ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/functions.py
  23. 1 0
      ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/nagios_server_config.py
  24. 4 1
      ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/params.py
  25. 4 3
      ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-commands.cfg.j2
  26. 12 8
      ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-services.cfg.j2
  27. 4 0
      ambari-server/src/test/python/stacks/1.3.2/NAGIOS/test_nagios_server.py
  28. 1 2
      ambari-server/src/test/python/stacks/1.3.2/hooks/before-INSTALL/test_before_install.py
  29. 1 13
      ambari-server/src/test/python/stacks/1.3.2/hooks/before-START/test_before_start.py
  30. 4 0
      ambari-server/src/test/python/stacks/2.0.6/NAGIOS/test_nagios_server.py
  31. 0 1
      ambari-server/src/test/python/stacks/2.0.6/hooks/before-INSTALL/test_before_install.py
  32. 0 14
      ambari-server/src/test/python/stacks/2.0.6/hooks/before-START/test_before_start.py

+ 1 - 7
ambari-server/src/main/resources/stacks/HDP/1.3.2/hooks/before-INSTALL/scripts/shared_initialization.py

@@ -145,10 +145,4 @@ def setup_java():
     )
 
 def install_packages():
-  packages = {"redhat": ["net-snmp-utils", "net-snmp"],
-              "suse": ["net-snmp"],
-              "all": ["unzip"]
-              }
-  
-  Package(packages['all'])
-  Package(packages[System.get_instance().os_family])
+  Package(['unzip'])

+ 0 - 1
ambari-server/src/main/resources/stacks/HDP/1.3.2/hooks/before-START/scripts/hook.py

@@ -34,7 +34,6 @@ class BeforeConfigureHook(Hook):
     setup_database()
     setup_configs()
     create_javahome_symlink()
-    init_services()
 
 if __name__ == "__main__":
   BeforeConfigureHook().execute()

+ 0 - 5
ambari-server/src/main/resources/stacks/HDP/1.3.2/hooks/before-START/scripts/params.py

@@ -32,11 +32,6 @@ hdfs_user = config['configurations']['global']['hdfs_user']
 
 user_group = config['configurations']['global']['user_group']
 
-#snmp
-snmp_conf_dir = "/etc/snmp/"
-snmp_source = "0.0.0.0/0"
-snmp_community = "hadoop"
-
 #hosts
 hostname = config["hostname"]
 rm_host = default("/clusterHostInfo/rm_host", [])

+ 1 - 8
ambari-server/src/main/resources/stacks/HDP/1.3.2/hooks/before-START/scripts/shared_initialization.py

@@ -171,11 +171,4 @@ def create_javahome_symlink():
     Execute("mkdir -p /usr/jdk64/")
     Execute("ln -s /usr/jdk/jdk1.6.0_31 /usr/jdk64/jdk1.6.0_31")
 
-def init_services():
-  import params
-  File(os.path.join(params.snmp_conf_dir, 'snmpd.conf'),
-       content=Template("snmpd.conf.j2"))
-  # enable snmpd
-  Execute( "service snmpd start; chkconfig snmpd on",
-    path = "/usr/local/bin/:/bin/:/sbin/"
-  )  
+

+ 0 - 28
ambari-server/src/main/resources/stacks/HDP/1.3.2/hooks/before-START/templates/health_check-v2.j2

@@ -58,35 +58,7 @@ function check_disks {
 
 }
 
-function check_link {
-  snmp=/usr/bin/snmpwalk
-  if [ -e $snmp ] ; then
-    $snmp -t 5 -Oe  -Oq  -Os -v 1 -c public localhost if | \
-    awk ' {
-      split($1,a,".") ;
-      if ( a[1] == "ifIndex" ) { ifIndex[a[2]] = $2 }
-      if ( a[1] == "ifDescr" ) { ifDescr[a[2]] = $2 }
-      if ( a[1] == "ifType" ) { ifType[a[2]] = $2 }
-      if ( a[1] == "ifSpeed" ) { ifSpeed[a[2]] = $2 }
-      if ( a[1] == "ifAdminStatus" ) { ifAdminStatus[a[2]] = $2 }
-      if ( a[1] == "ifOperStatus" ) { ifOperStatus[a[2]] = $2 }
-    }
-    END {
-      up=0;
-      for (i in ifIndex ) {
-      if ( ifType[i] == 6 && ifAdminStatus[i] == 1 && ifOperStatus[i] == 1 && ifSpeed[i] == 1000000000 ) {
-      up=i;
-      }
-      }
-      if ( up == 0 ) { print "check link" ; exit 2 }
-      else { print ifDescr[up],"ok" }
-    }'
-    exit $? ;
-  fi
-}
-
 # Run all checks
-# Disabled 'check_link' for now... 
 for check in disks ; do
   msg=`check_${check}` ;
   if [ $? -eq 0 ] ; then

+ 0 - 28
ambari-server/src/main/resources/stacks/HDP/1.3.2/hooks/before-START/templates/health_check.j2

@@ -85,35 +85,7 @@ function check_jetty {
   fi
 }
 
-function check_link {
-  snmp=/usr/bin/snmpwalk
-  if [ -e $snmp ] ; then
-    $snmp -t 5 -Oe  -Oq  -Os -v 1 -c public localhost if | \
-    awk ' {
-      split($1,a,".") ;
-      if ( a[1] == "ifIndex" ) { ifIndex[a[2]] = $2 }
-      if ( a[1] == "ifDescr" ) { ifDescr[a[2]] = $2 }
-      if ( a[1] == "ifType" ) { ifType[a[2]] = $2 }
-      if ( a[1] == "ifSpeed" ) { ifSpeed[a[2]] = $2 }
-      if ( a[1] == "ifAdminStatus" ) { ifAdminStatus[a[2]] = $2 }
-      if ( a[1] == "ifOperStatus" ) { ifOperStatus[a[2]] = $2 }
-    }
-    END {
-      up=0;
-      for (i in ifIndex ) {
-      if ( ifType[i] == 6 && ifAdminStatus[i] == 1 && ifOperStatus[i] == 1 && ifSpeed[i] == 1000000000 ) {
-      up=i;
-      }
-      }
-      if ( up == 0 ) { print "check link" ; exit 2 }
-      else { print ifDescr[up],"ok" }
-    }'
-    exit $? ;
-  fi
-}
-
 # Run all checks
-# Disabled 'check_link' for now... 
 for check in disks taskcontroller jetty; do
   msg=`check_${check}` ;
   if [ $? -eq 0 ] ; then

+ 0 - 4
ambari-server/src/main/resources/stacks/HDP/1.3.2/services/NAGIOS/metainfo.xml

@@ -72,10 +72,6 @@
               <type>rpm</type>
               <name>perl</name>
             </package>
-            <package>
-              <type>rpm</type>
-              <name>perl-Net-SNMP</name>
-            </package>
             <package>
               <type>rpm</type>
               <name>nagios-plugins-1.4.9</name>

+ 109 - 0
ambari-server/src/main/resources/stacks/HDP/1.3.2/services/NAGIOS/package/files/check_cpu.php

@@ -0,0 +1,109 @@
+<?php
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+  include "hdp_nagios_init.php";
+
+  $options = getopt ("h:p:w:c:k:r:t:u:e");
+  if (!array_key_exists('h', $options) || !array_key_exists('p', $options) || !array_key_exists('w', $options)
+      || !array_key_exists('c', $options)) {
+    usage();
+    exit(3);
+  }
+
+  $hosts=$options['h'];
+  $port=$options['p'];
+  $warn=$options['w']; $warn = preg_replace('/%$/', '', $warn);
+  $crit=$options['c']; $crit = preg_replace('/%$/', '', $crit);
+  $keytab_path=$options['k'];
+  $principal_name=$options['r'];
+  $kinit_path_local=$options['t'];
+  $security_enabled=$options['u'];
+  $ssl_enabled=$options['e'];
+
+  /* Kinit if security enabled */
+  $status = kinit_if_needed($security_enabled, $kinit_path_local, $keytab_path, $principal_name);
+  $retcode = $status[0];
+  $output = $status[1];
+  
+  if ($output != 0) {
+    echo "CRITICAL: Error doing kinit for nagios. $output";
+    exit (2);
+  }
+
+  $protocol = ($ssl_enabled == "true" ? "https" : "http");
+
+
+  foreach (preg_split('/,/', $hosts) as $host) {
+    /* Get the json document */
+
+    $ch = curl_init();
+    $username = rtrim(`id -un`, "\n");
+    curl_setopt_array($ch, array( CURLOPT_URL => $protocol."://".$host.":".$port."/jmx?qry=java.lang:type=OperatingSystem",
+                                  CURLOPT_RETURNTRANSFER => true,
+                                  CURLOPT_HTTPAUTH => CURLAUTH_ANY,
+                                  CURLOPT_USERPWD => "$username:",
+                                  CURLOPT_SSL_VERIFYPEER => FALSE ));
+    $json_string = curl_exec($ch);
+    $info = curl_getinfo($ch);
+    if (intval($info['http_code']) == 401){
+      logout();
+      $json_string = curl_exec($ch);
+    }
+    $info = curl_getinfo($ch);
+    curl_close($ch);
+    $json_array = json_decode($json_string, true);
+
+    $object = $json_array['beans'][0];
+
+    if (count($object) == 0) {
+      echo "CRITICAL: Data inaccessible, Status code = ". $info['http_code'] ."\n";
+      exit(2);
+    }
+
+    $cpu_load = $object['SystemCpuLoad'];
+
+    if (!isset($object['SystemCpuLoad']) || $cpu_load < 0.0) {
+      echo "WARNING: Data unavailable, SystemCpuLoad is not set\n";
+      exit(1);
+    }
+
+    $cpu_count = $object['AvailableProcessors'];
+
+    $cpu_percent = $cpu_load*100;
+  }
+
+  $out_msg = $cpu_count . " CPU, load " . number_format($cpu_percent, 1, '.', '') . '%';
+
+  if ($cpu_percent > $crit) {
+    echo $out_msg . ' > ' . $crit . "% : CRITICAL\n";
+    exit(2);
+  }
+  if ($cpu_percent > $warn) {
+    echo $out_msg . ' > ' . $warn . "% : WARNING\n";
+    exit(1);
+  }
+
+  echo $out_msg . ' < ' . $warn . "% : OK\n";
+  exit(0);
+
+  /* print usage */
+  function usage () {
+    echo "Usage: $0 -h <host> -p port -w <warn%> -c <crit%> -k keytab_path -r principal_name -t kinit_path -u security_enabled -e ssl_enabled\n";
+  }
+?>

+ 26 - 1
ambari-server/src/main/resources/stacks/HDP/1.3.2/services/NAGIOS/package/scripts/functions.py

@@ -25,4 +25,29 @@ def get_port_from_url(address):
   if not is_empty(address):
     return address.split(':')[-1]
   else:
-    return address
+    return address
+
+# Gets if the java version is greater than 6
+def is_jdk_greater_6(java64_home):
+  import os
+  import re
+  java_bin = os.path.join(java64_home, 'bin', 'java')
+  ver_check = shell.call([java_bin, '-version'])
+
+  ver = ''
+  if 0 != ver_check[0]:
+    # java is not local, try the home name as a fallback
+    ver = java64_home
+  else:
+    ver = ver_check[1]
+
+  regex = re.compile('"1\.([0-9]*)\.0_([0-9]*)"', re.IGNORECASE)
+  r = regex.search(ver)
+  if r:
+    strs = r.groups()
+    if 2 == len(strs):
+      minor = int(strs[0])
+      if minor > 6:
+        return True
+
+  return False

+ 1 - 0
ambari-server/src/main/resources/stacks/HDP/1.3.2/services/NAGIOS/package/scripts/nagios_server_config.py

@@ -49,6 +49,7 @@ def nagios_server_config():
     )
 
   nagios_server_check( 'check_cpu.pl')
+  nagios_server_check( 'check_cpu.php')
   nagios_server_check( 'check_datanode_storage.php')
   nagios_server_check( 'check_aggregate.php')
   nagios_server_check( 'check_hdfs_blocks.php')

+ 4 - 1
ambari-server/src/main/resources/stacks/HDP/1.3.2/services/NAGIOS/package/scripts/params.py

@@ -21,6 +21,7 @@ Ambari Agent
 """
 
 from functions import get_port_from_url
+from functions import is_jdk_greater_6
 from resource_management import *
 import status_params
 
@@ -65,7 +66,8 @@ flume_port = "4159"
 hive_metastore_port = config['configurations']['global']['hive_metastore_port'] #"9083"
 hive_server_port = "10000"
 templeton_port = config['configurations']['webhcat-site']['templeton.port'] #"50111"
-hbase_rs_port = "60030"
+hbase_master_port = config['configurations']['hbase-site']['hbase.master.info.port'] #"60010"
+hbase_rs_port = config['configurations']['hbase-site']['hbase.regionserver.info.port'] #"60030"
 
 # this 4 is different for HDP2
 jtnode_port = get_port_from_url(config['configurations']['mapred-site']['mapred.job.tracker.http.address'])
@@ -79,6 +81,7 @@ clientPort = config['configurations']['global']['clientPort'] #ZK
 
 
 java64_home = config['hostLevelParams']['java_home']
+check_cpu_on = is_jdk_greater_6(java64_home)
 _authentication = config['configurations']['core-site']['hadoop.security.authentication']
 security_enabled = ( not is_empty(_authentication) and _authentication == 'kerberos')
 

+ 3 - 2
ambari-server/src/main/resources/stacks/HDP/1.3.2/services/NAGIOS/package/templates/hadoop-commands.cfg.j2

@@ -38,11 +38,12 @@
 #
 #
 
-{% if env.system.os_family != "suse" %}
+{% if check_cpu_on %}
 # 'check_cpu' check remote cpu load
 define command {
         command_name    check_cpu
-        command_line    $USER1$/check_wrapper.sh $USER1$/check_cpu.pl -H $HOSTADDRESS$ -C hadoop -w $ARG1$ -c $ARG2$
+#        command_line    $USER1$/check_wrapper.sh $USER1$/check_cpu.pl -H $HOSTADDRESS$ -C hadoop -w $ARG1$ -c $ARG2$
+        command_line    $USER1$/check_wrapper.sh php $USER1$/check_cpu.php -h $HOSTADDRESS$ -p $ARG1$ -w $ARG2$ -c $ARG3$ -e $ARG4$ -k $ARG5$ -r $ARG6$ -t $ARG7$ -u $ARG8$
        }
 {% endif %}
 

+ 16 - 11
ambari-server/src/main/resources/stacks/HDP/1.3.2/services/NAGIOS/package/templates/hadoop-services.cfg.j2

@@ -250,13 +250,14 @@ define service {
         max_check_attempts      3
 }
 
-{% if env.system.os_family != "suse" %}
+{% if check_cpu_on %}
 define service {
         host_name               {{ namenode_hostname }}
         use                     hadoop-service
         service_description     NAMENODE::NameNode host CPU utilization on {{ namenode_hostname }}
         servicegroups           HDFS
-        check_command           check_cpu!200%!250%
+#        check_command           check_cpu!200%!250%
+        check_command           check_cpu!{{ namenode_port }}!200%!250%!{{ str(hadoop_ssl_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}!{{ str(security_enabled).lower() }}
         normal_check_interval   5
         retry_check_interval    2
         max_check_attempts      5
@@ -345,13 +346,14 @@ define service {
         retry_check_interval    1
         max_check_attempts      3
 }
-{% if env.system.os_family != "suse" %}
+{% if check_cpu_on %}
 define service {
         hostgroup_name          jobtracker
         use                     hadoop-service
         service_description     JOBTRACKER::JobTracker CPU utilization
         servicegroups           MAPREDUCE
-        check_command           check_cpu!200%!250%
+#        check_command           check_cpu!200%!250%
+        check_command           check_cpu!{{ jtnode_port }}!200%!250%!{{ str(hadoop_ssl_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}!{{ str(security_enabled).lower() }}
         normal_check_interval   5
         retry_check_interval    2 
         max_check_attempts      5
@@ -433,13 +435,14 @@ define service {
         max_check_attempts      3
 }
 
-{% if env.system.os_family != "suse" %}
+{% if check_cpu_on %}
 define service {
         hostgroup_name          resourcemanager
         use                     hadoop-service
         service_description     RESOURCEMANAGER::ResourceManager CPU utilization
         servicegroups           YARN
-        check_command           check_cpu!200%!250%
+#        check_command           check_cpu!200%!250%
+        check_command           check_cpu!{{ rm_port }}!200%!250%!{{ str(hadoop_ssl_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}!{{ str(security_enabled).lower() }}
         normal_check_interval   5
         retry_check_interval    2 
         max_check_attempts      5
@@ -517,13 +520,14 @@ define service {
         max_check_attempts      3
 }
 
-{% if env.system.os_family != "suse" %}
+{% if check_cpu_on %}
 define service {
         hostgroup_name          historyserver2
         use                     hadoop-service
         service_description     JOBHISTORY::HistoryServer CPU utilization
         servicegroups           MAPREDUCE
-        check_command           check_cpu!200%!250%
+#       check_command           check_cpu!200%!250%
+        check_command           check_cpu!{{ hs_port }}!200%!250%!{{ str(hadoop_ssl_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}!{{ str(security_enabled).lower() }}
         normal_check_interval   5
         retry_check_interval    2 
         max_check_attempts      5
@@ -637,14 +641,15 @@ define service {
 #         retry_check_interval    1
 #         max_check_attempts      3
 # #}
-{%  for hbasemaster in hbase_master_hosts  %}
-{% if env.system.os_family != "suse" %}
+{%  for hbasemaster in hbase_master_hosts %}
+{% if check_cpu_on %}
 define service {
         host_name               {{ hbasemaster }}
         use                     hadoop-service
         service_description     HBASEMASTER::HBase Master CPU utilization on {{ hbasemaster }}
         servicegroups           HBASE
-        check_command           check_cpu!200%!250%
+#        check_command           check_cpu!200%!250%
+        check_command           check_cpu!{{ hbase_master_port }}!200%!250%!{{ str(hadoop_ssl_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}!{{ str(security_enabled).lower() }}
         normal_check_interval   5
         retry_check_interval    2 
         max_check_attempts      5

+ 1 - 8
ambari-server/src/main/resources/stacks/HDP/2.0.6/hooks/before-INSTALL/scripts/shared_initialization.py

@@ -163,11 +163,4 @@ def setup_java():
     )
 
 def install_packages():
-  packages = {"redhat": ["net-snmp-utils", "net-snmp"],
-              "suse": ["net-snmp"],
-              "debian": ["snmp", "snmpd"],
-              "all": ["unzip", "curl"]
-              }
-  
-  Package(packages['all'])
-  Package(packages[System.get_instance().os_family])
+  Package(['unzip', 'curl'])

+ 0 - 1
ambari-server/src/main/resources/stacks/HDP/2.0.6/hooks/before-START/scripts/hook.py

@@ -32,7 +32,6 @@ class BeforeConfigureHook(Hook):
     setup_hadoop()
     setup_configs()
     create_javahome_symlink()
-    init_services()
 
 if __name__ == "__main__":
   BeforeConfigureHook().execute()

+ 0 - 5
ambari-server/src/main/resources/stacks/HDP/2.0.6/hooks/before-START/scripts/params.py

@@ -34,11 +34,6 @@ yarn_user = config['configurations']['global']['yarn_user']
 
 user_group = config['configurations']['global']['user_group']
 
-#snmp
-snmp_conf_dir = "/etc/snmp/"
-snmp_source = "0.0.0.0/0"
-snmp_community = "hadoop"
-
 #hosts
 hostname = config["hostname"]
 rm_host = default("/clusterHostInfo/rm_host", [])

+ 0 - 17
ambari-server/src/main/resources/stacks/HDP/2.0.6/hooks/before-START/scripts/shared_initialization.py

@@ -178,20 +178,3 @@ def create_javahome_symlink():
     Execute("mkdir -p /usr/jdk64/")
     Execute("ln -s /usr/jdk/jdk1.6.0_31 /usr/jdk64/jdk1.6.0_31")
 
-def init_services():
-  import params
-  File(os.path.join(params.snmp_conf_dir, 'snmpd.conf'),
-       content=Template("snmpd.conf.j2"))
-  # enable snmpd
-  Execute( "service snmpd start",
-  )
-  
-  if System.get_instance().os_family == "debian":
-    Execute( "update-rc.d snmpd defaults",
-    )    
-  else:
-    Execute( "chkconfig snmpd on",
-    )
-      
-  
-  

+ 0 - 28
ambari-server/src/main/resources/stacks/HDP/2.0.6/hooks/before-START/templates/health_check-v2.j2

@@ -58,35 +58,7 @@ function check_disks {
 
 }
 
-function check_link {
-  snmp=/usr/bin/snmpwalk
-  if [ -e $snmp ] ; then
-    $snmp -t 5 -Oe  -Oq  -Os -v 1 -c public localhost if | \
-    awk ' {
-      split($1,a,".") ;
-      if ( a[1] == "ifIndex" ) { ifIndex[a[2]] = $2 }
-      if ( a[1] == "ifDescr" ) { ifDescr[a[2]] = $2 }
-      if ( a[1] == "ifType" ) { ifType[a[2]] = $2 }
-      if ( a[1] == "ifSpeed" ) { ifSpeed[a[2]] = $2 }
-      if ( a[1] == "ifAdminStatus" ) { ifAdminStatus[a[2]] = $2 }
-      if ( a[1] == "ifOperStatus" ) { ifOperStatus[a[2]] = $2 }
-    }
-    END {
-      up=0;
-      for (i in ifIndex ) {
-      if ( ifType[i] == 6 && ifAdminStatus[i] == 1 && ifOperStatus[i] == 1 && ifSpeed[i] == 1000000000 ) {
-      up=i;
-      }
-      }
-      if ( up == 0 ) { print "check link" ; exit 2 }
-      else { print ifDescr[up],"ok" }
-    }'
-    exit $? ;
-  fi
-}
-
 # Run all checks
-# Disabled 'check_link' for now... 
 for check in disks ; do
   msg=`check_${check}` ;
   if [ $? -eq 0 ] ; then

+ 0 - 27
ambari-server/src/main/resources/stacks/HDP/2.0.6/hooks/before-START/templates/health_check.j2

@@ -85,35 +85,8 @@ function check_jetty {
   fi
 }
 
-function check_link {
-  snmp=/usr/bin/snmpwalk
-  if [ -e $snmp ] ; then
-    $snmp -t 5 -Oe  -Oq  -Os -v 1 -c public localhost if | \
-    awk ' {
-      split($1,a,".") ;
-      if ( a[1] == "ifIndex" ) { ifIndex[a[2]] = $2 }
-      if ( a[1] == "ifDescr" ) { ifDescr[a[2]] = $2 }
-      if ( a[1] == "ifType" ) { ifType[a[2]] = $2 }
-      if ( a[1] == "ifSpeed" ) { ifSpeed[a[2]] = $2 }
-      if ( a[1] == "ifAdminStatus" ) { ifAdminStatus[a[2]] = $2 }
-      if ( a[1] == "ifOperStatus" ) { ifOperStatus[a[2]] = $2 }
-    }
-    END {
-      up=0;
-      for (i in ifIndex ) {
-      if ( ifType[i] == 6 && ifAdminStatus[i] == 1 && ifOperStatus[i] == 1 && ifSpeed[i] == 1000000000 ) {
-      up=i;
-      }
-      }
-      if ( up == 0 ) { print "check link" ; exit 2 }
-      else { print ifDescr[up],"ok" }
-    }'
-    exit $? ;
-  fi
-}
 
 # Run all checks
-# Disabled 'check_link' for now... 
 for check in disks taskcontroller jetty; do
   msg=`check_${check}` ;
   if [ $? -eq 0 ] ; then

+ 0 - 4
ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/metainfo.xml

@@ -86,10 +86,6 @@
               <type>rpm</type>
               <name>perl</name>
             </package>
-            <package>
-              <type>rpm</type>
-              <name>perl-Net-SNMP</name>
-            </package>
             <package>
               <type>rpm</type>
               <name>nagios-plugins-1.4.9</name>

+ 109 - 0
ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/files/check_cpu.php

@@ -0,0 +1,109 @@
+<?php
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+  include "hdp_nagios_init.php";
+
+  $options = getopt ("h:p:w:c:k:r:t:u:e");
+  if (!array_key_exists('h', $options) || !array_key_exists('p', $options) || !array_key_exists('w', $options)
+      || !array_key_exists('c', $options)) {
+    usage();
+    exit(3);
+  }
+
+  $hosts=$options['h'];
+  $port=$options['p'];
+  $warn=$options['w']; $warn = preg_replace('/%$/', '', $warn);
+  $crit=$options['c']; $crit = preg_replace('/%$/', '', $crit);
+  $keytab_path=$options['k'];
+  $principal_name=$options['r'];
+  $kinit_path_local=$options['t'];
+  $security_enabled=$options['u'];
+  $ssl_enabled=$options['e'];
+
+  /* Kinit if security enabled */
+  $status = kinit_if_needed($security_enabled, $kinit_path_local, $keytab_path, $principal_name);
+  $retcode = $status[0];
+  $output = $status[1];
+  
+  if ($output != 0) {
+    echo "CRITICAL: Error doing kinit for nagios. $output";
+    exit (2);
+  }
+
+  $protocol = ($ssl_enabled == "true" ? "https" : "http");
+
+
+  foreach (preg_split('/,/', $hosts) as $host) {
+    /* Get the json document */
+
+    $ch = curl_init();
+    $username = rtrim(`id -un`, "\n");
+    curl_setopt_array($ch, array( CURLOPT_URL => $protocol."://".$host.":".$port."/jmx?qry=java.lang:type=OperatingSystem",
+                                  CURLOPT_RETURNTRANSFER => true,
+                                  CURLOPT_HTTPAUTH => CURLAUTH_ANY,
+                                  CURLOPT_USERPWD => "$username:",
+                                  CURLOPT_SSL_VERIFYPEER => FALSE ));
+    $json_string = curl_exec($ch);
+    $info = curl_getinfo($ch);
+    if (intval($info['http_code']) == 401){
+      logout();
+      $json_string = curl_exec($ch);
+    }
+    $info = curl_getinfo($ch);
+    curl_close($ch);
+    $json_array = json_decode($json_string, true);
+
+    $object = $json_array['beans'][0];
+
+    if (count($object) == 0) {
+      echo "CRITICAL: Data inaccessible, Status code = ". $info['http_code'] ."\n";
+      exit(2);
+    }
+
+    $cpu_load = $object['SystemCpuLoad'];
+
+    if (!isset($object['SystemCpuLoad']) || $cpu_load < 0.0) {
+      echo "WARNING: Data unavailable, SystemCpuLoad is not set\n";
+      exit(1);
+    }
+
+    $cpu_count = $object['AvailableProcessors'];
+
+    $cpu_percent = $cpu_load*100;
+  }
+
+  $out_msg = $cpu_count . " CPU, load " . number_format($cpu_percent, 1, '.', '') . '%';
+
+  if ($cpu_percent > $crit) {
+    echo $out_msg . ' > ' . $crit . "% : CRITICAL\n";
+    exit(2);
+  }
+  if ($cpu_percent > $warn) {
+    echo $out_msg . ' > ' . $warn . "% : WARNING\n";
+    exit(1);
+  }
+
+  echo $out_msg . ' < ' . $warn . "% : OK\n";
+  exit(0);
+
+  /* print usage */
+  function usage () {
+    echo "Usage: $0 -h <host> -p port -w <warn%> -c <crit%> -k keytab_path -r principal_name -t kinit_path -u security_enabled -e ssl_enabled\n";
+  }
+?>

+ 26 - 1
ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/functions.py

@@ -25,4 +25,29 @@ def get_port_from_url(address):
   if not is_empty(address):
     return address.split(':')[-1]
   else:
-    return address
+    return address
+
+# Gets if the java version is greater than 6
+def is_jdk_greater_6(java64_home):
+  import os
+  import re
+  java_bin = os.path.join(java64_home, 'bin', 'java')
+  ver_check = shell.call([java_bin, '-version'])
+
+  ver = ''
+  if 0 != ver_check[0]:
+    # java is not local, try the home name as a fallback
+    ver = java64_home
+  else:
+    ver = ver_check[1]
+
+  regex = re.compile('"1\.([0-9]*)\.0_([0-9]*)"', re.IGNORECASE)
+  r = regex.search(ver)
+  if r:
+    strs = r.groups()
+    if 2 == len(strs):
+      minor = int(strs[0])
+      if minor > 6:
+        return True
+
+  return False

+ 1 - 0
ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/nagios_server_config.py

@@ -49,6 +49,7 @@ def nagios_server_config():
     )
 
   nagios_server_check( 'check_cpu.pl')
+  nagios_server_check( 'check_cpu.php')
   nagios_server_check( 'check_datanode_storage.php')
   nagios_server_check( 'check_aggregate.php')
   nagios_server_check( 'check_hdfs_blocks.php')

+ 4 - 1
ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/params.py

@@ -21,6 +21,7 @@ Ambari Agent
 """
 
 from functions import get_port_from_url
+from functions import is_jdk_greater_6
 from resource_management import *
 import status_params
 
@@ -65,7 +66,8 @@ flume_port = "4159"
 hive_metastore_port = config['configurations']['global']['hive_metastore_port'] #"9083"
 hive_server_port = "10000"
 templeton_port = config['configurations']['webhcat-site']['templeton.port'] #"50111"
-hbase_rs_port = "60030"
+hbase_master_port = config['configurations']['hbase-site']['hbase.master.info.port'] #"60010"
+hbase_rs_port = config['configurations']['hbase-site']['hbase.regionserver.info.port'] #"60030"
 storm_ui_port = config['configurations']['storm-site']['ui.port']
 drpc_port = config['configurations']['storm-site']['drpc.port']
 nimbus_port = config['configurations']['storm-site']['nimbus.thrift.port']
@@ -80,6 +82,7 @@ clientPort = config['configurations']['global']['clientPort'] #ZK
 
 
 java64_home = config['hostLevelParams']['java_home']
+check_cpu_on = is_jdk_greater_6(java64_home)
 _authentication = config['configurations']['core-site']['hadoop.security.authentication']
 security_enabled = ( not is_empty(_authentication) and _authentication == 'kerberos')
 

+ 4 - 3
ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-commands.cfg.j2

@@ -38,11 +38,12 @@
 #
 #
 
-{% if env.system.os_family != "suse" %}
+{% if check_cpu_on %}
 # 'check_cpu' check remote cpu load
 define command {
         command_name    check_cpu
-        command_line    $USER1$/check_wrapper.sh $USER1$/check_cpu.pl -H $HOSTADDRESS$ -C hadoop -w $ARG1$ -c $ARG2$
+#        command_line    $USER1$/check_wrapper.sh $USER1$/check_cpu.pl -H $HOSTADDRESS$ -C hadoop -w $ARG1$ -c $ARG2$
+        command_line    $USER1$/check_wrapper.sh php $USER1$/check_cpu.php -h $HOSTADDRESS$ -p $ARG1$ -w $ARG2$ -c $ARG3$ -e $ARG4$ -k $ARG5$ -r $ARG6$ -t $ARG7$ -u $ARG8$
        }
 {% endif %}
 
@@ -74,7 +75,7 @@ define command{
 
 define command{
         command_name    check_nagios
-        command_line    $USER1$/check_nagios -e $ARG1$ -F $ARG2$ -C $ARG3$ 
+        command_line    $USER1$/check_wrapper.sh $USER1$/check_nagios -e $ARG1$ -F $ARG2$ -C $ARG3$ 
        }
 
 define command{

+ 12 - 8
ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-services.cfg.j2

@@ -360,13 +360,14 @@ define service {
         max_check_attempts      3
 }
 
-{% if env.system.os_family != "suse" %}
+{% if check_cpu_on %}
 define service {
         host_name               {{ namenode_hostname }}
         use                     hadoop-service
         service_description     NAMENODE::NameNode host CPU utilization on {{ namenode_hostname }}
         servicegroups           HDFS
-        check_command           check_cpu!200%!250%
+#        check_command           check_cpu!200%!250%
+        check_command           check_cpu!{{ namenode_port }}!200%!250%!{{ str(hadoop_ssl_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}!{{ str(security_enabled).lower() }}
         normal_check_interval   5
         retry_check_interval    2
         max_check_attempts      5
@@ -448,13 +449,14 @@ define service {
         max_check_attempts      3
 }
 
-{% if env.system.os_family != "suse" %}
+{% if check_cpu_on %}
 define service {
         hostgroup_name          resourcemanager
         use                     hadoop-service
         service_description     RESOURCEMANAGER::ResourceManager CPU utilization
         servicegroups           YARN
-        check_command           check_cpu!200%!250%
+#        check_command           check_cpu!200%!250%
+        check_command           check_cpu!{{ rm_port }}!200%!250%!{{ str(hadoop_ssl_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}!{{ str(security_enabled).lower() }}
         normal_check_interval   5
         retry_check_interval    2 
         max_check_attempts      5
@@ -532,13 +534,14 @@ define service {
         max_check_attempts      3
 }
 
-{% if env.system.os_family != "suse" %}
+{% if check_cpu_on %}
 define service {
         hostgroup_name          historyserver2
         use                     hadoop-service
         service_description     JOBHISTORY::HistoryServer CPU utilization
         servicegroups           MAPREDUCE
-        check_command           check_cpu!200%!250%
+#        check_command           check_cpu!200%!250%
+        check_command           check_cpu!{{ hs_port }}!200%!250%!{{ str(hadoop_ssl_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}!{{ str(security_enabled).lower() }}
         normal_check_interval   5
         retry_check_interval    2 
         max_check_attempts      5
@@ -676,13 +679,14 @@ define service {
 #         max_check_attempts      3
 # #}
 {%  for hbasemaster in hbase_master_hosts  %}
-{% if env.system.os_family != "suse" %}
+{% if check_cpu_on %}
 define service {
         host_name               {{ hbasemaster }}
         use                     hadoop-service
         service_description     HBASEMASTER::HBase Master CPU utilization on {{ hbasemaster }}
         servicegroups           HBASE
-        check_command           check_cpu!200%!250%
+#        check_command           check_cpu!200%!250%
+        check_command           check_cpu!{{ hbase_master_port }}!200%!250%!{{ str(hadoop_ssl_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}!{{ str(security_enabled).lower() }}
         normal_check_interval   5
         retry_check_interval    2 
         max_check_attempts      5

+ 4 - 0
ambari-server/src/test/python/stacks/1.3.2/NAGIOS/test_nagios_server.py

@@ -161,6 +161,10 @@ class TestNagiosServer(RMFTestCase):
                               content=StaticFile('check_cpu.pl'),
                               mode=0755
     )
+    self.assertResourceCalled('File', '/usr/lib64/nagios/plugins/check_cpu.php',
+                              content=StaticFile('check_cpu.php'),
+                              mode=0755
+    )
     self.assertResourceCalled('File',
                               '/usr/lib64/nagios/plugins/check_datanode_storage.php',
                               content=StaticFile('check_datanode_storage.php'),

+ 1 - 2
ambari-server/src/test/python/stacks/1.3.2/hooks/before-INSTALL/test_before_install.py

@@ -104,5 +104,4 @@ class TestHookBeforeInstall(RMFTestCase):
                               gid = 'hadoop',
                               )
     self.assertResourceCalled('Package', 'unzip',)
-    self.assertResourceCalled('Package', 'net-snmp',)
-    self.assertNoMoreResources()
+    self.assertNoMoreResources()

+ 1 - 13
ambari-server/src/test/python/stacks/1.3.2/hooks/before-START/test_before_start.py

@@ -87,12 +87,6 @@ class TestHookBeforeStart(RMFTestCase):
       owner = 'hdfs',
       group = 'hadoop',
     )
-    self.assertResourceCalled('File', '/etc/snmp/snmpd.conf',
-                              content = Template('snmpd.conf.j2'),
-                              )
-    self.assertResourceCalled('Execute', 'service snmpd start; chkconfig snmpd on',
-                              path = ['/usr/local/bin/:/bin/:/sbin/'],
-                              )
     self.assertNoMoreResources()
 
   def test_hook_secured(self, mockHook):
@@ -153,10 +147,4 @@ class TestHookBeforeStart(RMFTestCase):
                               owner = 'hdfs',
                               group = 'hadoop',
                               )
-    self.assertResourceCalled('File', '/etc/snmp/snmpd.conf',
-                              content = Template('snmpd.conf.j2'),
-                              )
-    self.assertResourceCalled('Execute', 'service snmpd start; chkconfig snmpd on',
-                              path = ['/usr/local/bin/:/bin/:/sbin/'],
-                              )
-    self.assertNoMoreResources()
+    self.assertNoMoreResources()

+ 4 - 0
ambari-server/src/test/python/stacks/2.0.6/NAGIOS/test_nagios_server.py

@@ -160,6 +160,10 @@ class TestNagiosServer(RMFTestCase):
                               content=StaticFile('check_cpu.pl'),
                               mode=0755
     )
+    self.assertResourceCalled('File', '/usr/lib64/nagios/plugins/check_cpu.php',
+                              content=StaticFile('check_cpu.php'),
+                              mode=0755
+    )
     self.assertResourceCalled('File',
                               '/usr/lib64/nagios/plugins/check_datanode_storage.php',
                               content=StaticFile('check_datanode_storage.php'),

+ 0 - 1
ambari-server/src/test/python/stacks/2.0.6/hooks/before-INSTALL/test_before_install.py

@@ -30,7 +30,6 @@ class TestHookBeforeInstall(RMFTestCase):
     )
     self.assertResourceCalled('Package', 'unzip', )
     self.assertResourceCalled('Package', 'curl', )
-    self.assertResourceCalled('Package', 'net-snmp', )
     
     self.assertResourceCalled('Execute', 'mkdir -p /tmp/HDP-artifacts/ ; curl -kf --retry 10 http://c6401.ambari.apache.org:8080/resources//jdk-7u45-linux-x64.tar.gz -o /tmp/HDP-artifacts//jdk-7u45-linux-x64.tar.gz',
                               not_if = 'test -e /usr/jdk64/jdk1.7.0_45/bin/java',

+ 0 - 14
ambari-server/src/test/python/stacks/2.0.6/hooks/before-START/test_before_start.py

@@ -96,13 +96,6 @@ class TestHookBeforeStart(RMFTestCase):
       owner = 'hdfs',
       group = 'hadoop',
     )
-    self.assertResourceCalled('File', '/etc/snmp/snmpd.conf',
-                              content = Template('snmpd.conf.j2'),
-                              )
-    self.assertResourceCalled('Execute', 'service snmpd start',
-                              )
-    self.assertResourceCalled('Execute', 'chkconfig snmpd on',
-                              )
     self.assertNoMoreResources()
 
   def test_hook_secured(self):
@@ -178,11 +171,4 @@ class TestHookBeforeStart(RMFTestCase):
                               owner = 'hdfs',
                               group = 'hadoop',
                               )
-    self.assertResourceCalled('File', '/etc/snmp/snmpd.conf',
-                              content = Template('snmpd.conf.j2'),
-                              )
-    self.assertResourceCalled('Execute', 'service snmpd start',
-                            )
-    self.assertResourceCalled('Execute', 'chkconfig snmpd on',
-                            )
     self.assertNoMoreResources()