123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644 |
- #!/usr/bin/python
- # Licensed to the Apache Software Foundation (ASF) under one or more
- # contributor license agreements. See the NOTICE file distributed with
- # this work for additional information regarding copyright ownership.
- # The ASF licenses this file to You under the Apache License, Version 2.0
- # (the "License"); you may not use this file except in compliance with
- # the License. You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- import sys
- sys.path.append('../src')
- import sys_logger
- tests_passed = 0
- tests_failed = 0
- def test_log_tvi_msg(msg):
- global tests_passed, tests_failed
- if msg == expected_log_msg:
- print 'Test Passed'
- tests_passed += 1
- else:
- print '*** TEST FAILED ***'
- print 'Expected MSG: {0}'.format(expected_log_msg)
- print 'Actual MSG : {0}'.format(msg)
- tests_failed += 1
- sys_logger.log_tvi_msg = test_log_tvi_msg
- def test(tvi_rule, expected_msg, arg1, arg2, arg3, arg4, arg5):
- sys.stdout.write(tvi_rule + ': ')
- global expected_log_msg
- expected_log_msg = expected_msg
- sys_logger.generate_tvi_log_msg(arg1, arg2, arg3, arg4, arg5)
- def summary():
- total_tests = tests_passed + tests_failed
- print '\nTests Run: {0}'.format(total_tests)
- print 'Passed: {0}, Failed: {1}'.format(tests_passed, tests_failed)
- if not tests_failed:
- print 'SUCCESS! All tests pass.'
- # Hadoop_Host_Down
- test('Hadoop_Host_Down',
- 'Critical: Hadoop: host_down# Event Host=MY_HOST(CRITICAL), PING FAILED - Packet loss = 100%, RTA = 0.00 ms',
- 'HARD', '1', 'CRITICAL', 'Host::Ping', 'Event Host=MY_HOST(CRITICAL), PING FAILED - Packet loss = 100%, RTA = 0.00 ms')
- test('Hadoop_Host_Down:OK',
- 'OK: Hadoop: host_down_ok# Event Host=MY_HOST(OK), PING SUCCESS - Packet loss = 0%, RTA = 1.00 ms',
- 'HARD', '1', 'OK', 'Host::Ping', 'Event Host=MY_HOST(OK), PING SUCCESS - Packet loss = 0%, RTA = 1.00 ms')
- # Hadoop_Master_Daemon_CPU_Utilization
- test('Hadoop_Master_Daemon_CPU_Utilization',
- 'Critical: Hadoop: master_cpu_utilization# Event Host=MY_HOST Service Description=HBASEMASTER::HBaseMaster CPU utilization(CRITICAL), 4 CPU, average load 2.5% 200%',
- 'HARD', '1', 'CRITICAL', 'HBASEMASTER::HBaseMaster CPU utilization',
- 'Event Host=MY_HOST Service Description=HBASEMASTER::HBaseMaster CPU utilization(CRITICAL), 4 CPU, average load 2.5% 200%')
- test('Hadoop_Master_Daemon_CPU_Utilization:Degraded',
- 'Degraded: Hadoop: master_cpu_utilization# Event Host=MY_HOST Service Description=HBASEMASTER::HBaseMaster CPU utilization(CRITICAL), 4 CPU, average load 2.5% 200%',
- 'HARD', '1', 'WARNING', 'HBASEMASTER::HBaseMaster CPU utilization',
- 'Event Host=MY_HOST Service Description=HBASEMASTER::HBaseMaster CPU utilization(CRITICAL), 4 CPU, average load 2.5% 200%')
- test('Hadoop_Master_Daemon_CPU_Utilization:OK',
- 'OK: Hadoop: master_cpu_utilization_ok# Event Host=MY_HOST Service Description=HBASEMASTER::HBaseMaster CPU utilization(OK), 4 CPU, average load 2.5% 200%',
- 'HARD', '1', 'OK', 'HBASEMASTER::HBaseMaster CPU utilization',
- 'Event Host=MY_HOST Service Description=HBASEMASTER::HBaseMaster CPU utilization(OK), 4 CPU, average load 2.5% 200%')
- # Hadoop_HDFS_Percent_Capacity
- test('Hadoop_HDFS_Percent_Capacity',
- 'Critical: Hadoop: hdfs_percent_capacity# Event Host=MY_HOST Service Description=HDFS::HDFS Capacity utilization(CRITICAL),DFSUsedGB:0.1, DFSTotalGB:1568.7',
- 'HARD', '1', 'CRITICAL', 'HDFS::HDFS Capacity utilization',
- 'Event Host=MY_HOST Service Description=HDFS::HDFS Capacity utilization(CRITICAL),DFSUsedGB:0.1, DFSTotalGB:1568.7')
- test('Hadoop_HDFS_Percent_Capacity:OK',
- 'OK: Hadoop: hdfs_percent_capacity_ok# Event Host=MY_HOST Service Description=HDFS::HDFS Capacity utilization(OK),DFSUsedGB:0.1, DFSTotalGB:1568.7',
- 'HARD', '1', 'OK', 'HDFS::HDFS Capacity utilization',
- 'Event Host=MY_HOST Service Description=HDFS::HDFS Capacity utilization(OK),DFSUsedGB:0.1, DFSTotalGB:1568.7')
- # Hadoop_HDFS_Corrupt_Missing_Blocks
- test('Hadoop_HDFS_Corrupt_Missing_Blocks',
- 'Critical: Hadoop: hdfs_block# Event Host=MY_HOST Service Description=HDFS::Corrupt/Missing blocks(CRITICAL), corrupt_blocks:0, missing_blocks:0, total_blocks:147',
- 'HARD', '1', 'CRITICAL', 'HDFS::Corrupt/Missing blocks',
- 'Event Host=MY_HOST Service Description=HDFS::Corrupt/Missing blocks(CRITICAL), corrupt_blocks:0, missing_blocks:0, total_blocks:147')
- test('Hadoop_HDFS_Corrupt_Missing_Blocks:OK',
- 'OK: Hadoop: hdfs_block_ok# Event Host=MY_HOST Service Description=HDFS::Corrupt/Missing blocks(OK), corrupt_blocks:0, missing_blocks:0, total_blocks:147',
- 'HARD', '1', 'OK', 'HDFS::Corrupt/Missing blocks',
- 'Event Host=MY_HOST Service Description=HDFS::Corrupt/Missing blocks(OK), corrupt_blocks:0, missing_blocks:0, total_blocks:147')
- # Hadoop_NameNode_Edit_Log_Dir_Write
- test('Hadoop_NameNode_Edit_Log_Dir_Write',
- 'Critical: Hadoop: namenode_edit_log_write# SERVICE MSG',
- 'HARD', '1', 'CRITICAL', 'NAMENODE::Namenode Edit logs directory status', 'SERVICE MSG')
- test('Hadoop_NameNode_Edit_Log_Dir_Write:OK',
- 'OK: Hadoop: namenode_edit_log_write_ok# SERVICE MSG',
- 'HARD', '1', 'OK', 'NAMENODE::Namenode Edit logs directory status', 'SERVICE MSG')
- # Hadoop_DataNode_Down
- test('Hadoop_DataNode_Down',
- 'Critical: Hadoop: datanode_down# SERVICE MSG',
- 'HARD', '1', 'CRITICAL', 'HDFS::Percent DataNodes down','SERVICE MSG')
- test('Hadoop_DataNode_Down:OK',
- 'OK: Hadoop: datanode_down_ok# SERVICE MSG',
- 'HARD', '1', 'OK', 'HDFS::Percent DataNodes down','SERVICE MSG')
- # Hadoop_DataNode_Process_Down
- test('Hadoop_DataNode_Process_Down',
- 'Critical: Hadoop: datanode_process_down# SERVICE MSG',
- 'HARD', '1', 'CRITICAL', 'DATANODE::Process down', 'SERVICE MSG')
- test('Hadoop_DataNode_Process_Down:OK',
- 'OK: Hadoop: datanode_process_down_ok# SERVICE MSG',
- 'HARD', '1', 'OK', 'DATANODE::Process down', 'SERVICE MSG')
- # Hadoop_Percent_DataNodes_Storage_Full
- test('Hadoop_Percent_DataNodes_Storage_Full',
- 'Critical: Hadoop: datanodes_percent_storage_full# SERVICE MSG',
- 'HARD', '1', 'CRITICAL', 'HDFS::Percent DataNodes storage full', 'SERVICE MSG')
- test('Hadoop_Percent_DataNodes_Storage_Full:OK',
- 'OK: Hadoop: datanodes_percent_storage_full_ok# SERVICE MSG',
- 'HARD', '1', 'OK', 'HDFS::Percent DataNodes storage full', 'SERVICE MSG')
- # Hadoop_NameNode_Process_Down
- test('Hadoop_NameNode_Process_Down:CRITICAL',
- 'Fatal: Hadoop: namenode_process_down# SERVICE MSG',
- 'HARD', '1', 'CRITICAL', 'NAMENODE::Namenode Process down', 'SERVICE MSG')
- test('Hadoop_NameNode_Process_Down:WARNING',
- 'Fatal: Hadoop: namenode_process_down# SERVICE MSG',
- 'HARD', '1', 'WARNING', 'NAMENODE::Namenode Process down', 'SERVICE MSG')
- test('Hadoop_NameNode_Process_Down:UNKNOWN',
- 'Fatal: Hadoop: namenode_process_down# SERVICE MSG',
- 'HARD', '1', 'UNKNOWN', 'NAMENODE::Namenode Process down', 'SERVICE MSG')
- test('Hadoop_NameNode_Process_Down:OK',
- 'OK: Hadoop: namenode_process_down_ok# SERVICE MSG',
- 'HARD', '1', 'OK', 'NAMENODE::Namenode Process down', 'SERVICE MSG')
- # Hadoop_Secondary_NameNode_Process_Down
- test('Hadoop_Secondary_NameNode_Process_Down',
- 'Critical: Hadoop: secondary_namenode_process_down# SERVICE MSG',
- 'HARD', '1', 'CRITICAL', 'NAMENODE::Secondary Namenode Process down', 'SERVICE MSG')
- test('Hadoop_Secondary_NameNode_Process_Down:OK',
- 'OK: Hadoop: secondary_namenode_process_down_ok# SERVICE MSG',
- 'HARD', '1', 'OK', 'NAMENODE::Secondary Namenode Process down', 'SERVICE MSG')
- # Hadoop_NameNode_RPC_Latency
- test('Hadoop_NameNode_RPC_Latency',
- 'Critical: Hadoop: namenode_rpc_latency# SERVICE MSG',
- 'HARD', '1', 'CRITICAL', 'HDFS::Namenode RPC Latency', 'SERVICE MSG')
- test('Hadoop_NameNode_RPC_Latency:Degraded',
- 'Degraded: Hadoop: namenode_rpc_latency# SERVICE MSG',
- 'HARD', '1', 'WARNING', 'HDFS::Namenode RPC Latency', 'SERVICE MSG')
- test('Hadoop_NameNode_RPC_Latency:OK',
- 'OK: Hadoop: namenode_rpc_latency_ok# SERVICE MSG',
- 'HARD', '1', 'OK', 'HDFS::Namenode RPC Latency', 'SERVICE MSG')
- # Hadoop_DataNodes_Storage_Full
- test('Hadoop_DataNodes_Storage_Full',
- 'Critical: Hadoop: datanodes_storage_full# SERVICE MSG',
- 'HARD', '1', 'CRITICAL', 'DATANODE::Storage full', 'SERVICE MSG')
- test('Hadoop_DataNodes_Storage_Full:OK',
- 'OK: Hadoop: datanodes_storage_full_ok# SERVICE MSG',
- 'HARD', '1', 'OK', 'DATANODE::Storage full', 'SERVICE MSG')
- # Hadoop_JobTracker_Process_Down
- test('Hadoop_JobTracker_Process_Down',
- 'Critical: Hadoop: jobtracker_process_down# SERVICE MSG',
- 'HARD', '1', 'CRITICAL', 'JOBTRACKER::Jobtracker Process down', 'SERVICE MSG')
- test('Hadoop_JobTracker_Process_Down:OK',
- 'OK: Hadoop: jobtracker_process_down_ok# SERVICE MSG',
- 'HARD', '1', 'OK', 'JOBTRACKER::Jobtracker Process down', 'SERVICE MSG')
- # Hadoop_JobTracker_RPC_Latency
- test('Hadoop_JobTracker_RPC_Latency',
- 'Critical: Hadoop: jobtracker_rpc_latency# SERVICE MSG',
- 'HARD', '1', 'CRITICAL', 'MAPREDUCE::JobTracker RPC Latency', 'SERVICE MSG')
- test('Hadoop_JobTracker_RPC_Latency:Degraded',
- 'Degraded: Hadoop: jobtracker_rpc_latency# SERVICE MSG',
- 'HARD', '1', 'WARNING', 'MAPREDUCE::JobTracker RPC Latency', 'SERVICE MSG')
- test('Hadoop_JobTracker_RPC_Latency:OK',
- 'OK: Hadoop: jobtracker_rpc_latency_ok# SERVICE MSG',
- 'HARD', '1', 'OK', 'MAPREDUCE::JobTracker RPC Latency', 'SERVICE MSG')
- # Hadoop_JobTracker_CPU_Utilization
- test('Hadoop_JobTracker_CPU_Utilization',
- 'Critical: Hadoop: jobtracker_cpu_utilization# SERVICE MSG',
- 'HARD', '1', 'CRITICAL', 'JOBTRACKER::Jobtracker CPU utilization', 'SERVICE MSG')
- test('Hadoop_JobTracker_CPU_Utilization:Degraded',
- 'Degraded: Hadoop: jobtracker_cpu_utilization# SERVICE MSG',
- 'HARD', '1', 'WARNING', 'JOBTRACKER::Jobtracker CPU utilization', 'SERVICE MSG')
- test('Hadoop_JobTracker_CPU_Utilization:OK',
- 'OK: Hadoop: jobtracker_cpu_utilization_ok# SERVICE MSG',
- 'HARD', '1', 'OK', 'JOBTRACKER::Jobtracker CPU utilization', 'SERVICE MSG')
- # Hadoop_TaskTracker_Down
- test('Hadoop_TaskTracker_Down',
- 'Critical: Hadoop: tasktrackers_down# SERVICE MSG',
- 'HARD', '1', 'CRITICAL', 'MAPREDUCE::Percent TaskTrackers down', 'SERVICE MSG')
- test('Hadoop_TaskTracker_Down:OK',
- 'OK: Hadoop: tasktrackers_down_ok# SERVICE MSG',
- 'HARD', '1', 'OK', 'MAPREDUCE::Percent TaskTrackers down', 'SERVICE MSG')
- # Hadoop_TaskTracker_Process_Down
- test('Hadoop_TaskTracker_Process_Down',
- 'Critical: Hadoop: tasktracker_process_down# SERVICE MSG',
- 'HARD', '1', 'CRITICAL', 'TASKTRACKER::Process down', 'SERVICE MSG')
- test('Hadoop_TaskTracker_Process_Down:OK',
- 'OK: Hadoop: tasktracker_process_down_ok# SERVICE MSG',
- 'HARD', '1', 'OK', 'TASKTRACKER::Process down', 'SERVICE MSG')
- # Hadoop_HBaseMaster_Process_Down
- test('Hadoop_HBaseMaster_Process_Down',
- 'Critical: Hadoop: hbasemaster_process_down# SERVICE MSG',
- 'HARD', '1', 'CRITICAL', 'HBASEMASTER::HBaseMaster Process down', 'SERVICE MSG')
- test('Hadoop_HBaseMaster_Process_Down:OK',
- 'OK: Hadoop: hbasemaster_process_down_ok# SERVICE MSG',
- 'HARD', '1', 'OK', 'HBASEMASTER::HBaseMaster Process down', 'SERVICE MSG')
- # Hadoop_RegionServer_Process_Down
- test('Hadoop_RegionServer_Process_Down',
- 'Critical: Hadoop: regionserver_process_down# SERVICE MSG',
- 'HARD', '1', 'CRITICAL', 'REGIONSERVER::Process down', 'SERVICE MSG')
- test('Hadoop_RegionServer_Process_Down:OK',
- 'OK: Hadoop: regionserver_process_down_ok# SERVICE MSG',
- 'HARD', '1', 'OK', 'REGIONSERVER::Process down', 'SERVICE MSG')
- # Hadoop_RegionServer_Down
- test('Hadoop_RegionServer_Down',
- 'Critical: Hadoop: regionservers_down# SERVICE MSG',
- 'HARD', '1', 'CRITICAL', 'HBASE::Percent region servers down', 'SERVICE MSG')
- test('Hadoop_RegionServer_Down:OK',
- 'OK: Hadoop: regionservers_down_ok# SERVICE MSG',
- 'HARD', '1', 'OK', 'HBASE::Percent region servers down', 'SERVICE MSG')
- test('HBASE_RegionServer_live',
- 'Critical: Hadoop: regionservers_down# SERVICE MSG',
- 'HARD', '1', 'CRITICAL', 'HBASE::Percent RegionServers live', 'SERVICE MSG')
- test('HBASE_RegionServer_live:OK',
- 'OK: Hadoop: regionservers_down_ok# SERVICE MSG',
- 'HARD', '1', 'OK', 'HBASE::Percent RegionServers live', 'SERVICE MSG')
- # Hadoop_Hive_Metastore_Process_Down
- test('Hadoop_Hive_Metastore_Process_Down',
- 'Critical: Hadoop: hive_metastore_process_down# SERVICE MSG',
- 'HARD', '1', 'CRITICAL', 'HIVE-METASTORE::HIVE-METASTORE status check', 'SERVICE MSG')
- test('Hadoop_Hive_Metastore_Process_Down:OK',
- 'OK: Hadoop: hive_metastore_process_down_ok# SERVICE MSG',
- 'HARD', '1', 'OK', 'HIVE-METASTORE::HIVE-METASTORE status check', 'SERVICE MSG')
- # Hadoop_Zookeeper_Down
- test('Hadoop_Zookeeper_Down',
- 'Critical: Hadoop: zookeepers_down# SERVICE MSG',
- 'HARD', '1', 'CRITICAL', 'ZOOKEEPER::Percent zookeeper servers down', 'SERVICE MSG')
- test('Hadoop_Zookeeper_Down:OK',
- 'OK: Hadoop: zookeepers_down_ok# SERVICE MSG',
- 'HARD', '1', 'OK', 'ZOOKEEPER::Percent zookeeper servers down', 'SERVICE MSG')
- # Hadoop_Zookeeper_Process_Down
- test('Hadoop_Zookeeper_Process_Down',
- 'Critical: Hadoop: zookeeper_process_down# SERVICE MSG',
- 'HARD', '1', 'CRITICAL', 'ZKSERVERS::ZKSERVERS Process down', 'SERVICE MSG')
- test('Hadoop_Zookeeper_Process_Down:OK',
- 'OK: Hadoop: zookeeper_process_down_ok# SERVICE MSG',
- 'HARD', '1', 'OK', 'ZKSERVERS::ZKSERVERS Process down', 'SERVICE MSG')
- # Hadoop_Oozie_Down
- test('Hadoop_Oozie_Down',
- 'Critical: Hadoop: oozie_down# SERVICE MSG',
- 'HARD', '1', 'CRITICAL', 'OOZIE::Oozie status check', 'SERVICE MSG')
- test('Hadoop_Oozie_Down:OK',
- 'OK: Hadoop: oozie_down_ok# SERVICE MSG',
- 'HARD', '1', 'OK', 'OOZIE::Oozie status check', 'SERVICE MSG')
- # Hadoop_Templeton_Down
- test('Hadoop_Templeton_Down',
- 'Critical: Hadoop: templeton_down# SERVICE MSG',
- 'HARD', '1', 'CRITICAL', 'TEMPLETON::Templeton status check', 'SERVICE MSG')
- test('Hadoop_Templeton_Down:OK',
- 'OK: Hadoop: templeton_down_ok# SERVICE MSG',
- 'HARD', '1', 'OK', 'TEMPLETON::Templeton status check', 'SERVICE MSG')
- # Hadoop_Puppet_Down
- test('Hadoop_Puppet_Down',
- 'Critical: Hadoop: puppet_down# SERVICE MSG',
- 'HARD', '1', 'CRITICAL', 'PUPPET::Puppet agent down', 'SERVICE MSG')
- test('Hadoop_Puppet_Down:OK',
- 'OK: Hadoop: puppet_down_ok# SERVICE MSG',
- 'HARD', '1', 'OK', 'PUPPET::Puppet agent down', 'SERVICE MSG')
- # Hadoop_Nagios_Status_Log_Stale
- test('Hadoop_Nagios_Status_Log_Stale',
- 'Critical: Hadoop: nagios_status_log_stale# SERVICE MSG',
- 'HARD', '1', 'CRITICAL', 'NAGIOS::Nagios status log staleness', 'SERVICE MSG')
- test('Hadoop_Nagios_Status_Log_Stale:OK',
- 'OK: Hadoop: nagios_status_log_stale_ok# SERVICE MSG',
- 'HARD', '1', 'OK', 'NAGIOS::Nagios status log staleness', 'SERVICE MSG')
- # Hadoop_Ganglia_Process_Down
- test('Hadoop_Ganglia_Process_Down',
- 'Critical: Hadoop: ganglia_process_down# SERVICE MSG',
- 'HARD', '1', 'CRITICAL', 'GANGLIA::Ganglia [gmetad] Process down', 'SERVICE MSG')
- test('Hadoop_Ganglia_Process_Down:OK',
- 'OK: Hadoop: ganglia_process_down_ok# SERVICE MSG',
- 'HARD', '1', 'OK', 'GANGLIA::Ganglia [gmetad] Process down', 'SERVICE MSG')
- # Hadoop_Ganglia_Collector_Process_Down
- test('Hadoop_Ganglia_Collector_Process_Down',
- 'Critical: Hadoop: ganglia_collector_process_down# SERVICE MSG',
- 'HARD', '1', 'CRITICAL', 'GANGLIA::Ganglia collector [gmond] Process down alert for hbasemaster', 'SERVICE MSG')
- test('Hadoop_Ganglia_Collector_Process_Down:OK',
- 'OK: Hadoop: ganglia_collector_process_down_ok# SERVICE MSG',
- 'HARD', '1', 'OK', 'GANGLIA::Ganglia collector [gmond] Process down alert for hbasemaster', 'SERVICE MSG')
- # Hadoop_Ganglia_Collector_Process_Down
- test('Hadoop_Ganglia_Collector_Process_Down',
- 'Critical: Hadoop: ganglia_collector_process_down# SERVICE MSG',
- 'HARD', '1', 'CRITICAL', 'GANGLIA::Ganglia collector [gmond] Process down alert for jobtracker', 'SERVICE MSG')
- test('Hadoop_Ganglia_Collector_Process_Down:OK',
- 'OK: Hadoop: ganglia_collector_process_down_ok# SERVICE MSG',
- 'HARD', '1', 'OK', 'GANGLIA::Ganglia collector [gmond] Process down alert for jobtracker', 'SERVICE MSG')
- # Hadoop_Ganglia_Collector_Process_Down
- test('Hadoop_Ganglia_Collector_Process_Down',
- 'Critical: Hadoop: ganglia_collector_process_down# SERVICE MSG',
- 'HARD', '1', 'CRITICAL', 'GANGLIA::Ganglia collector [gmond] Process down alert for namenode', 'SERVICE MSG')
- test('Hadoop_Ganglia_Collector_Process_Down:OK',
- 'OK: Hadoop: ganglia_collector_process_down_ok# SERVICE MSG',
- 'HARD', '1', 'OK', 'GANGLIA::Ganglia collector [gmond] Process down alert for namenode', 'SERVICE MSG')
- # Hadoop_Ganglia_Collector_Process_Down
- test('Hadoop_Ganglia_Collector_Process_Down',
- 'Critical: Hadoop: ganglia_collector_process_down# SERVICE MSG',
- 'HARD', '1', 'CRITICAL', 'GANGLIA::Ganglia collector [gmond] Process down alert for slaves', 'SERVICE MSG')
- test('Hadoop_Ganglia_Collector_Process_Down:OK',
- 'OK: Hadoop: ganglia_collector_process_down_ok# SERVICE MSG',
- 'HARD', '1', 'OK', 'GANGLIA::Ganglia collector [gmond] Process down alert for slaves', 'SERVICE MSG')
- # Hadoop_UNKNOWN_MSG
- test('Hadoop_UNKNOWN_MSG',
- 'Critical: Hadoop: HADOOP_UNKNOWN_MSG# SERVICE MSG',
- 'HARD', '1', 'CRITICAL', 'ANY UNKNOWN SERVICE', 'SERVICE MSG')
- # HBase UI Down
- test('Hadoop_HBase_UI_Down',
- 'Critical: Hadoop: hbase_ui_down# SERVICE MSG',
- 'HARD', '1', 'CRITICAL', 'HBASEMASTER::HBase Web UI down', 'SERVICE MSG')
- test('Hadoop_HBase_UI_Down:OK',
- 'OK: Hadoop: hbase_ui_down_ok# SERVICE MSG',
- 'HARD', '1', 'OK', 'HBASEMASTER::HBase Web UI down', 'SERVICE MSG')
- # Namenode UI Down
- test('Hadoop_NameNode_UI_Down',
- 'Critical: Hadoop: namenode_ui_down# SERVICE MSG',
- 'HARD', '1', 'CRITICAL', 'NAMENODE::Namenode Web UI down', 'SERVICE MSG')
- test('Hadoop_NameNode_UI_Down:OK',
- 'OK: Hadoop: namenode_ui_down_ok# SERVICE MSG',
- 'HARD', '1', 'OK', 'NAMENODE::Namenode Web UI down', 'SERVICE MSG')
- # JobHistory UI Down
- test('Hadoop_JobHistory_UI_Down',
- 'Critical: Hadoop: jobhistory_ui_down# SERVICE MSG',
- 'HARD', '1', 'CRITICAL', 'JOBTRACKER::JobHistory Web UI down', 'SERVICE MSG')
- test('Hadoop_JobHistory_UI_Down:OK',
- 'OK: Hadoop: jobhistory_ui_down_ok# SERVICE MSG',
- 'HARD', '1', 'OK', 'JOBTRACKER::JobHistory Web UI down', 'SERVICE MSG')
- # JobTracker UI Down
- test('Hadoop_JobTracker_UI_Down',
- 'Critical: Hadoop: jobtracker_ui_down# SERVICE MSG',
- 'HARD', '1', 'CRITICAL', 'JOBTRACKER::JobTracker Web UI down', 'SERVICE MSG')
- test('Hadoop_JobTracker_UI_Down:OK',
- 'OK: Hadoop: jobtracker_ui_down_ok# SERVICE MSG',
- 'HARD', '1', 'OK', 'JOBTRACKER::JobTracker Web UI down', 'SERVICE MSG')
- # Tests for ambari nagios service check
- test('DataNode_process',
- 'Critical: Hadoop: datanode_process# SERVICE MSG',
- 'HARD', '1', 'CRITICAL', 'DATANODE::DataNode process', 'SERVICE MSG')
- test('DataNode_process:OK',
- 'OK: Hadoop: datanode_process_ok# SERVICE MSG',
- 'HARD', '1', 'OK', 'DATANODE::DataNode process', 'SERVICE MSG')
- test('NameNode_process',
- 'Fatal: Hadoop: namenode_process# SERVICE MSG',
- 'HARD', '1', 'CRITICAL', 'NAMENODE::NameNode process', 'SERVICE MSG')
- test('NameNode_process:WARNING',
- 'Fatal: Hadoop: namenode_process# SERVICE MSG',
- 'HARD', '1', 'WARNING', 'NAMENODE::NameNode process', 'SERVICE MSG')
- test('NameNode_process:OK',
- 'OK: Hadoop: namenode_process_ok# SERVICE MSG',
- 'HARD', '1', 'OK', 'NAMENODE::NameNode process', 'SERVICE MSG')
- test('Secondary_NameNode_process',
- 'Critical: Hadoop: secondary_namenode_process# SERVICE MSG',
- 'HARD', '1', 'CRITICAL', 'NAMENODE::Secondary NameNode process', 'SERVICE MSG')
- test('Secondary_NameNode_process:OK',
- 'OK: Hadoop: secondary_namenode_process_ok# SERVICE MSG',
- 'HARD', '1', 'OK', 'NAMENODE::Secondary NameNode process', 'SERVICE MSG')
- test('JournalNode_process',
- 'Critical: Hadoop: journalnode_process# SERVICE MSG',
- 'HARD', '1', 'CRITICAL', 'JOURNALNODE::JournalNode process', 'SERVICE MSG')
- test('JournalNode_process:OK',
- 'OK: Hadoop: journalnode_process_ok# SERVICE MSG',
- 'HARD', '1', 'OK', 'JOURNALNODE::JournalNode process', 'SERVICE MSG')
- test('ZooKeeper_Server_process',
- 'Critical: Hadoop: zookeeper_server_process# SERVICE MSG',
- 'HARD', '1', 'CRITICAL', 'ZOOKEEPER::ZooKeeper Server process', 'SERVICE MSG')
- test('ZooKeeper_Server_process:OK',
- 'OK: Hadoop: zookeeper_server_process_ok# SERVICE MSG',
- 'HARD', '1', 'OK', 'ZOOKEEPER::ZooKeeper Server process', 'SERVICE MSG')
- test('JobTracker_process',
- 'Critical: Hadoop: jobtracker_process# SERVICE MSG',
- 'HARD', '1', 'CRITICAL', 'JOBTRACKER::JobTracker process', 'SERVICE MSG')
- test('JobTracker_process:OK',
- 'OK: Hadoop: jobtracker_process_ok# SERVICE MSG',
- 'HARD', '1', 'OK', 'JOBTRACKER::JobTracker process', 'SERVICE MSG')
- test('TaskTracker_process',
- 'Critical: Hadoop: tasktracker_process# SERVICE MSG',
- 'HARD', '1', 'CRITICAL', 'TASKTRACKER::TaskTracker process', 'SERVICE MSG')
- test('TaskTracker_process:OK',
- 'OK: Hadoop: tasktracker_process_ok# SERVICE MSG',
- 'HARD', '1', 'OK', 'TASKTRACKER::TaskTracker process', 'SERVICE MSG')
- test('Ganglia_Server_process',
- 'Critical: Hadoop: ganglia_server_process# SERVICE MSG',
- 'HARD', '1', 'CRITICAL', 'GANGLIA::Ganglia Server process', 'SERVICE MSG')
- test('Ganglia_Server_process:OK',
- 'OK: Hadoop: ganglia_server_process_ok# SERVICE MSG',
- 'HARD', '1', 'OK', 'GANGLIA::Ganglia Server process', 'SERVICE MSG')
- test('Ganglia_Monitor_process_for_Slaves',
- 'Critical: Hadoop: ganglia_monitor_process# SERVICE MSG',
- 'HARD', '1', 'CRITICAL', 'GANGLIA::Ganglia Monitor process for Slaves', 'SERVICE MSG')
- test('Ganglia_Monitor_process_for_Slaves:OK',
- 'OK: Hadoop: ganglia_monitor_process_ok# SERVICE MSG',
- 'HARD', '1', 'OK', 'GANGLIA::Ganglia Monitor process for Slaves', 'SERVICE MSG')
- test('Ganglia_Monitor_process_for_NameNode',
- 'Critical: Hadoop: ganglia_monitor_process# SERVICE MSG',
- 'HARD', '1', 'CRITICAL', 'GANGLIA::Ganglia Monitor process for NameNode', 'SERVICE MSG')
- test('Ganglia_Monitor_process_for_NameNode:OK',
- 'OK: Hadoop: ganglia_monitor_process_ok# SERVICE MSG',
- 'HARD', '1', 'OK', 'GANGLIA::Ganglia Monitor process for NameNode', 'SERVICE MSG')
- test('Ganglia_Monitor_process_for_JobTracker',
- 'Critical: Hadoop: ganglia_monitor_process# SERVICE MSG',
- 'HARD', '1', 'CRITICAL', 'GANGLIA::Ganglia Monitor process for JobTracker', 'SERVICE MSG')
- test('Ganglia_Monitor_process_for_JobTracker:OK',
- 'OK: Hadoop: ganglia_monitor_process_ok# SERVICE MSG',
- 'HARD', '1', 'OK', 'GANGLIA::Ganglia Monitor process for JobTracker', 'SERVICE MSG')
- test('Ganglia_Monitor_process_for_HBase_Master',
- 'Critical: Hadoop: ganglia_monitor_process# SERVICE MSG',
- 'HARD', '1', 'CRITICAL', 'GANGLIA::Ganglia Monitor process for HBase Master', 'SERVICE MSG')
- test('Ganglia_Monitor_process_for_HBase_Master:OK',
- 'OK: Hadoop: ganglia_monitor_process_ok# SERVICE MSG',
- 'HARD', '1', 'OK', 'GANGLIA::Ganglia Monitor process for HBase Master', 'SERVICE MSG')
- test('Ganglia_Monitor_process_for_ResourceManager',
- 'Critical: Hadoop: ganglia_monitor_process# SERVICE MSG',
- 'HARD', '1', 'CRITICAL', 'GANGLIA::Ganglia Monitor process for ResourceManager', 'SERVICE MSG')
- test('Ganglia_Monitor_process_for_ResourceManager:OK',
- 'OK: Hadoop: ganglia_monitor_process_ok# SERVICE MSG',
- 'HARD', '1', 'OK', 'GANGLIA::Ganglia Monitor process for ResourceManager', 'SERVICE MSG')
- test('Ganglia_Monitor_process_for_HistoryServer',
- 'Critical: Hadoop: ganglia_monitor_process# SERVICE MSG',
- 'HARD', '1', 'CRITICAL', 'GANGLIA::Ganglia Monitor process for HistoryServer', 'SERVICE MSG')
- test('Ganglia_Monitor_process_for_HistoryServer:OK',
- 'OK: Hadoop: ganglia_monitor_process_ok# SERVICE MSG',
- 'HARD', '1', 'OK', 'GANGLIA::Ganglia Monitor process for HistoryServer', 'SERVICE MSG')
- test('HBase_Master_process',
- 'Critical: Hadoop: hbase_master_process# SERVICE MSG',
- 'HARD', '1', 'CRITICAL', 'HBASEMASTER::HBase Master process', 'SERVICE MSG')
- test('HBase_Master_process:OK',
- 'OK: Hadoop: hbase_master_process_ok# SERVICE MSG',
- 'HARD', '1', 'OK', 'HBASEMASTER::HBase Master process', 'SERVICE MSG')
- test('RegionServer_process',
- 'Critical: Hadoop: regionserver_process# SERVICE MSG',
- 'HARD', '1', 'CRITICAL', 'REGIONSERVER::RegionServer process', 'SERVICE MSG')
- test('RegionServer_process:OK',
- 'OK: Hadoop: regionserver_process_ok# SERVICE MSG',
- 'HARD', '1', 'OK', 'REGIONSERVER::RegionServer process', 'SERVICE MSG')
- test('Nagios_status_log_freshness',
- 'Critical: Hadoop: nagios_process# SERVICE MSG',
- 'HARD', '1', 'CRITICAL', 'NAGIOS::Nagios status log freshness', 'SERVICE MSG')
- test('Nagios_status_log_freshness:OK',
- 'OK: Hadoop: nagios_process_ok# SERVICE MSG',
- 'HARD', '1', 'OK', 'NAGIOS::Nagios status log freshness', 'SERVICE MSG')
- test('Flume_Agent_process',
- 'Critical: Hadoop: flume_agent_process# SERVICE MSG',
- 'HARD', '1', 'CRITICAL', 'FLUME::Flume Agent process', 'SERVICE MSG')
- test('Flume_Agent_process:OK',
- 'OK: Hadoop: flume_agent_process_ok# SERVICE MSG',
- 'HARD', '1', 'OK', 'FLUME::Flume Agent process', 'SERVICE MSG')
- test('Oozie_Server_status',
- 'Critical: Hadoop: oozie_server_process# SERVICE MSG',
- 'HARD', '1', 'CRITICAL', 'OOZIE::Oozie Server status', 'SERVICE MSG')
- test('Oozie_Server_status:OK',
- 'OK: Hadoop: oozie_server_process_ok# SERVICE MSG',
- 'HARD', '1', 'OK', 'OOZIE::Oozie Server status', 'SERVICE MSG')
- test('Hive_Metastore_status',
- 'Critical: Hadoop: hive_metastore_process# SERVICE MSG',
- 'HARD', '1', 'CRITICAL', 'HIVE-METASTORE::Hive Metastore status', 'SERVICE MSG')
- test('Hive_Metastore_status:OK',
- 'OK: Hadoop: hive_metastore_process_ok# SERVICE MSG',
- 'HARD', '1', 'OK', 'HIVE-METASTORE::Hive Metastore status', 'SERVICE MSG')
- test('WebHCat_Server_status',
- 'Critical: Hadoop: webhcat_down# SERVICE MSG',
- 'HARD', '1', 'CRITICAL', 'WEBHCAT::WebHCat Server status', 'SERVICE MSG')
- test('WebHCat_Server_status:OK',
- 'OK: Hadoop: webhcat_down_ok# SERVICE MSG',
- 'HARD', '1', 'OK', 'WEBHCAT::WebHCat Server status', 'SERVICE MSG')
- test('ResourceManager_process',
- 'Critical: Hadoop: resourcemanager_process_down# SERVICE MSG',
- 'HARD', '1', 'CRITICAL', 'RESOURCEMANAGER::ResourceManager process', 'SERVICE MSG')
- test('ResourceManager_process:OK',
- 'OK: Hadoop: resourcemanager_process_down_ok# SERVICE MSG',
- 'HARD', '1', 'OK', 'RESOURCEMANAGER::ResourceManager process', 'SERVICE MSG')
- test('AppTimeline_process',
- 'Critical: Hadoop: timelineserver_process# SERVICE MSG',
- 'HARD', '1', 'CRITICAL', 'APP_TIMELINE_SERVER::App Timeline Server process', 'SERVICE MSG')
- test('AppTimeline_process:OK',
- 'OK: Hadoop: timelineserver_process_ok# SERVICE MSG',
- 'HARD', '1', 'OK', 'APP_TIMELINE_SERVER::App Timeline Server process', 'SERVICE MSG')
- test('NodeManager_process',
- 'Critical: Hadoop: nodemanager_process_down# SERVICE MSG',
- 'HARD', '1', 'CRITICAL', 'NODEMANAGER::NodeManager process', 'SERVICE MSG')
- test('NodeManager_process:OK',
- 'OK: Hadoop: nodemanager_process_down_ok# SERVICE MSG',
- 'HARD', '1', 'OK', 'NODEMANAGER::NodeManager process', 'SERVICE MSG')
- test('NodeManager_health',
- 'Critical: Hadoop: nodemanager_health# SERVICE MSG',
- 'HARD', '1', 'CRITICAL', 'NODEMANAGER::NodeManager health', 'SERVICE MSG')
- test('NodeManager_health:OK',
- 'OK: Hadoop: nodemanager_health_ok# SERVICE MSG',
- 'HARD', '1', 'OK', 'NODEMANAGER::NodeManager health', 'SERVICE MSG')
- test('NodeManager_live',
- 'Critical: Hadoop: nodemanagers_down# SERVICE MSG',
- 'HARD', '1', 'CRITICAL', 'NODEMANAGER::Percent NodeManagers live', 'SERVICE MSG')
- test('NodeManager_live:OK',
- 'OK: Hadoop: nodemanagers_down_ok# SERVICE MSG',
- 'HARD', '1', 'OK', 'NODEMANAGER::Percent NodeManagers live', 'SERVICE MSG')
- test('HistoryServer_process',
- 'Critical: Hadoop: historyserver_process# SERVICE MSG',
- 'HARD', '1', 'CRITICAL', 'JOBHISTORY::HistoryServer process', 'SERVICE MSG')
- test('HistoryServer_process:OK',
- 'OK: Hadoop: historyserver_process_ok# SERVICE MSG',
- 'HARD', '1', 'OK', 'JOBHISTORY::HistoryServer process', 'SERVICE MSG')
- test('HistoryServer_RPC_latency',
- 'Critical: Hadoop: historyserver_rpc_latency# SERVICE MSG',
- 'HARD', '1', 'CRITICAL', 'JOBHISTORY::HistoryServer RPC latency', 'SERVICE MSG')
- test('HistoryServer_RPC_latency:OK',
- 'OK: Hadoop: historyserver_rpc_latency_ok# SERVICE MSG',
- 'HARD', '1', 'OK', 'JOBHISTORY::HistoryServer RPC latency', 'SERVICE MSG')
- test('HistoryServer_CPU_utilization',
- 'Critical: Hadoop: historyserver_cpu_utilization# SERVICE MSG',
- 'HARD', '1', 'CRITICAL', 'JOBHISTORY::HistoryServer CPU utilization', 'SERVICE MSG')
- test('HistoryServer_CPU_utilization:OK',
- 'OK: Hadoop: historyserver_cpu_utilization_ok# SERVICE MSG',
- 'HARD', '1', 'OK', 'JOBHISTORY::HistoryServer CPU utilization', 'SERVICE MSG')
- test('HistoryServer_Web_UI',
- 'Critical: Hadoop: historyserver_ui# SERVICE MSG',
- 'HARD', '1', 'CRITICAL', 'JOBHISTORY::HistoryServer Web UI', 'SERVICE MSG')
- test('HistoryServer_Web_UI:OK',
- 'OK: Hadoop: historyserver_ui_ok# SERVICE MSG',
- 'HARD', '1', 'OK', 'JOBHISTORY::HistoryServer Web UI', 'SERVICE MSG')
- test('ResourceManager_rpc_latency',
- 'Critical: Hadoop: resourcemanager_rpc_latency# SERVICE MSG',
- 'HARD', '1', 'CRITICAL', 'RESOURCEMANAGER::ResourceManager RPC latency', 'SERVICE MSG')
- test('ResourceManager_rpc_latency:OK',
- 'OK: Hadoop: resourcemanager_rpc_latency_ok# SERVICE MSG',
- 'HARD', '1', 'OK', 'RESOURCEMANAGER::ResourceManager RPC latency', 'SERVICE MSG')
- test('ResourceManager_cpu_utilization',
- 'Critical: Hadoop: resourcemanager_cpu_utilization# SERVICE MSG',
- 'HARD', '1', 'CRITICAL', 'RESOURCEMANAGER::ResourceManager CPU utilization', 'SERVICE MSG')
- test('ResourceManager_cpu_utilization:OK',
- 'OK: Hadoop: resourcemanager_cpu_utilization_ok# SERVICE MSG',
- 'HARD', '1', 'OK', 'RESOURCEMANAGER::ResourceManager CPU utilization', 'SERVICE MSG')
- summary()
|