Prechádzať zdrojové kódy

AMBARI-9523. We should be doing disk space check before deploying cluster and/or RU (dlysnichenko)

Lisnichenko Dmitro 10 rokov pred
rodič
commit
1d28f98910

+ 2 - 33
ambari-agent/src/main/python/ambari_agent/HostInfo.py

@@ -141,7 +141,8 @@ class HostInfoLinux(HostInfo):
 
   # Default set of directories that are checked for existence of files and folders
   DEFAULT_DIRS = [
-    "/etc", "/var/run", "/var/log", "/usr/lib", "/var/lib", "/var/tmp", "/tmp", "/var", "/hadoop"
+    "/etc", "/var/run", "/var/log", "/usr/lib", "/var/lib", "/var/tmp", "/tmp", "/var",
+    "/hadoop", "/usr/hdp"
   ]
 
   # Packages that are used to find repos (then repos are used to find other packages)
@@ -189,38 +190,6 @@ class HostInfoLinux(HostInfo):
       pass
     return diskInfo
 
-  def createAlerts(self, alerts):
-    existingUsers = []
-    self.checkUsers(self.DEFAULT_USERS, existingUsers)
-    dirs = []
-    self.checkFolders(self.DEFAULT_DIRS, self.DEFAULT_PROJECT_NAMES, existingUsers, dirs)
-    alert = {
-      'name': 'host_alert',
-      'instance': None,
-      'service': 'AMBARI',
-      'component': 'host',
-      'host': hostname.hostname(self.config),
-      'state': 'OK',
-      'label': 'Disk space',
-      'text': 'Used disk space less than 80%'}
-    message = ""
-    mountinfoSet = []
-    for dir in dirs:
-      if dir["type"] == 'directory':
-        mountinfo = self.osdiskAvailableSpace(dir['name'])
-        if int(mountinfo["percent"].strip('%')) >= 80:
-          if not mountinfo in mountinfoSet:
-            mountinfoSet.append(mountinfo)
-          message += str(dir['name']) + ";\n"
-
-    if message != "":
-      message = "These discs have low space:\n" + str(
-        mountinfoSet) + "\n They include following critical directories:\n" + message
-      alert['state'] = 'WARNING'
-      alert['text'] = message
-    alerts.append(alert)
-    return alerts
-
   def checkUsers(self, users, results):
     f = open('/etc/passwd', 'r')
     for userLine in f:

+ 58 - 14
ambari-server/src/main/resources/host_scripts/alert_disk_space.py

@@ -25,7 +25,8 @@ from ambari_commons.os_family_impl import OsFamilyFuncImpl, OsFamilyImpl
 from ambari_commons import OSConst
 
 DiskInfo = collections.namedtuple('DiskInfo', 'total used free')
-
+MIN_FREE_SPACE = 5000000000L   # 5GB
+USR_HDP = '/usr/hdp'
 
 def get_tokens():
   """
@@ -34,9 +35,10 @@ def get_tokens():
   """
   return None
 
-
+@OsFamilyFuncImpl(os_family=OsFamilyImpl.DEFAULT)
 def execute(parameters=None, host_name=None):
   """
+  Performs advanced disk checks under Linux
   Returns a tuple containing the result code and a pre-formatted result label
 
   Keyword arguments:
@@ -44,27 +46,69 @@ def execute(parameters=None, host_name=None):
   host_name (string): the name of this host where the alert is running
   """
 
-  disk_usage = None
+  # Check usage of root partition
   try:
     disk_usage = _get_disk_usage()
+    result_code, label = _get_warnings_for_partition(disk_usage)
   except NotImplementedError, platform_error:
-    return (('CRITICAL', [str(platform_error)]))
+    return 'CRITICAL', [str(platform_error)]
+
+  if result_code == 'OK':
+    # Root partition seems to be OK, let's check /usr/hdp
+    try:
+      disk_usage = _get_disk_usage(USR_HDP)
+      result_code_usr_hdp, label_usr_hdp = _get_warnings_for_partition(disk_usage)
+      if result_code_usr_hdp != 'OK':
+        label = "{0}. Insufficient space at {1}: {2}".format(label, USR_HDP, label_usr_hdp)
+        result_code = 'WARNING'
+    except NotImplementedError, platform_error:
+      return 'CRITICAL', [str(platform_error)]
+    pass
+
+  return result_code, [label]
+
 
+def _get_warnings_for_partition(disk_usage):
   if disk_usage is None or disk_usage.total == 0:
-    return (('CRITICAL', ['Unable to determine the disk usage']))
+    return 'CRITICAL', ['Unable to determine the disk usage']
 
   result_code = 'OK'
   percent = disk_usage.used / float(disk_usage.total) * 100
-  if percent > 50:
+  if percent > 80:
+    result_code = 'CRITICAL'
+  elif percent > 50:
     result_code = 'WARNING'
-  elif percent > 80:
-    result_code = 'CRTICAL'
 
   label = 'Capacity Used: [{0:.2f}%, {1}], Capacity Total: [{2}]'.format(
-      percent, _get_formatted_size(disk_usage.used),
-      _get_formatted_size(disk_usage.total) )
+    percent, _get_formatted_size(disk_usage.used),
+    _get_formatted_size(disk_usage.total))
+
+  if result_code == 'OK':
+    # Check absolute disk space value
+    if disk_usage.free < MIN_FREE_SPACE:
+      result_code = 'WARNING'
+      label += '. Free space < {0}'.format(_get_formatted_size(MIN_FREE_SPACE))
+
+  return result_code, label
+
+
+@OsFamilyFuncImpl(os_family=OSConst.WINSRV_FAMILY)
+def execute(parameters=None, host_name=None):
+  """
+  Performs simplified disk checks under Windows
+  Returns a tuple containing the result code and a pre-formatted result label
+
+  Keyword arguments:
+  parameters (dictionary): a mapping of parameter key to value
+  host_name (string): the name of this host where the alert is running
+  """
+  try:
+    disk_usage = _get_disk_usage()
+    result = _get_warnings_for_partition(disk_usage)
+  except NotImplementedError, platform_error:
+    result = ('CRITICAL', [str(platform_error)])
+  return result
 
-  return ((result_code, [label]))
 
 @OsFamilyFuncImpl(os_family=OsFamilyImpl.DEFAULT)
 def _get_disk_usage(path='/'):
@@ -125,13 +169,13 @@ def _get_formatted_size(bytes):
   if bytes < 1000:
     return '%i' % bytes + ' B'
   elif 1000 <= bytes < 1000000:
-    return '%.1f' % (bytes/1000.0) + ' KB'
+    return '%.1f' % (bytes / 1000.0) + ' KB'
   elif 1000000 <= bytes < 1000000000:
     return '%.1f' % (bytes / 1000000.0) + ' MB'
   elif 1000000000 <= bytes < 1000000000000:
-    return '%.1f' % (bytes/1000000000.0) + ' GB'
+    return '%.1f' % (bytes / 1000000000.0) + ' GB'
   else:
-    return '%.1f' % (bytes/1000000000000.0) + ' TB'
+    return '%.1f' % (bytes / 1000000000000.0) + ' TB'
 
 if __name__ == '__main__':
     print _get_disk_usage(os.getcwd())

+ 78 - 0
ambari-server/src/test/python/host_scripts/TestAlertDiskSpace.py

@@ -0,0 +1,78 @@
+#!/usr/bin/env python
+
+'''
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+'''
+import json
+import os
+import socket, pprint
+import alert_disk_space
+from mock.mock import patch, MagicMock
+from ambari_commons.os_check import OSCheck
+from stacks.utils.RMFTestCase import *
+
+class TestAlertDiskSpace(RMFTestCase):
+
+  @patch('alert_disk_space._get_disk_usage')
+  def test_linux_flow(self, disk_usage_mock):
+    # / OK, /usr/hdp OK
+    disk_usage_mock.return_value = \
+      alert_disk_space.DiskInfo(total=21673930752L, used=5695861760L, free=15978068992L)
+    res = alert_disk_space.execute()
+    self.assertEqual(res, ('OK', ['Capacity Used: [26.28%, 5.7 GB], Capacity Total: [21.7 GB]']))
+
+    # / WARNING, /usr/hdp OK
+    disk_usage_mock.return_value = \
+      alert_disk_space.DiskInfo(total=21673930752L, used=14521533603L, free=7152397149L)
+    res = alert_disk_space.execute()
+    self.assertEqual(res, ('WARNING', ['Capacity Used: [67.00%, 14.5 GB], Capacity Total: [21.7 GB]']))
+
+    # / CRITICAL, /usr/hdp OK
+    disk_usage_mock.return_value = \
+      alert_disk_space.DiskInfo(total=21673930752L, used=20590234214L, free=1083696538)
+    res = alert_disk_space.execute()
+    self.assertEqual(res, ('CRITICAL', ['Capacity Used: [95.00%, 20.6 GB], Capacity Total: [21.7 GB]']))
+
+    # / < 5GB, /usr/hdp OK
+    disk_usage_mock.return_value = \
+      alert_disk_space.DiskInfo(total=5418482688L, used=1625544806L, free=3792937882L)
+    res = alert_disk_space.execute()
+    self.assertEqual(res, ('WARNING', ['Capacity Used: [30.00%, 1.6 GB], Capacity Total: [5.4 GB]. Free space < 5.0 GB']))
+
+    # / OK, /usr/hdp WARNING
+    disk_usage_mock.side_effect = \
+      [alert_disk_space.DiskInfo(total=21673930752L, used=5695861760L, free=15978068992L),
+       alert_disk_space.DiskInfo(total=21673930752L, used=14521533603L, free=7152397149L)]
+    res = alert_disk_space.execute()
+    self.assertEqual(res, ('WARNING', ["Capacity Used: [26.28%, 5.7 GB], Capacity Total: [21.7 GB]. "
+                                       "Insufficient space at /usr/hdp: Capacity Used: [67.00%, 14.5 GB], Capacity Total: [21.7 GB]"]))
+
+    # / OK, /usr/hdp CRITICAL
+    disk_usage_mock.side_effect = \
+      [alert_disk_space.DiskInfo(total=21673930752L, used=5695861760L, free=15978068992L),
+       alert_disk_space.DiskInfo(total=21673930752L, used=20590234214L, free=1083696538L)]
+    res = alert_disk_space.execute()
+    self.assertEqual(res, ('WARNING', ["Capacity Used: [26.28%, 5.7 GB], Capacity Total: [21.7 GB]. "
+                                       "Insufficient space at /usr/hdp: Capacity Used: [95.00%, 20.6 GB], Capacity Total: [21.7 GB]"]))
+
+    # / OK, /usr/hdp < 5GB
+    disk_usage_mock.side_effect = \
+      [alert_disk_space.DiskInfo(total=21673930752L, used=5695861760L, free=15978068992L),
+       alert_disk_space.DiskInfo(total=5418482688L, used=1625544806L, free=3792937882L)]
+    res = alert_disk_space.execute()
+    self.assertEqual(res, ('WARNING', ["Capacity Used: [26.28%, 5.7 GB], Capacity Total: [21.7 GB]. "
+                                       "Insufficient space at /usr/hdp: Capacity Used: [30.00%, 1.6 GB], Capacity Total: [5.4 GB]. Free space < 5.0 GB"]))

+ 3 - 1
ambari-server/src/test/python/unitTests.py

@@ -138,7 +138,8 @@ def main():
   sys.path.append(os.path.join(ambari_server_folder, "src/main/python"))
   sys.path.append(os.path.join(ambari_server_folder, "src/main/resources/scripts"))
   sys.path.append(os.path.join(ambari_server_folder, "src/main/resources/custom_actions/scripts"))
-  
+  sys.path.append(os.path.join(ambari_server_folder, "src/main/resources/host_scripts"))
+
   stacks_folder = os.path.join(pwd, 'stacks')
   #generate test variants(path, service, stack)
   test_variants = []
@@ -196,6 +197,7 @@ def main():
 
   test_dirs = [
     (os.path.join(pwd, 'custom_actions'), "\nRunning tests for custom actions\n"),
+    (os.path.join(pwd, 'host_scripts'), "\nRunning tests for host scripts\n"),
     (pwd, "\nRunning tests for ambari-server\n"),
   ]