Forráskód Böngészése

AMBARI-9298 Many alerts triggered on windows (echekanskiy via fbarca)

On windows address 0.0.0.0 is incorrect for connection but ok for binding(in linux it resolved to 127.0.0.1) and this causing alerts on windows when services binded to 0.0.0.0.
Florian Barca 10 éve
szülő
commit
760e155833

+ 5 - 1
ambari-agent/src/main/python/ambari_agent/alerts/port_alert.py

@@ -23,7 +23,8 @@ import socket
 import time
 from alerts.base_alert import BaseAlert
 from resource_management.libraries.functions.get_port_from_url import get_port_from_url
-
+from ambari_commons import OSCheck
+from ambari_commons.inet_utils import resolve_address
 logger = logging.getLogger()
 
 # default timeouts
@@ -114,6 +115,9 @@ class PortAlert(BaseAlert):
       s.settimeout(self.critical_timeout)
 
       t = time.time()
+      if OSCheck.is_windows_family():
+        # on windows 0.0.0.0 is invalid address to connect but on linux it resolved to 127.0.0.1
+        host = resolve_address(host)
       s.connect((host, port))
       milliseconds = time.time() - t
       seconds = milliseconds/1000.0

+ 5 - 1
ambari-agent/src/main/python/ambari_agent/alerts/web_alert.py

@@ -24,6 +24,8 @@ import urllib2
 from alerts.base_alert import BaseAlert
 from collections import namedtuple
 from resource_management.libraries.functions.get_port_from_url import get_port_from_url
+from ambari_commons import OSCheck
+from ambari_commons.inet_utils import resolve_address
 
 logger = logging.getLogger()
 
@@ -93,7 +95,9 @@ class WebAlert(BaseAlert):
     scheme = 'http'
     if alert_uri.is_ssl_enabled is True:
       scheme = 'https'
-
+    if OSCheck.is_windows_family():
+      # on windows 0.0.0.0 is invalid address to connect but on linux it resolved to 127.0.0.1
+      host = resolve_address(host)
     return "{0}://{1}:{2}".format(scheme, host, str(port))
 
 

+ 13 - 0
ambari-common/src/main/python/ambari_commons/inet_utils.py

@@ -24,6 +24,7 @@ import urllib2
 
 from exceptions import *
 from logging_utils import *
+from os_check import OSCheck
 
 def download_file(link, destination, chunk_size=16 * 1024):
   print_info_msg("Downloading {0} to {1}".format(link, destination))
@@ -146,3 +147,15 @@ def force_download_file(link, destination, chunk_size = 16 * 1024, progress_func
     #Windows behavior: rename fails if the destination file exists
     os.unlink(destination)
   os.rename(temp_dest, destination)
+
+def resolve_address(address):
+  """
+  Resolves address to proper one in special cases, for example 0.0.0.0 to 127.0.0.1 on windows os.
+
+  :param address: address to resolve
+  :return: resulting address
+  """
+  if OSCheck.is_windows_family():
+    if address == '0.0.0.0':
+      return '127.0.0.1'
+  return address

+ 7 - 2
ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/alerts/alert_nodemanager_health.py

@@ -21,7 +21,8 @@ limitations under the License.
 import json
 import socket
 import urllib2
-
+from ambari_commons import OSCheck
+from ambari_commons.inet_utils import resolve_address
 RESULT_CODE_OK = 'OK'
 RESULT_CODE_CRITICAL = 'CRITICAL'
 RESULT_CODE_UNKNOWN = 'UNKNOWN'
@@ -92,7 +93,11 @@ def execute(parameters=None, host_name=None):
       host_name = socket.getfqdn()
 
     uri = '{0}:{1}'.format(host_name, NODEMANAGER_DEFAULT_PORT)
-
+  if OSCheck.is_windows_family():
+    uri_host, uri_port = uri.split(':')
+    # on windows 0.0.0.0 is invalid address to connect but on linux it resolved to 127.0.0.1
+    uri_host = resolve_address(uri_host)
+    uri = '{0}:{1}'.format(uri_host, uri_port)
   try:
     query = "{0}://{1}/ws/v1/node/info".format(scheme,uri)
     

+ 47 - 12
ambari-server/src/main/resources/host_scripts/alert_disk_space.py

@@ -21,6 +21,11 @@ limitations under the License.
 import collections
 import os
 import platform
+from ambari_commons.os_family_impl import OsFamilyFuncImpl, OsFamilyImpl
+from ambari_commons import OSConst
+
+DiskInfo = collections.namedtuple('DiskInfo', 'total used free')
+
 
 def get_tokens():
   """
@@ -28,7 +33,7 @@ def get_tokens():
   to build the dictionary passed into execute
   """
   return None
-  
+
 
 def execute(parameters=None, host_name=None):
   """
@@ -47,30 +52,30 @@ def execute(parameters=None, host_name=None):
 
   if disk_usage is None or disk_usage.total == 0:
     return (('CRITICAL', ['Unable to determine the disk usage']))
-  
+
   result_code = 'OK'
   percent = disk_usage.used / float(disk_usage.total) * 100
   if percent > 50:
     result_code = 'WARNING'
   elif percent > 80:
     result_code = 'CRTICAL'
-    
-  label = 'Capacity Used: [{0:.2f}%, {1}], Capacity Total: [{2}]'.format( 
-      percent, _get_formatted_size(disk_usage.used), 
+
+  label = 'Capacity Used: [{0:.2f}%, {1}], Capacity Total: [{2}]'.format(
+      percent, _get_formatted_size(disk_usage.used),
       _get_formatted_size(disk_usage.total) )
-  
-  return ((result_code, [label]))
 
+  return ((result_code, [label]))
 
+@OsFamilyFuncImpl(os_family=OsFamilyImpl.DEFAULT)
 def _get_disk_usage(path='/'):
   """
   returns a named tuple that contains the total, used, and free disk space
-  in bytes
+  in bytes. Linux implementation.
   """
   used = 0
   total = 0
   free = 0
-  
+
   if 'statvfs' in dir(os):
     disk_stats = os.statvfs(path)
     free = disk_stats.f_bavail * disk_stats.f_frsize
@@ -78,15 +83,45 @@ def _get_disk_usage(path='/'):
     used = (disk_stats.f_blocks - disk_stats.f_bfree) * disk_stats.f_frsize
   else:
     raise NotImplementedError("{0} is not a supported platform for this alert".format(platform.platform()))
-  
-  DiskInfo = collections.namedtuple('DiskInfo', 'total used free')
+
+  return DiskInfo(total=total, used=used, free=free)
+
+
+@OsFamilyFuncImpl(os_family=OSConst.WINSRV_FAMILY)
+def _get_disk_usage(path=None):
+  """
+  returns a named tuple that contains the total, used, and free disk space
+  in bytes. Windows implementation
+  """
+  import string
+  import ctypes
+
+  used = 0
+  total = 0
+  free = 0
+  drives = []
+  bitmask = ctypes.windll.kernel32.GetLogicalDrives()
+  for letter in string.uppercase:
+    if bitmask & 1:
+      drives.append(letter)
+    bitmask >>= 1
+  for drive in drives:
+    free_bytes = ctypes.c_ulonglong(0)
+    total_bytes = ctypes.c_ulonglong(0)
+    ctypes.windll.kernel32.GetDiskFreeSpaceExW(ctypes.c_wchar_p(drive + ":\\"),
+                                               None, ctypes.pointer(total_bytes),
+                                               ctypes.pointer(free_bytes))
+    total += total_bytes.value
+    free += free_bytes.value
+    used += total_bytes.value - free_bytes.value
+
   return DiskInfo(total=total, used=used, free=free)
 
 
 def _get_formatted_size(bytes):
   """
   formats the supplied bytes 
-  """  
+  """
   if bytes < 1000:
     return '%i' % bytes + ' B'
   elif 1000 <= bytes < 1000000: