Переглянути джерело

AMBARI-9298 Many alerts triggered on windows (echekanskiy via fbarca)

On windows address 0.0.0.0 is incorrect for connection but ok for binding(in linux it resolved to 127.0.0.1) and this causing alerts on windows when services binded to 0.0.0.0.
Florian Barca 10 роки тому
батько
коміт
760e155833

+ 5 - 1
ambari-agent/src/main/python/ambari_agent/alerts/port_alert.py

@@ -23,7 +23,8 @@ import socket
 import time
 import time
 from alerts.base_alert import BaseAlert
 from alerts.base_alert import BaseAlert
 from resource_management.libraries.functions.get_port_from_url import get_port_from_url
 from resource_management.libraries.functions.get_port_from_url import get_port_from_url
-
+from ambari_commons import OSCheck
+from ambari_commons.inet_utils import resolve_address
 logger = logging.getLogger()
 logger = logging.getLogger()
 
 
 # default timeouts
 # default timeouts
@@ -114,6 +115,9 @@ class PortAlert(BaseAlert):
       s.settimeout(self.critical_timeout)
       s.settimeout(self.critical_timeout)
 
 
       t = time.time()
       t = time.time()
+      if OSCheck.is_windows_family():
+        # on windows 0.0.0.0 is invalid address to connect but on linux it resolved to 127.0.0.1
+        host = resolve_address(host)
       s.connect((host, port))
       s.connect((host, port))
       milliseconds = time.time() - t
       milliseconds = time.time() - t
       seconds = milliseconds/1000.0
       seconds = milliseconds/1000.0

+ 5 - 1
ambari-agent/src/main/python/ambari_agent/alerts/web_alert.py

@@ -24,6 +24,8 @@ import urllib2
 from alerts.base_alert import BaseAlert
 from alerts.base_alert import BaseAlert
 from collections import namedtuple
 from collections import namedtuple
 from resource_management.libraries.functions.get_port_from_url import get_port_from_url
 from resource_management.libraries.functions.get_port_from_url import get_port_from_url
+from ambari_commons import OSCheck
+from ambari_commons.inet_utils import resolve_address
 
 
 logger = logging.getLogger()
 logger = logging.getLogger()
 
 
@@ -93,7 +95,9 @@ class WebAlert(BaseAlert):
     scheme = 'http'
     scheme = 'http'
     if alert_uri.is_ssl_enabled is True:
     if alert_uri.is_ssl_enabled is True:
       scheme = 'https'
       scheme = 'https'
-
+    if OSCheck.is_windows_family():
+      # on windows 0.0.0.0 is invalid address to connect but on linux it resolved to 127.0.0.1
+      host = resolve_address(host)
     return "{0}://{1}:{2}".format(scheme, host, str(port))
     return "{0}://{1}:{2}".format(scheme, host, str(port))
 
 
 
 

+ 13 - 0
ambari-common/src/main/python/ambari_commons/inet_utils.py

@@ -24,6 +24,7 @@ import urllib2
 
 
 from exceptions import *
 from exceptions import *
 from logging_utils import *
 from logging_utils import *
+from os_check import OSCheck
 
 
 def download_file(link, destination, chunk_size=16 * 1024):
 def download_file(link, destination, chunk_size=16 * 1024):
   print_info_msg("Downloading {0} to {1}".format(link, destination))
   print_info_msg("Downloading {0} to {1}".format(link, destination))
@@ -146,3 +147,15 @@ def force_download_file(link, destination, chunk_size = 16 * 1024, progress_func
     #Windows behavior: rename fails if the destination file exists
     #Windows behavior: rename fails if the destination file exists
     os.unlink(destination)
     os.unlink(destination)
   os.rename(temp_dest, destination)
   os.rename(temp_dest, destination)
+
+def resolve_address(address):
+  """
+  Resolves address to proper one in special cases, for example 0.0.0.0 to 127.0.0.1 on windows os.
+
+  :param address: address to resolve
+  :return: resulting address
+  """
+  if OSCheck.is_windows_family():
+    if address == '0.0.0.0':
+      return '127.0.0.1'
+  return address

+ 7 - 2
ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/alerts/alert_nodemanager_health.py

@@ -21,7 +21,8 @@ limitations under the License.
 import json
 import json
 import socket
 import socket
 import urllib2
 import urllib2
-
+from ambari_commons import OSCheck
+from ambari_commons.inet_utils import resolve_address
 RESULT_CODE_OK = 'OK'
 RESULT_CODE_OK = 'OK'
 RESULT_CODE_CRITICAL = 'CRITICAL'
 RESULT_CODE_CRITICAL = 'CRITICAL'
 RESULT_CODE_UNKNOWN = 'UNKNOWN'
 RESULT_CODE_UNKNOWN = 'UNKNOWN'
@@ -92,7 +93,11 @@ def execute(parameters=None, host_name=None):
       host_name = socket.getfqdn()
       host_name = socket.getfqdn()
 
 
     uri = '{0}:{1}'.format(host_name, NODEMANAGER_DEFAULT_PORT)
     uri = '{0}:{1}'.format(host_name, NODEMANAGER_DEFAULT_PORT)
-
+  if OSCheck.is_windows_family():
+    uri_host, uri_port = uri.split(':')
+    # on windows 0.0.0.0 is invalid address to connect but on linux it resolved to 127.0.0.1
+    uri_host = resolve_address(uri_host)
+    uri = '{0}:{1}'.format(uri_host, uri_port)
   try:
   try:
     query = "{0}://{1}/ws/v1/node/info".format(scheme,uri)
     query = "{0}://{1}/ws/v1/node/info".format(scheme,uri)
     
     

+ 47 - 12
ambari-server/src/main/resources/host_scripts/alert_disk_space.py

@@ -21,6 +21,11 @@ limitations under the License.
 import collections
 import collections
 import os
 import os
 import platform
 import platform
+from ambari_commons.os_family_impl import OsFamilyFuncImpl, OsFamilyImpl
+from ambari_commons import OSConst
+
+DiskInfo = collections.namedtuple('DiskInfo', 'total used free')
+
 
 
 def get_tokens():
 def get_tokens():
   """
   """
@@ -28,7 +33,7 @@ def get_tokens():
   to build the dictionary passed into execute
   to build the dictionary passed into execute
   """
   """
   return None
   return None
-  
+
 
 
 def execute(parameters=None, host_name=None):
 def execute(parameters=None, host_name=None):
   """
   """
@@ -47,30 +52,30 @@ def execute(parameters=None, host_name=None):
 
 
   if disk_usage is None or disk_usage.total == 0:
   if disk_usage is None or disk_usage.total == 0:
     return (('CRITICAL', ['Unable to determine the disk usage']))
     return (('CRITICAL', ['Unable to determine the disk usage']))
-  
+
   result_code = 'OK'
   result_code = 'OK'
   percent = disk_usage.used / float(disk_usage.total) * 100
   percent = disk_usage.used / float(disk_usage.total) * 100
   if percent > 50:
   if percent > 50:
     result_code = 'WARNING'
     result_code = 'WARNING'
   elif percent > 80:
   elif percent > 80:
     result_code = 'CRTICAL'
     result_code = 'CRTICAL'
-    
-  label = 'Capacity Used: [{0:.2f}%, {1}], Capacity Total: [{2}]'.format( 
-      percent, _get_formatted_size(disk_usage.used), 
+
+  label = 'Capacity Used: [{0:.2f}%, {1}], Capacity Total: [{2}]'.format(
+      percent, _get_formatted_size(disk_usage.used),
       _get_formatted_size(disk_usage.total) )
       _get_formatted_size(disk_usage.total) )
-  
-  return ((result_code, [label]))
 
 
+  return ((result_code, [label]))
 
 
+@OsFamilyFuncImpl(os_family=OsFamilyImpl.DEFAULT)
 def _get_disk_usage(path='/'):
 def _get_disk_usage(path='/'):
   """
   """
   returns a named tuple that contains the total, used, and free disk space
   returns a named tuple that contains the total, used, and free disk space
-  in bytes
+  in bytes. Linux implementation.
   """
   """
   used = 0
   used = 0
   total = 0
   total = 0
   free = 0
   free = 0
-  
+
   if 'statvfs' in dir(os):
   if 'statvfs' in dir(os):
     disk_stats = os.statvfs(path)
     disk_stats = os.statvfs(path)
     free = disk_stats.f_bavail * disk_stats.f_frsize
     free = disk_stats.f_bavail * disk_stats.f_frsize
@@ -78,15 +83,45 @@ def _get_disk_usage(path='/'):
     used = (disk_stats.f_blocks - disk_stats.f_bfree) * disk_stats.f_frsize
     used = (disk_stats.f_blocks - disk_stats.f_bfree) * disk_stats.f_frsize
   else:
   else:
     raise NotImplementedError("{0} is not a supported platform for this alert".format(platform.platform()))
     raise NotImplementedError("{0} is not a supported platform for this alert".format(platform.platform()))
-  
-  DiskInfo = collections.namedtuple('DiskInfo', 'total used free')
+
+  return DiskInfo(total=total, used=used, free=free)
+
+
+@OsFamilyFuncImpl(os_family=OSConst.WINSRV_FAMILY)
+def _get_disk_usage(path=None):
+  """
+  returns a named tuple that contains the total, used, and free disk space
+  in bytes. Windows implementation
+  """
+  import string
+  import ctypes
+
+  used = 0
+  total = 0
+  free = 0
+  drives = []
+  bitmask = ctypes.windll.kernel32.GetLogicalDrives()
+  for letter in string.uppercase:
+    if bitmask & 1:
+      drives.append(letter)
+    bitmask >>= 1
+  for drive in drives:
+    free_bytes = ctypes.c_ulonglong(0)
+    total_bytes = ctypes.c_ulonglong(0)
+    ctypes.windll.kernel32.GetDiskFreeSpaceExW(ctypes.c_wchar_p(drive + ":\\"),
+                                               None, ctypes.pointer(total_bytes),
+                                               ctypes.pointer(free_bytes))
+    total += total_bytes.value
+    free += free_bytes.value
+    used += total_bytes.value - free_bytes.value
+
   return DiskInfo(total=total, used=used, free=free)
   return DiskInfo(total=total, used=used, free=free)
 
 
 
 
 def _get_formatted_size(bytes):
 def _get_formatted_size(bytes):
   """
   """
   formats the supplied bytes 
   formats the supplied bytes 
-  """  
+  """
   if bytes < 1000:
   if bytes < 1000:
     return '%i' % bytes + ' B'
     return '%i' % bytes + ' B'
   elif 1000 <= bytes < 1000000:
   elif 1000 <= bytes < 1000000: