Просмотр исходного кода

AMBARI-18589. HCat client install during Ambari install wizard (aonishuk)

Andrew Onishuk 9 лет назад
Родитель
Сommit
cbcd85ed07

+ 3 - 7
ambari-agent/src/test/python/resource_management/TestPackageResource.py

@@ -40,9 +40,7 @@ class TestPackageResource(TestCase):
       Package("some_package",
         logoutput = False
       )
-    call_mock.assert_has_calls([call("dpkg --get-selections | grep -v deinstall | awk '{print $1}' | grep ^some-package$"),
- call(['/usr/bin/apt-get', '-q', '-o', 'Dpkg::Options::=--force-confdef', '--allow-unauthenticated', '--assume-yes', 'install', 'some-package'], logoutput=False, sudo=True, env={'DEBIAN_FRONTEND': 'noninteractive'}),
- call(['/usr/bin/apt-get', 'update', '-qq'], logoutput=False, sudo=True)])
+    call_mock.assert_has_calls([call("dpkg --get-selections | grep -v deinstall | awk '{print $1}' | grep ^some-package$")])
     
     shell_mock.assert_has_calls([call(['/usr/bin/apt-get', '-q', '-o', 'Dpkg::Options::=--force-confdef', 
                                        '--allow-unauthenticated', '--assume-yes', 'install', 'some-package'], logoutput=False, sudo=True,  env={'DEBIAN_FRONTEND': 'noninteractive'})])
@@ -57,11 +55,9 @@ class TestPackageResource(TestCase):
       Package("some_package",
         logoutput = False
       )
-    call_mock.assert_has_calls([call("dpkg --get-selections | grep -v deinstall | awk '{print $1}' | grep ^some-package$"),
- call(['/usr/bin/apt-get', '-q', '-o', 'Dpkg::Options::=--force-confdef', '--allow-unauthenticated', '--assume-yes', 'install', 'some-package'], logoutput=False, sudo=True, env={'DEBIAN_FRONTEND': 'noninteractive'})])
+    call_mock.assert_has_calls([call("dpkg --get-selections | grep -v deinstall | awk '{print $1}' | grep ^some-package$")])
 
-    
-    self.assertEqual(shell_mock.call_count, 0, "shell.checked_call shouldn't be called")
+    shell_mock.assert_has_call([call(['/usr/bin/apt-get', '-q', '-o', 'Dpkg::Options::=--force-confdef', '--allow-unauthenticated', '--assume-yes', 'install', 'some-package'], logoutput=False, sudo=True, env={'DEBIAN_FRONTEND': 'noninteractive'})])
 
   @patch.object(shell, "call")
   @patch.object(shell, "checked_call")

+ 13 - 1
ambari-common/src/main/python/resource_management/core/exceptions.py

@@ -20,7 +20,7 @@ Ambari Agent
 
 """
 
-__all__ = ["Fail", "ExecuteTimeoutException", "InvalidArgument", "ClientComponentHasNoStatus", "ComponentIsNotRunning"]
+__all__ = ["Fail", "ExecutionFailed", "ExecuteTimeoutException", "InvalidArgument", "ClientComponentHasNoStatus", "ComponentIsNotRunning"]
 
 class Fail(Exception):
   pass
@@ -46,3 +46,15 @@ class ComponentIsNotRunning(Fail):
   Later exception is silently processed at script.py
   """
   pass
+
+class ExecutionFailed(Fail):
+  """
+  Is thrown when shell command returns non-zero return code
+  """
+  def __init__(self, exception_message, code, out, err=None):
+    self.exception_message = exception_message
+    self.code = code
+    self.out = out
+    self.err = err
+
+    super(ExecutionFailed, self).__init__(exception_message)

+ 42 - 25
ambari-common/src/main/python/resource_management/core/providers/package/__init__.py

@@ -25,7 +25,7 @@ import time
 import re
 import logging
 
-from resource_management.core.base import Fail
+from resource_management.core.exceptions import ExecutionFailed
 from resource_management.core.providers import Provider
 from resource_management.core.logger import Logger
 from resource_management.core.utils import suppress_stdout
@@ -67,6 +67,9 @@ class PackageProvider(Provider):
     else:
       return self.resource.package_name
 
+  def get_repo_update_cmd(self):
+    raise NotImplementedError()
+
   def is_locked_output(self, out):
     return False
 
@@ -84,44 +87,58 @@ class PackageProvider(Provider):
 
   def _call_with_retries(self, cmd, is_checked=True, **kwargs):
     func = shell.checked_call if is_checked else shell.call
+    # at least do one retry, to run after repository is cleaned
+    try_count = 2 if self.resource.retry_count < 2 else self.resource.retry_count
+
+    for i in range(try_count):
+      is_first_time = (i == 0)
+      is_last_time = (i == try_count - 1)
 
-    for i in range(self.resource.retry_count):
-      is_last_time = (i == self.resource.retry_count - 1)
       try:
         code, out = func(cmd, **kwargs)
-      except Fail as ex:
-        # non-lock error
-        if not self._is_handled_error(str(ex), is_last_time) or is_last_time:
+      except ExecutionFailed as ex:
+        should_stop_retries = self._handle_retries(cmd, ex.code, ex.out, is_first_time, is_last_time)
+        if should_stop_retries:
           raise
-
-        self._notify_about_handled_error(str(ex), is_last_time)
       else:
-        # didn't fail or failed with non-lock error.
-        if not code or not self._is_handled_error(out, is_last_time):
+        should_stop_retries = self._handle_retries(cmd, code, out, is_first_time, is_last_time)
+        if should_stop_retries:
           break
 
-        self._notify_about_handled_error(str(out), is_last_time)
-
       time.sleep(self.resource.retry_sleep)
 
     return code, out
 
-  def _is_handled_error(self, output, is_last_time):
-    if self.resource.retry_on_locked and self.is_locked_output(output):
-      return True
-    elif self.resource.retry_on_repo_unavailability and self.is_repo_error_output(output):
-      return True
+  def _handle_retries(self, cmd, code, out, is_first_time, is_last_time):
+    # handle first failure in a special way (update repo metadata after it, so next try has a better chance to succeed)
+    if is_first_time and code and not self.is_locked_output(out):
+      self._update_repo_metadata_after_bad_try(cmd, code, out)
+      return False
 
-    return False
+    handled_error_log_message = None
+    if self.resource.retry_on_locked and self.is_locked_output(out):
+      handled_error_log_message = PACKAGE_MANAGER_LOCK_ACQUIRED_MSG.format(self.resource.retry_sleep, out)
+    elif self.resource.retry_on_repo_unavailability and self.is_repo_error_output(out):
+      handled_error_log_message = PACKAGE_MANAGER_REPO_ERROR_MSG.format(self.resource.retry_sleep, out)
+
+    is_handled_error = (handled_error_log_message is not None)
+    if is_handled_error and not is_last_time:
+      Logger.info(handled_error_log_message)
+
+    return (is_last_time or not code or not is_handled_error)
+
+  def _update_repo_metadata_after_bad_try(self, cmd, code, out):
+    name = self.get_package_name_with_version()
+    repo_update_cmd = self.get_repo_update_cmd()
+
+    Logger.info("Execution of '%s' returned %d. %s" % (shell.string_cmd_from_args_list(cmd), code, out))
+    Logger.info("Failed to install package %s. Executing '%s'" % (name, shell.string_cmd_from_args_list(repo_update_cmd)))
+    code, out = shell.call(repo_update_cmd, sudo=True, logoutput=self.get_logoutput())
 
-  def _notify_about_handled_error(self, output, is_last_time):
-    if is_last_time:
-      return
+    if code:
+      Logger.info("Execution of '%s' returned %d. %s" % (repo_update_cmd, code, out))
 
-    if self.resource.retry_on_locked and self.is_locked_output(output):
-      Logger.info(PACKAGE_MANAGER_LOCK_ACQUIRED_MSG.format(self.resource.retry_sleep, str(output)))
-    elif self.resource.retry_on_repo_unavailability and self.is_repo_error_output(output):
-      Logger.info(PACKAGE_MANAGER_REPO_ERROR_MSG.format(self.resource.retry_sleep, str(output)))
+    Logger.info("Retrying to install package %s after %d seconds" % (name, self.resource.retry_sleep))
 
   def yum_check_package_available(self, name):
     """

+ 4 - 17
ambari-common/src/main/python/resource_management/core/providers/package/apt.py

@@ -78,23 +78,7 @@ class AptProvider(PackageProvider):
 
       cmd = cmd + [name]
       Logger.info("Installing package %s ('%s')" % (name, string_cmd_from_args_list(cmd)))
-      code, out = self.call_with_retries(cmd, sudo=True, env=INSTALL_CMD_ENV, logoutput=self.get_logoutput())
-      
-      if self.is_locked_output(out):
-        err_msg = Logger.filter_text("Execution of '%s' returned %d. %s" % (cmd, code, out))
-        raise Fail(err_msg)
-      
-      # apt-get update wasn't done too long maybe?
-      if code:
-        Logger.info("Execution of '%s' returned %d. %s" % (cmd, code, out))
-        Logger.info("Failed to install package %s. Executing `%s`" % (name, string_cmd_from_args_list(REPO_UPDATE_CMD)))
-        code, out = self.call_with_retries(REPO_UPDATE_CMD, sudo=True, logoutput=self.get_logoutput())
-        
-        if code:
-          Logger.info("Execution of '%s' returned %d. %s" % (REPO_UPDATE_CMD, code, out))
-          
-        Logger.info("Retrying to install package %s" % (name))
-        self.checked_call_with_retries(cmd, sudo=True, env=INSTALL_CMD_ENV, logoutput=self.get_logoutput())
+      self.checked_call_with_retries(cmd, sudo=True, env=INSTALL_CMD_ENV, logoutput=self.get_logoutput())
 
       if is_tmp_dir_created:
         for temporal_sources_file in copied_sources_files:
@@ -111,6 +95,9 @@ class AptProvider(PackageProvider):
   def is_repo_error_output(self, out):
     return "Failure when receiving data from the peer" in out
 
+  def get_repo_update_cmd(self):
+    return REPO_UPDATE_CMD
+
   @replace_underscores
   def upgrade_package(self, name, use_repos=[], skip_repos=[], is_upgrade=True):
     return self.install_package(name, use_repos, skip_repos, is_upgrade)

+ 6 - 1
ambari-common/src/main/python/resource_management/core/providers/package/yumrpm.py

@@ -36,6 +36,8 @@ REMOVE_CMD = {
   False: ['/usr/bin/yum', '-d', '0', '-e', '0', '-y', 'erase'],
 }
 
+REPO_UPDATE_CMD = ['/usr/bin/yum', 'clean','metadata']
+
 class YumProvider(PackageProvider):
   def install_package(self, name, use_repos=[], skip_repos=[], is_upgrade=False):
     if is_upgrade or use_repos or not self._check_existence(name):
@@ -63,7 +65,10 @@ class YumProvider(PackageProvider):
 
   def is_repo_error_output(self, out):
     return "Failure when receiving data from the peer" in out or \
-           "No more mirrors to try" in out
+           "Nothing to do" in out
+
+  def get_repo_update_cmd(self):
+    return REPO_UPDATE_CMD
 
   def _check_existence(self, name):
     """

+ 5 - 0
ambari-common/src/main/python/resource_management/core/providers/package/zypper.py

@@ -35,6 +35,8 @@ REMOVE_CMD = {
   False: ['/usr/bin/zypper', '--quiet', 'remove', '--no-confirm'],
 }
 
+REPO_UPDATE_CMD = ['/usr/bin/zypper', 'clean']
+
 LIST_ACTIVE_REPOS_CMD = ['/usr/bin/zypper', 'repos']
 
 class ZypperProvider(PackageProvider):
@@ -90,6 +92,9 @@ class ZypperProvider(PackageProvider):
   def is_repo_error_output(self, out):
     return "Failure when receiving data from the peer" in out
 
+  def get_repo_update_cmd(self):
+    return REPO_UPDATE_CMD
+
   def _check_existence(self, name):
     """
     For regexp names:

+ 2 - 3
ambari-common/src/main/python/resource_management/core/shell.py

@@ -32,8 +32,7 @@ import string
 import subprocess
 import threading
 import traceback
-from exceptions import Fail
-from exceptions import ExecuteTimeoutException
+from exceptions import Fail, ExecutionFailed, ExecuteTimeoutException
 from resource_management.core.logger import Logger
 from resource_management.core import utils
 from ambari_commons.constants import AMBARI_SUDO_BINARY
@@ -301,7 +300,7 @@ def _call(command, logoutput=None, throw_on_failure=True, stdout=subprocess.PIPE
   
   if throw_on_failure and code:
     err_msg = Logger.filter_text("Execution of '{0}' returned {1}. {2}".format(command_alias, code, all_output))
-    raise Fail(err_msg)
+    raise ExecutionFailed(err_msg, code, out, err)
   
   # if separate stderr is enabled (by default it's redirected to out)
   if stderr == subprocess.PIPE:

+ 2 - 2
ambari-common/src/main/python/resource_management/libraries/functions/get_user_call_output.py

@@ -24,7 +24,7 @@ import os
 import tempfile
 from resource_management.core import shell
 from resource_management.core.logger import Logger
-from resource_management.core.exceptions import Fail
+from resource_management.core.exceptions import ExecutionFailed
 
 def get_user_call_output(command, user, quiet=False, is_checked_call=True, **call_kwargs):
   """
@@ -58,7 +58,7 @@ def get_user_call_output(command, user, quiet=False, is_checked_call=True, **cal
       err_msg = Logger.filter_text(("Execution of '%s' returned %d. %s") % (command_string, code, all_output))
       
       if is_checked_call:
-        raise Fail(err_msg)
+        raise ExecutionFailed(err_msg, code, files_output[0], files_output[1])
       else:
         Logger.warning(err_msg)