Sfoglia il codice sorgente

AMBARI-11567. Improve logging for auto recovery and record desired state for RESTART (smohanty)

Sumit Mohanty 10 anni fa
parent
commit
0603d0048a

+ 10 - 1
ambari-agent/src/main/python/ambari_agent/ActionQueue.py

@@ -310,19 +310,28 @@ class ActionQueue(threading.Thread):
     else:
     else:
       roleResult['structuredOut'] = ''
       roleResult['structuredOut'] = ''
 
 
-    # let ambari know that configuration tags were applied
+    # let recovery manager know the current state
     if status == self.COMPLETED_STATUS:
     if status == self.COMPLETED_STATUS:
       if self.controller.recovery_manager.enabled() and command.has_key('roleCommand'):
       if self.controller.recovery_manager.enabled() and command.has_key('roleCommand'):
         if command['roleCommand'] == self.ROLE_COMMAND_START:
         if command['roleCommand'] == self.ROLE_COMMAND_START:
           self.controller.recovery_manager.update_current_status(command['role'], LiveStatus.LIVE_STATUS)
           self.controller.recovery_manager.update_current_status(command['role'], LiveStatus.LIVE_STATUS)
+          self.controller.recovery_manager.update_config_staleness(command['role'], False)
+          logger.info("After EXECUTION_COMMAND (START), current state of " + command['role'] + " to " +
+                       self.controller.recovery_manager.get_current_status(command['role']) )
         elif command['roleCommand'] == self.ROLE_COMMAND_STOP or command['roleCommand'] == self.ROLE_COMMAND_INSTALL:
         elif command['roleCommand'] == self.ROLE_COMMAND_STOP or command['roleCommand'] == self.ROLE_COMMAND_INSTALL:
           self.controller.recovery_manager.update_current_status(command['role'], LiveStatus.DEAD_STATUS)
           self.controller.recovery_manager.update_current_status(command['role'], LiveStatus.DEAD_STATUS)
+          logger.info("After EXECUTION_COMMAND (STOP/INSTALL), current state of " + command['role'] + " to " +
+                       self.controller.recovery_manager.get_current_status(command['role']) )
         elif command['roleCommand'] == self.ROLE_COMMAND_CUSTOM_COMMAND:
         elif command['roleCommand'] == self.ROLE_COMMAND_CUSTOM_COMMAND:
           if command['hostLevelParams'].has_key('custom_command') and \
           if command['hostLevelParams'].has_key('custom_command') and \
                   command['hostLevelParams']['custom_command'] == self.CUSTOM_COMMAND_RESTART:
                   command['hostLevelParams']['custom_command'] == self.CUSTOM_COMMAND_RESTART:
             self.controller.recovery_manager.update_current_status(command['role'], LiveStatus.LIVE_STATUS)
             self.controller.recovery_manager.update_current_status(command['role'], LiveStatus.LIVE_STATUS)
+            self.controller.recovery_manager.update_config_staleness(command['role'], False)
+            logger.info("After EXECUTION_COMMAND (RESTART), current state of " + command['role'] + " to " +
+                         self.controller.recovery_manager.get_current_status(command['role']) )
       pass
       pass
 
 
+      # let ambari know that configuration tags were applied
       configHandler = ActualConfigHandler(self.config, self.configTags)
       configHandler = ActualConfigHandler(self.config, self.configTags)
       #update
       #update
       if command.has_key('forceRefreshConfigTags') and len(command['forceRefreshConfigTags']) > 0  :
       if command.has_key('forceRefreshConfigTags') and len(command['forceRefreshConfigTags']) > 0  :

+ 22 - 2
ambari-agent/src/main/python/ambari_agent/RecoveryManager.py

@@ -46,6 +46,7 @@ class RecoveryManager:
   HAS_STALE_CONFIG = "hasStaleConfigs"
   HAS_STALE_CONFIG = "hasStaleConfigs"
   EXECUTION_COMMAND_DETAILS = "executionCommandDetails"
   EXECUTION_COMMAND_DETAILS = "executionCommandDetails"
   ROLE_COMMAND = "roleCommand"
   ROLE_COMMAND = "roleCommand"
+  HOST_LEVEL_PARAMS = "hostLevelParams"
   PAYLOAD_LEVEL_DEFAULT = "DEFAULT"
   PAYLOAD_LEVEL_DEFAULT = "DEFAULT"
   PAYLOAD_LEVEL_MINIMAL = "MINIMAL"
   PAYLOAD_LEVEL_MINIMAL = "MINIMAL"
   PAYLOAD_LEVEL_EXECUTION_COMMAND = "EXECUTION_COMMAND"
   PAYLOAD_LEVEL_EXECUTION_COMMAND = "EXECUTION_COMMAND"
@@ -117,6 +118,15 @@ class RecoveryManager:
   def enabled(self):
   def enabled(self):
     return self.recovery_enabled
     return self.recovery_enabled
 
 
+  def get_current_status(self, component):
+    if component in self.statuses:
+      return self.statuses[component]["current"]
+    pass
+
+  def get_desired_status(self, component):
+    if component in self.statuses:
+      return self.statuses[component]["desired"]
+    pass
 
 
   def update_config_staleness(self, component, is_config_stale):
   def update_config_staleness(self, component, is_config_stale):
     """
     """
@@ -528,8 +538,18 @@ class RecoveryManager:
           if self.ROLE in command:
           if self.ROLE in command:
             if command[self.ROLE_COMMAND] in (ActionQueue.ROLE_COMMAND_INSTALL, ActionQueue.ROLE_COMMAND_STOP):
             if command[self.ROLE_COMMAND] in (ActionQueue.ROLE_COMMAND_INSTALL, ActionQueue.ROLE_COMMAND_STOP):
               self.update_desired_status(command[self.ROLE], LiveStatus.DEAD_STATUS)
               self.update_desired_status(command[self.ROLE], LiveStatus.DEAD_STATUS)
-            if command[self.ROLE_COMMAND] == ActionQueue.ROLE_COMMAND_START:
+              logger.info("Received EXECUTION_COMMAND (STOP/INSTALL), desired state of " + command[self.ROLE] + " to " +
+                           self.get_desired_status(command[self.ROLE]) )
+            elif command[self.ROLE_COMMAND] == ActionQueue.ROLE_COMMAND_START:
               self.update_desired_status(command[self.ROLE], LiveStatus.LIVE_STATUS)
               self.update_desired_status(command[self.ROLE], LiveStatus.LIVE_STATUS)
+              logger.info("Received EXECUTION_COMMAND (START), desired state of " + command[self.ROLE] + " to " +
+                           self.get_desired_status(command[self.ROLE]) )
+            elif command[self.HOST_LEVEL_PARAMS].has_key('custom_command') and \
+                    command[self.HOST_LEVEL_PARAMS]['custom_command'] == ActionQueue.CUSTOM_COMMAND_RESTART:
+              self.update_desired_status(command[self.ROLE], LiveStatus.LIVE_STATUS)
+              logger.info("Received EXECUTION_COMMAND (RESTART), desired state of " + command[self.ROLE] + " to " +
+                           self.get_desired_status(command[self.ROLE]) )
+
     pass
     pass
 
 
 
 
@@ -615,7 +635,7 @@ class RecoveryManager:
         command[self.ROLE_COMMAND] = "CUSTOM_COMMAND"
         command[self.ROLE_COMMAND] = "CUSTOM_COMMAND"
         command[self.COMMAND_TYPE] = ActionQueue.AUTO_EXECUTION_COMMAND
         command[self.COMMAND_TYPE] = ActionQueue.AUTO_EXECUTION_COMMAND
         command[self.TASK_ID] = self.get_unique_task_id()
         command[self.TASK_ID] = self.get_unique_task_id()
-        command['hostLevelParams']['custom_command'] = 'RESTART'
+        command[self.HOST_LEVEL_PARAMS]['custom_command'] = 'RESTART'
         return command
         return command
       else:
       else:
         logger.info("RESTART command cannot be computed as details are not received from Server.")
         logger.info("RESTART command cannot be computed as details are not received from Server.")

+ 28 - 2
ambari-agent/src/test/python/ambari_agent/TestRecoveryManager.py

@@ -62,7 +62,8 @@ class TestRecoveryManager(TestCase):
       "commandParams": {
       "commandParams": {
         "service_package_folder": "common-services/YARN/2.1.0.2.0/package"
         "service_package_folder": "common-services/YARN/2.1.0.2.0/package"
       }
       }
-    }
+    },
+    "hostLevelParams": {}
   }
   }
 
 
   exec_command2 = {
   exec_command2 = {
@@ -77,7 +78,8 @@ class TestRecoveryManager(TestCase):
       "commandParams": {
       "commandParams": {
         "service_package_folder": "common-services/YARN/2.1.0.2.0/package"
         "service_package_folder": "common-services/YARN/2.1.0.2.0/package"
       }
       }
-    }
+    },
+    "hostLevelParams": {}
   }
   }
 
 
   exec_command3 = {
   exec_command3 = {
@@ -92,6 +94,25 @@ class TestRecoveryManager(TestCase):
       "commandParams": {
       "commandParams": {
         "service_package_folder": "common-services/YARN/2.1.0.2.0/package"
         "service_package_folder": "common-services/YARN/2.1.0.2.0/package"
       }
       }
+    },
+    "hostLevelParams": {}
+  }
+
+  exec_command4 = {
+    "commandType": "EXECUTION_COMMAND",
+    "roleCommand": "CUSTOM_COMMAND",
+    "role": "NODEMANAGER",
+    "configurations": {
+      "capacity-scheduler": {
+        "yarn.scheduler.capacity.default.minimum-user-limit-percent": "100"},
+      "capacity-calculator": {
+        "yarn.scheduler.capacity.default.minimum-user-limit-percent": "100"},
+      "commandParams": {
+        "service_package_folder": "common-services/YARN/2.1.0.2.0/package"
+      }
+    },
+    "hostLevelParams": {
+      "custom_command": "RESTART"
     }
     }
   }
   }
 
 
@@ -127,6 +148,9 @@ class TestRecoveryManager(TestCase):
 
 
     rm.process_execution_commands([self.exec_command1, self.command])
     rm.process_execution_commands([self.exec_command1, self.command])
     mock_uds.assert_has_calls([call("NODEMANAGER", "INSTALLED")])
     mock_uds.assert_has_calls([call("NODEMANAGER", "INSTALLED")])
+
+    rm.process_execution_commands([self.exec_command4])
+    mock_uds.assert_has_calls([call("NODEMANAGER", "STARTED")])
     pass
     pass
 
 
   def test_defaults(self):
   def test_defaults(self):
@@ -327,6 +351,8 @@ class TestRecoveryManager(TestCase):
 
 
     rm.update_current_status("NODEMANAGER", "INSTALLED")
     rm.update_current_status("NODEMANAGER", "INSTALLED")
     rm.update_desired_status("NODEMANAGER", "STARTED")
     rm.update_desired_status("NODEMANAGER", "STARTED")
+    self.assertEqual("INSTALLED", rm.get_current_status("NODEMANAGER"))
+    self.assertEqual("STARTED", rm.get_desired_status("NODEMANAGER"))
 
 
     commands = rm.get_recovery_commands()
     commands = rm.get_recovery_commands()
     self.assertEqual(1, len(commands))
     self.assertEqual(1, len(commands))