Przeglądaj źródła

AMBARI-2549. Ambari UI shows TaskTrackers down even if they are up. (Oleksandr Diachenko via smohanty)

git-svn-id: https://svn.apache.org/repos/asf/incubator/ambari/trunk@1499486 13f79535-47bb-0310-9956-ffa450edef68
Sumit Mohanty 12 lat temu
rodzic
commit
93479c9d25

+ 13 - 1
ambari-agent/src/main/python/ambari_agent/AmbariConfig.py

@@ -167,7 +167,19 @@ servicesToPidNames = {
   'WEBHCAT_SERVER': 'webhcat.pid',
 }
 
-linuxUserPattern = '[A-Za-z0-9_-]*[$]?'
+#Each service, which's pid depends on user should provide user mapping
+servicesToLinuxUser = {
+  'NAMENODE': 'hdfs_user',
+  'SECONDARY_NAMENODE': 'hdfs_user',
+  'DATANODE': 'hdfs_user',
+  'JOBTRACKER': 'mapred_user',
+  'TASKTRACKER': 'mapred_user',
+  'RESOURCEMANAGER': 'yarn_user',
+  'NODEMANAGER': 'yarn_user',
+  'HISTORYSERVER': 'mapred_user',
+  'HBASE_MASTER': 'hbase_user',
+  'HBASE_REGIONSERVER': 'hbase_user',
+}
 
 pidPathesVars = [
   {'var' : 'hcfs_pid_dir_prefix',

+ 3 - 1
ambari-agent/src/main/python/ambari_agent/LiveStatus.py

@@ -115,7 +115,9 @@ class LiveStatus:
   # Live status was stripped from heartbeat after revision e1718dd
   def build(self):
     global SERVICES, COMPONENTS, LIVE_STATUS, DEAD_STATUS
-    statusCheck = StatusCheck(AmbariConfig.servicesToPidNames, AmbariConfig.pidPathesVars, self.globalConfig, AmbariConfig.linuxUserPattern)
+    statusCheck = StatusCheck(AmbariConfig.servicesToPidNames,
+      AmbariConfig.pidPathesVars, self.globalConfig,
+      AmbariConfig.servicesToLinuxUser)
     livestatus = None
     for component in self.COMPONENTS:
       if component["serviceName"] == self.service and component["componentName"] == self.component:

+ 19 - 5
ambari-agent/src/main/python/ambari_agent/StatusCheck.py

@@ -30,7 +30,8 @@ logger = logging.getLogger()
 
 class StatusCheck:
     
-    
+  USER_PATTERN='{USER}'
+
   def listFiles(self, dir):
     basedir = dir
     logger.debug("Files in " + os.path.abspath(dir) + ": ")
@@ -62,23 +63,36 @@ class StatusCheck:
     except Exception as e:
         logger.error("Error while filling directories values " + str(e))
         
-  def __init__(self, serviceToPidDict, pidPathesVars, globalConfig, linuxUserPattern):
-
+  def __init__(self, serviceToPidDict, pidPathesVars, globalConfig,
+    servicesToLinuxUser):
+    
     self.serToPidDict = serviceToPidDict
     self.pidPathesVars = pidPathesVars
     self.pidPathes = []
     self.sh = shellRunner()
     self.pidFilesDict = {}
     self.globalConfig = globalConfig
-    self.linuxUserPattern = linuxUserPattern
+    self.servicesToLinuxUser = servicesToLinuxUser
     
     self.fillDirValues()
     
     for pidPath in self.pidPathes:
       self.listFiles(pidPath)
 
+    logger.info('serToPidDict:')
+    logger.info(self.serToPidDict.items())
+
     for service, pid in self.serToPidDict.items():
-      self.serToPidDict[service] = string.replace(pid, '{USER}', self.linuxUserPattern)
+      if self.servicesToLinuxUser.has_key(service):
+        linuxUserKey = self.servicesToLinuxUser[service]
+        if self.globalConfig.has_key(linuxUserKey):
+          self.serToPidDict[service] = string.replace(pid, self.USER_PATTERN,
+            self.globalConfig[linuxUserKey])
+      else:
+        if self.USER_PATTERN in pid:
+          logger.error('There is no linux user mapping for component: ' + service)
+
+    logger.debug('Service to pid dictionary: ' + str(self.serToPidDict))
 
   def getIsLive(self, pidPath):
 

+ 56 - 6
ambari-agent/src/test/python/TestStatusCheck.py

@@ -33,10 +33,10 @@ USERNAME_CHARS=string.ascii_uppercase +string.ascii_lowercase + string.digits +
 PID_DIR='/pids_dir'
 
 COMPONENT_LIVE = 'LIVE_COMPONENT'
-COMPONENT_LIVE_PID = 'live_{USER}_comp.pid'
+COMPONENT_LIVE_PID = 'live_' + StatusCheck.USER_PATTERN + '_comp.pid'
 
 COMPONENT_DEAD = 'DEAD_COMPONENT'
-COMPONENT_DEAD_PID = 'dead_{USER}_comp.pid'
+COMPONENT_DEAD_PID = 'dead_' + StatusCheck.USER_PATTERN + '_comp.pid'
 
 class TestStatusCheck(TestCase):
 
@@ -59,12 +59,12 @@ class TestStatusCheck(TestCase):
 
     live_user = self.generateUserName()
     self.logger.info('Live user: ' + live_user)
-    self.live_pid_file_name = string.replace(COMPONENT_LIVE_PID, '{USER}', live_user)
+    self.live_pid_file_name = string.replace(COMPONENT_LIVE_PID, StatusCheck.USER_PATTERN, live_user)
     self.live_pid_full_path = PID_DIR + os.sep + self.live_pid_file_name
 
     dead_user = self.generateUserName()
     self.logger.info('Dead user: ' + live_user)
-    self.dead_pid_file_name = string.replace(COMPONENT_DEAD_PID, '{USER}', dead_user)
+    self.dead_pid_file_name = string.replace(COMPONENT_DEAD_PID, StatusCheck.USER_PATTERN, dead_user)
     self.dead_pid_full_path = PID_DIR + os.sep + self.dead_pid_file_name
 
     self.pidFilesDict = {self.live_pid_file_name : self.live_pid_full_path,
@@ -72,13 +72,20 @@ class TestStatusCheck(TestCase):
 
     self.is_live_values = {self.live_pid_full_path : True,
                       self.dead_pid_full_path : False}
+    
+    self.servicesToLinuxUser = {COMPONENT_LIVE : 'live_user',
+                                COMPONENT_DEAD : 'dead_user'}
+
+    self.globalConfig = {'live_user' : live_user,
+                         'dead_user' : dead_user}
 
     
   # Ensure that status checker return True for running process
   @patch.object(StatusCheck, 'getIsLive')
   def test_live(self, get_is_live_mock):
 
-    statusCheck = StatusCheck(self.serviceToPidDict, self.pidPathesVars,{},AmbariConfig.linuxUserPattern)
+    statusCheck = StatusCheck(self.serviceToPidDict, self.pidPathesVars,
+      self.globalConfig, self.servicesToLinuxUser)
 
     statusCheck.pidFilesDict = self.pidFilesDict
     
@@ -87,10 +94,53 @@ class TestStatusCheck(TestCase):
     status = statusCheck.getStatus(COMPONENT_LIVE)
     self.assertEqual(status, True)
 
+  # Ensure that status checker return True for running process even if multiple
+  # pids for a service component exist
+  @patch.object(StatusCheck, 'getIsLive')
+  def test_live_if_multiple_pids(self, get_is_live_mock):
+
+    one_more_pid_file_name = string.replace(COMPONENT_LIVE_PID, StatusCheck.USER_PATTERN,
+      'any_other_linux_user')
+    one_more_pid_full_path = PID_DIR + os.sep + one_more_pid_file_name
+
+    self.pidFilesDict[one_more_pid_file_name] = one_more_pid_full_path
+    self.is_live_values[one_more_pid_full_path] = False
+
+    statusCheck = StatusCheck(self.serviceToPidDict, self.pidPathesVars,
+      self.globalConfig, self.servicesToLinuxUser)
+
+    statusCheck.pidFilesDict = self.pidFilesDict
+
+    get_is_live_mock.side_effect = lambda pid_path : self.is_live_values[pid_path]
+
+    status = statusCheck.getStatus(COMPONENT_LIVE)
+    self.assertEqual(status, True)
+    
+  # Ensure that status checker prints error message if there is no linux user
+  # for service, which pid depends on user
+  @patch.object(StatusCheck, 'getIsLive')
+  @patch.object(logger, "error")
+  def test_no_user_mapping(self, error_mock, get_is_live_mock):
+
+    
+    badServiceToPidDict = self.serviceToPidDict.copy()
+    badServiceToPidDict['BAD_COMPONENT'] = 'prefix' + StatusCheck.USER_PATTERN
+
+    statusCheck = StatusCheck(badServiceToPidDict, self.pidPathesVars,
+      self.globalConfig, self.servicesToLinuxUser)
+
+    statusCheck.pidFilesDict = self.pidFilesDict
+
+    get_is_live_mock.side_effect = lambda pid_path : self.is_live_values[pid_path]
+
+    status = statusCheck.getStatus(COMPONENT_LIVE)
+    self.assertTrue(error_mock.called)
+
   # Ensure that status checker return False for dead process
   @patch.object(StatusCheck, 'getIsLive')
   def test_dead(self, get_is_live_mock):
-    statusCheck = StatusCheck(self.serviceToPidDict, self.pidPathesVars,{},AmbariConfig.linuxUserPattern)
+    statusCheck = StatusCheck(self.serviceToPidDict, self.pidPathesVars,
+      self.globalConfig, self.servicesToLinuxUser)
 
     statusCheck.pidFilesDict = self.pidFilesDict