Browse Source

AMBARI-8890. Ambari detects hive processes by pid, not reliable method (aonishuk)

Andrew Onishuk 10 years ago
parent
commit
e8d6672ce2

+ 19 - 3
ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/scripts/hive_service.py

@@ -96,10 +96,26 @@ def hive_service(name, action='start', rolling_restart=False):
       print "Successfully connected to Hive at %s on port %s after %d seconds" % (address, port, elapsed_time)    
             
   elif action == 'stop':
-    demon_cmd = format("sudo kill `cat {pid_file}`")
-    Execute(demon_cmd, not_if = format("! ({process_id_exists_command})"))
 
-    File(pid_file, action = "delete",)
+    daemon_kill_cmd = format("sudo kill `cat {pid_file}`")
+    daemon_hard_kill_cmd = format("sudo kill -9 `cat {pid_file}`")
+
+    Execute(daemon_kill_cmd,
+      not_if = format("! ({process_id_exists_command})")
+    )
+
+    wait_time = 5
+    Execute(daemon_hard_kill_cmd,
+      not_if = format("! ({process_id_exists_command}) || ( sleep {wait_time} && ! ({process_id_exists_command}) )")
+    )
+
+    # check if stopped the process, else fail the task
+    Execute(format("! ({process_id_exists_command})")
+    )
+
+    File(pid_file,
+         action = "delete"
+    )
 
 def check_fs_root():
   import params  

+ 13 - 4
ambari-server/src/test/python/stacks/2.0.6/HIVE/test_hive_metastore.py

@@ -68,10 +68,14 @@ class TestHiveMetastore(RMFTestCase):
     )
 
     self.assertResourceCalled('Execute', 'sudo kill `cat /var/run/hive/hive.pid`',
-        not_if = '! (ls /var/run/hive/hive.pid >/dev/null 2>&1 && ps -p `cat /var/run/hive/hive.pid` >/dev/null 2>&1)',
+      not_if = '! (ls /var/run/hive/hive.pid >/dev/null 2>&1 && ps -p `cat /var/run/hive/hive.pid` >/dev/null 2>&1)',
     )
+    self.assertResourceCalled('Execute', 'sudo kill -9 `cat /var/run/hive/hive.pid`',
+      not_if = '! (ls /var/run/hive/hive.pid >/dev/null 2>&1 && ps -p `cat /var/run/hive/hive.pid` >/dev/null 2>&1) || ( sleep 5 && ! (ls /var/run/hive/hive.pid >/dev/null 2>&1 && ps -p `cat /var/run/hive/hive.pid` >/dev/null 2>&1) )',
+    )
+    self.assertResourceCalled('Execute', '! (ls /var/run/hive/hive.pid >/dev/null 2>&1 && ps -p `cat /var/run/hive/hive.pid` >/dev/null 2>&1)',)
     self.assertResourceCalled('File', '/var/run/hive/hive.pid',
-        action = ['delete'],
+      action = ['delete'],
     )
     self.assertNoMoreResources()
 
@@ -119,11 +123,16 @@ class TestHiveMetastore(RMFTestCase):
     )
 
     self.assertResourceCalled('Execute', 'sudo kill `cat /var/run/hive/hive.pid`',
-        not_if = '! (ls /var/run/hive/hive.pid >/dev/null 2>&1 && ps -p `cat /var/run/hive/hive.pid` >/dev/null 2>&1)',
+      not_if = '! (ls /var/run/hive/hive.pid >/dev/null 2>&1 && ps -p `cat /var/run/hive/hive.pid` >/dev/null 2>&1)',
+    )
+    self.assertResourceCalled('Execute', 'sudo kill -9 `cat /var/run/hive/hive.pid`',
+      not_if = '! (ls /var/run/hive/hive.pid >/dev/null 2>&1 && ps -p `cat /var/run/hive/hive.pid` >/dev/null 2>&1) || ( sleep 5 && ! (ls /var/run/hive/hive.pid >/dev/null 2>&1 && ps -p `cat /var/run/hive/hive.pid` >/dev/null 2>&1) )',
     )
+    self.assertResourceCalled('Execute', '! (ls /var/run/hive/hive.pid >/dev/null 2>&1 && ps -p `cat /var/run/hive/hive.pid` >/dev/null 2>&1)',)
     self.assertResourceCalled('File', '/var/run/hive/hive.pid',
-        action = ['delete'],
+     action = ['delete'],
     )
+
     self.assertNoMoreResources()
 
   def assert_configure_default(self):

+ 12 - 4
ambari-server/src/test/python/stacks/2.0.6/HIVE/test_hive_server.py

@@ -85,10 +85,14 @@ class TestHiveServer(RMFTestCase):
     )
 
     self.assertResourceCalled('Execute', 'sudo kill `cat /var/run/hive/hive-server.pid`',
-        not_if = '! (ls /var/run/hive/hive-server.pid >/dev/null 2>&1 && ps -p `cat /var/run/hive/hive-server.pid` >/dev/null 2>&1)',
+      not_if = '! (ls /var/run/hive/hive-server.pid >/dev/null 2>&1 && ps -p `cat /var/run/hive/hive-server.pid` >/dev/null 2>&1)',
     )
+    self.assertResourceCalled('Execute', 'sudo kill -9 `cat /var/run/hive/hive-server.pid`',
+      not_if = '! (ls /var/run/hive/hive-server.pid >/dev/null 2>&1 && ps -p `cat /var/run/hive/hive-server.pid` >/dev/null 2>&1) || ( sleep 5 && ! (ls /var/run/hive/hive-server.pid >/dev/null 2>&1 && ps -p `cat /var/run/hive/hive-server.pid` >/dev/null 2>&1) )',
+    )
+    self.assertResourceCalled('Execute', '! (ls /var/run/hive/hive-server.pid >/dev/null 2>&1 && ps -p `cat /var/run/hive/hive-server.pid` >/dev/null 2>&1)',)
     self.assertResourceCalled('File', '/var/run/hive/hive-server.pid',
-        action = ['delete'],
+      action = ['delete'],
     )
     
     self.assertNoMoreResources()
@@ -152,10 +156,14 @@ class TestHiveServer(RMFTestCase):
     )
 
     self.assertResourceCalled('Execute', 'sudo kill `cat /var/run/hive/hive-server.pid`',
-        not_if = '! (ls /var/run/hive/hive-server.pid >/dev/null 2>&1 && ps -p `cat /var/run/hive/hive-server.pid` >/dev/null 2>&1)',
+      not_if = '! (ls /var/run/hive/hive-server.pid >/dev/null 2>&1 && ps -p `cat /var/run/hive/hive-server.pid` >/dev/null 2>&1)',
+    )
+    self.assertResourceCalled('Execute', 'sudo kill -9 `cat /var/run/hive/hive-server.pid`',
+      not_if = '! (ls /var/run/hive/hive-server.pid >/dev/null 2>&1 && ps -p `cat /var/run/hive/hive-server.pid` >/dev/null 2>&1) || ( sleep 5 && ! (ls /var/run/hive/hive-server.pid >/dev/null 2>&1 && ps -p `cat /var/run/hive/hive-server.pid` >/dev/null 2>&1) )',
     )
+    self.assertResourceCalled('Execute', '! (ls /var/run/hive/hive-server.pid >/dev/null 2>&1 && ps -p `cat /var/run/hive/hive-server.pid` >/dev/null 2>&1)',)
     self.assertResourceCalled('File', '/var/run/hive/hive-server.pid',
-        action = ['delete'],
+      action = ['delete'],
     )
     
     self.assertNoMoreResources()

+ 14 - 6
ambari-server/src/test/python/stacks/2.1/HIVE/test_hive_metastore.py

@@ -70,11 +70,15 @@ class TestHiveMetastore(RMFTestCase):
     )
 
     self.assertResourceCalled('Execute', 'sudo kill `cat /var/run/hive/hive.pid`',
-        not_if = '! (ls /var/run/hive/hive.pid >/dev/null 2>&1 && ps -p `cat /var/run/hive/hive.pid` >/dev/null 2>&1)',
-    )
+                              not_if = '! (ls /var/run/hive/hive.pid >/dev/null 2>&1 && ps -p `cat /var/run/hive/hive.pid` >/dev/null 2>&1)',
+                              )
+    self.assertResourceCalled('Execute', 'sudo kill -9 `cat /var/run/hive/hive.pid`',
+                              not_if = '! (ls /var/run/hive/hive.pid >/dev/null 2>&1 && ps -p `cat /var/run/hive/hive.pid` >/dev/null 2>&1) || ( sleep 5 && ! (ls /var/run/hive/hive.pid >/dev/null 2>&1 && ps -p `cat /var/run/hive/hive.pid` >/dev/null 2>&1) )',
+                              )
+    self.assertResourceCalled('Execute', '! (ls /var/run/hive/hive.pid >/dev/null 2>&1 && ps -p `cat /var/run/hive/hive.pid` >/dev/null 2>&1)',)
     self.assertResourceCalled('File', '/var/run/hive/hive.pid',
-        action = ['delete'],
-    )
+                              action = ['delete'],
+                              )
     self.assertNoMoreResources()
 
   def test_configure_secured(self):
@@ -121,10 +125,14 @@ class TestHiveMetastore(RMFTestCase):
     )
 
     self.assertResourceCalled('Execute', 'sudo kill `cat /var/run/hive/hive.pid`',
-        not_if = '! (ls /var/run/hive/hive.pid >/dev/null 2>&1 && ps -p `cat /var/run/hive/hive.pid` >/dev/null 2>&1)',
+      not_if = '! (ls /var/run/hive/hive.pid >/dev/null 2>&1 && ps -p `cat /var/run/hive/hive.pid` >/dev/null 2>&1)',
+    )
+    self.assertResourceCalled('Execute', 'sudo kill -9 `cat /var/run/hive/hive.pid`',
+      not_if = '! (ls /var/run/hive/hive.pid >/dev/null 2>&1 && ps -p `cat /var/run/hive/hive.pid` >/dev/null 2>&1) || ( sleep 5 && ! (ls /var/run/hive/hive.pid >/dev/null 2>&1 && ps -p `cat /var/run/hive/hive.pid` >/dev/null 2>&1) )',
     )
+    self.assertResourceCalled('Execute', '! (ls /var/run/hive/hive.pid >/dev/null 2>&1 && ps -p `cat /var/run/hive/hive.pid` >/dev/null 2>&1)',)
     self.assertResourceCalled('File', '/var/run/hive/hive.pid',
-        action = ['delete'],
+      action = ['delete'],
     )
     self.assertNoMoreResources()