Parcourir la source

AMBARI-13615 - Express Upgrade: ZKFC Cannot Stop Because Newer Configurations Don't Exist (jonathanhurley)

Jonathan Hurley il y a 9 ans
Parent
commit
4dac97f724
18 fichiers modifiés avec 142 ajouts et 187 suppressions
  1. 5 1
      ambari-common/src/main/python/resource_management/libraries/functions/hdp_select.py
  2. 8 16
      ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/utils.py
  3. 18 8
      ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/zkfc_slave.py
  4. 8 4
      ambari-server/src/main/resources/stacks/HDP/2.1/upgrades/nonrolling-upgrade-2.3.xml
  5. 5 1
      ambari-server/src/main/resources/stacks/HDP/2.2/upgrades/nonrolling-upgrade-2.2.xml
  6. 9 5
      ambari-server/src/main/resources/stacks/HDP/2.2/upgrades/nonrolling-upgrade-2.3.xml
  7. 1 1
      ambari-server/src/main/resources/stacks/HDP/2.2/upgrades/upgrade-2.2.xml
  8. 1 1
      ambari-server/src/main/resources/stacks/HDP/2.2/upgrades/upgrade-2.3.xml
  9. 19 15
      ambari-server/src/main/resources/stacks/HDP/2.3/upgrades/nonrolling-upgrade-2.3.xml
  10. 1 1
      ambari-server/src/main/resources/stacks/HDP/2.3/upgrades/upgrade-2.3.xml
  11. 13 36
      ambari-server/src/test/python/stacks/2.0.6/HDFS/test_datanode.py
  12. 6 18
      ambari-server/src/test/python/stacks/2.0.6/HDFS/test_journalnode.py
  13. 4 18
      ambari-server/src/test/python/stacks/2.0.6/HDFS/test_namenode.py
  14. 6 18
      ambari-server/src/test/python/stacks/2.0.6/HDFS/test_nfsgateway.py
  15. 6 18
      ambari-server/src/test/python/stacks/2.0.6/HDFS/test_snamenode.py
  16. 6 18
      ambari-server/src/test/python/stacks/2.0.6/HDFS/test_zkfc.py
  17. 5 2
      ambari-server/src/test/python/stacks/2.1/HIVE/test_hive_metastore.py
  18. 21 6
      ambari-server/src/test/python/stacks/utils/RMFTestCase.py

+ 5 - 1
ambari-common/src/main/python/resource_management/libraries/functions/hdp_select.py

@@ -69,7 +69,11 @@ SERVER_ROLE_DIRECTORY_MAP = {
   'APP_TIMELINE_SERVER' : 'hadoop-yarn-timelineserver',
   'NODEMANAGER' : 'hadoop-yarn-nodemanager',
   'RESOURCEMANAGER' : 'hadoop-yarn-resourcemanager',
-  'ZOOKEEPER_SERVER' : 'zookeeper-server'
+  'ZOOKEEPER_SERVER' : 'zookeeper-server',
+
+  # ZKFC is tied to NN since it doesn't have its own componnet in hdp-select and there is
+  # a requirement that the ZKFC is installed on each NN
+  'ZKFC' : 'hadoop-hdfs-namenode'
 }
 
 # mapping of service check to hdp-select component

+ 8 - 16
ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/utils.py

@@ -186,7 +186,7 @@ def service(action=None, name=None, user=None, options="", create_pid_dir=False,
     }
     hadoop_env_exports.update(custom_export)
 
-  check_process = as_sudo(["test", "-f", pid_file]) + " && " + as_sudo(["pgrep", "-F", pid_file])
+  process_id_exists_command = as_sudo(["test", "-f", pid_file]) + " && " + as_sudo(["pgrep", "-F", pid_file])
 
   # on STOP directories shouldn't be created
   # since during stop still old dirs are used (which were created during previous start)
@@ -261,21 +261,13 @@ def service(action=None, name=None, user=None, options="", create_pid_dir=False,
       cmd += " " + options
     daemon_cmd = as_user(cmd, user)
      
-  service_is_up = check_process if action == "start" else None
-  #remove pid file from dead process
-  File(pid_file,
-       action="delete",
-       not_if=check_process
-  )
-  Execute(daemon_cmd,
-          not_if=service_is_up,
-          environment=hadoop_env_exports
-  )
-
-  if action == "stop":
-    File(pid_file,
-         action="delete",
-    )
+  if action == "start":
+    # remove pid file from dead process
+    File(pid_file, action="delete", not_if=process_id_exists_command)
+    Execute(daemon_cmd, not_if=process_id_exists_command, environment=hadoop_env_exports)
+  elif action == "stop":
+    Execute(daemon_cmd, only_if=process_id_exists_command, environment=hadoop_env_exports)
+    File(pid_file, action="delete")
 
 def get_jmx_data(nn_address, modeler_type, metric, encrypted=False, security_enabled=False):
   """

+ 18 - 8
ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/zkfc_slave.py

@@ -16,16 +16,24 @@ See the License for the specific language governing permissions and
 limitations under the License.
 
 """
-
-from resource_management import *
-from resource_management.libraries.functions.check_process_status import check_process_status
-from resource_management.libraries.functions.security_commons import build_expectations, \
-  cached_kinit_executor, get_params_from_filesystem, validate_security_config_properties, \
-  FILE_TYPE_XML
-import utils  # this is needed to avoid a circular dependency since utils.py calls this class
+# this is needed to avoid a circular dependency since utils.py calls this class
+import utils
 from hdfs import hdfs
-from ambari_commons.os_family_impl import OsFamilyImpl
+
 from ambari_commons import OSConst
+from ambari_commons.os_family_impl import OsFamilyImpl
+from resource_management.core.logger import Logger
+from resource_management.core.exceptions import Fail
+from resource_management.core.resources.system import Directory
+from resource_management.core.resources.service import Service
+from resource_management.core import shell
+from resource_management.libraries.functions.check_process_status import check_process_status
+from resource_management.libraries.functions.security_commons import build_expectations
+from resource_management.libraries.functions.security_commons import cached_kinit_executor
+from resource_management.libraries.functions.security_commons import get_params_from_filesystem
+from resource_management.libraries.functions.security_commons import validate_security_config_properties
+from resource_management.libraries.functions.security_commons import FILE_TYPE_XML
+from resource_management.libraries.script import Script
 
 class ZkfcSlave(Script):
   def install(self, env):
@@ -159,6 +167,8 @@ class ZkfcSlaveWindows(ZkfcSlave):
 
   def status(self, env):
     import status_params
+    from resource_management.libraries.functions.windows_service_utils import check_windows_service_status
+
     env.set_params(status_params)
     check_windows_service_status(status_params.zkfc_win_service_name)
 

+ 8 - 4
ambari-server/src/main/resources/stacks/HDP/2.1/upgrades/nonrolling-upgrade-2.3.xml

@@ -268,15 +268,15 @@
       <execute-stage service="STORM" component="NIMBUS">
         <task xsi:type="configure" id="hdp_2_3_0_0_nimbus_convert_nimbus_host_to_seeds"/>
       </execute-stage>
-      
+
       <execute-stage service="STORM" component="NIMBUS">
         <task xsi:type="configure" id="hdp_2_3_0_0_update_storm_env"/>
       </execute-stage>
-      
+
       <execute-stage service="STORM" component="NIMBUS">
         <task xsi:type="configure" id="hdp_2_3_0_0_add_storm_cluster_logs_content"/>
       </execute-stage>
-      
+
       <execute-stage service="STORM" component="NIMBUS">
         <task xsi:type="configure" id="hdp_2_3_0_0_add_storm_worker_logs_content"/>
       </execute-stage>
@@ -296,6 +296,10 @@
       </execute-stage>
     </group>
 
+    <!-- Invoke hdp-select set all after all components have been stopped by 
+    before starting new ones. This step must be done here in order to
+    ensure that components which rely on other components (like ZKFC on hadoop)
+    will start on the correct versions -->
     <group xsi:type="cluster" name="ALL_HOST_OPS" title="Set Version On All Hosts">
       <skippable>true</skippable>
       <execute-stage title="Update stack to {{version}}">
@@ -459,7 +463,7 @@
           <message>The following hosts were unhealthy and should be resolved before finalizing can be completed: {{hosts.unhealthy}}</message>
         </task>
       </execute-stage>
-      
+
       <execute-stage title="Confirm Finalize">
         <direction>UPGRADE</direction>
         <task xsi:type="manual">

+ 5 - 1
ambari-server/src/main/resources/stacks/HDP/2.2/upgrades/nonrolling-upgrade-2.2.xml

@@ -225,6 +225,10 @@
       </execute-stage>
     </group>
 
+    <!-- Invoke hdp-select set all after all components have been stopped by
+    before starting new ones. This step must be done here in order to
+    ensure that components which rely on other components (like ZKFC on hadoop)
+    will start on the correct versions -->
     <group xsi:type="cluster" name="ALL_HOST_OPS" title="Set Version On All Hosts">
       <skippable>true</skippable>
       <execute-stage title="Update stack to {{version}}">
@@ -456,7 +460,7 @@
           <message>The following hosts were unhealthy and should be resolved before finalizing can be completed: {{hosts.unhealthy}}</message>
         </task>
       </execute-stage>
-      
+
       <execute-stage title="Confirm Finalize">
         <direction>UPGRADE</direction>
         <task xsi:type="manual">

+ 9 - 5
ambari-server/src/main/resources/stacks/HDP/2.2/upgrades/nonrolling-upgrade-2.3.xml

@@ -428,15 +428,15 @@
       <execute-stage service="STORM" component="NIMBUS">
         <task xsi:type="configure" id="hdp_2_3_0_0_nimbus_convert_nimbus_host_to_seeds"/>
       </execute-stage>
-      
+
       <execute-stage service="STORM" component="NIMBUS">
         <task xsi:type="configure" id="hdp_2_3_0_0_update_storm_env"/>
       </execute-stage>
-      
+
       <execute-stage service="STORM" component="NIMBUS">
         <task xsi:type="configure" id="hdp_2_3_0_0_add_storm_cluster_logs_content"/>
       </execute-stage>
-      
+
       <execute-stage service="STORM" component="NIMBUS">
         <task xsi:type="configure" id="hdp_2_3_0_0_add_storm_worker_logs_content"/>
       </execute-stage>
@@ -468,6 +468,10 @@
       </execute-stage>
     </group>
 
+    <!-- Invoke hdp-select set all after all components have been stopped by
+    before starting new ones. This step must be done here in order to
+    ensure that components which rely on other components (like ZKFC on hadoop)
+    will start on the correct versions -->
     <group xsi:type="cluster" name="ALL_HOST_OPS" title="Set Version On All Hosts">
       <skippable>true</skippable>
       <execute-stage title="Update stack to {{version}}">
@@ -637,7 +641,7 @@
       <skippable>true</skippable>
       <service name="FALCON">
         <component>FALCON_SERVER</component>
-        <component>FALCON_CLIENT</component>        
+        <component>FALCON_CLIENT</component>
       </service>
     </group>
 
@@ -698,7 +702,7 @@
           <message>The following hosts were unhealthy and should be resolved before finalizing can be completed: {{hosts.unhealthy}}</message>
         </task>
       </execute-stage>
-      
+
       <execute-stage title="Confirm Finalize">
         <direction>UPGRADE</direction>
         <task xsi:type="manual">

+ 1 - 1
ambari-server/src/main/resources/stacks/HDP/2.2/upgrades/upgrade-2.2.xml

@@ -93,8 +93,8 @@
       <service-check>false</service-check>
       <service name="HDFS">
         <component>JOURNALNODE</component>
-        <component>ZKFC</component>
         <component>NAMENODE</component>
+        <component>ZKFC</component>
       </service>
 
       <service name="MAPREDUCE2">

+ 1 - 1
ambari-server/src/main/resources/stacks/HDP/2.2/upgrades/upgrade-2.3.xml

@@ -123,8 +123,8 @@
       <service-check>false</service-check>
       <service name="HDFS">
         <component>JOURNALNODE</component>
-        <component>ZKFC</component>
         <component>NAMENODE</component>
+        <component>ZKFC</component>
       </service>
 
       <service name="MAPREDUCE2">

+ 19 - 15
ambari-server/src/main/resources/stacks/HDP/2.3/upgrades/nonrolling-upgrade-2.3.xml

@@ -90,7 +90,7 @@
       </service>
 
       <service name="YARN">
-        <component>NODEMANAGER</component>        
+        <component>NODEMANAGER</component>
         <component>RESOURCEMANAGER</component>
         <component>APP_TIMELINE_SERVER</component>
       </service>
@@ -244,12 +244,16 @@
       </execute-stage>
     </group>
 
+    <!-- Invoke hdp-select set all after all components have been stopped by
+    before starting new ones. This step must be done here in order to
+    ensure that components which rely on other components (like ZKFC on hadoop)
+    will start on the correct versions -->
     <group xsi:type="cluster" name="ALL_HOST_OPS" title="Set Version On All Hosts">
       <skippable>true</skippable>
       <execute-stage title="Update stack to {{version}}">
         <task xsi:type="execute">
           <script>scripts/ru_set_all.py</script>
-          <function>actionexecute</function>      
+          <function>actionexecute</function>
         </task>
       </execute-stage>
     </group>
@@ -262,7 +266,7 @@
       <service name="ZOOKEEPER">
         <service-check>false</service-check>        <!-- TODO (Alejandro), enable service-check once done testing -->
         <component>ZOOKEEPER_SERVER</component>
-        <component>ZOOKEEPER_CLIENT</component>     
+        <component>ZOOKEEPER_CLIENT</component>
       </service>
     </group>
 
@@ -315,13 +319,13 @@
       <skippable>true</skippable>
       <service name="MAPREDUCE2">
         <component>HISTORYSERVER</component>
-        <component>MAPREDUCE2_CLIENT</component>    
+        <component>MAPREDUCE2_CLIENT</component>
       </service>
       <service name="YARN">
         <component>APP_TIMELINE_SERVER</component>
         <component>RESOURCEMANAGER</component>
-        <component>NODEMANAGER</component>          
-        <component>YARN_CLIENT</component>          
+        <component>NODEMANAGER</component>
+        <component>YARN_CLIENT</component>
       </service>
     </group>
 
@@ -330,13 +334,13 @@
       <skippable>true</skippable>
       <service name="HBASE">
         <component>HBASE_MASTER</component>
-        <component>HBASE_REGIONSERVER</component>   
-        <component>HBASE_CLIENT</component>         
+        <component>HBASE_REGIONSERVER</component>
+        <component>HBASE_CLIENT</component>
         <component>PHOENIX_QUERY_SERVER</component>
       </service>
     </group>
 
-    <group xsi:type="restart" name="CLIENTS" title="Tez, Pig, Sqoop Clients">  
+    <group xsi:type="restart" name="CLIENTS" title="Tez, Pig, Sqoop Clients">
       <service-check>false</service-check>
       <skippable>true</skippable>
 
@@ -375,8 +379,8 @@
         <component>HIVE_METASTORE</component>
         <component>HIVE_SERVER</component>
         <component>WEBHCAT_SERVER</component>
-        <component>HIVE_CLIENT</component>          
-        <component>HCAT</component>                 
+        <component>HIVE_CLIENT</component>
+        <component>HCAT</component>
       </service>
     </group>
 
@@ -385,7 +389,7 @@
       <skippable>true</skippable>
       <service name="SPARK">
         <component>SPARK_JOBHISTORYSERVER</component>
-        <component>SPARK_CLIENT</component>         
+        <component>SPARK_CLIENT</component>
       </service>
     </group>
 
@@ -418,7 +422,7 @@
       <skippable>true</skippable>
       <service name="OOZIE">
         <component>OOZIE_SERVER</component>
-        <component>OOZIE_CLIENT</component>         
+        <component>OOZIE_CLIENT</component>
       </service>
     </group>
 
@@ -427,7 +431,7 @@
       <skippable>true</skippable>
       <service name="FALCON">
         <component>FALCON_SERVER</component>
-        <component>FALCON_CLIENT</component>        
+        <component>FALCON_CLIENT</component>
       </service>
     </group>
 
@@ -501,7 +505,7 @@
           <message>The following hosts were unhealthy and should be resolved before finalizing can be completed: {{hosts.unhealthy}}</message>
         </task>
       </execute-stage>
-      
+
       <execute-stage title="Confirm Finalize">
         <direction>UPGRADE</direction>
         <task xsi:type="manual">

+ 1 - 1
ambari-server/src/main/resources/stacks/HDP/2.3/upgrades/upgrade-2.3.xml

@@ -120,8 +120,8 @@
       <service-check>false</service-check>
       <service name="HDFS">
         <component>JOURNALNODE</component>
-        <component>ZKFC</component>
         <component>NAMENODE</component>
+        <component>ZKFC</component>
       </service>
 
       <service name="MAPREDUCE2">

+ 13 - 36
ambari-server/src/test/python/stacks/2.0.6/HDFS/test_datanode.py

@@ -82,17 +82,12 @@ class TestDatanode(RMFTestCase):
                        hdp_stack_version = self.STACK_VERSION,
                        target = RMFTestCase.TARGET_COMMON_SERVICES
     )
-    self.assertResourceCalled('File', '/var/run/hadoop/hdfs/hadoop-hdfs-datanode.pid',
-        action = ['delete'],
-        not_if = "ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E test -f /var/run/hadoop/hdfs/hadoop-hdfs-datanode.pid && ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E pgrep -F /var/run/hadoop/hdfs/hadoop-hdfs-datanode.pid",
-    )
     self.assertResourceCalled('Execute', "ambari-sudo.sh su hdfs -l -s /bin/bash -c '[RMF_EXPORT_PLACEHOLDER]ulimit -c unlimited ;  /usr/lib/hadoop/sbin/hadoop-daemon.sh --config /etc/hadoop/conf stop datanode'",
         environment = {'HADOOP_LIBEXEC_DIR': '/usr/lib/hadoop/libexec'},
-        not_if = None,
-    )
-    self.assertResourceCalled('File', '/var/run/hadoop/hdfs/hadoop-hdfs-datanode.pid',
-                              action = ['delete'],
-                              )
+        only_if = "ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E test -f /var/run/hadoop/hdfs/hadoop-hdfs-datanode.pid && ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E pgrep -F /var/run/hadoop/hdfs/hadoop-hdfs-datanode.pid")
+
+    self.assertResourceCalled('File', '/var/run/hadoop/hdfs/hadoop-hdfs-datanode.pid', action = ['delete'])
+
     self.assertNoMoreResources()
 
   def test_configure_secured(self):
@@ -226,17 +221,11 @@ class TestDatanode(RMFTestCase):
                        hdp_stack_version = self.STACK_VERSION,
                        target = RMFTestCase.TARGET_COMMON_SERVICES
     )
-    self.assertResourceCalled('File', '/var/run/hadoop/hdfs/hadoop-hdfs-datanode.pid',
-        action = ['delete'],
-        not_if = "ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E test -f /var/run/hadoop/hdfs/hadoop-hdfs-datanode.pid && ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E pgrep -F /var/run/hadoop/hdfs/hadoop-hdfs-datanode.pid",
-    )
     self.assertResourceCalled('Execute', 'ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E /usr/lib/hadoop/sbin/hadoop-daemon.sh --config /etc/hadoop/conf stop datanode',
         environment = {'HADOOP_LIBEXEC_DIR': '/usr/lib/hadoop/libexec'},
-        not_if = None,
-    )
-    self.assertResourceCalled('File', '/var/run/hadoop/hdfs/hadoop-hdfs-datanode.pid',
-                              action = ['delete'],
-                              )
+        only_if = "ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E test -f /var/run/hadoop/hdfs/hadoop-hdfs-datanode.pid && ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E pgrep -F /var/run/hadoop/hdfs/hadoop-hdfs-datanode.pid")
+
+    self.assertResourceCalled('File', '/var/run/hadoop/hdfs/hadoop-hdfs-datanode.pid', action = ['delete'])
     self.assertNoMoreResources()
 
 
@@ -255,17 +244,11 @@ class TestDatanode(RMFTestCase):
                        hdp_stack_version = self.STACK_VERSION,
                        target = RMFTestCase.TARGET_COMMON_SERVICES
     )
-    self.assertResourceCalled('File', '/var/run/hadoop/hdfs/hadoop-hdfs-datanode.pid',
-        action = ['delete'],
-        not_if = "ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E test -f /var/run/hadoop/hdfs/hadoop-hdfs-datanode.pid && ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E pgrep -F /var/run/hadoop/hdfs/hadoop-hdfs-datanode.pid",
-    )
     self.assertResourceCalled('Execute', 'ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E /usr/hdp/current/hadoop-client/sbin/hadoop-daemon.sh --config /usr/hdp/current/hadoop-client/conf stop datanode',
         environment = {'HADOOP_LIBEXEC_DIR': '/usr/hdp/current/hadoop-client/libexec'},
-        not_if = None,
-    )
-    self.assertResourceCalled('File', '/var/run/hadoop/hdfs/hadoop-hdfs-datanode.pid',
-                              action = ['delete'],
-                              )
+        only_if = "ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E test -f /var/run/hadoop/hdfs/hadoop-hdfs-datanode.pid && ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E pgrep -F /var/run/hadoop/hdfs/hadoop-hdfs-datanode.pid")
+
+    self.assertResourceCalled('File', '/var/run/hadoop/hdfs/hadoop-hdfs-datanode.pid', action = ['delete'])
     self.assertNoMoreResources()
 
   @patch("os.path.exists", new = MagicMock(return_value=False))
@@ -286,17 +269,11 @@ class TestDatanode(RMFTestCase):
                        hdp_stack_version = self.STACK_VERSION,
                        target = RMFTestCase.TARGET_COMMON_SERVICES
     )
-    self.assertResourceCalled('File', '/var/run/hadoop/hdfs/hadoop-hdfs-datanode.pid',
-        action = ['delete'],
-        not_if = "ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E test -f /var/run/hadoop/hdfs/hadoop-hdfs-datanode.pid && ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E pgrep -F /var/run/hadoop/hdfs/hadoop-hdfs-datanode.pid",
-    )
     self.assertResourceCalled('Execute', "ambari-sudo.sh su hdfs -l -s /bin/bash -c '[RMF_EXPORT_PLACEHOLDER]ulimit -c unlimited ;  /usr/hdp/current/hadoop-client/sbin/hadoop-daemon.sh --config /usr/hdp/current/hadoop-client/conf stop datanode'",
         environment = {'HADOOP_LIBEXEC_DIR': '/usr/hdp/current/hadoop-client/libexec'},
-        not_if = None,
-    )
-    self.assertResourceCalled('File', '/var/run/hadoop/hdfs/hadoop-hdfs-datanode.pid',
-                              action=['delete'],
-                              )
+        only_if = "ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E test -f /var/run/hadoop/hdfs/hadoop-hdfs-datanode.pid && ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E pgrep -F /var/run/hadoop/hdfs/hadoop-hdfs-datanode.pid")
+
+    self.assertResourceCalled('File', '/var/run/hadoop/hdfs/hadoop-hdfs-datanode.pid', action = ['delete'])
     self.assertNoMoreResources()
 
   def assert_configure_default(self):

+ 6 - 18
ambari-server/src/test/python/stacks/2.0.6/HDFS/test_journalnode.py

@@ -79,17 +79,11 @@ class TestJournalnode(RMFTestCase):
                        hdp_stack_version = self.STACK_VERSION,
                        target = RMFTestCase.TARGET_COMMON_SERVICES
     )
-    self.assertResourceCalled('File', '/var/run/hadoop/hdfs/hadoop-hdfs-journalnode.pid',
-        action = ['delete'],
-        not_if = "ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E test -f /var/run/hadoop/hdfs/hadoop-hdfs-journalnode.pid && ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E pgrep -F /var/run/hadoop/hdfs/hadoop-hdfs-journalnode.pid",
-    )
     self.assertResourceCalled('Execute', "ambari-sudo.sh su hdfs -l -s /bin/bash -c '[RMF_EXPORT_PLACEHOLDER]ulimit -c unlimited ;  /usr/lib/hadoop/sbin/hadoop-daemon.sh --config /etc/hadoop/conf stop journalnode'",
         environment = {'HADOOP_LIBEXEC_DIR': '/usr/lib/hadoop/libexec'},
-        not_if = None,
-    )
-    self.assertResourceCalled('File', '/var/run/hadoop/hdfs/hadoop-hdfs-journalnode.pid',
-                              action = ['delete'],
-                              )
+        only_if = "ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E test -f /var/run/hadoop/hdfs/hadoop-hdfs-journalnode.pid && ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E pgrep -F /var/run/hadoop/hdfs/hadoop-hdfs-journalnode.pid")
+
+    self.assertResourceCalled('File', '/var/run/hadoop/hdfs/hadoop-hdfs-journalnode.pid', action = ['delete'])
     self.assertNoMoreResources()
 
   def test_configure_secured(self):
@@ -143,17 +137,11 @@ class TestJournalnode(RMFTestCase):
                        hdp_stack_version = self.STACK_VERSION,
                        target = RMFTestCase.TARGET_COMMON_SERVICES
     )
-    self.assertResourceCalled('File', '/var/run/hadoop/hdfs/hadoop-hdfs-journalnode.pid',
-        action = ['delete'],
-        not_if = "ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E test -f /var/run/hadoop/hdfs/hadoop-hdfs-journalnode.pid && ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E pgrep -F /var/run/hadoop/hdfs/hadoop-hdfs-journalnode.pid",
-    )
     self.assertResourceCalled('Execute', "ambari-sudo.sh su hdfs -l -s /bin/bash -c '[RMF_EXPORT_PLACEHOLDER]ulimit -c unlimited ;  /usr/lib/hadoop/sbin/hadoop-daemon.sh --config /etc/hadoop/conf stop journalnode'",
         environment = {'HADOOP_LIBEXEC_DIR': '/usr/lib/hadoop/libexec'},
-        not_if = None,
-    )
-    self.assertResourceCalled('File', '/var/run/hadoop/hdfs/hadoop-hdfs-journalnode.pid',
-                              action = ['delete'],
-                              )
+        only_if = "ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E test -f /var/run/hadoop/hdfs/hadoop-hdfs-journalnode.pid && ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E pgrep -F /var/run/hadoop/hdfs/hadoop-hdfs-journalnode.pid")
+
+    self.assertResourceCalled('File', '/var/run/hadoop/hdfs/hadoop-hdfs-journalnode.pid', action = ['delete'])
     self.assertNoMoreResources()
 
   def assert_configure_default(self):

+ 4 - 18
ambari-server/src/test/python/stacks/2.0.6/HDFS/test_namenode.py

@@ -264,17 +264,10 @@ class TestNamenode(RMFTestCase):
                        hdp_stack_version = self.STACK_VERSION,
                        target = RMFTestCase.TARGET_COMMON_SERVICES
     )
-    self.assertResourceCalled('File', '/var/run/hadoop/hdfs/hadoop-hdfs-namenode.pid',
-        action = ['delete'],
-        not_if = "ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E test -f /var/run/hadoop/hdfs/hadoop-hdfs-namenode.pid && ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E pgrep -F /var/run/hadoop/hdfs/hadoop-hdfs-namenode.pid",
-    )
     self.assertResourceCalled('Execute', "ambari-sudo.sh su hdfs -l -s /bin/bash -c '[RMF_EXPORT_PLACEHOLDER]ulimit -c unlimited ;  /usr/lib/hadoop/sbin/hadoop-daemon.sh --config /etc/hadoop/conf stop namenode'",
         environment = {'HADOOP_LIBEXEC_DIR': '/usr/lib/hadoop/libexec'},
-        not_if = None,
-    )
-    self.assertResourceCalled('File', '/var/run/hadoop/hdfs/hadoop-hdfs-namenode.pid',
-                              action = ['delete'],
-                              )
+        only_if = "ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E test -f /var/run/hadoop/hdfs/hadoop-hdfs-namenode.pid && ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E pgrep -F /var/run/hadoop/hdfs/hadoop-hdfs-namenode.pid")
+    self.assertResourceCalled('File', '/var/run/hadoop/hdfs/hadoop-hdfs-namenode.pid',action = ['delete'])
     self.assertNoMoreResources()
 
   def test_configure_secured(self):
@@ -391,17 +384,10 @@ class TestNamenode(RMFTestCase):
                        hdp_stack_version = self.STACK_VERSION,
                        target = RMFTestCase.TARGET_COMMON_SERVICES
     )
-    self.assertResourceCalled('File', '/var/run/hadoop/hdfs/hadoop-hdfs-namenode.pid',
-        action = ['delete'],
-        not_if = "ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E test -f /var/run/hadoop/hdfs/hadoop-hdfs-namenode.pid && ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E pgrep -F /var/run/hadoop/hdfs/hadoop-hdfs-namenode.pid",
-    )
     self.assertResourceCalled('Execute', "ambari-sudo.sh su hdfs -l -s /bin/bash -c '[RMF_EXPORT_PLACEHOLDER]ulimit -c unlimited ;  /usr/lib/hadoop/sbin/hadoop-daemon.sh --config /etc/hadoop/conf stop namenode'",
         environment = {'HADOOP_LIBEXEC_DIR': '/usr/lib/hadoop/libexec'},
-        not_if = None,
-    )
-    self.assertResourceCalled('File', '/var/run/hadoop/hdfs/hadoop-hdfs-namenode.pid',
-                              action = ['delete'],
-                              )
+        only_if = "ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E test -f /var/run/hadoop/hdfs/hadoop-hdfs-namenode.pid && ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E pgrep -F /var/run/hadoop/hdfs/hadoop-hdfs-namenode.pid")
+    self.assertResourceCalled('File', '/var/run/hadoop/hdfs/hadoop-hdfs-namenode.pid',action = ['delete'])
     self.assertNoMoreResources()
 
   def test_start_ha_default(self):

+ 6 - 18
ambari-server/src/test/python/stacks/2.0.6/HDFS/test_nfsgateway.py

@@ -89,20 +89,14 @@ class TestNFSGateway(RMFTestCase):
                        hdp_stack_version = self.STACK_VERSION,
                        target = RMFTestCase.TARGET_COMMON_SERVICES
     )
-    self.assertResourceCalled('File', '/var/run/hadoop/root/hadoop_privileged_nfs3.pid',
-        action = ['delete'],
-        not_if = "ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E test -f /var/run/hadoop/root/hadoop_privileged_nfs3.pid && ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E pgrep -F /var/run/hadoop/root/hadoop_privileged_nfs3.pid",
-    )
     self.assertResourceCalled('Execute', 'ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E /usr/lib/hadoop/sbin/hadoop-daemon.sh --config /etc/hadoop/conf stop nfs3',
         environment = {'HADOOP_LIBEXEC_DIR': '/usr/lib/hadoop/libexec',
            'HADOOP_PRIVILEGED_NFS_LOG_DIR': u'/var/log/hadoop/root',
            'HADOOP_PRIVILEGED_NFS_PID_DIR': u'/var/run/hadoop/root',
            'HADOOP_PRIVILEGED_NFS_USER': u'hdfs'},
-        not_if = None,
-    )
-    self.assertResourceCalled('File', '/var/run/hadoop/root/hadoop_privileged_nfs3.pid',
-                              action = ['delete'],
-                              )
+        only_if = "ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E test -f /var/run/hadoop/root/hadoop_privileged_nfs3.pid && ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E pgrep -F /var/run/hadoop/root/hadoop_privileged_nfs3.pid")
+
+    self.assertResourceCalled('File', '/var/run/hadoop/root/hadoop_privileged_nfs3.pid', action = ['delete'])
     self.assertNoMoreResources()
 
   def test_configure_secured(self):
@@ -162,20 +156,14 @@ class TestNFSGateway(RMFTestCase):
                        hdp_stack_version = self.STACK_VERSION,
                        target = RMFTestCase.TARGET_COMMON_SERVICES
     )
-    self.assertResourceCalled('File', '/var/run/hadoop/root/hadoop_privileged_nfs3.pid',
-        action = ['delete'],
-        not_if = "ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E test -f /var/run/hadoop/root/hadoop_privileged_nfs3.pid && ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E pgrep -F /var/run/hadoop/root/hadoop_privileged_nfs3.pid",
-    )
     self.assertResourceCalled('Execute', 'ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E /usr/lib/hadoop/sbin/hadoop-daemon.sh --config /etc/hadoop/conf stop nfs3',
         environment = {'HADOOP_LIBEXEC_DIR': '/usr/lib/hadoop/libexec',
            'HADOOP_PRIVILEGED_NFS_LOG_DIR': u'/var/log/hadoop/root',
            'HADOOP_PRIVILEGED_NFS_PID_DIR': u'/var/run/hadoop/root',
            'HADOOP_PRIVILEGED_NFS_USER': u'hdfs'},
-        not_if = None,
-    )
-    self.assertResourceCalled('File', '/var/run/hadoop/root/hadoop_privileged_nfs3.pid',
-                              action = ['delete'],
-                              )
+        only_if = "ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E test -f /var/run/hadoop/root/hadoop_privileged_nfs3.pid && ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E pgrep -F /var/run/hadoop/root/hadoop_privileged_nfs3.pid")
+
+    self.assertResourceCalled('File', '/var/run/hadoop/root/hadoop_privileged_nfs3.pid', action = ['delete'])
     self.assertNoMoreResources()
 
   def assert_configure_default(self):

+ 6 - 18
ambari-server/src/test/python/stacks/2.0.6/HDFS/test_snamenode.py

@@ -86,17 +86,11 @@ class TestSNamenode(RMFTestCase):
                        hdp_stack_version = self.STACK_VERSION,
                        target = RMFTestCase.TARGET_COMMON_SERVICES
     )
-    self.assertResourceCalled('File', '/var/run/hadoop/hdfs/hadoop-hdfs-secondarynamenode.pid',
-        action = ['delete'],
-        not_if = "ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E test -f /var/run/hadoop/hdfs/hadoop-hdfs-secondarynamenode.pid && ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E pgrep -F /var/run/hadoop/hdfs/hadoop-hdfs-secondarynamenode.pid",
-    )
     self.assertResourceCalled('Execute', "ambari-sudo.sh su hdfs -l -s /bin/bash -c '[RMF_EXPORT_PLACEHOLDER]ulimit -c unlimited ;  /usr/lib/hadoop/sbin/hadoop-daemon.sh --config /etc/hadoop/conf stop secondarynamenode'",
         environment = {'HADOOP_LIBEXEC_DIR': '/usr/lib/hadoop/libexec'},
-        not_if = None,
-    )
-    self.assertResourceCalled('File', '/var/run/hadoop/hdfs/hadoop-hdfs-secondarynamenode.pid',
-                              action = ['delete'],
-                              )
+        only_if = "ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E test -f /var/run/hadoop/hdfs/hadoop-hdfs-secondarynamenode.pid && ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E pgrep -F /var/run/hadoop/hdfs/hadoop-hdfs-secondarynamenode.pid")
+
+    self.assertResourceCalled('File', '/var/run/hadoop/hdfs/hadoop-hdfs-secondarynamenode.pid',action = ['delete'])
     self.assertNoMoreResources()
 
   def test_configure_secured(self):
@@ -160,17 +154,11 @@ class TestSNamenode(RMFTestCase):
                        hdp_stack_version = self.STACK_VERSION,
                        target = RMFTestCase.TARGET_COMMON_SERVICES
     )
-    self.assertResourceCalled('File', '/var/run/hadoop/hdfs/hadoop-hdfs-secondarynamenode.pid',
-        action = ['delete'],
-        not_if = "ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E test -f /var/run/hadoop/hdfs/hadoop-hdfs-secondarynamenode.pid && ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E pgrep -F /var/run/hadoop/hdfs/hadoop-hdfs-secondarynamenode.pid",
-    )
     self.assertResourceCalled('Execute', "ambari-sudo.sh su hdfs -l -s /bin/bash -c '[RMF_EXPORT_PLACEHOLDER]ulimit -c unlimited ;  /usr/lib/hadoop/sbin/hadoop-daemon.sh --config /etc/hadoop/conf stop secondarynamenode'",
         environment = {'HADOOP_LIBEXEC_DIR': '/usr/lib/hadoop/libexec'},
-        not_if = None,
-    )
-    self.assertResourceCalled('File', '/var/run/hadoop/hdfs/hadoop-hdfs-secondarynamenode.pid',
-                              action = ['delete'],
-                              )
+        only_if = "ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E test -f /var/run/hadoop/hdfs/hadoop-hdfs-secondarynamenode.pid && ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E pgrep -F /var/run/hadoop/hdfs/hadoop-hdfs-secondarynamenode.pid")
+
+    self.assertResourceCalled('File', '/var/run/hadoop/hdfs/hadoop-hdfs-secondarynamenode.pid',action = ['delete'])
     self.assertNoMoreResources()
 
   def assert_configure_default(self):

+ 6 - 18
ambari-server/src/test/python/stacks/2.0.6/HDFS/test_zkfc.py

@@ -114,17 +114,11 @@ class TestZkfc(RMFTestCase):
                        hdp_stack_version = self.STACK_VERSION,
                        target = RMFTestCase.TARGET_COMMON_SERVICES
     )
-    self.assertResourceCalled('File', '/var/run/hadoop/hdfs/hadoop-hdfs-zkfc.pid',
-        action = ['delete'],
-        not_if = "ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E test -f /var/run/hadoop/hdfs/hadoop-hdfs-zkfc.pid && ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E pgrep -F /var/run/hadoop/hdfs/hadoop-hdfs-zkfc.pid",
-    )
     self.assertResourceCalled('Execute', "ambari-sudo.sh su hdfs -l -s /bin/bash -c '[RMF_EXPORT_PLACEHOLDER]ulimit -c unlimited ;  /usr/lib/hadoop/sbin/hadoop-daemon.sh --config /etc/hadoop/conf stop zkfc'",
         environment = {'HADOOP_LIBEXEC_DIR': '/usr/lib/hadoop/libexec'},
-        not_if = None,
-    )
-    self.assertResourceCalled('File', '/var/run/hadoop/hdfs/hadoop-hdfs-zkfc.pid',
-                              action = ['delete'],
-                              )
+        only_if = "ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E test -f /var/run/hadoop/hdfs/hadoop-hdfs-zkfc.pid && ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E pgrep -F /var/run/hadoop/hdfs/hadoop-hdfs-zkfc.pid")
+
+    self.assertResourceCalled('File', '/var/run/hadoop/hdfs/hadoop-hdfs-zkfc.pid', action = ['delete'])
     self.assertNoMoreResources()
 
   def test_start_secured(self):
@@ -214,17 +208,11 @@ class TestZkfc(RMFTestCase):
                        hdp_stack_version = self.STACK_VERSION,
                        target = RMFTestCase.TARGET_COMMON_SERVICES
     )
-    self.assertResourceCalled('File', '/var/run/hadoop/hdfs/hadoop-hdfs-zkfc.pid',
-        action = ['delete'],
-        not_if = "ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E test -f /var/run/hadoop/hdfs/hadoop-hdfs-zkfc.pid && ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E pgrep -F /var/run/hadoop/hdfs/hadoop-hdfs-zkfc.pid",
-    )
     self.assertResourceCalled('Execute', "ambari-sudo.sh su hdfs -l -s /bin/bash -c '[RMF_EXPORT_PLACEHOLDER]ulimit -c unlimited ;  /usr/lib/hadoop/sbin/hadoop-daemon.sh --config /etc/hadoop/conf stop zkfc'",
         environment = {'HADOOP_LIBEXEC_DIR': '/usr/lib/hadoop/libexec'},
-        not_if = None,
-    )
-    self.assertResourceCalled('File', '/var/run/hadoop/hdfs/hadoop-hdfs-zkfc.pid',
-                              action = ['delete'],
-                              )
+        only_if = "ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E test -f /var/run/hadoop/hdfs/hadoop-hdfs-zkfc.pid && ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E pgrep -F /var/run/hadoop/hdfs/hadoop-hdfs-zkfc.pid")
+
+    self.assertResourceCalled('File', '/var/run/hadoop/hdfs/hadoop-hdfs-zkfc.pid', action = ['delete'])
     self.assertNoMoreResources()
 
   def test_start_with_ha_active_namenode_bootstrap(self):

+ 5 - 2
ambari-server/src/test/python/stacks/2.1/HIVE/test_hive_metastore.py

@@ -573,12 +573,14 @@ class TestHiveMetastore(RMFTestCase):
 
     # we don't care about configure here - the strings are different anyway because this
     # is an upgrade, so just pop those resources off of the call stack
-    self.pop_resources(21)
+    self.assertResourceCalledIgnoreEarlier('Directory', '/var/lib/hive', owner = 'hive', group = 'hadoop',
+      mode = 0755, recursive = True, cd_access = 'a')
 
     self.assertResourceCalled('Execute', ('rm', '-f', '/usr/hdp/current/hive-server2/lib/ojdbc6.jar'),
         path = ['/bin', '/usr/bin/'],
         sudo = True,
     )
+
     self.assertResourceCalled('File', '/tmp/mysql-connector-java.jar',
         content = DownloadSource('http://c6401.ambari.apache.org:8080/resources//mysql-jdbc-driver.jar'),
     )
@@ -659,7 +661,8 @@ class TestHiveMetastore(RMFTestCase):
 
     # we don't care about configure here - the strings are different anyway because this
     # is an upgrade, so just pop those resources off of the call stack
-    self.pop_resources(25)
+    self.assertResourceCalledIgnoreEarlier('Directory', '/var/lib/hive', owner = 'hive', group = 'hadoop',
+      mode = 0755, recursive = True, cd_access = 'a')
 
     self.assertResourceCalled('Execute',
                               ('rm', '-f', '/usr/hdp/current/hive-server2/lib/ojdbc6.jar'),

+ 21 - 6
ambari-server/src/test/python/stacks/utils/RMFTestCase.py

@@ -224,15 +224,30 @@ class RMFTestCase(TestCase):
       print s
     print(self.reindent("self.assertNoMoreResources()", intendation))
 
-  def pop_resources(self, count):
+  def assertResourceCalledIgnoreEarlier(self, resource_type, name, **kwargs):
+    """
+    Fast fowards past earlier resources called, popping them off the list until the specified
+    resource is hit. If it's not found, then an assertion is thrown that there are no more
+    resources.
+    """
     with patch.object(UnknownConfiguration, '__getattr__', return_value=lambda: "UnknownConfiguration()"):
-      self.assertNotEqual(len(RMFTestCase.env.resource_list), 0, "There was no more resources executed!")
-      for i in range(count):
-        RMFTestCase.env.resource_list.pop(0)
+      while len(RMFTestCase.env.resource_list) >= 0:
+        # no more items means exit the loop
+        self.assertNotEqual(len(RMFTestCase.env.resource_list), 0, "The specified resource was not found in the call stack.")
+
+        # take the next resource and try it out
+        resource = RMFTestCase.env.resource_list.pop(0)
+        try:
+          self.assertEquals(resource_type, resource.__class__.__name__)
+          self.assertEquals(name, resource.name)
+          self.assertEquals(kwargs, resource.arguments)
+          break
+        except AssertionError:
+          pass
 
   def assertResourceCalled(self, resource_type, name, **kwargs):
     with patch.object(UnknownConfiguration, '__getattr__', return_value=lambda: "UnknownConfiguration()"):
-      self.assertNotEqual(len(RMFTestCase.env.resource_list), 0, "There was no more resources executed!")
+      self.assertNotEqual(len(RMFTestCase.env.resource_list), 0, "There were no more resources executed!")
       resource = RMFTestCase.env.resource_list.pop(0)
 
       self.assertEquals(resource_type, resource.__class__.__name__)
@@ -240,7 +255,7 @@ class RMFTestCase(TestCase):
       self.assertEquals(kwargs, resource.arguments)
     
   def assertNoMoreResources(self):
-    self.assertEquals(len(RMFTestCase.env.resource_list), 0, "There was other resources executed!")
+    self.assertEquals(len(RMFTestCase.env.resource_list), 0, "There were other resources executed!")
     
   def assertResourceCalledByIndex(self, index, resource_type, name, **kwargs):
     resource = RMFTestCase.env.resource_list[index]