Browse Source

AMBARI-13755. Express Upgrade: Failed to Downgrade Namenode (alejandro)

Alejandro Fernandez 9 years ago
parent
commit
9cee9a22e0

+ 16 - 14
ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/hdfs_namenode.py

@@ -26,6 +26,7 @@ from resource_management.core.resources.service import Service
 from resource_management.libraries.functions.format import format
 from resource_management.libraries.functions.format import format
 from resource_management.libraries.functions.check_process_status import check_process_status
 from resource_management.libraries.functions.check_process_status import check_process_status
 from resource_management.libraries.resources.execute_hadoop import ExecuteHadoop
 from resource_management.libraries.resources.execute_hadoop import ExecuteHadoop
+from resource_management.libraries.functions import Direction
 from ambari_commons import OSCheck, OSConst
 from ambari_commons import OSCheck, OSConst
 from ambari_commons.os_family_impl import OsFamilyImpl, OsFamilyFuncImpl
 from ambari_commons.os_family_impl import OsFamilyImpl, OsFamilyFuncImpl
 
 
@@ -85,26 +86,27 @@ def namenode(action=None, hdfs_binary=None, do_format=True, upgrade_type=None, e
 
 
     options = ""
     options = ""
     if upgrade_type == "rolling":
     if upgrade_type == "rolling":
-      options = "-rollingUpgrade started"
+      if params.upgrade_direction == Direction.UPGRADE:
+        options = "-rollingUpgrade started"
+      elif params.upgrade_direction == Direction.DOWNGRADE:
+        options = "-rollingUpgrade downgrade"
+        
     elif upgrade_type == "nonrolling":
     elif upgrade_type == "nonrolling":
       is_previous_image_dir = is_previous_fs_image()
       is_previous_image_dir = is_previous_fs_image()
       Logger.info(format("Previous file system image dir present is {is_previous_image_dir}"))
       Logger.info(format("Previous file system image dir present is {is_previous_image_dir}"))
 
 
-      if params.dfs_ha_enabled:
-        if params.desired_namenode_role is None:
-          raise Fail("Did not receive parameter \"desired_namenode_role\" to indicate the role that this NameNode should have.")
+      if params.upgrade_direction == Direction.UPGRADE:
+        if params.dfs_ha_enabled:
+          if params.desired_namenode_role is None:
+            raise Fail("Did not receive parameter \"desired_namenode_role\" to indicate the role that this NameNode should have.")
 
 
-        if params.desired_namenode_role == "active":
-          # The "-upgrade" command can only be used exactly once. If used more than once during a retry, it will cause problems.
-          options = "" if is_previous_image_dir else "-upgrade"
-
-        if params.desired_namenode_role == "standby":
-          # bootstrap NN separately before starting the daemon
-          bootstrap_standby_namenode(params, use_path=True)
+          # Both Active and Standby can use the same command
+          options = "-rollingUpgrade started"
+        else:
+          options = "-rollingUpgrade started"
+      elif params.upgrade_direction == Direction.DOWNGRADE:
+        options = "-rollingUpgrade downgrade"
 
 
-      else:
-        # Both Primary and Secondary NameNode can use the same command.
-        options = "" if is_previous_image_dir else "-upgrade"
     Logger.info(format("Option for start command: {options}"))
     Logger.info(format("Option for start command: {options}"))
 
 
     service(
     service(

+ 21 - 4
ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/namenode.py

@@ -19,6 +19,7 @@ limitations under the License.
 
 
 import sys
 import sys
 import os
 import os
+import time
 import json
 import json
 import tempfile
 import tempfile
 from datetime import datetime
 from datetime import datetime
@@ -29,6 +30,7 @@ from resource_management.core.resources.system import Execute
 from resource_management.core import shell
 from resource_management.core import shell
 from resource_management.libraries.functions import conf_select
 from resource_management.libraries.functions import conf_select
 from resource_management.libraries.functions import hdp_select
 from resource_management.libraries.functions import hdp_select
+from resource_management.libraries.functions import Direction
 from resource_management.libraries.functions.version import compare_versions, format_hdp_stack_version
 from resource_management.libraries.functions.version import compare_versions, format_hdp_stack_version
 from resource_management.libraries.functions.format import format
 from resource_management.libraries.functions.format import format
 from resource_management.libraries.functions.security_commons import build_expectations, \
 from resource_management.libraries.functions.security_commons import build_expectations, \
@@ -39,6 +41,7 @@ from resource_management.core.exceptions import Fail
 from resource_management.core.shell import as_user
 from resource_management.core.shell import as_user
 from resource_management.core.logger import Logger
 from resource_management.core.logger import Logger
 
 
+
 from ambari_commons.os_family_impl import OsFamilyImpl
 from ambari_commons.os_family_impl import OsFamilyImpl
 from ambari_commons import OSConst
 from ambari_commons import OSConst
 
 
@@ -130,8 +133,9 @@ class NameNodeDefault(NameNode):
     print "TODO AMBARI-12698"
     print "TODO AMBARI-12698"
     pass
     pass
 
 
-  def prepare_non_rolling_upgrade(self, env):
+  def prepare_express_upgrade(self, env):
     """
     """
+    During an Express Upgrade.
     If in HA, on the Active NameNode only, examine the directory dfs.namenode.name.dir and
     If in HA, on the Active NameNode only, examine the directory dfs.namenode.name.dir and
     make sure that there is no "/previous" directory.
     make sure that there is no "/previous" directory.
 
 
@@ -143,10 +147,11 @@ class NameNodeDefault(NameNode):
 
 
     Copy the checkpoint files located in ${dfs.namenode.name.dir}/current into a backup directory.
     Copy the checkpoint files located in ${dfs.namenode.name.dir}/current into a backup directory.
 
 
-    Store the layoutVersion for the NameNode located at ${dfs.namenode.name.dir}/current/VERSION, into a backup directory
-
     Finalize any prior HDFS upgrade,
     Finalize any prior HDFS upgrade,
     hdfs dfsadmin -finalizeUpgrade
     hdfs dfsadmin -finalizeUpgrade
+
+    Prepare for a NameNode rolling upgrade in order to not lose any data.
+    hdfs dfsadmin -rollingUpgrade prepare
     """
     """
     import params
     import params
     Logger.info("Preparing the NameNodes for a NonRolling (aka Express) Upgrade.")
     Logger.info("Preparing the NameNodes for a NonRolling (aka Express) Upgrade.")
@@ -162,6 +167,9 @@ class NameNodeDefault(NameNode):
     namenode_upgrade.prepare_upgrade_backup_namenode_dir()
     namenode_upgrade.prepare_upgrade_backup_namenode_dir()
     namenode_upgrade.prepare_upgrade_finalize_previous_upgrades(hdfs_binary)
     namenode_upgrade.prepare_upgrade_finalize_previous_upgrades(hdfs_binary)
 
 
+    # Call -rollingUpgrade prepare
+    namenode_upgrade.prepare_rolling_upgrade(hdfs_binary)
+
   def prepare_rolling_upgrade(self, env):
   def prepare_rolling_upgrade(self, env):
     hfds_binary = self.get_hdfs_binary()
     hfds_binary = self.get_hdfs_binary()
     namenode_upgrade.prepare_rolling_upgrade(hfds_binary)
     namenode_upgrade.prepare_rolling_upgrade(hfds_binary)
@@ -185,6 +193,10 @@ class NameNodeDefault(NameNode):
               user=params.hdfs_user,
               user=params.hdfs_user,
               logoutput=True
               logoutput=True
       )
       )
+
+      # Wait a bit more since YARN still depends on block reports coming in.
+      # Also saw intermittent errors with HBASE service check if it was done too soon.
+      time.sleep(30)
     except Fail:
     except Fail:
       Logger.error("NameNode is still in safemode, please be careful with commands that need safemode OFF.")
       Logger.error("NameNode is still in safemode, please be careful with commands that need safemode OFF.")
 
 
@@ -202,7 +214,12 @@ class NameNodeDefault(NameNode):
     env.set_params(params)
     env.set_params(params)
 
 
     if params.version and compare_versions(format_hdp_stack_version(params.version), '2.2.0.0') >= 0:
     if params.version and compare_versions(format_hdp_stack_version(params.version), '2.2.0.0') >= 0:
-      conf_select.select(params.stack_name, "hadoop", params.version)
+      # When downgrading an Express Upgrade, the first thing we do is to revert the symlinks.
+      # Therefore, we cannot call this code in that scenario.
+      call_if = [("rolling", "upgrade"), ("rolling", "downgrade"), ("nonrolling", "upgrade")]
+      for e in call_if:
+        if (upgrade_type, params.upgrade_direction) == e:
+          conf_select.select(params.stack_name, "hadoop", params.version)
       hdp_select.select("hadoop-hdfs-namenode", params.version)
       hdp_select.select("hadoop-hdfs-namenode", params.version)
 
 
   def post_upgrade_restart(self, env, upgrade_type=None):
   def post_upgrade_restart(self, env, upgrade_type=None):

+ 12 - 11
ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/namenode_upgrade.py

@@ -72,11 +72,12 @@ def prepare_upgrade_enter_safe_mode(hdfs_binary):
   import params
   import params
 
 
   safe_mode_enter_cmd = format("{hdfs_binary} dfsadmin -safemode enter")
   safe_mode_enter_cmd = format("{hdfs_binary} dfsadmin -safemode enter")
-  safe_mode_enter_and_check_for_on = format("{safe_mode_enter_cmd} | grep 'Safe mode is ON'")
   try:
   try:
     # Safe to call if already in Safe Mode
     # Safe to call if already in Safe Mode
-    Logger.info("Enter SafeMode if not already in it.")
-    as_user(safe_mode_enter_and_check_for_on, params.hdfs_user, env={'PATH': params.hadoop_bin_dir})
+    desired_state = SafeMode.ON
+    safemode_transition_successful, original_state = reach_safemode_state(params.hdfs_user, desired_state, params.dfs_ha_enabled, hdfs_binary)
+    if not safemode_transition_successful:
+      raise Fail("Could not transition to safemode state %s. Please check logs to make sure namenode is up." % str(desired_state))
   except Exception, e:
   except Exception, e:
     message = format("Could not enter safemode. As the HDFS user, call this command: {safe_mode_enter_cmd}")
     message = format("Could not enter safemode. As the HDFS user, call this command: {safe_mode_enter_cmd}")
     Logger.error(message)
     Logger.error(message)
@@ -198,11 +199,11 @@ def reach_safemode_state(user, safemode_state, in_ha, hdfs_binary):
 
 
 def prepare_rolling_upgrade(hdfs_binary):
 def prepare_rolling_upgrade(hdfs_binary):
   """
   """
-  Perform either an upgrade or a downgrade.
+  This can be called during either Rolling Upgrade or Express Upgrade (aka nonrolling)
 
 
   Rolling Upgrade for HDFS Namenode requires the following.
   Rolling Upgrade for HDFS Namenode requires the following.
   0. Namenode must be up
   0. Namenode must be up
-  1. Leave safemode if the safemode status is not OFF
+  1. If HA: leave safemode if the safemode status is not OFF
   2. Execute a rolling upgrade "prepare"
   2. Execute a rolling upgrade "prepare"
   3. Execute a rolling upgrade "query"
   3. Execute a rolling upgrade "query"
   :param hdfs_binary: name/path of the HDFS binary to use
   :param hdfs_binary: name/path of the HDFS binary to use
@@ -217,11 +218,13 @@ def prepare_rolling_upgrade(hdfs_binary):
     kinit_command = format("{params.kinit_path_local} -kt {params.hdfs_user_keytab} {params.hdfs_principal_name}") 
     kinit_command = format("{params.kinit_path_local} -kt {params.hdfs_user_keytab} {params.hdfs_principal_name}") 
     Execute(kinit_command, user=params.hdfs_user, logoutput=True)
     Execute(kinit_command, user=params.hdfs_user, logoutput=True)
 
 
-
   if params.upgrade_direction == Direction.UPGRADE:
   if params.upgrade_direction == Direction.UPGRADE:
-    safemode_transition_successful, original_state = reach_safemode_state(params.hdfs_user, SafeMode.OFF, True, hdfs_binary)
-    if not safemode_transition_successful:
-      raise Fail("Could not transition to safemode state %s. Please check logs to make sure namenode is up." % str(SafeMode.OFF))
+    if params.dfs_ha_enabled:
+      Logger.info('High Availability is enabled, must leave safemode before calling "-rollingUpgrade prepare"')
+      desired_state = SafeMode.OFF
+      safemode_transition_successful, original_state = reach_safemode_state(params.hdfs_user, desired_state, True, hdfs_binary)
+      if not safemode_transition_successful:
+        raise Fail("Could not transition to safemode state %s. Please check logs to make sure namenode is up." % str(desired_state))
 
 
     prepare = format("{hdfs_binary} dfsadmin -rollingUpgrade prepare")
     prepare = format("{hdfs_binary} dfsadmin -rollingUpgrade prepare")
     query = format("{hdfs_binary} dfsadmin -rollingUpgrade query")
     query = format("{hdfs_binary} dfsadmin -rollingUpgrade query")
@@ -231,8 +234,6 @@ def prepare_rolling_upgrade(hdfs_binary):
     Execute(query,
     Execute(query,
             user=params.hdfs_user,
             user=params.hdfs_user,
             logoutput=True)
             logoutput=True)
-  elif params.upgrade_direction == Direction.DOWNGRADE:
-    pass
 
 
 def finalize_upgrade(upgrade_type, hdfs_binary):
 def finalize_upgrade(upgrade_type, hdfs_binary):
   """
   """

+ 2 - 16
ambari-server/src/main/resources/stacks/HDP/2.1/upgrades/nonrolling-upgrade-2.3.xml

@@ -119,10 +119,10 @@
         </task>
         </task>
       </execute-stage>
       </execute-stage>
 
 
-      <execute-stage service="HDFS" component="NAMENODE" title="Snapshot HDFS">
+      <execute-stage service="HDFS" component="NAMENODE" title="Prepare HDFS">
         <task xsi:type="execute" hosts="master">
         <task xsi:type="execute" hosts="master">
           <script>scripts/namenode.py</script>
           <script>scripts/namenode.py</script>
-          <function>prepare_non_rolling_upgrade</function>
+          <function>prepare_express_upgrade</function>
         </task>
         </task>
       </execute-stage>
       </execute-stage>
     </group>
     </group>
@@ -170,20 +170,6 @@
           <message>Before continuing, please restore the Hive Metastore database located on the following host(s): {{hosts.all}}.</message>
           <message>Before continuing, please restore the Hive Metastore database located on the following host(s): {{hosts.all}}.</message>
         </task>
         </task>
       </execute-stage>
       </execute-stage>
-
-      <execute-stage service="HBASE" component="HBASE_MASTER" title="Restore HBASE Snapshot">
-        <task xsi:type="execute" hosts="master">
-          <script>scripts/hbase_upgrade.py</script>
-          <function>restore_snapshot</function>
-        </task>
-      </execute-stage>
-
-      <execute-stage service="HDFS" component="NAMENODE" title="Restore HDFS Snapshot">
-        <task xsi:type="execute" hosts="master">
-          <script>scripts/namenode.py</script>
-          <function>restore_snapshot</function>
-        </task>
-      </execute-stage>
     </group>
     </group>
 
 
     <!--
     <!--

+ 25 - 40
ambari-server/src/main/resources/stacks/HDP/2.2/upgrades/nonrolling-upgrade-2.2.xml

@@ -97,7 +97,7 @@
       </service>
       </service>
 
 
       <service name="YARN">
       <service name="YARN">
-        <component>NODEMANAGER</component>        <!-- TODO, parallelize -->
+        <component>NODEMANAGER</component>
         <component>RESOURCEMANAGER</component>
         <component>RESOURCEMANAGER</component>
         <component>APP_TIMELINE_SERVER</component>
         <component>APP_TIMELINE_SERVER</component>
       </service>
       </service>
@@ -131,14 +131,14 @@
       <execute-stage service="HBASE" component="HBASE_MASTER" title="Snapshot HBASE">
       <execute-stage service="HBASE" component="HBASE_MASTER" title="Snapshot HBASE">
         <task xsi:type="execute" hosts="master">
         <task xsi:type="execute" hosts="master">
           <script>scripts/hbase_upgrade.py</script>
           <script>scripts/hbase_upgrade.py</script>
-          <function>take_snapshot</function>        <!-- TODO, this function used to be called just "snapshot" -->
+          <function>take_snapshot</function>
         </task>
         </task>
       </execute-stage>
       </execute-stage>
 
 
-      <execute-stage service="HDFS" component="NAMENODE" title="Snapshot HDFS">
-        <task xsi:type="execute" hosts="master">  <!-- TODO, this can be any NameNode, not just the active. -->
+      <execute-stage service="HDFS" component="NAMENODE" title="Prepare HDFS">
+        <task xsi:type="execute" hosts="master">
           <script>scripts/namenode.py</script>
           <script>scripts/namenode.py</script>
-          <function>prepare_non_rolling_upgrade</function>
+          <function>prepare_express_upgrade</function>
         </task>
         </task>
       </execute-stage>
       </execute-stage>
 
 
@@ -163,9 +163,9 @@
       <service name="HDFS">
       <service name="HDFS">
         <component>DATANODE</component>
         <component>DATANODE</component>
         <component>NAMENODE</component>
         <component>NAMENODE</component>
-        <component>SECONDARY_NAMENODE</component>   <!-- TODO, may not be present. -->
-        <component>ZKFC</component>                 <!-- TODO, may not be present. -->
-        <component>JOURNALNODE</component>          <!-- TODO, may not be present. -->
+        <component>SECONDARY_NAMENODE</component>
+        <component>ZKFC</component>
+        <component>JOURNALNODE</component>
       </service>
       </service>
 
 
       <service name="RANGER">
       <service name="RANGER">
@@ -204,20 +204,6 @@
         </task>
         </task>
       </execute-stage>
       </execute-stage>
 
 
-      <execute-stage service="HBASE" component="HBASE_MASTER" title="Restore HBASE Snapshot">
-        <task xsi:type="execute" hosts="master">
-          <script>scripts/hbase_upgrade.py</script>
-          <function>restore_snapshot</function>   <!-- TODO, this function name is new. -->
-        </task>
-      </execute-stage>
-
-      <execute-stage service="HDFS" component="NAMENODE" title="Restore HDFS Snapshot">
-        <task xsi:type="execute" hosts="master">  <!-- TODO, this can be any NameNode, not just the active. -->
-          <script>scripts/namenode.py</script>
-          <function>restore_snapshot</function>    <!-- TODO, this function doesn't exist yet. -->
-        </task>
-      </execute-stage>
-
       <execute-stage service="RANGER" component="RANGER_ADMIN" title="Restore Ranger Database">
       <execute-stage service="RANGER" component="RANGER_ADMIN" title="Restore Ranger Database">
         <task xsi:type="manual">
         <task xsi:type="manual">
           <message>Before continuing, please restore the Ranger Admin database and Ranger Audit database on the following host(s): {{hosts.all}}.</message>
           <message>Before continuing, please restore the Ranger Admin database and Ranger Audit database on the following host(s): {{hosts.all}}.</message>
@@ -239,9 +225,9 @@
       <skippable>true</skippable>
       <skippable>true</skippable>
       <parallel-scheduler></parallel-scheduler>
       <parallel-scheduler></parallel-scheduler>
       <service name="ZOOKEEPER">
       <service name="ZOOKEEPER">
-        <service-check>false</service-check>        <!-- TODO, enable service-check once done testing -->
+        <service-check>true</service-check>
         <component>ZOOKEEPER_SERVER</component>
         <component>ZOOKEEPER_SERVER</component>
-        <component>ZOOKEEPER_CLIENT</component>     <!-- TODO, parallelize -->
+        <component>ZOOKEEPER_CLIENT</component>
       </service>
       </service>
     </group>
     </group>
 
 
@@ -263,9 +249,9 @@
         <component>JOURNALNODE</component>
         <component>JOURNALNODE</component>
         <component>ZKFC</component>
         <component>ZKFC</component>
         <component>NAMENODE</component>
         <component>NAMENODE</component>
-        <component>SECONDARY_NAMENODE</component>   <!-- TODO, may not be present -->
-        <component>DATANODE</component>             <!-- TODO, parallelize -->
-        <component>HDFS_CLIENT</component>          <!-- TODO, parallelize -->
+        <component>SECONDARY_NAMENODE</component>
+        <component>DATANODE</component>
+        <component>HDFS_CLIENT</component>
       </service>
       </service>
     </group>
     </group>
 
 
@@ -288,13 +274,13 @@
       <parallel-scheduler></parallel-scheduler>
       <parallel-scheduler></parallel-scheduler>
       <service name="MAPREDUCE2">
       <service name="MAPREDUCE2">
         <component>HISTORYSERVER</component>
         <component>HISTORYSERVER</component>
-        <component>MAPREDUCE2_CLIENT</component>    <!-- TODO, parallelize -->
+        <component>MAPREDUCE2_CLIENT</component>
       </service>
       </service>
       <service name="YARN">
       <service name="YARN">
         <component>APP_TIMELINE_SERVER</component>
         <component>APP_TIMELINE_SERVER</component>
         <component>RESOURCEMANAGER</component>
         <component>RESOURCEMANAGER</component>
-        <component>NODEMANAGER</component>          <!-- TODO, parallelize -->
-        <component>YARN_CLIENT</component>          <!-- TODO, parallelize -->
+        <component>NODEMANAGER</component>
+        <component>YARN_CLIENT</component>
       </service>
       </service>
     </group>
     </group>
 
 
@@ -304,12 +290,12 @@
       <parallel-scheduler></parallel-scheduler>
       <parallel-scheduler></parallel-scheduler>
       <service name="HBASE">
       <service name="HBASE">
         <component>HBASE_MASTER</component>
         <component>HBASE_MASTER</component>
-        <component>HBASE_REGIONSERVER</component>   <!-- TODO, parallelize -->
-        <component>HBASE_CLIENT</component>         <!-- TODO, parallelize -->
+        <component>HBASE_REGIONSERVER</component>
+        <component>HBASE_CLIENT</component>
       </service>
       </service>
     </group>
     </group>
 
 
-    <group xsi:type="restart" name="CLIENTS" title="Tez, Pig, Sqoop Clients">  <!-- TODO, parallelize -->
+    <group xsi:type="restart" name="CLIENTS" title="Tez, Pig, Sqoop Clients">
       <service-check>false</service-check>
       <service-check>false</service-check>
       <skippable>true</skippable>
       <skippable>true</skippable>
       <parallel-scheduler></parallel-scheduler>
       <parallel-scheduler></parallel-scheduler>
@@ -346,8 +332,8 @@
         <component>HIVE_METASTORE</component>
         <component>HIVE_METASTORE</component>
         <component>HIVE_SERVER</component>
         <component>HIVE_SERVER</component>
         <component>WEBHCAT_SERVER</component>
         <component>WEBHCAT_SERVER</component>
-        <component>HIVE_CLIENT</component>          <!-- TODO, parallelize -->
-        <component>HCAT</component>                 <!-- TODO, parallelize -->
+        <component>HIVE_CLIENT</component>
+        <component>HCAT</component>
       </service>
       </service>
     </group>
     </group>
 
 
@@ -357,7 +343,7 @@
       <parallel-scheduler></parallel-scheduler>
       <parallel-scheduler></parallel-scheduler>
       <service name="SPARK">
       <service name="SPARK">
         <component>SPARK_JOBHISTORYSERVER</component>
         <component>SPARK_JOBHISTORYSERVER</component>
-        <component>SPARK_CLIENT</component>         <!-- TODO, parallelize -->
+        <component>SPARK_CLIENT</component>
       </service>
       </service>
     </group>
     </group>
 
 
@@ -391,7 +377,7 @@
       <parallel-scheduler></parallel-scheduler>
       <parallel-scheduler></parallel-scheduler>
       <service name="OOZIE">
       <service name="OOZIE">
         <component>OOZIE_SERVER</component>
         <component>OOZIE_SERVER</component>
-        <component>OOZIE_CLIENT</component>         <!-- TODO, parallelize -->
+        <component>OOZIE_CLIENT</component>
       </service>
       </service>
     </group>
     </group>
 
 
@@ -401,7 +387,7 @@
       <parallel-scheduler></parallel-scheduler>
       <parallel-scheduler></parallel-scheduler>
       <service name="FALCON">
       <service name="FALCON">
         <component>FALCON_SERVER</component>
         <component>FALCON_SERVER</component>
-        <component>FALCON_CLIENT</component>        <!-- TODO, parallelize -->
+        <component>FALCON_CLIENT</component>
       </service>
       </service>
     </group>
     </group>
 
 
@@ -434,7 +420,6 @@
         <component>DRPC_SERVER</component>
         <component>DRPC_SERVER</component>
       </service>
       </service>
 
 
-      <!-- TODO, does this work? -->
       <execute-stage service="STORM" component="DRPC_SERVER" title="Rebuild Storm Topology">
       <execute-stage service="STORM" component="DRPC_SERVER" title="Rebuild Storm Topology">
         <task xsi:type="manual">
         <task xsi:type="manual">
           <message>Please rebuild your topology using the new Storm version dependencies and resubmit it using the newly created jar.</message>
           <message>Please rebuild your topology using the new Storm version dependencies and resubmit it using the newly created jar.</message>
@@ -499,7 +484,7 @@
       </execute-stage>
       </execute-stage>
 
 
       <execute-stage service="HDFS" component="NAMENODE" title="Execute HDFS Finalize">
       <execute-stage service="HDFS" component="NAMENODE" title="Execute HDFS Finalize">
-        <task xsi:type="execute" hosts="master">      <!-- TODO, what happens if there's no HA. -->
+        <task xsi:type="execute" hosts="master">
           <script>scripts/namenode.py</script>
           <script>scripts/namenode.py</script>
           <function>finalize_non_rolling_upgrade</function>
           <function>finalize_non_rolling_upgrade</function>
         </task>
         </task>

+ 3 - 17
ambari-server/src/main/resources/stacks/HDP/2.2/upgrades/nonrolling-upgrade-2.3.xml

@@ -166,10 +166,10 @@
         </task>
         </task>
       </execute-stage>
       </execute-stage>
 
 
-      <execute-stage service="HDFS" component="NAMENODE" title="Snapshot HDFS">
+      <execute-stage service="HDFS" component="NAMENODE" title="Prepare HDFS">
         <task xsi:type="execute" hosts="master">
         <task xsi:type="execute" hosts="master">
           <script>scripts/namenode.py</script>
           <script>scripts/namenode.py</script>
-          <function>prepare_non_rolling_upgrade</function>
+          <function>prepare_express_upgrade</function>
         </task>
         </task>
       </execute-stage>
       </execute-stage>
 
 
@@ -235,20 +235,6 @@
         </task>
         </task>
       </execute-stage>
       </execute-stage>
 
 
-      <execute-stage service="HBASE" component="HBASE_MASTER" title="Restore HBASE Snapshot">
-        <task xsi:type="execute" hosts="master">
-          <script>scripts/hbase_upgrade.py</script>
-          <function>restore_snapshot</function>
-        </task>
-      </execute-stage>
-
-      <execute-stage service="HDFS" component="NAMENODE" title="Restore HDFS Snapshot">
-        <task xsi:type="execute" hosts="master">
-          <script>scripts/namenode.py</script>
-          <function>restore_snapshot</function>
-        </task>
-      </execute-stage>
-
       <execute-stage service="RANGER" component="RANGER_ADMIN" title="Restore Ranger Database">
       <execute-stage service="RANGER" component="RANGER_ADMIN" title="Restore Ranger Database">
         <task xsi:type="manual">
         <task xsi:type="manual">
           <message>Before continuing, please restore the Ranger Admin database and Ranger Audit database on the following host(s): {{hosts.all}}.</message>
           <message>Before continuing, please restore the Ranger Admin database and Ranger Audit database on the following host(s): {{hosts.all}}.</message>
@@ -616,7 +602,7 @@
       <parallel-scheduler></parallel-scheduler>
       <parallel-scheduler></parallel-scheduler>
       <service name="SPARK">
       <service name="SPARK">
         <component>SPARK_JOBHISTORYSERVER</component>
         <component>SPARK_JOBHISTORYSERVER</component>
-        <component>SPARK_CLIENT</component>         <!-- TODO, parallelize -->
+        <component>SPARK_CLIENT</component>
       </service>
       </service>
     </group>
     </group>
 
 

+ 9 - 24
ambari-server/src/main/resources/stacks/HDP/2.3/upgrades/nonrolling-upgrade-2.3.xml

@@ -144,14 +144,14 @@
       <execute-stage service="HBASE" component="HBASE_MASTER" title="Snapshot HBASE">
       <execute-stage service="HBASE" component="HBASE_MASTER" title="Snapshot HBASE">
         <task xsi:type="execute" hosts="master">
         <task xsi:type="execute" hosts="master">
           <script>scripts/hbase_upgrade.py</script>
           <script>scripts/hbase_upgrade.py</script>
-          <function>take_snapshot</function>        <!-- TODO (Alejandro), this function used to be called just "snapshot" -->
+          <function>take_snapshot</function>
         </task>
         </task>
       </execute-stage>
       </execute-stage>
 
 
-      <execute-stage service="HDFS" component="NAMENODE" title="Snapshot HDFS">
-        <task xsi:type="execute" hosts="master">  <!-- TODO (Alejandro), this can be any NameNode, not just the active. -->
+      <execute-stage service="HDFS" component="NAMENODE" title="Prepare HDFS">
+        <task xsi:type="execute" hosts="master">
           <script>scripts/namenode.py</script>
           <script>scripts/namenode.py</script>
-          <function>prepare_non_rolling_upgrade</function>
+          <function>prepare_express_upgrade</function>
         </task>
         </task>
       </execute-stage>
       </execute-stage>
 
 
@@ -183,9 +183,9 @@
       <service name="HDFS">
       <service name="HDFS">
         <component>DATANODE</component>
         <component>DATANODE</component>
         <component>NAMENODE</component>
         <component>NAMENODE</component>
-        <component>SECONDARY_NAMENODE</component>   <!-- TODO (Alejandro), may not be present. -->
-        <component>ZKFC</component>                 <!-- TODO (Alejandro), may not be present. -->
-        <component>JOURNALNODE</component>          <!-- TODO (Alejandro), may not be present. -->
+        <component>SECONDARY_NAMENODE</component>
+        <component>ZKFC</component>
+        <component>JOURNALNODE</component>
         <component>NFS_GATEWAY</component>
         <component>NFS_GATEWAY</component>
       </service>
       </service>
 
 
@@ -228,20 +228,6 @@
         </task>
         </task>
       </execute-stage>
       </execute-stage>
 
 
-      <execute-stage service="HBASE" component="HBASE_MASTER" title="Restore HBASE Snapshot">
-        <task xsi:type="execute" hosts="master">
-          <script>scripts/hbase_upgrade.py</script>
-          <function>restore_snapshot</function>   <!-- TODO (Alejandro), this function name is new. -->
-        </task>
-      </execute-stage>
-
-      <execute-stage service="HDFS" component="NAMENODE" title="Restore HDFS Snapshot">
-        <task xsi:type="execute" hosts="master">  <!-- TODO (Alejandro), this can be any NameNode, not just the active. -->
-          <script>scripts/namenode.py</script>
-          <function>restore_snapshot</function>    <!-- TODO (Alejandro), this function doesn't exist yet. -->
-        </task>
-      </execute-stage>
-
       <execute-stage service="RANGER" component="RANGER_ADMIN" title="Restore Ranger Database">
       <execute-stage service="RANGER" component="RANGER_ADMIN" title="Restore Ranger Database">
         <task xsi:type="manual">
         <task xsi:type="manual">
           <message>Before continuing, please restore the Ranger Admin database and Ranger Audit database on the following host(s): {{hosts.all}}.</message>
           <message>Before continuing, please restore the Ranger Admin database and Ranger Audit database on the following host(s): {{hosts.all}}.</message>
@@ -270,7 +256,7 @@
       <skippable>true</skippable>
       <skippable>true</skippable>
       <parallel-scheduler></parallel-scheduler>
       <parallel-scheduler></parallel-scheduler>
       <service name="ZOOKEEPER">
       <service name="ZOOKEEPER">
-        <service-check>false</service-check>        <!-- TODO (Alejandro), enable service-check once done testing -->
+        <service-check>true</service-check>
         <component>ZOOKEEPER_SERVER</component>
         <component>ZOOKEEPER_SERVER</component>
         <component>ZOOKEEPER_CLIENT</component>
         <component>ZOOKEEPER_CLIENT</component>
       </service>
       </service>
@@ -488,7 +474,6 @@
         <component>DRPC_SERVER</component>
         <component>DRPC_SERVER</component>
       </service>
       </service>
 
 
-      <!-- TODO (Alejandro), does this work? -->
       <execute-stage service="STORM" component="DRPC_SERVER" title="Rebuild Storm Topology">
       <execute-stage service="STORM" component="DRPC_SERVER" title="Rebuild Storm Topology">
         <task xsi:type="manual">
         <task xsi:type="manual">
           <message>Please rebuild your topology using the new Storm version dependencies and resubmit it using the newly created jar.</message>
           <message>Please rebuild your topology using the new Storm version dependencies and resubmit it using the newly created jar.</message>
@@ -567,7 +552,7 @@
       </execute-stage>
       </execute-stage>
 
 
       <execute-stage service="HDFS" component="NAMENODE" title="Execute HDFS Finalize">
       <execute-stage service="HDFS" component="NAMENODE" title="Execute HDFS Finalize">
-        <task xsi:type="execute" hosts="master">      <!-- TODO (Alejandro), what happens if there's no HA. -->
+        <task xsi:type="execute" hosts="master">
           <script>scripts/namenode.py</script>
           <script>scripts/namenode.py</script>
           <function>finalize_non_rolling_upgrade</function>
           <function>finalize_non_rolling_upgrade</function>
         </task>
         </task>

+ 4 - 17
ambari-server/src/test/python/stacks/2.0.6/HDFS/test_namenode.py

@@ -1303,14 +1303,10 @@ class TestNamenode(RMFTestCase):
                        hdp_stack_version = self.STACK_VERSION,
                        hdp_stack_version = self.STACK_VERSION,
                        target = RMFTestCase.TARGET_COMMON_SERVICES,
                        target = RMFTestCase.TARGET_COMMON_SERVICES,
                        mocks_dict=mocks_dict)
                        mocks_dict=mocks_dict)
-
-    # for now, just make sure hdfs bootstrap standby is called
-    found = False
-    for ca in mocks_dict['call'].call_args_list:
-      if str(ca[0][0]).startswith("/usr/hdp/2.3.2.0-2844/hadoop/bin/hdfs namenode -bootstrapStandby -nonInteractive"):
-        found = True
-
-    self.assertTrue(found)
+    
+    calls = mocks_dict['call'].call_args_list
+    self.assertTrue(len(calls) >= 1)
+    self.assertTrue(calls[0].startsWith("conf-select create-conf-dir --package hadoop --stack-version 2.3.2.0-2844 --conf-version 0"))
 
 
   def test_pre_upgrade_restart(self):
   def test_pre_upgrade_restart(self):
     config_file = self.get_src_folder()+"/test/python/stacks/2.0.6/configs/default.json"
     config_file = self.get_src_folder()+"/test/python/stacks/2.0.6/configs/default.json"
@@ -1348,15 +1344,6 @@ class TestNamenode(RMFTestCase):
     self.assertResourceCalled('Execute', ('ambari-python-wrap', '/usr/bin/hdp-select', 'set', 'hadoop-hdfs-namenode', version), sudo=True)
     self.assertResourceCalled('Execute', ('ambari-python-wrap', '/usr/bin/hdp-select', 'set', 'hadoop-hdfs-namenode', version), sudo=True)
     self.assertNoMoreResources()
     self.assertNoMoreResources()
 
 
-    self.assertEquals(1, mocks_dict['call'].call_count)
-    self.assertEquals(1, mocks_dict['checked_call'].call_count)
-    self.assertEquals(
-      ('ambari-python-wrap', '/usr/bin/conf-select', 'set-conf-dir', '--package', 'hadoop', '--stack-version', '2.3.0.0-1234', '--conf-version', '0'),
-       mocks_dict['checked_call'].call_args_list[0][0][0])
-    self.assertEquals(
-      ('ambari-python-wrap', '/usr/bin/conf-select', 'create-conf-dir', '--package', 'hadoop', '--stack-version', '2.3.0.0-1234', '--conf-version', '0'),
-       mocks_dict['call'].call_args_list[0][0][0])
-
   def test_post_upgrade_restart(self):
   def test_post_upgrade_restart(self):
     config_file = self.get_src_folder()+"/test/python/stacks/2.0.6/configs/default.json"
     config_file = self.get_src_folder()+"/test/python/stacks/2.0.6/configs/default.json"
     with open(config_file, "r") as f:
     with open(config_file, "r") as f: