Ver código fonte

AMBARI-9541. RU - HDFS downgrade needs skip HDFS prepare step (alejandro)

Alejandro Fernandez 10 anos atrás
pai
commit
586a5d7f64

+ 34 - 0
ambari-common/src/main/python/resource_management/core/constants.py

@@ -0,0 +1,34 @@
+#!/usr/bin/env python
+
+'''
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+'''
+
+class Direction:
+  """
+  Rolling Upgrade direction
+  """
+  UPGRADE = "upgrade"
+  DOWNGRADE = "downgrade"
+
+class SafeMode:
+  """
+  Namenode Safe Mode state
+  """
+  ON = "ON"
+  OFF = "OFF"
+  UNKNOWN = "UNKNOWN"

+ 23 - 18
ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/namenode_upgrade.py

@@ -21,15 +21,12 @@ import re
 from resource_management.core.logger import Logger
 from resource_management.core.resources.system import Execute
 from resource_management.libraries.functions.format import format
+from resource_management.libraries.functions.default import default
 from resource_management.core.shell import call
+from resource_management.core.constants import Direction, SafeMode
 from resource_management.core.exceptions import Fail
 
 
-class SafeMode:
-  ON = "ON"
-  OFF = "OFF"
-  UNKNOWN = "UNKNOWN"
-
 safemode_to_instruction = {SafeMode.ON: "enter",
                            SafeMode.OFF: "leave"}
 
@@ -79,31 +76,39 @@ def reach_safemode_state(user, safemode_state, in_ha):
 
 def prepare_rolling_upgrade():
   """
+  Perform either an upgrade or a downgrade.
+
   Rolling Upgrade for HDFS Namenode requires the following.
   0. Namenode must be up
   1. Leave safemode if the safemode status is not OFF
   2. Execute a rolling upgrade "prepare"
   3. Execute a rolling upgrade "query"
   """
-  Logger.info("Executing Rolling Upgrade prepare")
   import params
 
+  if not params.upgrade_direction or params.upgrade_direction not in [Direction.UPGRADE, Direction.DOWNGRADE]:
+    raise Fail("Could not retrieve upgrade direction: %s" % str(params.upgrade_direction))
+  Logger.info(format("Performing a(n) {params.upgrade_direction} of HDFS"))
+
   if params.security_enabled:
     Execute(format("{params.kinit_path_local} -kt {params.hdfs_user_keytab} {params.hdfs_principal_name}"))
 
-  safemode_transition_successful, original_state = reach_safemode_state(params.hdfs_user, SafeMode.OFF, True)
-  if not safemode_transition_successful:
-    raise Fail("Could not transition to safemode state %s. Please check logs to make sure namenode is up." % str(SafeMode.OFF))
-
-  prepare = "hdfs dfsadmin -rollingUpgrade prepare"
-  query = "hdfs dfsadmin -rollingUpgrade query"
-  Execute(prepare,
-          user=params.hdfs_user,
-          logoutput=True)
-  Execute(query,
-          user=params.hdfs_user,
-          logoutput=True)
 
+  if params.upgrade_direction == Direction.UPGRADE:
+    safemode_transition_successful, original_state = reach_safemode_state(params.hdfs_user, SafeMode.OFF, True)
+    if not safemode_transition_successful:
+      raise Fail("Could not transition to safemode state %s. Please check logs to make sure namenode is up." % str(SafeMode.OFF))
+
+    prepare = "hdfs dfsadmin -rollingUpgrade prepare"
+    query = "hdfs dfsadmin -rollingUpgrade query"
+    Execute(prepare,
+            user=params.hdfs_user,
+            logoutput=True)
+    Execute(query,
+            user=params.hdfs_user,
+            logoutput=True)
+  elif params.upgrade_direction == Direction.DOWNGRADE:
+    pass
 
 def finalize_rolling_upgrade():
   """

+ 1 - 0
ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/params.py

@@ -30,6 +30,7 @@ config = Script.get_config()
 tmp_dir = Script.get_tmp_dir()
 
 stack_name = default("/hostLevelParams/stack_name", None)
+upgrade_direction = default("/commandParams/upgrade_direction", None)
 
 stack_version_unformatted = str(config['hostLevelParams']['stack_version'])
 hdp_stack_version = format_hdp_stack_version(stack_version_unformatted)

+ 13 - 6
ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/utils.py

@@ -71,7 +71,8 @@ def failover_namenode():
     Logger.info("Rolling Upgrade - Initiating namenode failover by killing zkfc on active namenode")
 
     # Forcefully kill ZKFC on this host to initiate a failover
-    kill_zkfc(params.hdfs_user)
+    # If ZKFC is already dead, then potentially this node can still be the active one.
+    was_zkfc_killed = kill_zkfc(params.hdfs_user)
 
     # Wait until it transitions to standby
     check_standby_cmd = format("hdfs haadmin -getServiceState {namenode_id} | grep standby")
@@ -83,11 +84,13 @@ def failover_namenode():
     if code == 255 and out:
       Logger.info("Rolling Upgrade - namenode is already down")
     else:
-      Execute(check_standby_cmd,
-              user=params.hdfs_user,
-              tries=50,
-              try_sleep=6,
-              logoutput=True)
+      if was_zkfc_killed:
+        # Only mandate that this be the standby namenode if ZKFC was indeed killed to initiate a failover.
+        Execute(check_standby_cmd,
+                user=params.hdfs_user,
+                tries=50,
+                try_sleep=6,
+                logoutput=True)
 
   else:
     Logger.info("Rolling Upgrade - Host %s is the standby namenode." % str(params.hostname))
@@ -99,6 +102,7 @@ def kill_zkfc(zkfc_user):
   Option 1. Kill zkfc on primary namenode provided that the secondary is up and has zkfc running on it.
   Option 2. Silent failover (not supported as of HDP 2.2.0.0)
   :param zkfc_user: User that started the ZKFC process.
+  :return: Return True if ZKFC was killed, otherwise, false.
   """
   import params
   if params.dfs_ha_enabled:
@@ -110,6 +114,9 @@ def kill_zkfc(zkfc_user):
         Logger.debug("ZKFC is running and will be killed to initiate namenode failover.")
         kill_command = format("{check_process} && kill -9 `cat {zkfc_pid_file}` > /dev/null 2>&1")
         Execute(kill_command)
+        Execute(format("rm -f {zkfc_pid_file}"))
+        return True
+  return False
 
 
 def get_service_pid_file(name, user):

+ 2 - 1
ambari-server/src/main/resources/common-services/OOZIE/4.0.0.2.0/package/scripts/oozie_server_upgrade.py

@@ -24,6 +24,7 @@ import tempfile
 
 from resource_management.core import shell
 from resource_management.core.logger import Logger
+from resource_management.core.constants import Direction
 from resource_management.core.exceptions import Fail
 from resource_management.core.resources.system import Execute
 from resource_management.libraries.functions import format
@@ -118,7 +119,7 @@ def prepare_libext_directory():
   # /usr/hdp/current/hadoop-client ; we must use params.version directly
   # however, this only works when upgrading beyond 2.2.0.0; don't do this
   # for downgrade to 2.2.0.0 since hadoop-lzo will not be present
-  if params.upgrade_direction == "upgrade" or target_version_needs_compression_libraries:
+  if params.upgrade_direction == Direction.UPGRADE or target_version_needs_compression_libraries:
     hadoop_lzo_pattern = 'hadoop-lzo*.jar'
     hadoop_client_new_lib_dir = format("/usr/hdp/{version}/hadoop/lib")