namenode.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317
  1. """
  2. Licensed to the Apache Software Foundation (ASF) under one
  3. or more contributor license agreements. See the NOTICE file
  4. distributed with this work for additional information
  5. regarding copyright ownership. The ASF licenses this file
  6. to you under the Apache License, Version 2.0 (the
  7. "License"); you may not use this file except in compliance
  8. with the License. You may obtain a copy of the License at
  9. http://www.apache.org/licenses/LICENSE-2.0
  10. Unless required by applicable law or agreed to in writing, software
  11. distributed under the License is distributed on an "AS IS" BASIS,
  12. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. See the License for the specific language governing permissions and
  14. limitations under the License.
  15. """
  16. import sys
  17. import os
  18. import json
  19. import tempfile
  20. from datetime import datetime
  21. import ambari_simplejson as json # simplejson is much faster comparing to Python 2.6 json module and has the same functions set.
  22. from resource_management import Script
  23. from resource_management.core.resources.system import Execute
  24. from resource_management.core import shell
  25. from resource_management.libraries.functions import conf_select
  26. from resource_management.libraries.functions import hdp_select
  27. from resource_management.libraries.functions.version import compare_versions, format_hdp_stack_version
  28. from resource_management.libraries.functions.format import format
  29. from resource_management.libraries.functions.security_commons import build_expectations, \
  30. cached_kinit_executor, get_params_from_filesystem, validate_security_config_properties, \
  31. FILE_TYPE_XML
  32. from resource_management.core.exceptions import Fail
  33. from resource_management.core.shell import as_user
  34. from resource_management.core.logger import Logger
  35. from ambari_commons.os_family_impl import OsFamilyImpl
  36. from ambari_commons import OSConst
  37. import namenode_upgrade
  38. from hdfs_namenode import namenode
  39. from hdfs import hdfs
  40. import hdfs_rebalance
  41. from utils import initiate_safe_zkfc_failover
  42. # hashlib is supplied as of Python 2.5 as the replacement interface for md5
  43. # and other secure hashes. In 2.6, md5 is deprecated. Import hashlib if
  44. # available, avoiding a deprecation warning under 2.6. Import md5 otherwise,
  45. # preserving 2.4 compatibility.
  46. try:
  47. import hashlib
  48. _md5 = hashlib.md5
  49. except ImportError:
  50. import md5
  51. _md5 = md5.new
  52. class NameNode(Script):
  53. def install(self, env):
  54. import params
  55. self.install_packages(env, params.exclude_packages)
  56. env.set_params(params)
  57. #TODO we need this for HA because of manual steps
  58. self.configure(env)
  59. def configure(self, env):
  60. import params
  61. env.set_params(params)
  62. hdfs("namenode")
  63. namenode(action="configure", env=env)
  64. def start(self, env, rolling_restart=False):
  65. import params
  66. env.set_params(params)
  67. self.configure(env)
  68. namenode(action="start", rolling_restart=rolling_restart, env=env)
  69. def stop(self, env, rolling_restart=False):
  70. import params
  71. env.set_params(params)
  72. if rolling_restart and params.dfs_ha_enabled:
  73. if params.dfs_ha_automatic_failover_enabled:
  74. initiate_safe_zkfc_failover()
  75. else:
  76. raise Fail("Rolling Upgrade - dfs.ha.automatic-failover.enabled must be enabled to perform a rolling restart")
  77. namenode(action="stop", rolling_restart=rolling_restart, env=env)
  78. def status(self, env):
  79. import status_params
  80. env.set_params(status_params)
  81. namenode(action="status", rolling_restart=False, env=env)
  82. def decommission(self, env):
  83. import params
  84. env.set_params(params)
  85. namenode(action="decommission")
  86. @OsFamilyImpl(os_family=OsFamilyImpl.DEFAULT)
  87. class NameNodeDefault(NameNode):
  88. def get_stack_to_component(self):
  89. return {"HDP": "hadoop-hdfs-namenode"}
  90. def prepare_rolling_upgrade(self, env):
  91. namenode_upgrade.prepare_rolling_upgrade()
  92. def finalize_rolling_upgrade(self, env):
  93. namenode_upgrade.finalize_rolling_upgrade()
  94. def pre_rolling_restart(self, env):
  95. Logger.info("Executing Rolling Upgrade pre-restart")
  96. import params
  97. env.set_params(params)
  98. if params.version and compare_versions(format_hdp_stack_version(params.version), '2.2.0.0') >= 0:
  99. conf_select.select(params.stack_name, "hadoop", params.version)
  100. hdp_select.select("hadoop-hdfs-namenode", params.version)
  101. def post_rolling_restart(self, env):
  102. Logger.info("Executing Rolling Upgrade post-restart")
  103. import params
  104. env.set_params(params)
  105. Execute("hdfs dfsadmin -report -live",
  106. user=params.hdfs_user
  107. )
  108. def security_status(self, env):
  109. import status_params
  110. env.set_params(status_params)
  111. props_value_check = {"hadoop.security.authentication": "kerberos",
  112. "hadoop.security.authorization": "true"}
  113. props_empty_check = ["hadoop.security.auth_to_local"]
  114. props_read_check = None
  115. core_site_expectations = build_expectations('core-site', props_value_check, props_empty_check,
  116. props_read_check)
  117. props_value_check = None
  118. props_empty_check = ['dfs.namenode.kerberos.internal.spnego.principal',
  119. 'dfs.namenode.keytab.file',
  120. 'dfs.namenode.kerberos.principal']
  121. props_read_check = ['dfs.namenode.keytab.file']
  122. hdfs_site_expectations = build_expectations('hdfs-site', props_value_check, props_empty_check,
  123. props_read_check)
  124. hdfs_expectations = {}
  125. hdfs_expectations.update(core_site_expectations)
  126. hdfs_expectations.update(hdfs_site_expectations)
  127. security_params = get_params_from_filesystem(status_params.hadoop_conf_dir,
  128. {'core-site.xml': FILE_TYPE_XML,
  129. 'hdfs-site.xml': FILE_TYPE_XML})
  130. if 'core-site' in security_params and 'hadoop.security.authentication' in security_params['core-site'] and \
  131. security_params['core-site']['hadoop.security.authentication'].lower() == 'kerberos':
  132. result_issues = validate_security_config_properties(security_params, hdfs_expectations)
  133. if not result_issues: # If all validations passed successfully
  134. try:
  135. # Double check the dict before calling execute
  136. if ( 'hdfs-site' not in security_params
  137. or 'dfs.namenode.keytab.file' not in security_params['hdfs-site']
  138. or 'dfs.namenode.kerberos.principal' not in security_params['hdfs-site']):
  139. self.put_structured_out({"securityState": "UNSECURED"})
  140. self.put_structured_out(
  141. {"securityIssuesFound": "Keytab file or principal are not set property."})
  142. return
  143. cached_kinit_executor(status_params.kinit_path_local,
  144. status_params.hdfs_user,
  145. security_params['hdfs-site']['dfs.namenode.keytab.file'],
  146. security_params['hdfs-site']['dfs.namenode.kerberos.principal'],
  147. status_params.hostname,
  148. status_params.tmp_dir)
  149. self.put_structured_out({"securityState": "SECURED_KERBEROS"})
  150. except Exception as e:
  151. self.put_structured_out({"securityState": "ERROR"})
  152. self.put_structured_out({"securityStateErrorInfo": str(e)})
  153. else:
  154. issues = []
  155. for cf in result_issues:
  156. issues.append("Configuration file %s did not pass the validation. Reason: %s" % (cf, result_issues[cf]))
  157. self.put_structured_out({"securityIssuesFound": ". ".join(issues)})
  158. self.put_structured_out({"securityState": "UNSECURED"})
  159. else:
  160. self.put_structured_out({"securityState": "UNSECURED"})
  161. def rebalancehdfs(self, env):
  162. import params
  163. env.set_params(params)
  164. name_node_parameters = json.loads( params.name_node_params )
  165. threshold = name_node_parameters['threshold']
  166. _print("Starting balancer with threshold = %s\n" % threshold)
  167. rebalance_env = {'PATH': params.hadoop_bin_dir}
  168. if params.security_enabled:
  169. # Create the kerberos credentials cache (ccache) file and set it in the environment to use
  170. # when executing HDFS rebalance command. Use the md5 hash of the combination of the principal and keytab file
  171. # to generate a (relatively) unique cache filename so that we can use it as needed.
  172. # TODO: params.tmp_dir=/var/lib/ambari-agent/data/tmp. However hdfs user doesn't have access to this path.
  173. # TODO: Hence using /tmp
  174. ccache_file_name = "hdfs_rebalance_cc_" + _md5(format("{hdfs_principal_name}|{hdfs_user_keytab}")).hexdigest()
  175. ccache_file_path = os.path.join(tempfile.gettempdir(), ccache_file_name)
  176. rebalance_env['KRB5CCNAME'] = ccache_file_path
  177. # If there are no tickets in the cache or they are expired, perform a kinit, else use what
  178. # is in the cache
  179. klist_cmd = format("{klist_path_local} -s {ccache_file_path}")
  180. kinit_cmd = format("{kinit_path_local} -c {ccache_file_path} -kt {hdfs_user_keytab} {hdfs_principal_name}")
  181. if shell.call(klist_cmd, user=params.hdfs_user)[0] != 0:
  182. Execute(kinit_cmd, user=params.hdfs_user)
  183. def calculateCompletePercent(first, current):
  184. return 1.0 - current.bytesLeftToMove/first.bytesLeftToMove
  185. def startRebalancingProcess(threshold, rebalance_env):
  186. rebalanceCommand = format('hdfs --config {hadoop_conf_dir} balancer -threshold {threshold}')
  187. return as_user(rebalanceCommand, params.hdfs_user, env=rebalance_env)
  188. command = startRebalancingProcess(threshold, rebalance_env)
  189. basedir = os.path.join(env.config.basedir, 'scripts')
  190. if(threshold == 'DEBUG'): #FIXME TODO remove this on PROD
  191. basedir = os.path.join(env.config.basedir, 'scripts', 'balancer-emulator')
  192. command = ['python','hdfs-command.py']
  193. _print("Executing command %s\n" % command)
  194. parser = hdfs_rebalance.HdfsParser()
  195. def handle_new_line(line, is_stderr):
  196. if is_stderr:
  197. return
  198. _print('[balancer] %s' % (line))
  199. pl = parser.parseLine(line)
  200. if pl:
  201. res = pl.toJson()
  202. res['completePercent'] = calculateCompletePercent(parser.initialLine, pl)
  203. self.put_structured_out(res)
  204. elif parser.state == 'PROCESS_FINISED' :
  205. _print('[balancer] %s' % ('Process is finished' ))
  206. self.put_structured_out({'completePercent' : 1})
  207. return
  208. Execute(command,
  209. on_new_line = handle_new_line,
  210. logoutput = False,
  211. )
  212. if params.security_enabled and os.path.exists(ccache_file_path):
  213. # Delete the kerberos credentials cache (ccache) file
  214. os.remove(ccache_file_path)
  215. @OsFamilyImpl(os_family=OSConst.WINSRV_FAMILY)
  216. class NameNodeWindows(NameNode):
  217. def install(self, env):
  218. import install_params
  219. self.install_packages(env, install_params.exclude_packages)
  220. #TODO we need this for HA because of manual steps
  221. self.configure(env)
  222. def rebalancehdfs(self, env):
  223. from ambari_commons.os_windows import UserHelper, run_os_command_impersonated
  224. import params
  225. env.set_params(params)
  226. hdfs_username, hdfs_domain = UserHelper.parse_user_name(params.hdfs_user, ".")
  227. name_node_parameters = json.loads( params.name_node_params )
  228. threshold = name_node_parameters['threshold']
  229. _print("Starting balancer with threshold = %s\n" % threshold)
  230. def calculateCompletePercent(first, current):
  231. return 1.0 - current.bytesLeftToMove/first.bytesLeftToMove
  232. def startRebalancingProcess(threshold):
  233. rebalanceCommand = 'hdfs balancer -threshold %s' % threshold
  234. return ['cmd', '/C', rebalanceCommand]
  235. command = startRebalancingProcess(threshold)
  236. basedir = os.path.join(env.config.basedir, 'scripts')
  237. _print("Executing command %s\n" % command)
  238. parser = hdfs_rebalance.HdfsParser()
  239. returncode, stdout, err = run_os_command_impersonated(' '.join(command), hdfs_username, Script.get_password(params.hdfs_user), hdfs_domain)
  240. for line in stdout.split('\n'):
  241. _print('[balancer] %s %s' % (str(datetime.now()), line ))
  242. pl = parser.parseLine(line)
  243. if pl:
  244. res = pl.toJson()
  245. res['completePercent'] = calculateCompletePercent(parser.initialLine, pl)
  246. self.put_structured_out(res)
  247. elif parser.state == 'PROCESS_FINISED' :
  248. _print('[balancer] %s %s' % (str(datetime.now()), 'Process is finished' ))
  249. self.put_structured_out({'completePercent' : 1})
  250. break
  251. if returncode != None and returncode != 0:
  252. raise Fail('Hdfs rebalance process exited with error. See the log output')
  253. def _print(line):
  254. sys.stdout.write(line)
  255. sys.stdout.flush()
  256. if __name__ == "__main__":
  257. NameNode().execute()