params.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303
  1. """
  2. Licensed to the Apache Software Foundation (ASF) under one
  3. or more contributor license agreements. See the NOTICE file
  4. distributed with this work for additional information
  5. regarding copyright ownership. The ASF licenses this file
  6. to you under the Apache License, Version 2.0 (the
  7. "License"); you may not use this file except in compliance
  8. with the License. You may obtain a copy of the License at
  9. http://www.apache.org/licenses/LICENSE-2.0
  10. Unless required by applicable law or agreed to in writing, software
  11. distributed under the License is distributed on an "AS IS" BASIS,
  12. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. See the License for the specific language governing permissions and
  14. limitations under the License.
  15. """
  16. import collections
  17. import re
  18. import os
  19. import ast
  20. import ambari_simplejson as json # simplejson is much faster comparing to Python 2.6 json module and has the same functions set.
  21. from resource_management.libraries.script import Script
  22. from resource_management.libraries.functions import default
  23. from resource_management.libraries.functions import format
  24. from resource_management.libraries.functions import conf_select
  25. from resource_management.libraries.functions import stack_select
  26. from resource_management.libraries.functions import format_jvm_option
  27. from resource_management.libraries.functions.is_empty import is_empty
  28. from resource_management.libraries.functions.version import format_stack_version
  29. from resource_management.libraries.functions.expect import expect
  30. from resource_management.libraries.functions import StackFeature
  31. from resource_management.libraries.functions.stack_features import check_stack_feature
  32. from resource_management.libraries.functions.stack_features import get_stack_feature_version
  33. from resource_management.libraries.functions.get_architecture import get_architecture
  34. from ambari_commons.constants import AMBARI_SUDO_BINARY
  35. from resource_management.libraries.functions.namenode_ha_utils import get_properties_for_all_nameservices, namenode_federation_enabled
  36. config = Script.get_config()
  37. tmp_dir = Script.get_tmp_dir()
  38. stack_root = Script.get_stack_root()
  39. architecture = get_architecture()
  40. dfs_type = default("/clusterLevelParams/dfs_type", "")
  41. artifact_dir = format("{tmp_dir}/AMBARI-artifacts/")
  42. jdk_name = default("/ambariLevelParams/jdk_name", None)
  43. java_home = config['ambariLevelParams']['java_home']
  44. java_version = expect("/ambariLevelParams/java_version", int)
  45. jdk_location = config['ambariLevelParams']['jdk_location']
  46. hadoop_custom_extensions_enabled = default("/configurations/core-site/hadoop.custom-extensions.enabled", False)
  47. sudo = AMBARI_SUDO_BINARY
  48. ambari_server_hostname = config['ambariLevelParams']['ambari_server_host']
  49. stack_version_unformatted = config['clusterLevelParams']['stack_version']
  50. stack_version_formatted = format_stack_version(stack_version_unformatted)
  51. upgrade_type = Script.get_upgrade_type(default("/commandParams/upgrade_type", ""))
  52. version = default("/commandParams/version", None)
  53. # Handle upgrade and downgrade
  54. if (upgrade_type is not None) and version:
  55. stack_version_formatted = format_stack_version(version)
  56. ambari_java_home = default("/commandParams/ambari_java_home", None)
  57. ambari_jdk_name = default("/commandParams/ambari_jdk_name", None)
  58. security_enabled = config['configurations']['cluster-env']['security_enabled']
  59. hdfs_user = config['configurations']['hadoop-env']['hdfs_user']
  60. # Some datanode settings
  61. dfs_dn_addr = default('/configurations/hdfs-site/dfs.datanode.address', None)
  62. dfs_dn_http_addr = default('/configurations/hdfs-site/dfs.datanode.http.address', None)
  63. dfs_dn_https_addr = default('/configurations/hdfs-site/dfs.datanode.https.address', None)
  64. dfs_http_policy = default('/configurations/hdfs-site/dfs.http.policy', None)
  65. secure_dn_ports_are_in_use = False
  66. def get_port(address):
  67. """
  68. Extracts port from the address like 0.0.0.0:1019
  69. """
  70. if address is None:
  71. return None
  72. m = re.search(r'(?:http(?:s)?://)?([\w\d.]*):(\d{1,5})', address)
  73. if m is not None:
  74. return int(m.group(2))
  75. else:
  76. return None
  77. def is_secure_port(port):
  78. """
  79. Returns True if port is root-owned at *nix systems
  80. """
  81. if port is not None:
  82. return port < 1024
  83. else:
  84. return False
  85. # upgrades would cause these directories to have a version instead of "current"
  86. # which would cause a lot of problems when writing out hadoop-env.sh; instead
  87. # force the use of "current" in the hook
  88. hdfs_user_nofile_limit = default("/configurations/hadoop-env/hdfs_user_nofile_limit", "128000")
  89. hadoop_home = stack_select.get_hadoop_dir("home")
  90. hadoop_hdfs_home = stack_select.get_hadoop_dir("hdfs_home")
  91. hadoop_mapred_home = stack_select.get_hadoop_dir("mapred_home")
  92. hadoop_yarn_home = stack_select.get_hadoop_dir("yarn_home")
  93. hadoop_libexec_dir = stack_select.get_hadoop_dir("libexec")
  94. hadoop_lib_home = stack_select.get_hadoop_dir("lib")
  95. ozone_manager_hosts = default("/clusterHostInfo/ozone_manager_hosts", [])
  96. has_ozone = not len(ozone_manager_hosts) == 0
  97. if version:
  98. hadoop_ozone_home = os.path.join(stack_root, version, "hadoop-ozone")
  99. else:
  100. hadoop_ozone_home = os.path.join(stack_root, "current", "hadoop-ozone")
  101. hadoop_dir = "/etc/hadoop"
  102. hadoop_java_io_tmpdir = os.path.join(tmp_dir, "hadoop_java_io_tmpdir")
  103. datanode_max_locked_memory = config['configurations']['hdfs-site']['dfs.datanode.max.locked.memory']
  104. is_datanode_max_locked_memory_set = not is_empty(config['configurations']['hdfs-site']['dfs.datanode.max.locked.memory'])
  105. mapreduce_libs_path = format("{hadoop_mapred_home}/*,{hadoop_mapred_home}/lib/*")
  106. tez_home = '/usr/lib/tez'
  107. tez_conf_dir = '/etc/tez/conf'
  108. # hadoop parameters for stacks that support rolling_upgrade
  109. if stack_version_formatted and check_stack_feature(StackFeature.ROLLING_UPGRADE, stack_version_formatted):
  110. tez_home = format("{stack_root}/current/tez-client")
  111. if not security_enabled:
  112. hadoop_secure_dn_user = '""'
  113. else:
  114. dfs_dn_port = get_port(dfs_dn_addr)
  115. dfs_dn_http_port = get_port(dfs_dn_http_addr)
  116. dfs_dn_https_port = get_port(dfs_dn_https_addr)
  117. # We try to avoid inability to start datanode as a plain user due to usage of root-owned ports
  118. if dfs_http_policy == "HTTPS_ONLY":
  119. secure_dn_ports_are_in_use = is_secure_port(dfs_dn_port) or is_secure_port(dfs_dn_https_port)
  120. elif dfs_http_policy == "HTTP_AND_HTTPS":
  121. secure_dn_ports_are_in_use = is_secure_port(dfs_dn_port) or is_secure_port(dfs_dn_http_port) or is_secure_port(dfs_dn_https_port)
  122. else: # params.dfs_http_policy == "HTTP_ONLY" or not defined:
  123. secure_dn_ports_are_in_use = is_secure_port(dfs_dn_port) or is_secure_port(dfs_dn_http_port)
  124. if secure_dn_ports_are_in_use:
  125. hadoop_secure_dn_user = hdfs_user
  126. else:
  127. hadoop_secure_dn_user = '""'
  128. #hadoop params
  129. hdfs_log_dir_prefix = config['configurations']['hadoop-env']['hdfs_log_dir_prefix']
  130. hadoop_pid_dir_prefix = config['configurations']['hadoop-env']['hadoop_pid_dir_prefix']
  131. hadoop_root_logger = config['configurations']['hadoop-env']['hadoop_root_logger']
  132. jsvc_path = "/usr/lib/bigtop-utils"
  133. hadoop_heapsize = config['configurations']['hadoop-env']['hadoop_heapsize']
  134. namenode_heapsize = config['configurations']['hadoop-env']['namenode_heapsize']
  135. namenode_opt_newsize = config['configurations']['hadoop-env']['namenode_opt_newsize']
  136. namenode_opt_maxnewsize = config['configurations']['hadoop-env']['namenode_opt_maxnewsize']
  137. namenode_opt_permsize = format_jvm_option("/configurations/hadoop-env/namenode_opt_permsize","128m")
  138. namenode_opt_maxpermsize = format_jvm_option("/configurations/hadoop-env/namenode_opt_maxpermsize","256m")
  139. jtnode_opt_newsize = "200m"
  140. jtnode_opt_maxnewsize = "200m"
  141. jtnode_heapsize = "1024m"
  142. ttnode_heapsize = "1024m"
  143. dtnode_heapsize = config['configurations']['hadoop-env']['dtnode_heapsize']
  144. nfsgateway_heapsize = config['configurations']['hadoop-env']['nfsgateway_heapsize']
  145. mapred_pid_dir_prefix = default("/configurations/mapred-env/mapred_pid_dir_prefix","/var/run/hadoop-mapreduce")
  146. mapred_log_dir_prefix = default("/configurations/mapred-env/mapred_log_dir_prefix","/var/log/hadoop-mapreduce")
  147. hadoop_env_sh_template = config['configurations']['hadoop-env']['content']
  148. #users and groups
  149. hbase_user = config['configurations']['hbase-env']['hbase_user']
  150. smoke_user = config['configurations']['cluster-env']['smokeuser']
  151. gmetad_user = config['configurations']['ganglia-env']["gmetad_user"]
  152. gmond_user = config['configurations']['ganglia-env']["gmond_user"]
  153. tez_user = config['configurations']['tez-env']["tez_user"]
  154. oozie_user = config['configurations']['oozie-env']["oozie_user"]
  155. falcon_user = config['configurations']['falcon-env']["falcon_user"]
  156. ranger_user = config['configurations']['ranger-env']["ranger_user"]
  157. zeppelin_user = config['configurations']['zeppelin-env']["zeppelin_user"]
  158. zeppelin_group = config['configurations']['zeppelin-env']["zeppelin_group"]
  159. user_group = config['configurations']['cluster-env']['user_group']
  160. ganglia_server_hosts = default("/clusterHostInfo/ganglia_server_hosts", [])
  161. hdfs_client_hosts = default("/clusterHostInfo/hdfs_client_hosts", [])
  162. hbase_master_hosts = default("/clusterHostInfo/hbase_master_hosts", [])
  163. oozie_servers = default("/clusterHostInfo/oozie_server", [])
  164. falcon_server_hosts = default("/clusterHostInfo/falcon_server_hosts", [])
  165. ranger_admin_hosts = default("/clusterHostInfo/ranger_admin_hosts", [])
  166. zeppelin_server_hosts = default("/clusterHostInfo/zeppelin_server_hosts", [])
  167. # get the correct version to use for checking stack features
  168. version_for_stack_feature_checks = get_stack_feature_version(config)
  169. has_hdfs_clients = not len(hdfs_client_hosts) == 0
  170. has_ganglia_server = not len(ganglia_server_hosts) == 0
  171. has_tez = 'tez-site' in config['configurations']
  172. has_hbase_masters = not len(hbase_master_hosts) == 0
  173. has_oozie_server = not len(oozie_servers) == 0
  174. has_falcon_server_hosts = not len(falcon_server_hosts) == 0
  175. has_ranger_admin = not len(ranger_admin_hosts) == 0
  176. has_zeppelin_server = not len(zeppelin_server_hosts) == 0
  177. stack_supports_zk_security = check_stack_feature(StackFeature.SECURE_ZOOKEEPER, version_for_stack_feature_checks)
  178. hostname = config['agentLevelParams']['hostname']
  179. hdfs_site = config['configurations']['hdfs-site']
  180. # HDFS High Availability properties
  181. dfs_ha_enabled = False
  182. dfs_ha_nameservices = default('/configurations/hdfs-site/dfs.internal.nameservices', None)
  183. if dfs_ha_nameservices is None:
  184. dfs_ha_nameservices = default('/configurations/hdfs-site/dfs.nameservices', None)
  185. # on stacks without any filesystem there is no hdfs-site
  186. dfs_ha_namenode_ids_all_ns = get_properties_for_all_nameservices(hdfs_site, 'dfs.ha.namenodes') if 'hdfs-site' in config['configurations'] else {}
  187. dfs_ha_automatic_failover_enabled = default("/configurations/hdfs-site/dfs.ha.automatic-failover.enabled", False)
  188. # Values for the current Host
  189. namenode_id = None
  190. namenode_rpc = None
  191. dfs_ha_namemodes_ids_list = []
  192. other_namenode_id = None
  193. for ns, dfs_ha_namenode_ids in dfs_ha_namenode_ids_all_ns.iteritems():
  194. found = False
  195. if not is_empty(dfs_ha_namenode_ids):
  196. dfs_ha_namemodes_ids_list = dfs_ha_namenode_ids.split(",")
  197. dfs_ha_namenode_ids_array_len = len(dfs_ha_namemodes_ids_list)
  198. if dfs_ha_namenode_ids_array_len > 1:
  199. dfs_ha_enabled = True
  200. if dfs_ha_enabled:
  201. for nn_id in dfs_ha_namemodes_ids_list:
  202. nn_host = config['configurations']['hdfs-site'][format('dfs.namenode.rpc-address.{ns}.{nn_id}')]
  203. if hostname in nn_host:
  204. namenode_id = nn_id
  205. namenode_rpc = nn_host
  206. found = True
  207. # With HA enabled namenode_address is recomputed
  208. namenode_address = format('hdfs://{ns}')
  209. # Calculate the namenode id of the other namenode. This is needed during RU to initiate an HA failover using ZKFC.
  210. if namenode_id is not None and len(dfs_ha_namemodes_ids_list) == 2:
  211. other_namenode_id = list(set(dfs_ha_namemodes_ids_list) - set([namenode_id]))[0]
  212. if found:
  213. break
  214. if has_hdfs_clients or dfs_type == 'HCFS':
  215. hadoop_conf_dir = conf_select.get_hadoop_conf_dir()
  216. hadoop_conf_secure_dir = os.path.join(hadoop_conf_dir, "secure")
  217. hbase_tmp_dir = "/tmp/hbase-hbase"
  218. proxyuser_group = default("/configurations/hadoop-env/proxyuser_group","users")
  219. ranger_group = config['configurations']['ranger-env']['ranger_group']
  220. dfs_cluster_administrators_group = config['configurations']['hdfs-site']["dfs.cluster.administrators"]
  221. sysprep_skip_create_users_and_groups = default("/configurations/cluster-env/sysprep_skip_create_users_and_groups", False)
  222. ignore_groupsusers_create = default("/configurations/cluster-env/ignore_groupsusers_create", False)
  223. fetch_nonlocal_groups = config['configurations']['cluster-env']["fetch_nonlocal_groups"]
  224. smoke_user_dirs = format("/tmp/hadoop-{smoke_user},/tmp/hsperfdata_{smoke_user},/home/{smoke_user},/tmp/{smoke_user},/tmp/sqoop-{smoke_user}")
  225. if has_hbase_masters:
  226. hbase_user_dirs = format("/home/{hbase_user},/tmp/{hbase_user},/usr/bin/{hbase_user},/var/log/{hbase_user},{hbase_tmp_dir}")
  227. #repo params
  228. repo_info = config['hostLevelParams']['repoInfo']
  229. service_repo_info = default("/hostLevelParams/service_repo_info",None)
  230. user_to_groups_dict = {}
  231. #Append new user-group mapping to the dict
  232. try:
  233. user_group_map = ast.literal_eval(config['clusterLevelParams']['user_groups'])
  234. for key in user_group_map.iterkeys():
  235. user_to_groups_dict[key] = user_group_map[key]
  236. except ValueError:
  237. print('User Group mapping (user_group) is missing in the hostLevelParams')
  238. user_to_gid_dict = collections.defaultdict(lambda:user_group)
  239. user_list = json.loads(config['clusterLevelParams']['user_list'])
  240. group_list = json.loads(config['clusterLevelParams']['group_list'])
  241. host_sys_prepped = default("/ambariLevelParams/host_sys_prepped", False)
  242. tez_am_view_acls = config['configurations']['tez-site']["tez.am.view-acls"]
  243. override_uid = str(default("/configurations/cluster-env/override_uid", "true")).lower()
  244. # if NN HA on secure clutser, access Zookeper securely
  245. if stack_supports_zk_security and dfs_ha_enabled and security_enabled:
  246. hadoop_zkfc_opts=format("-Dzookeeper.sasl.client=true -Dzookeeper.sasl.client.username=zookeeper -Djava.security.auth.login.config={hadoop_conf_secure_dir}/hdfs_jaas.conf -Dzookeeper.sasl.clientconfig=Client")