params.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350
  1. """
  2. Licensed to the Apache Software Foundation (ASF) under one
  3. or more contributor license agreements. See the NOTICE file
  4. distributed with this work for additional information
  5. regarding copyright ownership. The ASF licenses this file
  6. to you under the Apache License, Version 2.0 (the
  7. "License"); you may not use this file except in compliance
  8. with the License. You may obtain a copy of the License at
  9. http://www.apache.org/licenses/LICENSE-2.0
  10. Unless required by applicable law or agreed to in writing, software
  11. distributed under the License is distributed on an "AS IS" BASIS,
  12. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. See the License for the specific language governing permissions and
  14. limitations under the License.
  15. """
  16. import os
  17. from resource_management.libraries.functions import conf_select
  18. from resource_management.libraries.functions import stack_select
  19. from resource_management.libraries.functions.format_jvm_option import format_jvm_option_value
  20. from resource_management.libraries.functions import format
  21. from resource_management.libraries.functions.version import format_stack_version, get_major_version
  22. from resource_management.libraries.script.script import Script
  23. from resource_management.libraries.functions import get_kinit_path
  24. from resource_management.libraries.functions.get_not_managed_resources import get_not_managed_resources
  25. from resource_management.libraries.resources.hdfs_resource import HdfsResource
  26. from resource_management.libraries.functions.stack_features import check_stack_feature
  27. from resource_management.libraries.functions.stack_features import get_stack_feature_version
  28. from resource_management.libraries.functions import StackFeature
  29. from ambari_commons.constants import AMBARI_SUDO_BINARY
  30. from resource_management.libraries.functions.cluster_settings import get_cluster_setting_value
  31. config = Script.get_config()
  32. execution_command = Script.get_execution_command()
  33. module_configs = Script.get_module_configs()
  34. module_name = execution_command.get_module_name()
  35. tmp_dir = Script.get_tmp_dir()
  36. artifact_dir = tmp_dir + "/AMBARI-artifacts"
  37. version_for_stack_feature_checks = get_stack_feature_version(config)
  38. stack_supports_hadoop_custom_extensions = check_stack_feature(StackFeature.HADOOP_CUSTOM_EXTENSIONS, version_for_stack_feature_checks)
  39. sudo = AMBARI_SUDO_BINARY
  40. # Global flag enabling or disabling the sysprep feature
  41. host_sys_prepped = execution_command.is_host_system_prepared()
  42. # Whether to skip copying fast-hdfs-resource.jar to /var/lib/ambari-agent/lib/
  43. # This is required if tarballs are going to be copied to HDFS, so set to False
  44. sysprep_skip_copy_fast_jar_hdfs = host_sys_prepped and get_cluster_setting_value('sysprep_skip_copy_fast_jar_hdfs')
  45. # Whether to skip setting up the unlimited key JCE policy
  46. sysprep_skip_setup_jce = host_sys_prepped and get_cluster_setting_value('sysprep_skip_setup_jce')
  47. stack_version_unformatted = execution_command.get_mpack_version()
  48. stack_version_formatted = format_stack_version(stack_version_unformatted)
  49. major_stack_version = get_major_version(stack_version_formatted)
  50. dfs_type = execution_command.get_dfs_type()
  51. hadoop_conf_dir = "/etc/hadoop/conf"
  52. component_list = execution_command.get_local_components()
  53. hdfs_tmp_dir = module_configs.get_property_value(module_name, 'hadoop-env', 'hdfs_tmp_dir', '/tmp')
  54. hadoop_metrics2_properties_content = module_configs.get_property_value(module_name, 'hadoop-metrics2.properties', 'content')
  55. hadoop_libexec_dir = stack_select.get_hadoop_dir("libexec")
  56. hadoop_lib_home = stack_select.get_hadoop_dir("lib")
  57. hadoop_bin = stack_select.get_hadoop_dir("sbin")
  58. mapreduce_libs_path = "/usr/hdp/current/hadoop-mapreduce-client/*"
  59. hadoop_home = stack_select.get_hadoop_dir("home")
  60. create_lib_snappy_symlinks = False
  61. current_service = module_name
  62. #security params
  63. security_enabled = get_cluster_setting_value('security_enabled')
  64. ambari_server_resources_url = execution_command.get_jdk_location()
  65. if ambari_server_resources_url and ambari_server_resources_url.endswith('/'):
  66. ambari_server_resources_url = ambari_server_resources_url[:-1]
  67. # Unlimited key JCE policy params
  68. jce_policy_zip = execution_command.get_jce_name() # None when jdk is already installed by user
  69. unlimited_key_jce_required = execution_command.check_unlimited_key_jce_required()
  70. jdk_name = execution_command.get_jdk_name()
  71. java_home = execution_command.get_java_home()
  72. java_exec = "{0}/bin/java".format(java_home) if java_home is not None else "/bin/java"
  73. #users and groups
  74. has_hadoop_env = bool(module_configs.get_all_properties(module_name, "hadoop-env"))
  75. mapred_user = module_configs.get_property_value(module_name, 'mapred-env', 'mapred_user')
  76. hdfs_user = module_configs.get_property_value(module_name, 'hadoop-env', 'hdfs_user')
  77. yarn_user = module_configs.get_property_value(module_name, 'yarn-env', 'yarn_user')
  78. user_group = get_cluster_setting_value('user_group')
  79. #hosts
  80. hostname = execution_command.get_host_name()
  81. ambari_server_hostname = execution_command.get_ambari_server_host()
  82. rm_host = execution_command.get_component_hosts('resourcemanager')
  83. slave_hosts = execution_command.get_component_hosts('datanode')
  84. oozie_servers = execution_command.get_component_hosts('oozie_server')
  85. hcat_server_hosts = execution_command.get_component_hosts('webhcat_server')
  86. hive_server_host = execution_command.get_component_hosts('hive_server')
  87. hs_host = execution_command.get_component_hosts('historyserver')
  88. namenode_host = execution_command.get_component_hosts('namenode')
  89. zk_hosts = execution_command.get_component_hosts('zookeeper_server')
  90. ganglia_server_hosts = execution_command.get_component_hosts('ganglia_server')
  91. cluster_name = execution_command.get_cluster_name()
  92. set_instanceId = "false"
  93. ams_collector_hosts = module_configs.get_property_value(module_name, 'cluster-env', 'metrics_collector_external_hosts')
  94. if ams_collector_hosts:
  95. set_instanceId = "true"
  96. else:
  97. ams_collector_hosts = ",".join(execution_command.get_component_hosts('metrics_collector'))
  98. has_namenode = not len(namenode_host) == 0
  99. has_resourcemanager = not len(rm_host) == 0
  100. has_slaves = not len(slave_hosts) == 0
  101. has_oozie_server = not len(oozie_servers) == 0
  102. has_hcat_server_host = not len(hcat_server_hosts) == 0
  103. has_hive_server_host = not len(hive_server_host) == 0
  104. has_zk_host = not len(zk_hosts) == 0
  105. has_ganglia_server = not len(ganglia_server_hosts) == 0
  106. has_metric_collector = not len(ams_collector_hosts) == 0
  107. is_namenode_master = hostname in namenode_host
  108. is_rmnode_master = hostname in rm_host
  109. is_hsnode_master = hostname in hs_host
  110. is_slave = hostname in slave_hosts
  111. if has_ganglia_server:
  112. ganglia_server_host = ganglia_server_hosts[0]
  113. metric_collector_port = module_configs.get_property_value(module_name, 'cluster-env', 'metrics_collector_external_port')
  114. if has_metric_collector:
  115. if not metric_collector_port:
  116. metric_collector_web_address = module_configs.get_property_value(module_name, 'ams-env', 'timeline.metrics.service.webapp.address', '0.0.0.0:6188')
  117. if metric_collector_web_address.find(':') != -1:
  118. metric_collector_port = metric_collector_web_address.split(':')[1]
  119. else:
  120. metric_collector_port = '6188'
  121. if module_configs.get_property_value(module_name, 'ams-env', 'timeline.metrics.service.http.policy', 'HTTP_ONLY') == "HTTPS_ONLY":
  122. metric_collector_protocol = 'https'
  123. else:
  124. metric_collector_protocol = 'http'
  125. metric_truststore_path= module_configs.get_property_value(module_name, 'ams-ssl-client', 'ams-ssl-client/ssl.client.truststore.location', '')
  126. metric_truststore_type= module_configs.get_property_value(module_name, 'ams-ssl-client', 'ams-ssl-client/ssl.client.truststore.type', '')
  127. metric_truststore_password= module_configs.get_property_value(module_name, 'ams-ssl-client', 'ssl.client.truststore.password', '')
  128. pass
  129. metrics_report_interval = module_configs.get_property_value(module_name, 'ams-site', 'timeline.metrics.sink.report.interval', 60)
  130. metrics_collection_period = module_configs.get_property_value(module_name, 'ams-site', 'timeline.metrics.sink.collection.period', 10)
  131. host_in_memory_aggregation = module_configs.get_property_value(module_name, 'ams-site', 'timeline.metrics.host.inmemory.aggregation', True)
  132. host_in_memory_aggregation_port = module_configs.get_property_value(module_name, 'ams-site', 'timeline.metrics.host.inmemory.aggregation.port', 61888)
  133. # Cluster Zookeeper quorum
  134. zookeeper_quorum = None
  135. if has_zk_host:
  136. if not zookeeper_quorum:
  137. zookeeper_clientPort = '2181'
  138. zookeeper_quorum = (':' + zookeeper_clientPort + ',').join(execution_command.get_component_hosts('zookeeper_server'))
  139. # last port config
  140. zookeeper_quorum += ':' + zookeeper_clientPort
  141. #hadoop params
  142. if has_namenode or dfs_type == 'HCFS':
  143. hadoop_tmp_dir = format("/tmp/hadoop-{hdfs_user}")
  144. hadoop_conf_dir = conf_select.get_hadoop_conf_dir()
  145. task_log4j_properties_location = os.path.join(hadoop_conf_dir, "task-log4j.properties")
  146. hadoop_pid_dir_prefix = module_configs.get_property_value(module_name, 'hadoop-env', 'hadoop_pid_dir_prefix')
  147. hdfs_log_dir_prefix = module_configs.get_property_value(module_name, 'hadoop-env', 'hdfs_log_dir_prefix')
  148. #db params
  149. oracle_driver_symlink_url = format("{ambari_server_resources_url}/oracle-jdbc-driver.jar")
  150. mysql_driver_symlink_url = format("{ambari_server_resources_url}/mysql-jdbc-driver.jar")
  151. if has_namenode:
  152. rca_enabled = module_configs.get_property_value(module_name, 'hadoop-env', 'rca_enabled', False)
  153. else:
  154. rca_enabled = False
  155. rca_disabled_prefix = "###"
  156. if rca_enabled == True:
  157. rca_prefix = ""
  158. else:
  159. rca_prefix = rca_disabled_prefix
  160. #hadoop-env.sh
  161. jsvc_path = "/usr/lib/bigtop-utils"
  162. hadoop_heapsize = module_configs.get_property_value(module_name, 'hadoop-env', 'hadoop_heapsize')
  163. namenode_heapsize = module_configs.get_property_value(module_name, 'hadoop-env', 'namenode_heapsize')
  164. namenode_opt_newsize = module_configs.get_property_value(module_name, 'hadoop-env', 'namenode_opt_newsize')
  165. namenode_opt_maxnewsize = module_configs.get_property_value(module_name, 'hadoop-env', 'namenode_opt_maxnewsize')
  166. namenode_opt_permsize = format_jvm_option_value(module_configs.get_property_value(module_name, 'hadoop-env', 'namenode_opt_permsize', '128m'), '128m')
  167. namenode_opt_maxpermsize = format_jvm_option_value(module_configs.get_property_value(module_name, 'hadoop-env', 'namenode_opt_maxpermsize', '256m'), '256m')
  168. ttnode_heapsize = "1024m"
  169. dtnode_heapsize = module_configs.get_property_value(module_name, 'hadoop-env', 'dtnode_heapsize')
  170. mapred_pid_dir_prefix = module_configs.get_property_value(module_name, 'mapred-env', 'mapred_pid_dir_prefix', '/var/run/hadoop-mapreduce')
  171. mapred_log_dir_prefix = module_configs.get_property_value(module_name, 'mapred-env', 'mapred_log_dir_prefix', '/var/log/hadoop-mapreduce')
  172. #log4j.properties
  173. yarn_log_dir_prefix = module_configs.get_property_value(module_name, 'yarn-env', 'yarn_log_dir_prefix', '/var/log/hadoop-yarn')
  174. dfs_hosts = module_configs.get_property_value(module_name, 'hdfs-site', 'dfs.hosts')
  175. # Hdfs log4j settings
  176. hadoop_log_max_backup_size = module_configs.get_property_value(module_name, 'hdfs-log4j', 'hadoop_log_max_backup_size', 256)
  177. hadoop_log_number_of_backup_files = module_configs.get_property_value(module_name, 'hdfs-log4j', 'hadoop_log_number_of_backup_files', 10)
  178. hadoop_security_log_max_backup_size = module_configs.get_property_value(module_name, 'hdfs-log4j', 'hadoop_security_log_max_backup_size', 256)
  179. hadoop_security_log_number_of_backup_files = module_configs.get_property_value(module_name, 'hdfs-log4j', 'hadoop_security_log_number_of_backup_files', 20)
  180. # Yarn log4j settings
  181. yarn_rm_summary_log_max_backup_size = module_configs.get_property_value(module_name, 'yarn-log4j', 'yarn_rm_summary_log_max_backup_size', 256)
  182. yarn_rm_summary_log_number_of_backup_files = module_configs.get_property_value(module_name, 'yarn-log4j', 'yarn_rm_summary_log_number_of_backup_files', 20)
  183. #log4j.properties
  184. log4j_props = module_configs.get_property_value(module_name, 'hdfs-log4j', 'content')
  185. if log4j_props and module_configs.get_property_value(module_name, 'yarn-log4j', 'content'):
  186. log4j_props += module_configs.get_property_value(module_name, 'yarn-log4j', 'content')
  187. refresh_topology = execution_command.need_refresh_topology()
  188. ambari_java_home = execution_command.get_ambari_java_home()
  189. ambari_jdk_name = execution_command.get_ambari_jdk_name()
  190. ambari_jce_name = execution_command.get_ambari_jce_name()
  191. ambari_libs_dir = "/var/lib/ambari-agent/lib"
  192. is_webhdfs_enabled = module_configs.get_property_value(module_name, 'hdfs-site', 'dfs.webhdfs.enabled')
  193. default_fs = module_configs.get_property_value(module_name, 'core-site', 'fs.defaultFS')
  194. #host info
  195. all_hosts = execution_command.get_all_hosts()
  196. all_racks = execution_command.get_all_racks()
  197. all_ipv4_ips = execution_command.get_all_ipv4_ips()
  198. slave_hosts = execution_command.get_component_hosts('datanode')
  199. #topology files
  200. net_topology_script_file_path = "/etc/hadoop/conf/topology_script.py"
  201. net_topology_script_dir = os.path.dirname(net_topology_script_file_path)
  202. net_topology_mapping_data_file_name = 'topology_mappings.data'
  203. net_topology_mapping_data_file_path = os.path.join(net_topology_script_dir, net_topology_mapping_data_file_name)
  204. #Added logic to create /tmp and /user directory for HCFS stack.
  205. has_core_site = bool(module_configs.get_all_properties(module_name, "core-site"))
  206. hdfs_user_keytab = module_configs.get_property_value(module_name, 'hadoop-env', 'hdfs_user_keytab')
  207. kinit_path_local = get_kinit_path()
  208. stack_version_unformatted = execution_command.get_mpack_version()
  209. stack_version_formatted = format_stack_version(stack_version_unformatted)
  210. hadoop_bin_dir = stack_select.get_hadoop_dir("bin")
  211. hdfs_principal_name = module_configs.get_property_value(module_name, 'hadoop-env', 'hdfs_principal_name')
  212. hdfs_site = module_configs.get_all_properties(module_name, 'hdfs-site')
  213. smoke_user = get_cluster_setting_value('smokeuser')
  214. smoke_hdfs_user_dir = format("/user/{smoke_user}")
  215. smoke_hdfs_user_mode = 0770
  216. ##### Namenode RPC ports - metrics config section start #####
  217. # Figure out the rpc ports for current namenode
  218. nn_rpc_client_port = None
  219. nn_rpc_dn_port = None
  220. nn_rpc_healthcheck_port = None
  221. namenode_id = None
  222. namenode_rpc = None
  223. dfs_ha_enabled = False
  224. dfs_ha_nameservices = module_configs.get_property_value(module_name, 'hdfs-site', 'dfs.internal.nameservices')
  225. if dfs_ha_nameservices is None:
  226. dfs_ha_nameservices = module_configs.get_property_value(module_name, 'hdfs-site', 'dfs.nameservices')
  227. dfs_ha_namenode_ids = module_configs.get_property_value(module_name, 'hdfs-site', 'dfs.ha.namenodes.{dfs_ha_nameservices}')
  228. dfs_ha_namemodes_ids_list = []
  229. other_namenode_id = None
  230. if dfs_ha_namenode_ids:
  231. dfs_ha_namemodes_ids_list = dfs_ha_namenode_ids.split(",")
  232. dfs_ha_namenode_ids_array_len = len(dfs_ha_namemodes_ids_list)
  233. if dfs_ha_namenode_ids_array_len > 1:
  234. dfs_ha_enabled = True
  235. if dfs_ha_enabled:
  236. for nn_id in dfs_ha_namemodes_ids_list:
  237. nn_host = module_configs.get_property_value(module_name, 'hdfs-site', format('dfs.namenode.rpc-address.{dfs_ha_nameservices}.{nn_id}'))
  238. if hostname.lower() in nn_host.lower():
  239. namenode_id = nn_id
  240. namenode_rpc = nn_host
  241. pass
  242. pass
  243. else:
  244. namenode_rpc = module_configs.get_property_value(module_name, 'hdfs-site', 'dfs.namenode.rpc-address', default_fs)
  245. # if HDFS is not installed in the cluster, then don't try to access namenode_rpc
  246. if has_namenode and namenode_rpc and module_configs.get_all_properties(module_name, 'core-site'):
  247. port_str = namenode_rpc.split(':')[-1].strip()
  248. try:
  249. nn_rpc_client_port = int(port_str)
  250. except ValueError:
  251. nn_rpc_client_port = None
  252. if dfs_ha_enabled:
  253. dfs_service_rpc_address = module_configs.get_property_value(module_name, 'hdfs-site', 'dfs.namenode.servicerpc-address.{dfs_ha_nameservices}.{namenode_id}')
  254. dfs_lifeline_rpc_address = module_configs.get_property_value(module_name, 'hdfs-site', 'dfs.namenode.lifeline.rpc-address.{dfs_ha_nameservices}.{namenode_id}')
  255. else:
  256. dfs_service_rpc_address = module_configs.get_property_value(module_name, 'hdfs-site', 'dfs.namenode.servicerpc-address')
  257. dfs_lifeline_rpc_address = module_configs.get_property_value(module_name, 'hdfs-site', 'dfs.namenode.lifeline.rpc-address')
  258. if dfs_service_rpc_address:
  259. nn_rpc_dn_port = dfs_service_rpc_address.split(':')[1].strip()
  260. if dfs_lifeline_rpc_address:
  261. nn_rpc_healthcheck_port = dfs_lifeline_rpc_address.split(':')[1].strip()
  262. is_nn_client_port_configured = False if nn_rpc_client_port is None else True
  263. is_nn_dn_port_configured = False if nn_rpc_dn_port is None else True
  264. is_nn_healthcheck_port_configured = False if nn_rpc_healthcheck_port is None else True
  265. ##### end #####
  266. import functools
  267. #create partial functions with common arguments for every HdfsResource call
  268. #to create/delete/copyfromlocal hdfs directories/files we need to call params.HdfsResource in code
  269. HdfsResource = functools.partial(
  270. HdfsResource,
  271. user=hdfs_user,
  272. hdfs_resource_ignore_file = "/var/lib/ambari-agent/data/.hdfs_resource_ignore",
  273. security_enabled = security_enabled,
  274. keytab = hdfs_user_keytab,
  275. kinit_path_local = kinit_path_local,
  276. hadoop_bin_dir = hadoop_bin_dir,
  277. hadoop_conf_dir = hadoop_conf_dir,
  278. principal_name = hdfs_principal_name,
  279. hdfs_site = hdfs_site,
  280. default_fs = default_fs,
  281. immutable_paths = get_not_managed_resources(),
  282. dfs_type = dfs_type
  283. )