params.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358
  1. """
  2. Licensed to the Apache Software Foundation (ASF) under one
  3. or more contributor license agreements. See the NOTICE file
  4. distributed with this work for additional information
  5. regarding copyright ownership. The ASF licenses this file
  6. to you under the Apache License, Version 2.0 (the
  7. "License"); you may not use this file except in compliance
  8. with the License. You may obtain a copy of the License at
  9. http://www.apache.org/licenses/LICENSE-2.0
  10. Unless required by applicable law or agreed to in writing, software
  11. distributed under the License is distributed on an "AS IS" BASIS,
  12. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. See the License for the specific language governing permissions and
  14. limitations under the License.
  15. """
  16. import os
  17. from resource_management.libraries.functions import conf_select
  18. from resource_management.libraries.functions import stack_select
  19. from resource_management.libraries.functions import default
  20. from resource_management.libraries.functions.format_jvm_option import format_jvm_option_value
  21. from resource_management.libraries.functions import format
  22. from resource_management.libraries.functions.version import format_stack_version, compare_versions, get_major_version
  23. from ambari_commons.os_check import OSCheck
  24. from resource_management.libraries.script.script import Script
  25. from resource_management.libraries.functions import get_kinit_path
  26. from resource_management.libraries.functions.get_not_managed_resources import get_not_managed_resources
  27. from resource_management.libraries.resources.hdfs_resource import HdfsResource
  28. from resource_management.libraries.functions.stack_features import check_stack_feature
  29. from resource_management.libraries.functions.stack_features import get_stack_feature_version
  30. from resource_management.libraries.functions import StackFeature
  31. from resource_management.libraries.execution_command import execution_command
  32. from resource_management.libraries.execution_command import module_configs
  33. from ambari_commons.constants import AMBARI_SUDO_BINARY
  34. from resource_management.libraries.functions.cluster_settings import get_cluster_setting_value
  35. config = Script.get_config()
  36. execution_command = Script.get_execution_command()
  37. module_configs = Script.get_module_configs()
  38. module_name = execution_command.get_module_name()
  39. tmp_dir = Script.get_tmp_dir()
  40. artifact_dir = tmp_dir + "/AMBARI-artifacts"
  41. version_for_stack_feature_checks = get_stack_feature_version(config)
  42. stack_supports_hadoop_custom_extensions = check_stack_feature(StackFeature.HADOOP_CUSTOM_EXTENSIONS, version_for_stack_feature_checks)
  43. sudo = AMBARI_SUDO_BINARY
  44. # Global flag enabling or disabling the sysprep feature
  45. host_sys_prepped = execution_command.is_host_system_prepared()
  46. # Whether to skip copying fast-hdfs-resource.jar to /var/lib/ambari-agent/lib/
  47. # This is required if tarballs are going to be copied to HDFS, so set to False
  48. sysprep_skip_copy_fast_jar_hdfs = host_sys_prepped and get_cluster_setting_value('sysprep_skip_copy_fast_jar_hdfs')
  49. # Whether to skip setting up the unlimited key JCE policy
  50. sysprep_skip_setup_jce = host_sys_prepped and get_cluster_setting_value('sysprep_skip_setup_jce')
  51. stack_version_unformatted = execution_command.get_mpack_version()
  52. stack_version_formatted = format_stack_version(stack_version_unformatted)
  53. major_stack_version = get_major_version(stack_version_formatted)
  54. dfs_type = execution_command.get_dfs_type()
  55. hadoop_conf_dir = "/etc/hadoop/conf"
  56. component_list = execution_command.get_local_components()
  57. hdfs_tmp_dir = module_configs.get_property_value(module_name, 'hadoop-env', 'hdfs_tmp_dir', '/tmp')
  58. hadoop_metrics2_properties_content = module_configs.get_property_value(module_name, 'hadoop-metrics2.properties', 'content')
  59. hadoop_libexec_dir = stack_select.get_hadoop_dir("libexec")
  60. hadoop_lib_home = stack_select.get_hadoop_dir("lib")
  61. hadoop_bin = stack_select.get_hadoop_dir("sbin")
  62. mapreduce_libs_path = "/usr/hdp/current/hadoop-mapreduce-client/*"
  63. hadoop_home = stack_select.get_hadoop_dir("home")
  64. create_lib_snappy_symlinks = False
  65. current_service = module_name
  66. #security params
  67. security_enabled = get_cluster_setting_value('security_enabled')
  68. ambari_server_resources_url = execution_command.get_jdk_location()
  69. if ambari_server_resources_url and ambari_server_resources_url.endswith('/'):
  70. ambari_server_resources_url = ambari_server_resources_url[:-1]
  71. # Unlimited key JCE policy params
  72. jce_policy_zip = execution_command.get_jce_name() # None when jdk is already installed by user
  73. unlimited_key_jce_required = execution_command.check_unlimited_key_jce_required()
  74. jdk_name = execution_command.get_jdk_name()
  75. java_home = execution_command.get_java_home()
  76. java_exec = "{0}/bin/java".format(java_home) if java_home is not None else "/bin/java"
  77. #users and groups
  78. has_hadoop_env = bool(module_configs.get_all_properties(module_name, "hadoop-env"))
  79. mapred_user = module_configs.get_property_value(module_name, 'mapred-env', 'mapred_user')
  80. hdfs_user = module_configs.get_property_value(module_name, 'hadoop-env', 'hdfs_user')
  81. yarn_user = module_configs.get_property_value(module_name, 'yarn-env', 'yarn_user')
  82. user_group = get_cluster_setting_value('user_group')
  83. #hosts
  84. hostname = execution_command.get_host_name()
  85. ambari_server_hostname = execution_command.get_ambari_server_host()
  86. rm_host = execution_command.get_component_hosts('resourcemanager')
  87. slave_hosts = execution_command.get_component_hosts('datanode')
  88. oozie_servers = execution_command.get_component_hosts('oozie_server')
  89. hcat_server_hosts = execution_command.get_component_hosts('webhcat_server')
  90. hive_server_host = execution_command.get_component_hosts('hive_server')
  91. hbase_master_hosts = execution_command.get_component_hosts('hbase_master')
  92. hs_host = execution_command.get_component_hosts('historyserver')
  93. namenode_host = execution_command.get_component_hosts('namenode')
  94. zk_hosts = execution_command.get_component_hosts('zookeeper_server')
  95. ganglia_server_hosts = execution_command.get_component_hosts('ganglia_server')
  96. cluster_name = execution_command.get_cluster_name()
  97. set_instanceId = "false"
  98. ams_collector_hosts = module_configs.get_property_value(module_name, 'cluster-env', 'metrics_collector_external_hosts')
  99. if ams_collector_hosts:
  100. set_instanceId = "true"
  101. else:
  102. ams_collector_hosts = ",".join(execution_command.get_component_hosts('metrics_collector'))
  103. has_namenode = not len(namenode_host) == 0
  104. has_resourcemanager = not len(rm_host) == 0
  105. has_slaves = not len(slave_hosts) == 0
  106. has_oozie_server = not len(oozie_servers) == 0
  107. has_hcat_server_host = not len(hcat_server_hosts) == 0
  108. has_hive_server_host = not len(hive_server_host) == 0
  109. has_hbase_masters = not len(hbase_master_hosts) == 0
  110. has_zk_host = not len(zk_hosts) == 0
  111. has_ganglia_server = not len(ganglia_server_hosts) == 0
  112. has_metric_collector = not len(ams_collector_hosts) == 0
  113. is_namenode_master = hostname in namenode_host
  114. is_rmnode_master = hostname in rm_host
  115. is_hsnode_master = hostname in hs_host
  116. is_hbase_master = hostname in hbase_master_hosts
  117. is_slave = hostname in slave_hosts
  118. if has_ganglia_server:
  119. ganglia_server_host = ganglia_server_hosts[0]
  120. metric_collector_port = module_configs.get_property_value(module_name, 'cluster-env', 'metrics_collector_external_port')
  121. if has_metric_collector:
  122. if not metric_collector_port:
  123. metric_collector_web_address = module_configs.get_property_value(module_name, 'ams-env', 'timeline.metrics.service.webapp.address', '0.0.0.0:6188')
  124. if metric_collector_web_address.find(':') != -1:
  125. metric_collector_port = metric_collector_web_address.split(':')[1]
  126. else:
  127. metric_collector_port = '6188'
  128. if module_configs.get_property_value(module_name, 'ams-env', 'timeline.metrics.service.http.policy', 'HTTP_ONLY') == "HTTPS_ONLY":
  129. metric_collector_protocol = 'https'
  130. else:
  131. metric_collector_protocol = 'http'
  132. metric_truststore_path= module_configs.get_property_value(module_name, 'ams-ssl-client', 'ams-ssl-client/ssl.client.truststore.location', '')
  133. metric_truststore_type= module_configs.get_property_value(module_name, 'ams-ssl-client', 'ams-ssl-client/ssl.client.truststore.type', '')
  134. metric_truststore_password= module_configs.get_property_value(module_name, 'ams-ssl-client', 'ssl.client.truststore.password', '')
  135. pass
  136. metrics_report_interval = module_configs.get_property_value(module_name, 'ams-site', 'timeline.metrics.sink.report.interval', 60)
  137. metrics_collection_period = module_configs.get_property_value(module_name, 'ams-site', 'timeline.metrics.sink.collection.period', 10)
  138. host_in_memory_aggregation = module_configs.get_property_value(module_name, 'ams-site', 'timeline.metrics.host.inmemory.aggregation', True)
  139. host_in_memory_aggregation_port = module_configs.get_property_value(module_name, 'ams-site', 'timeline.metrics.host.inmemory.aggregation.port', 61888)
  140. # Cluster Zookeeper quorum
  141. zookeeper_quorum = None
  142. if has_zk_host:
  143. if not zookeeper_quorum:
  144. zookeeper_clientPort = '2181'
  145. zookeeper_quorum = (':' + zookeeper_clientPort + ',').join(execution_command.get_component_hosts('zookeeper_server'))
  146. # last port config
  147. zookeeper_quorum += ':' + zookeeper_clientPort
  148. #hadoop params
  149. if has_namenode or dfs_type == 'HCFS':
  150. hadoop_tmp_dir = format("/tmp/hadoop-{hdfs_user}")
  151. hadoop_conf_dir = conf_select.get_hadoop_conf_dir()
  152. task_log4j_properties_location = os.path.join(hadoop_conf_dir, "task-log4j.properties")
  153. hadoop_pid_dir_prefix = module_configs.get_property_value(module_name, 'hadoop-env', 'hadoop_pid_dir_prefix')
  154. hdfs_log_dir_prefix = module_configs.get_property_value(module_name, 'hadoop-env', 'hdfs_log_dir_prefix')
  155. hbase_tmp_dir = "/tmp/hbase-hbase"
  156. #db params
  157. oracle_driver_symlink_url = format("{ambari_server_resources_url}/oracle-jdbc-driver.jar")
  158. mysql_driver_symlink_url = format("{ambari_server_resources_url}/mysql-jdbc-driver.jar")
  159. if has_namenode:
  160. rca_enabled = module_configs.get_property_value(module_name, 'hadoop-env', 'rca_enabled', False)
  161. else:
  162. rca_enabled = False
  163. rca_disabled_prefix = "###"
  164. if rca_enabled == True:
  165. rca_prefix = ""
  166. else:
  167. rca_prefix = rca_disabled_prefix
  168. #hadoop-env.sh
  169. jsvc_path = "/usr/lib/bigtop-utils"
  170. hadoop_heapsize = module_configs.get_property_value(module_name, 'hadoop-env', 'hadoop_heapsize')
  171. namenode_heapsize = module_configs.get_property_value(module_name, 'hadoop-env', 'namenode_heapsize')
  172. namenode_opt_newsize = module_configs.get_property_value(module_name, 'hadoop-env', 'namenode_opt_newsize')
  173. namenode_opt_maxnewsize = module_configs.get_property_value(module_name, 'hadoop-env', 'namenode_opt_maxnewsize')
  174. namenode_opt_permsize = format_jvm_option_value(module_configs.get_property_value(module_name, 'hadoop-env', 'namenode_opt_permsize', '128m'), '128m')
  175. namenode_opt_maxpermsize = format_jvm_option_value(module_configs.get_property_value(module_name, 'hadoop-env', 'namenode_opt_maxpermsize', '256m'), '256m')
  176. ttnode_heapsize = "1024m"
  177. dtnode_heapsize = module_configs.get_property_value(module_name, 'hadoop-env', 'dtnode_heapsize')
  178. mapred_pid_dir_prefix = module_configs.get_property_value(module_name, 'mapred-env', 'mapred_pid_dir_prefix', '/var/run/hadoop-mapreduce')
  179. mapred_log_dir_prefix = module_configs.get_property_value(module_name, 'mapred-env', 'mapred_log_dir_prefix', '/var/log/hadoop-mapreduce')
  180. #log4j.properties
  181. yarn_log_dir_prefix = module_configs.get_property_value(module_name, 'yarn-env', 'yarn_log_dir_prefix', '/var/log/hadoop-yarn')
  182. dfs_hosts = module_configs.get_property_value(module_name, 'hdfs-site', 'dfs.hosts')
  183. # Hdfs log4j settings
  184. hadoop_log_max_backup_size = module_configs.get_property_value(module_name, 'hdfs-log4j', 'hadoop_log_max_backup_size', 256)
  185. hadoop_log_number_of_backup_files = module_configs.get_property_value(module_name, 'hdfs-log4j', 'hadoop_log_number_of_backup_files', 10)
  186. hadoop_security_log_max_backup_size = module_configs.get_property_value(module_name, 'hdfs-log4j', 'hadoop_security_log_max_backup_size', 256)
  187. hadoop_security_log_number_of_backup_files = module_configs.get_property_value(module_name, 'hdfs-log4j', 'hadoop_security_log_number_of_backup_files', 20)
  188. # Yarn log4j settings
  189. yarn_rm_summary_log_max_backup_size = module_configs.get_property_value(module_name, 'yarn-log4j', 'yarn_rm_summary_log_max_backup_size', 256)
  190. yarn_rm_summary_log_number_of_backup_files = module_configs.get_property_value(module_name, 'yarn-log4j', 'yarn_rm_summary_log_number_of_backup_files', 20)
  191. #log4j.properties
  192. log4j_props = module_configs.get_property_value(module_name, 'hdfs-log4j', 'content')
  193. if log4j_props and module_configs.get_property_value(module_name, 'yarn-log4j', 'content'):
  194. log4j_props += module_configs.get_property_value(module_name, 'yarn-log4j', 'content')
  195. refresh_topology = execution_command.need_refresh_topology()
  196. ambari_java_home = execution_command.get_ambari_java_home()
  197. ambari_jdk_name = execution_command.get_ambari_jdk_name()
  198. ambari_jce_name = execution_command.get_ambari_jce_name()
  199. ambari_libs_dir = "/var/lib/ambari-agent/lib"
  200. is_webhdfs_enabled = module_configs.get_property_value(module_name, 'hdfs-site', 'dfs.webhdfs.enabled')
  201. default_fs = module_configs.get_property_value(module_name, 'core-site', 'fs.defaultFS')
  202. #host info
  203. all_hosts = execution_command.get_all_hosts()
  204. all_racks = execution_command.get_all_racks()
  205. all_ipv4_ips = execution_command.get_all_ipv4_ips()
  206. slave_hosts = execution_command.get_component_hosts('datanode')
  207. #topology files
  208. net_topology_script_file_path = "/etc/hadoop/conf/topology_script.py"
  209. net_topology_script_dir = os.path.dirname(net_topology_script_file_path)
  210. net_topology_mapping_data_file_name = 'topology_mappings.data'
  211. net_topology_mapping_data_file_path = os.path.join(net_topology_script_dir, net_topology_mapping_data_file_name)
  212. #Added logic to create /tmp and /user directory for HCFS stack.
  213. has_core_site = bool(module_configs.get_all_properties(module_name, "core-site"))
  214. hdfs_user_keytab = module_configs.get_property_value(module_name, 'hadoop-env', 'hdfs_user_keytab')
  215. kinit_path_local = get_kinit_path()
  216. stack_version_unformatted = execution_command.get_mpack_version()
  217. stack_version_formatted = format_stack_version(stack_version_unformatted)
  218. hadoop_bin_dir = stack_select.get_hadoop_dir("bin")
  219. hdfs_principal_name = module_configs.get_property_value(module_name, 'hadoop-env', 'hdfs_principal_name')
  220. hdfs_site = module_configs.get_all_properties(module_name, 'hdfs-site')
  221. smoke_user = get_cluster_setting_value('smokeuser')
  222. smoke_hdfs_user_dir = format("/user/{smoke_user}")
  223. smoke_hdfs_user_mode = 0770
  224. ##### Namenode RPC ports - metrics config section start #####
  225. # Figure out the rpc ports for current namenode
  226. nn_rpc_client_port = None
  227. nn_rpc_dn_port = None
  228. nn_rpc_healthcheck_port = None
  229. namenode_id = None
  230. namenode_rpc = None
  231. dfs_ha_enabled = False
  232. dfs_ha_nameservices = module_configs.get_property_value(module_name, 'hdfs-site', 'dfs.internal.nameservices')
  233. if dfs_ha_nameservices is None:
  234. dfs_ha_nameservices = module_configs.get_property_value(module_name, 'hdfs-site', 'dfs.nameservices')
  235. dfs_ha_namenode_ids = module_configs.get_property_value(module_name, 'hdfs-site', 'dfs.ha.namenodes.{dfs_ha_nameservices}')
  236. dfs_ha_namemodes_ids_list = []
  237. other_namenode_id = None
  238. if dfs_ha_namenode_ids:
  239. dfs_ha_namemodes_ids_list = dfs_ha_namenode_ids.split(",")
  240. dfs_ha_namenode_ids_array_len = len(dfs_ha_namemodes_ids_list)
  241. if dfs_ha_namenode_ids_array_len > 1:
  242. dfs_ha_enabled = True
  243. if dfs_ha_enabled:
  244. for nn_id in dfs_ha_namemodes_ids_list:
  245. nn_host = module_configs.get_property_value(module_name, 'hdfs-site', format('dfs.namenode.rpc-address.{dfs_ha_nameservices}.{nn_id}'))
  246. if hostname.lower() in nn_host.lower():
  247. namenode_id = nn_id
  248. namenode_rpc = nn_host
  249. pass
  250. pass
  251. else:
  252. namenode_rpc = module_configs.get_property_value(module_name, 'hdfs-site', 'dfs.namenode.rpc-address', default_fs)
  253. # if HDFS is not installed in the cluster, then don't try to access namenode_rpc
  254. if has_namenode and namenode_rpc and module_configs.get_all_properties(module_name, 'core-site'):
  255. port_str = namenode_rpc.split(':')[-1].strip()
  256. try:
  257. nn_rpc_client_port = int(port_str)
  258. except ValueError:
  259. nn_rpc_client_port = None
  260. if dfs_ha_enabled:
  261. dfs_service_rpc_address = module_configs.get_property_value(module_name, 'hdfs-site', 'dfs.namenode.servicerpc-address.{dfs_ha_nameservices}.{namenode_id}')
  262. dfs_lifeline_rpc_address = module_configs.get_property_value(module_name, 'hdfs-site', 'dfs.namenode.lifeline.rpc-address.{dfs_ha_nameservices}.{namenode_id}')
  263. else:
  264. dfs_service_rpc_address = module_configs.get_property_value(module_name, 'hdfs-site', 'dfs.namenode.servicerpc-address')
  265. dfs_lifeline_rpc_address = module_configs.get_property_value(module_name, 'hdfs-site', 'dfs.namenode.lifeline.rpc-address')
  266. if dfs_service_rpc_address:
  267. nn_rpc_dn_port = dfs_service_rpc_address.split(':')[1].strip()
  268. if dfs_lifeline_rpc_address:
  269. nn_rpc_healthcheck_port = dfs_lifeline_rpc_address.split(':')[1].strip()
  270. is_nn_client_port_configured = False if nn_rpc_client_port is None else True
  271. is_nn_dn_port_configured = False if nn_rpc_dn_port is None else True
  272. is_nn_healthcheck_port_configured = False if nn_rpc_healthcheck_port is None else True
  273. ##### end #####
  274. import functools
  275. #create partial functions with common arguments for every HdfsResource call
  276. #to create/delete/copyfromlocal hdfs directories/files we need to call params.HdfsResource in code
  277. HdfsResource = functools.partial(
  278. HdfsResource,
  279. user=hdfs_user,
  280. hdfs_resource_ignore_file = "/var/lib/ambari-agent/data/.hdfs_resource_ignore",
  281. security_enabled = security_enabled,
  282. keytab = hdfs_user_keytab,
  283. kinit_path_local = kinit_path_local,
  284. hadoop_bin_dir = hadoop_bin_dir,
  285. hadoop_conf_dir = hadoop_conf_dir,
  286. principal_name = hdfs_principal_name,
  287. hdfs_site = hdfs_site,
  288. default_fs = default_fs,
  289. immutable_paths = get_not_managed_resources(),
  290. dfs_type = dfs_type
  291. )