params.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240
  1. #!/usr/bin/python
  2. """
  3. Licensed to the Apache Software Foundation (ASF) under one
  4. or more contributor license agreements. See the NOTICE file
  5. distributed with this work for additional information
  6. regarding copyright ownership. The ASF licenses this file
  7. to you under the Apache License, Version 2.0 (the
  8. "License"); you may not use this file except in compliance
  9. with the License. You may obtain a copy of the License at
  10. http://www.apache.org/licenses/LICENSE-2.0
  11. Unless required by applicable law or agreed to in writing, software
  12. distributed under the License is distributed on an "AS IS" BASIS,
  13. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. See the License for the specific language governing permissions and
  15. limitations under the License.
  16. """
  17. import status_params
  18. from resource_management.libraries.functions.stack_features import check_stack_feature
  19. from resource_management.libraries.functions import StackFeature
  20. from setup_spark import *
  21. import resource_management.libraries.functions
  22. from resource_management.libraries.functions import conf_select
  23. from resource_management.libraries.functions import stack_select
  24. from resource_management.libraries.functions import format
  25. from resource_management.libraries.functions.get_stack_version import get_stack_version
  26. from resource_management.libraries.functions.version import format_stack_version
  27. from resource_management.libraries.functions.default import default
  28. from resource_management.libraries.functions import get_kinit_path
  29. from resource_management.libraries.functions.get_not_managed_resources import get_not_managed_resources
  30. from resource_management.libraries.script.script import Script
  31. # a map of the Ambari role to the component name
  32. # for use with <stack-root>/current/<component>
  33. SERVER_ROLE_DIRECTORY_MAP = {
  34. 'SPARK_JOBHISTORYSERVER' : 'spark-historyserver',
  35. 'SPARK_CLIENT' : 'spark-client',
  36. 'SPARK_THRIFTSERVER' : 'spark-thriftserver',
  37. 'LIVY_SERVER' : 'livy-server',
  38. 'LIVY_CLIENT' : 'livy-client'
  39. }
  40. component_directory = Script.get_component_from_role(SERVER_ROLE_DIRECTORY_MAP, "SPARK_CLIENT")
  41. config = Script.get_config()
  42. tmp_dir = Script.get_tmp_dir()
  43. stack_name = status_params.stack_name
  44. stack_root = Script.get_stack_root()
  45. stack_version_unformatted = config['hostLevelParams']['stack_version']
  46. stack_version_formatted = format_stack_version(stack_version_unformatted)
  47. host_sys_prepped = default("/hostLevelParams/host_sys_prepped", False)
  48. # New Cluster Stack Version that is defined during the RESTART of a Stack Upgrade
  49. version = default("/commandParams/version", None)
  50. spark_conf = '/etc/spark/conf'
  51. hadoop_conf_dir = conf_select.get_hadoop_conf_dir()
  52. hadoop_bin_dir = stack_select.get_hadoop_dir("bin")
  53. if stack_version_formatted and check_stack_feature(StackFeature.ROLLING_UPGRADE, stack_version_formatted):
  54. hadoop_home = stack_select.get_hadoop_dir("home")
  55. spark_conf = format("{stack_root}/current/{component_directory}/conf")
  56. spark_log_dir = config['configurations']['spark-env']['spark_log_dir']
  57. spark_daemon_memory = config['configurations']['spark-env']['spark_daemon_memory']
  58. spark_pid_dir = status_params.spark_pid_dir
  59. spark_home = format("{stack_root}/current/{component_directory}")
  60. spark_thrift_server_conf_file = spark_conf + "/spark-thrift-sparkconf.conf"
  61. java_home = config['hostLevelParams']['java_home']
  62. hdfs_user = config['configurations']['hadoop-env']['hdfs_user']
  63. hdfs_principal_name = config['configurations']['hadoop-env']['hdfs_principal_name']
  64. hdfs_user_keytab = config['configurations']['hadoop-env']['hdfs_user_keytab']
  65. user_group = config['configurations']['cluster-env']['user_group']
  66. spark_user = status_params.spark_user
  67. hive_user = status_params.hive_user
  68. spark_group = status_params.spark_group
  69. user_group = status_params.user_group
  70. spark_hdfs_user_dir = format("/user/{spark_user}")
  71. spark_history_dir = default('/configurations/spark-defaults/spark.history.fs.logDirectory', "hdfs:///spark-history")
  72. spark_history_server_pid_file = status_params.spark_history_server_pid_file
  73. spark_thrift_server_pid_file = status_params.spark_thrift_server_pid_file
  74. spark_history_server_start = format("{spark_home}/sbin/start-history-server.sh")
  75. spark_history_server_stop = format("{spark_home}/sbin/stop-history-server.sh")
  76. spark_thrift_server_start = format("{spark_home}/sbin/start-thriftserver.sh")
  77. spark_thrift_server_stop = format("{spark_home}/sbin/stop-thriftserver.sh")
  78. spark_logs_dir = format("{spark_home}/logs")
  79. spark_hadoop_lib_native = format("{stack_root}/current/hadoop-client/lib/native")
  80. spark_submit_cmd = format("{spark_home}/bin/spark-submit")
  81. spark_smoke_example = "org.apache.spark.examples.SparkPi"
  82. spark_service_check_cmd = format(
  83. "{spark_submit_cmd} --class {spark_smoke_example} --master yarn-cluster --num-executors 1 --driver-memory 256m --executor-memory 256m --executor-cores 1 {spark_home}/lib/spark-examples*.jar 1")
  84. spark_jobhistoryserver_hosts = default("/clusterHostInfo/spark_jobhistoryserver_hosts", [])
  85. if len(spark_jobhistoryserver_hosts) > 0:
  86. spark_history_server_host = spark_jobhistoryserver_hosts[0]
  87. else:
  88. spark_history_server_host = "localhost"
  89. # spark-defaults params
  90. spark_yarn_historyServer_address = default(spark_history_server_host, "localhost")
  91. spark_history_ui_port = config['configurations']['spark-defaults']['spark.history.ui.port']
  92. spark_env_sh = config['configurations']['spark-env']['content']
  93. spark_log4j_properties = config['configurations']['spark-log4j-properties']['content']
  94. spark_metrics_properties = config['configurations']['spark-metrics-properties']['content']
  95. hive_server_host = default("/clusterHostInfo/hive_server_host", [])
  96. is_hive_installed = not len(hive_server_host) == 0
  97. security_enabled = config['configurations']['cluster-env']['security_enabled']
  98. kinit_path_local = get_kinit_path(default('/configurations/kerberos-env/executable_search_paths', None))
  99. spark_kerberos_keytab = config['configurations']['spark-defaults']['spark.history.kerberos.keytab']
  100. spark_kerberos_principal = config['configurations']['spark-defaults']['spark.history.kerberos.principal']
  101. spark_thriftserver_hosts = default("/clusterHostInfo/spark_thriftserver_hosts", [])
  102. has_spark_thriftserver = not len(spark_thriftserver_hosts) == 0
  103. # hive-site params
  104. spark_hive_properties = {
  105. 'hive.metastore.uris': config['configurations']['hive-site']['hive.metastore.uris']
  106. }
  107. # security settings
  108. if security_enabled:
  109. spark_principal = spark_kerberos_principal.replace('_HOST',spark_history_server_host.lower())
  110. if is_hive_installed:
  111. spark_hive_properties.update({
  112. 'hive.metastore.sasl.enabled': str(config['configurations']['hive-site']['hive.metastore.sasl.enabled']).lower(),
  113. 'hive.metastore.kerberos.keytab.file': config['configurations']['hive-site']['hive.metastore.kerberos.keytab.file'],
  114. 'hive.server2.authentication.spnego.principal': config['configurations']['hive-site']['hive.server2.authentication.spnego.principal'],
  115. 'hive.server2.authentication.spnego.keytab': config['configurations']['hive-site']['hive.server2.authentication.spnego.keytab'],
  116. 'hive.metastore.kerberos.principal': config['configurations']['hive-site']['hive.metastore.kerberos.principal'],
  117. 'hive.server2.authentication.kerberos.principal': config['configurations']['hive-site']['hive.server2.authentication.kerberos.principal'],
  118. 'hive.server2.authentication.kerberos.keytab': config['configurations']['hive-site']['hive.server2.authentication.kerberos.keytab'],
  119. 'hive.server2.authentication': config['configurations']['hive-site']['hive.server2.authentication'],
  120. })
  121. hive_kerberos_keytab = config['configurations']['hive-site']['hive.server2.authentication.kerberos.keytab']
  122. hive_kerberos_principal = config['configurations']['hive-site']['hive.server2.authentication.kerberos.principal']
  123. # thrift server support - available on HDP 2.3 or higher
  124. spark_thrift_sparkconf = None
  125. spark_thrift_cmd_opts_properties = ''
  126. spark_thrift_fairscheduler_content = None
  127. spark_thrift_master = "yarn-client"
  128. if 'nm_hosts' in config['clusterHostInfo'] and len(config['clusterHostInfo']['nm_hosts']) == 1:
  129. # use local mode when there's only one nodemanager
  130. spark_thrift_master = "local[4]"
  131. if has_spark_thriftserver and 'spark-thrift-sparkconf' in config['configurations']:
  132. spark_thrift_sparkconf = config['configurations']['spark-thrift-sparkconf']
  133. spark_thrift_cmd_opts_properties = config['configurations']['spark-env']['spark_thrift_cmd_opts']
  134. if is_hive_installed:
  135. # update default metastore client properties (async wait for metastore component) it is useful in case of
  136. # blueprint provisioning when hive-metastore and spark-thriftserver is not on the same host.
  137. spark_hive_properties.update({
  138. 'hive.metastore.client.socket.timeout' : config['configurations']['hive-site']['hive.metastore.client.socket.timeout']
  139. })
  140. spark_hive_properties.update(config['configurations']['spark-hive-site-override'])
  141. if 'spark-thrift-fairscheduler' in config['configurations'] and 'fairscheduler_content' in config['configurations']['spark-thrift-fairscheduler']:
  142. spark_thrift_fairscheduler_content = config['configurations']['spark-thrift-fairscheduler']['fairscheduler_content']
  143. default_fs = config['configurations']['core-site']['fs.defaultFS']
  144. hdfs_site = config['configurations']['hdfs-site']
  145. dfs_type = default("/commandParams/dfs_type", "")
  146. # livy related config
  147. # livy is only supported from HDP 2.5
  148. has_livyserver = False
  149. if stack_version_formatted and check_stack_feature(StackFeature.SPARK_LIVY, stack_version_formatted):
  150. livy_component_directory = Script.get_component_from_role(SERVER_ROLE_DIRECTORY_MAP, "LIVY_SERVER")
  151. livy_conf = format("{stack_root}/current/{livy_component_directory}/conf")
  152. livy_log_dir = config['configurations']['livy-env']['livy_log_dir']
  153. livy_pid_dir = status_params.livy_pid_dir
  154. livy_home = format("{stack_root}/current/{livy_component_directory}")
  155. livy_user = status_params.livy_user
  156. livy_group = status_params.livy_group
  157. user_group = status_params.user_group
  158. livy_hdfs_user_dir = format("/user/{livy_user}")
  159. livy_server_pid_file = status_params.livy_server_pid_file
  160. livy_server_start = format("{livy_home}/bin/livy-server start")
  161. livy_server_stop = format("{livy_home}/bin/livy-server stop")
  162. livy_logs_dir = format("{livy_home}/logs")
  163. livy_env_sh = config['configurations']['livy-env']['content']
  164. livy_log4j_properties = config['configurations']['livy-log4j-properties']['content']
  165. livy_spark_blacklist_properties = config['configurations']['livy-spark-blacklist']['content']
  166. livy_kerberos_keytab = config['configurations']['livy-conf']['livy.server.kerberos.keytab']
  167. livy_kerberos_principal = config['configurations']['livy-conf']['livy.server.kerberos.principal']
  168. livy_livyserver_hosts = default("/clusterHostInfo/livy_server_hosts", [])
  169. if len(livy_livyserver_hosts) > 0:
  170. livy_livyserver_host = livy_livyserver_hosts[0]
  171. has_livyserver = True
  172. livy_livyserver_port = default('configurations/livy-conf/livy.server.port',8998)
  173. import functools
  174. #create partial functions with common arguments for every HdfsResource call
  175. #to create/delete hdfs directory/file/copyfromlocal we need to call params.HdfsResource in code
  176. HdfsResource = functools.partial(
  177. HdfsResource,
  178. user=hdfs_user,
  179. hdfs_resource_ignore_file = "/var/lib/ambari-agent/data/.hdfs_resource_ignore",
  180. security_enabled = security_enabled,
  181. keytab = hdfs_user_keytab,
  182. kinit_path_local = kinit_path_local,
  183. hadoop_bin_dir = hadoop_bin_dir,
  184. hadoop_conf_dir = hadoop_conf_dir,
  185. principal_name = hdfs_principal_name,
  186. hdfs_site = hdfs_site,
  187. default_fs = default_fs,
  188. immutable_paths = get_not_managed_resources(),
  189. dfs_type = dfs_type
  190. )