params.py 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201
  1. #!/usr/bin/python
  2. """
  3. Licensed to the Apache Software Foundation (ASF) under one
  4. or more contributor license agreements. See the NOTICE file
  5. distributed with this work for additional information
  6. regarding copyright ownership. The ASF licenses this file
  7. to you under the Apache License, Version 2.0 (the
  8. "License"); you may not use this file except in compliance
  9. with the License. You may obtain a copy of the License at
  10. http://www.apache.org/licenses/LICENSE-2.0
  11. Unless required by applicable law or agreed to in writing, software
  12. distributed under the License is distributed on an "AS IS" BASIS,
  13. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. See the License for the specific language governing permissions and
  15. limitations under the License.
  16. """
  17. import status_params
  18. from setup_spark import *
  19. import resource_management.libraries.functions
  20. from resource_management.libraries.functions import conf_select
  21. from resource_management.libraries.functions import hdp_select
  22. from resource_management.libraries.functions import format
  23. from resource_management.libraries.functions.get_hdp_version import get_hdp_version
  24. from resource_management.libraries.functions.version import format_hdp_stack_version
  25. from resource_management.libraries.functions.default import default
  26. from resource_management.libraries.functions import get_kinit_path
  27. from resource_management.libraries.script.script import Script
  28. # a map of the Ambari role to the component name
  29. # for use with /usr/hdp/current/<component>
  30. SERVER_ROLE_DIRECTORY_MAP = {
  31. 'SPARK_JOBHISTORYSERVER' : 'spark-historyserver',
  32. 'SPARK_CLIENT' : 'spark-client',
  33. 'SPARK_THRIFTSERVER' : 'spark-thriftserver'
  34. }
  35. component_directory = Script.get_component_from_role(SERVER_ROLE_DIRECTORY_MAP, "SPARK_CLIENT")
  36. config = Script.get_config()
  37. tmp_dir = Script.get_tmp_dir()
  38. stack_name = default("/hostLevelParams/stack_name", None)
  39. stack_version_unformatted = str(config['hostLevelParams']['stack_version'])
  40. hdp_stack_version = format_hdp_stack_version(stack_version_unformatted)
  41. host_sys_prepped = default("/hostLevelParams/host_sys_prepped", False)
  42. # New Cluster Stack Version that is defined during the RESTART of a Stack Upgrade
  43. version = default("/commandParams/version", None)
  44. # TODO! FIXME! Version check is not working as of today :
  45. # $ yum list installed | grep hdp-select
  46. # hdp-select.noarch 2.2.1.0-2340.el6 @HDP-2.2
  47. # And hdp_stack_version returned from hostLevelParams/stack_version is : 2.2.0.0
  48. # Commenting out for time being
  49. #stack_is_hdp22_or_further = hdp_stack_version != "" and compare_versions(hdp_stack_version, '2.2.1.0') >= 0
  50. spark_conf = '/etc/spark/conf'
  51. hadoop_conf_dir = conf_select.get_hadoop_conf_dir()
  52. hadoop_bin_dir = hdp_select.get_hadoop_dir("bin")
  53. if Script.is_hdp_stack_greater_or_equal("2.2"):
  54. hadoop_home = hdp_select.get_hadoop_dir("home")
  55. spark_conf = format("/usr/hdp/current/{component_directory}/conf")
  56. spark_log_dir = config['configurations']['spark-env']['spark_log_dir']
  57. spark_pid_dir = status_params.spark_pid_dir
  58. spark_home = format("/usr/hdp/current/{component_directory}")
  59. spark_thrift_server_conf_file = spark_conf + "/spark-thrift-sparkconf.conf"
  60. java_home = config['hostLevelParams']['java_home']
  61. hdfs_user = config['configurations']['hadoop-env']['hdfs_user']
  62. hdfs_principal_name = config['configurations']['hadoop-env']['hdfs_principal_name']
  63. hdfs_user_keytab = config['configurations']['hadoop-env']['hdfs_user_keytab']
  64. user_group = config['configurations']['cluster-env']['user_group']
  65. spark_user = status_params.spark_user
  66. hive_user = status_params.hive_user
  67. spark_group = status_params.spark_group
  68. user_group = status_params.user_group
  69. spark_hdfs_user_dir = format("/user/{spark_user}")
  70. spark_history_dir = default('/configurations/spark-defaults/spark.history.fs.logDirectory', "hdfs:///spark-history")
  71. spark_history_server_pid_file = status_params.spark_history_server_pid_file
  72. spark_thrift_server_pid_file = status_params.spark_thrift_server_pid_file
  73. spark_history_server_start = format("{spark_home}/sbin/start-history-server.sh")
  74. spark_history_server_stop = format("{spark_home}/sbin/stop-history-server.sh")
  75. spark_thrift_server_start = format("{spark_home}/sbin/start-thriftserver.sh")
  76. spark_thrift_server_stop = format("{spark_home}/sbin/stop-thriftserver.sh")
  77. spark_logs_dir = format("{spark_home}/logs")
  78. spark_submit_cmd = format("{spark_home}/bin/spark-submit")
  79. spark_smoke_example = "org.apache.spark.examples.SparkPi"
  80. spark_service_check_cmd = format(
  81. "{spark_submit_cmd} --class {spark_smoke_example} --master yarn-cluster --num-executors 1 --driver-memory 256m --executor-memory 256m --executor-cores 1 {spark_home}/lib/spark-examples*.jar 1")
  82. spark_jobhistoryserver_hosts = default("/clusterHostInfo/spark_jobhistoryserver_hosts", [])
  83. if len(spark_jobhistoryserver_hosts) > 0:
  84. spark_history_server_host = spark_jobhistoryserver_hosts[0]
  85. else:
  86. spark_history_server_host = "localhost"
  87. # spark-defaults params
  88. spark_yarn_historyServer_address = default(spark_history_server_host, "localhost")
  89. spark_history_ui_port = config['configurations']['spark-defaults']['spark.history.ui.port']
  90. spark_env_sh = config['configurations']['spark-env']['content']
  91. spark_log4j_properties = config['configurations']['spark-log4j-properties']['content']
  92. spark_metrics_properties = config['configurations']['spark-metrics-properties']['content']
  93. hive_server_host = default("/clusterHostInfo/hive_server_host", [])
  94. is_hive_installed = not len(hive_server_host) == 0
  95. security_enabled = config['configurations']['cluster-env']['security_enabled']
  96. kinit_path_local = get_kinit_path(default('/configurations/kerberos-env/executable_search_paths', None))
  97. spark_kerberos_keytab = config['configurations']['spark-defaults']['spark.history.kerberos.keytab']
  98. spark_kerberos_principal = config['configurations']['spark-defaults']['spark.history.kerberos.principal']
  99. spark_thriftserver_hosts = default("/clusterHostInfo/spark_thriftserver_hosts", [])
  100. has_spark_thriftserver = not len(spark_thriftserver_hosts) == 0
  101. # hive-site params
  102. spark_hive_properties = {
  103. 'hive.metastore.uris': config['configurations']['hive-site']['hive.metastore.uris']
  104. }
  105. # security settings
  106. if security_enabled:
  107. spark_principal = spark_kerberos_principal.replace('_HOST',spark_history_server_host.lower())
  108. if is_hive_installed:
  109. spark_hive_properties.update({
  110. 'hive.metastore.sasl.enabled': str(config['configurations']['hive-site']['hive.metastore.sasl.enabled']).lower(),
  111. 'hive.metastore.kerberos.keytab.file': config['configurations']['hive-site']['hive.metastore.kerberos.keytab.file'],
  112. 'hive.server2.authentication.spnego.principal': config['configurations']['hive-site']['hive.server2.authentication.spnego.principal'],
  113. 'hive.server2.authentication.spnego.keytab': config['configurations']['hive-site']['hive.server2.authentication.spnego.keytab'],
  114. 'hive.metastore.kerberos.principal': config['configurations']['hive-site']['hive.metastore.kerberos.principal'],
  115. 'hive.server2.authentication.kerberos.principal': config['configurations']['hive-site']['hive.server2.authentication.kerberos.principal'],
  116. 'hive.server2.authentication.kerberos.keytab': config['configurations']['hive-site']['hive.server2.authentication.kerberos.keytab'],
  117. 'hive.server2.authentication': config['configurations']['hive-site']['hive.server2.authentication'],
  118. })
  119. hive_kerberos_keytab = config['configurations']['hive-site']['hive.server2.authentication.kerberos.keytab']
  120. hive_kerberos_principal = config['configurations']['hive-site']['hive.server2.authentication.kerberos.principal']
  121. # thrift server support - available on HDP 2.3 or higher
  122. spark_thrift_sparkconf = None
  123. spark_thrift_cmd_opts_properties = ''
  124. spark_thrift_fairscheduler_content = None
  125. spark_thrift_master = "yarn-client"
  126. if 'nm_hosts' in config['clusterHostInfo'] and len(config['clusterHostInfo']['nm_hosts']) == 1:
  127. # use local mode when there's only one nodemanager
  128. spark_thrift_master = "local[4]"
  129. if has_spark_thriftserver and 'spark-thrift-sparkconf' in config['configurations']:
  130. spark_thrift_sparkconf = config['configurations']['spark-thrift-sparkconf']
  131. spark_thrift_cmd_opts_properties = config['configurations']['spark-env']['spark_thrift_cmd_opts']
  132. if is_hive_installed:
  133. # update default metastore client properties (async wait for metastore component) it is useful in case of
  134. # blueprint provisioning when hive-metastore and spark-thriftserver is not on the same host.
  135. spark_hive_properties.update({
  136. 'hive.metastore.client.socket.timeout' : config['configurations']['hive-site']['hive.metastore.client.socket.timeout']
  137. })
  138. spark_hive_properties.update(config['configurations']['spark-hive-site-override'])
  139. if 'spark-thrift-fairscheduler' in config['configurations'] and 'fairscheduler_content' in config['configurations']['spark-thrift-fairscheduler']:
  140. spark_thrift_fairscheduler_content = config['configurations']['spark-thrift-fairscheduler']['fairscheduler_content']
  141. default_fs = config['configurations']['core-site']['fs.defaultFS']
  142. hdfs_site = config['configurations']['hdfs-site']
  143. dfs_type = default("/commandParams/dfs_type", "")
  144. import functools
  145. #create partial functions with common arguments for every HdfsResource call
  146. #to create/delete hdfs directory/file/copyfromlocal we need to call params.HdfsResource in code
  147. HdfsResource = functools.partial(
  148. HdfsResource,
  149. user=hdfs_user,
  150. hdfs_resource_ignore_file = "/var/lib/ambari-agent/data/.hdfs_resource_ignore",
  151. security_enabled = security_enabled,
  152. keytab = hdfs_user_keytab,
  153. kinit_path_local = kinit_path_local,
  154. hadoop_bin_dir = hadoop_bin_dir,
  155. hadoop_conf_dir = hadoop_conf_dir,
  156. principal_name = hdfs_principal_name,
  157. hdfs_site = hdfs_site,
  158. default_fs = default_fs,
  159. dfs_type = dfs_type
  160. )