params.py 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153
  1. #!/usr/bin/python
  2. """
  3. Licensed to the Apache Software Foundation (ASF) under one
  4. or more contributor license agreements. See the NOTICE file
  5. distributed with this work for additional information
  6. regarding copyright ownership. The ASF licenses this file
  7. to you under the Apache License, Version 2.0 (the
  8. "License"); you may not use this file except in compliance
  9. with the License. You may obtain a copy of the License at
  10. http://www.apache.org/licenses/LICENSE-2.0
  11. Unless required by applicable law or agreed to in writing, software
  12. distributed under the License is distributed on an "AS IS" BASIS,
  13. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. See the License for the specific language governing permissions and
  15. limitations under the License.
  16. """
  17. from resource_management.libraries.functions.version import format_hdp_stack_version, compare_versions
  18. from resource_management.libraries.functions.default import default
  19. from resource_management import *
  20. from setup_spark import *
  21. import status_params
  22. config = Script.get_config()
  23. tmp_dir = Script.get_tmp_dir()
  24. stack_name = default("/hostLevelParams/stack_name", None)
  25. stack_version_unformatted = str(config['hostLevelParams']['stack_version'])
  26. hdp_stack_version = format_hdp_stack_version(stack_version_unformatted)
  27. # New Cluster Stack Version that is defined during the RESTART of a Rolling Upgrade
  28. version = default("/commandParams/version", None)
  29. # TODO! FIXME! Version check is not working as of today :
  30. # $ yum list installed | grep hdp-select
  31. # hdp-select.noarch 2.2.1.0-2340.el6 @HDP-2.2
  32. # And hdp_stack_version returned from hostLevelParams/stack_version is : 2.2.0.0
  33. # Commenting out for time being
  34. #stack_is_hdp22_or_further = hdp_stack_version != "" and compare_versions(hdp_stack_version, '2.2.1.0') >= 0
  35. stack_is_hdp22_or_further = hdp_stack_version != "" and compare_versions(hdp_stack_version, '2.2') >= 0
  36. if stack_is_hdp22_or_further:
  37. hadoop_home = "/usr/hdp/current/hadoop-client"
  38. hadoop_bin_dir = "/usr/hdp/current/hadoop-client/bin"
  39. spark_conf = '/etc/spark/conf'
  40. spark_log_dir = config['configurations']['spark-env']['spark_log_dir']
  41. spark_pid_dir = status_params.spark_pid_dir
  42. spark_role_root = "spark-client"
  43. command_role = default("/role", "")
  44. if command_role == "SPARK_CLIENT":
  45. spark_role_root = "spark-client"
  46. elif command_role == "SPARK_JOBHISTORYSERVER":
  47. spark_role_root = "spark-historyserver"
  48. spark_home = format("/usr/hdp/current/{spark_role_root}")
  49. else:
  50. pass
  51. java_home = config['hostLevelParams']['java_home']
  52. hadoop_conf_dir = "/etc/hadoop/conf"
  53. hdfs_user = config['configurations']['hadoop-env']['hdfs_user']
  54. hdfs_principal_name = config['configurations']['hadoop-env']['hdfs_principal_name']
  55. hdfs_user_keytab = config['configurations']['hadoop-env']['hdfs_user_keytab']
  56. spark_user = status_params.spark_user
  57. spark_group = status_params.spark_group
  58. user_group = status_params.user_group
  59. spark_hdfs_user_dir = format("/user/{spark_user}")
  60. spark_history_server_pid_file = status_params.spark_history_server_pid_file
  61. spark_history_server_start = format("{spark_home}/sbin/start-history-server.sh")
  62. spark_history_server_stop = format("{spark_home}/sbin/stop-history-server.sh")
  63. spark_submit_cmd = format("{spark_home}/bin/spark-submit")
  64. spark_smoke_example = "org.apache.spark.examples.SparkPi"
  65. spark_service_check_cmd = format(
  66. "{spark_submit_cmd} --class {spark_smoke_example} --master yarn-cluster --num-executors 1 --driver-memory 256m --executor-memory 256m --executor-cores 1 {spark_home}/lib/spark-examples*.jar 1")
  67. spark_jobhistoryserver_hosts = default("/clusterHostInfo/spark_jobhistoryserver_hosts", [])
  68. if len(spark_jobhistoryserver_hosts) > 0:
  69. spark_history_server_host = spark_jobhistoryserver_hosts[0]
  70. else:
  71. spark_history_server_host = "localhost"
  72. # spark-defaults params
  73. spark_yarn_historyServer_address = default(spark_history_server_host, "localhost")
  74. spark_yarn_applicationMaster_waitTries = default(
  75. "/configurations/spark-defaults/spark.yarn.applicationMaster.waitTries", '10')
  76. spark_yarn_submit_file_replication = default("/configurations/spark-defaults/spark.yarn.submit.file.replication", '3')
  77. spark_yarn_preserve_staging_files = default("/configurations/spark-defaults/spark.yarn.preserve.staging.files", "false")
  78. spark_yarn_scheduler_heartbeat_interval = default(
  79. "/configurations/spark-defaults/spark.yarn.scheduler.heartbeat.interval-ms", "5000")
  80. spark_yarn_queue = default("/configurations/spark-defaults/spark.yarn.queue", "default")
  81. spark_yarn_containerLauncherMaxThreads = default(
  82. "/configurations/spark-defaults/spark.yarn.containerLauncherMaxThreads", "25")
  83. spark_yarn_max_executor_failures = default("/configurations/spark-defaults/spark.yarn.max.executor.failures", "3")
  84. spark_yarn_executor_memoryOverhead = default("/configurations/spark-defaults/spark.yarn.executor.memoryOverhead", "384")
  85. spark_yarn_driver_memoryOverhead = default("/configurations/spark-defaults/spark.yarn.driver.memoryOverhead", "384")
  86. spark_history_provider = default("/configurations/spark-defaults/spark.history.provider",
  87. "org.apache.spark.deploy.yarn.history.YarnHistoryProvider")
  88. spark_history_ui_port = default("/configurations/spark-defaults/spark.history.ui.port", "18080")
  89. spark_env_sh = config['configurations']['spark-env']['content']
  90. spark_log4j_properties = config['configurations']['spark-log4j-properties']['content']
  91. spark_metrics_properties = config['configurations']['spark-metrics-properties']['content']
  92. spark_javaopts_properties = config['configurations']['spark-javaopts-properties']['content']
  93. hive_server_host = default("/clusterHostInfo/hive_server_host", [])
  94. is_hive_installed = not len(hive_server_host) == 0
  95. hdp_full_version = get_hdp_version()
  96. spark_driver_extraJavaOptions = str(config['configurations']['spark-defaults']['spark.driver.extraJavaOptions'])
  97. if spark_driver_extraJavaOptions.find('-Dhdp.version') == -1:
  98. spark_driver_extraJavaOptions = spark_driver_extraJavaOptions + ' -Dhdp.version=' + str(hdp_full_version)
  99. spark_yarn_am_extraJavaOptions = str(config['configurations']['spark-defaults']['spark.yarn.am.extraJavaOptions'])
  100. if spark_yarn_am_extraJavaOptions.find('-Dhdp.version') == -1:
  101. spark_yarn_am_extraJavaOptions = spark_yarn_am_extraJavaOptions + ' -Dhdp.version=' + str(hdp_full_version)
  102. spark_javaopts_properties = str(spark_javaopts_properties)
  103. if spark_javaopts_properties.find('-Dhdp.version') == -1:
  104. spark_javaopts_properties = spark_javaopts_properties+ ' -Dhdp.version=' + str(hdp_full_version)
  105. security_enabled = config['configurations']['cluster-env']['security_enabled']
  106. kinit_path_local = functions.get_kinit_path()
  107. spark_kerberos_keytab = config['configurations']['spark-defaults']['spark.history.kerberos.keytab']
  108. spark_kerberos_principal = config['configurations']['spark-defaults']['spark.history.kerberos.principal']
  109. if security_enabled:
  110. spark_principal = spark_kerberos_principal.replace('_HOST',spark_history_server_host.lower())
  111. import functools
  112. #create partial functions with common arguments for every HdfsDirectory call
  113. #to create hdfs directory we need to call params.HdfsDirectory in code
  114. HdfsDirectory = functools.partial(
  115. HdfsDirectory,
  116. conf_dir=hadoop_conf_dir,
  117. hdfs_user=hdfs_user,
  118. security_enabled = security_enabled,
  119. keytab = hdfs_user_keytab,
  120. kinit_path_local = kinit_path_local,
  121. bin_dir = hadoop_bin_dir
  122. )