hive.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395
  1. #!/usr/bin/env python
  2. """
  3. Licensed to the Apache Software Foundation (ASF) under one
  4. or more contributor license agreements. See the NOTICE file
  5. distributed with this work for additional information
  6. regarding copyright ownership. The ASF licenses this file
  7. to you under the Apache License, Version 2.0 (the
  8. "License"); you may not use this file except in compliance
  9. with the License. You may obtain a copy of the License at
  10. http://www.apache.org/licenses/LICENSE-2.0
  11. Unless required by applicable law or agreed to in writing, software
  12. distributed under the License is distributed on an "AS IS" BASIS,
  13. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. See the License for the specific language governing permissions and
  15. limitations under the License.
  16. """
  17. import os
  18. import glob
  19. from urlparse import urlparse
  20. from resource_management import PropertiesFile
  21. from resource_management.libraries.script.script import Script
  22. from resource_management.libraries.resources.hdfs_resource import HdfsResource
  23. from resource_management.libraries.functions.copy_tarball import copy_to_hdfs
  24. from resource_management.libraries.functions.version import compare_versions
  25. from resource_management.core.resources.service import ServiceConfig
  26. from resource_management.core.resources.system import File, Execute, Directory
  27. from resource_management.core.source import StaticFile, Template, DownloadSource, InlineTemplate
  28. from resource_management.core.shell import as_user
  29. from resource_management.libraries.functions.is_empty import is_empty
  30. from resource_management.libraries.resources.xml_config import XmlConfig
  31. from resource_management.libraries.functions.format import format
  32. from resource_management.core.exceptions import Fail
  33. from ambari_commons.os_family_impl import OsFamilyFuncImpl, OsFamilyImpl
  34. from ambari_commons import OSConst
  35. @OsFamilyFuncImpl(os_family=OSConst.WINSRV_FAMILY)
  36. def hive(name=None):
  37. import params
  38. XmlConfig("hive-site.xml",
  39. conf_dir = params.hive_conf_dir,
  40. configurations = params.config['configurations']['hive-site'],
  41. owner=params.hive_user,
  42. configuration_attributes=params.config['configuration_attributes']['hive-site']
  43. )
  44. if name in ["hiveserver2","metastore"]:
  45. # Manually overriding service logon user & password set by the installation package
  46. service_name = params.service_map[name]
  47. ServiceConfig(service_name,
  48. action="change_user",
  49. username = params.hive_user,
  50. password = Script.get_password(params.hive_user))
  51. Execute(format("cmd /c hadoop fs -mkdir -p {hive_warehouse_dir}"), logoutput=True, user=params.hadoop_user)
  52. if name == 'metastore':
  53. if params.init_metastore_schema:
  54. check_schema_created_cmd = format('cmd /c "{hive_bin}\\hive.cmd --service schematool -info '
  55. '-dbType {hive_metastore_db_type} '
  56. '-userName {hive_metastore_user_name} '
  57. '-passWord {hive_metastore_user_passwd!p}'
  58. '&set EXITCODE=%ERRORLEVEL%&exit /B %EXITCODE%"', #cmd "feature", propagate the process exit code manually
  59. hive_bin=params.hive_bin,
  60. hive_metastore_db_type=params.hive_metastore_db_type,
  61. hive_metastore_user_name=params.hive_metastore_user_name,
  62. hive_metastore_user_passwd=params.hive_metastore_user_passwd)
  63. try:
  64. Execute(check_schema_created_cmd)
  65. except Fail:
  66. create_schema_cmd = format('cmd /c {hive_bin}\\hive.cmd --service schematool -initSchema '
  67. '-dbType {hive_metastore_db_type} '
  68. '-userName {hive_metastore_user_name} '
  69. '-passWord {hive_metastore_user_passwd!p}',
  70. hive_bin=params.hive_bin,
  71. hive_metastore_db_type=params.hive_metastore_db_type,
  72. hive_metastore_user_name=params.hive_metastore_user_name,
  73. hive_metastore_user_passwd=params.hive_metastore_user_passwd)
  74. Execute(create_schema_cmd,
  75. user = params.hive_user,
  76. logoutput=True
  77. )
  78. if name == "hiveserver2":
  79. if params.hive_execution_engine == "tez":
  80. # Init the tez app dir in hadoop
  81. script_file = __file__.replace('/', os.sep)
  82. cmd_file = os.path.normpath(os.path.join(os.path.dirname(script_file), "..", "files", "hiveTezSetup.cmd"))
  83. Execute("cmd /c " + cmd_file, logoutput=True, user=params.hadoop_user)
  84. @OsFamilyFuncImpl(os_family=OsFamilyImpl.DEFAULT)
  85. def hive(name=None):
  86. import params
  87. if name == 'hiveserver2':
  88. # HDP 2.1.* or lower
  89. if params.hdp_stack_version_major != "" and compare_versions(params.hdp_stack_version_major, "2.2.0.0") < 0:
  90. params.HdfsResource(params.webhcat_apps_dir,
  91. type="directory",
  92. action="create_on_execute",
  93. owner=params.webhcat_user,
  94. mode=0755
  95. )
  96. # Create webhcat dirs.
  97. if params.hcat_hdfs_user_dir != params.webhcat_hdfs_user_dir:
  98. params.HdfsResource(params.hcat_hdfs_user_dir,
  99. type="directory",
  100. action="create_on_execute",
  101. owner=params.hcat_user,
  102. mode=params.hcat_hdfs_user_mode
  103. )
  104. params.HdfsResource(params.webhcat_hdfs_user_dir,
  105. type="directory",
  106. action="create_on_execute",
  107. owner=params.webhcat_user,
  108. mode=params.webhcat_hdfs_user_mode
  109. )
  110. # ****** Begin Copy Tarballs ******
  111. # *********************************
  112. # HDP 2.2 or higher, copy mapreduce.tar.gz to HDFS
  113. if params.hdp_stack_version_major != "" and compare_versions(params.hdp_stack_version_major, '2.2') >= 0:
  114. copy_to_hdfs("mapreduce", params.user_group, params.hdfs_user)
  115. # Always copy pig.tar.gz and hive.tar.gz using the appropriate mode.
  116. # This can use a different source and dest location to account for both HDP 2.1 and 2.2
  117. copy_to_hdfs("pig",
  118. params.user_group,
  119. params.hdfs_user,
  120. file_mode=params.tarballs_mode,
  121. custom_source_file=params.pig_tar_source,
  122. custom_dest_file=params.pig_tar_dest_file)
  123. copy_to_hdfs("hive",
  124. params.user_group,
  125. params.hdfs_user,
  126. file_mode=params.tarballs_mode,
  127. custom_source_file=params.hive_tar_source,
  128. custom_dest_file=params.hive_tar_dest_file)
  129. wildcard_tarballs = ["sqoop", "hadoop_streaming"]
  130. for tarball_name in wildcard_tarballs:
  131. source_file_pattern = eval("params." + tarball_name + "_tar_source")
  132. dest_dir = eval("params." + tarball_name + "_tar_dest_dir")
  133. if source_file_pattern is None or dest_dir is None:
  134. continue
  135. source_files = glob.glob(source_file_pattern) if "*" in source_file_pattern else [source_file_pattern]
  136. for source_file in source_files:
  137. src_filename = os.path.basename(source_file)
  138. dest_file = os.path.join(dest_dir, src_filename)
  139. copy_to_hdfs(tarball_name,
  140. params.user_group,
  141. params.hdfs_user,
  142. file_mode=params.tarballs_mode,
  143. custom_source_file=source_file,
  144. custom_dest_file=dest_file)
  145. # ******* End Copy Tarballs *******
  146. # *********************************
  147. # Create Hive Metastore Warehouse Dir
  148. params.HdfsResource(params.hive_apps_whs_dir,
  149. type="directory",
  150. action="create_on_execute",
  151. owner=params.hive_user,
  152. mode=0777
  153. )
  154. # Create Hive User Dir
  155. params.HdfsResource(params.hive_hdfs_user_dir,
  156. type="directory",
  157. action="create_on_execute",
  158. owner=params.hive_user,
  159. mode=params.hive_hdfs_user_mode
  160. )
  161. if not is_empty(params.hive_exec_scratchdir) and not urlparse(params.hive_exec_scratchdir).path.startswith("/tmp"):
  162. params.HdfsResource(params.hive_exec_scratchdir,
  163. type="directory",
  164. action="create_on_execute",
  165. owner=params.hive_user,
  166. group=params.hdfs_user,
  167. mode=0777) # Hive expects this dir to be writeable by everyone as it is used as a temp dir
  168. params.HdfsResource(None, action="execute")
  169. Directory(params.hive_etc_dir_prefix,
  170. mode=0755
  171. )
  172. # We should change configurations for client as well as for server.
  173. # The reason is that stale-configs are service-level, not component.
  174. for conf_dir in params.hive_conf_dirs_list:
  175. fill_conf_dir(conf_dir)
  176. XmlConfig("hive-site.xml",
  177. conf_dir=params.hive_config_dir,
  178. configurations=params.hive_site_config,
  179. configuration_attributes=params.config['configuration_attributes']['hive-site'],
  180. owner=params.hive_user,
  181. group=params.user_group,
  182. mode=0644)
  183. if params.atlas_hosts:
  184. PropertiesFile(format('{hive_config_dir}/client.properties'),
  185. properties = params.atlas_client_props,
  186. owner = params.hive_user,
  187. group = params.user_group,
  188. mode = 0644)
  189. if params.hive_specific_configs_supported and name == 'hiveserver2':
  190. XmlConfig("hiveserver2-site.xml",
  191. conf_dir=params.hive_server_conf_dir,
  192. configurations=params.config['configurations']['hiveserver2-site'],
  193. configuration_attributes=params.config['configuration_attributes']['hiveserver2-site'],
  194. owner=params.hive_user,
  195. group=params.user_group,
  196. mode=0644)
  197. File(format("{hive_config_dir}/hive-env.sh"),
  198. owner=params.hive_user,
  199. group=params.user_group,
  200. content=InlineTemplate(params.hive_env_sh_template)
  201. )
  202. # On some OS this folder could be not exists, so we will create it before pushing there files
  203. Directory(params.limits_conf_dir,
  204. recursive=True,
  205. owner='root',
  206. group='root'
  207. )
  208. File(os.path.join(params.limits_conf_dir, 'hive.conf'),
  209. owner='root',
  210. group='root',
  211. mode=0644,
  212. content=Template("hive.conf.j2")
  213. )
  214. if name == 'metastore' or name == 'hiveserver2':
  215. jdbc_connector()
  216. File(format("/usr/lib/ambari-agent/{check_db_connection_jar_name}"),
  217. content = DownloadSource(format("{jdk_location}{check_db_connection_jar_name}")),
  218. mode = 0644,
  219. )
  220. if name == 'metastore':
  221. File(params.start_metastore_path,
  222. mode=0755,
  223. content=StaticFile('startMetastore.sh')
  224. )
  225. if params.init_metastore_schema:
  226. create_schema_cmd = format("export HIVE_CONF_DIR={hive_server_conf_dir} ; "
  227. "{hive_bin}/schematool -initSchema "
  228. "-dbType {hive_metastore_db_type} "
  229. "-userName {hive_metastore_user_name} "
  230. "-passWord {hive_metastore_user_passwd!p}")
  231. check_schema_created_cmd = as_user(format("export HIVE_CONF_DIR={hive_server_conf_dir} ; "
  232. "{hive_bin}/schematool -info "
  233. "-dbType {hive_metastore_db_type} "
  234. "-userName {hive_metastore_user_name} "
  235. "-passWord {hive_metastore_user_passwd!p}"), params.hive_user)
  236. Execute(create_schema_cmd,
  237. not_if = check_schema_created_cmd,
  238. user = params.hive_user
  239. )
  240. elif name == 'hiveserver2':
  241. File(params.start_hiveserver2_path,
  242. mode=0755,
  243. content=Template(format('{start_hiveserver2_script}'))
  244. )
  245. if name != "client":
  246. crt_directory(params.hive_pid_dir)
  247. crt_directory(params.hive_log_dir)
  248. crt_directory(params.hive_var_lib)
  249. def fill_conf_dir(component_conf_dir):
  250. import params
  251. Directory(component_conf_dir,
  252. owner=params.hive_user,
  253. group=params.user_group,
  254. recursive=True
  255. )
  256. XmlConfig("mapred-site.xml",
  257. conf_dir=component_conf_dir,
  258. configurations=params.config['configurations']['mapred-site'],
  259. configuration_attributes=params.config['configuration_attributes']['mapred-site'],
  260. owner=params.hive_user,
  261. group=params.user_group,
  262. mode=0644)
  263. crt_file(format("{component_conf_dir}/hive-default.xml.template"))
  264. crt_file(format("{component_conf_dir}/hive-env.sh.template"))
  265. log4j_exec_filename = 'hive-exec-log4j.properties'
  266. if (params.log4j_exec_props != None):
  267. File(format("{component_conf_dir}/{log4j_exec_filename}"),
  268. mode=0644,
  269. group=params.user_group,
  270. owner=params.hive_user,
  271. content=params.log4j_exec_props
  272. )
  273. elif (os.path.exists("{component_conf_dir}/{log4j_exec_filename}.template")):
  274. File(format("{component_conf_dir}/{log4j_exec_filename}"),
  275. mode=0644,
  276. group=params.user_group,
  277. owner=params.hive_user,
  278. content=StaticFile(format("{component_conf_dir}/{log4j_exec_filename}.template"))
  279. )
  280. log4j_filename = 'hive-log4j.properties'
  281. if (params.log4j_props != None):
  282. File(format("{component_conf_dir}/{log4j_filename}"),
  283. mode=0644,
  284. group=params.user_group,
  285. owner=params.hive_user,
  286. content=params.log4j_props
  287. )
  288. elif (os.path.exists("{component_conf_dir}/{log4j_filename}.template")):
  289. File(format("{component_conf_dir}/{log4j_filename}"),
  290. mode=0644,
  291. group=params.user_group,
  292. owner=params.hive_user,
  293. content=StaticFile(format("{component_conf_dir}/{log4j_filename}.template"))
  294. )
  295. def crt_directory(name):
  296. import params
  297. Directory(name,
  298. recursive=True,
  299. cd_access='a',
  300. owner=params.hive_user,
  301. group=params.user_group,
  302. mode=0755)
  303. def crt_file(name):
  304. import params
  305. File(name,
  306. owner=params.hive_user,
  307. group=params.user_group
  308. )
  309. def jdbc_connector():
  310. import params
  311. if params.hive_jdbc_driver in params.hive_jdbc_drivers_list and params.hive_use_existing_db:
  312. environment = {
  313. "no_proxy": format("{ambari_server_hostname}")
  314. }
  315. # TODO: should be removed after ranger_hive_plugin will not provide jdbc
  316. Execute(('rm', '-f', params.prepackaged_ojdbc_symlink),
  317. path=["/bin", "/usr/bin/"],
  318. sudo = True)
  319. File(params.downloaded_custom_connector,
  320. content = DownloadSource(params.driver_curl_source),
  321. )
  322. Execute(('cp', '--remove-destination', params.downloaded_custom_connector, params.target),
  323. #creates=params.target, TODO: uncomment after ranger_hive_plugin will not provide jdbc
  324. path=["/bin", "/usr/bin/"],
  325. sudo = True)
  326. else:
  327. #for default hive db (Mysql)
  328. Execute(('cp', '--remove-destination', format('/usr/share/java/{jdbc_jar_name}'), params.target),
  329. #creates=params.target, TODO: uncomment after ranger_hive_plugin will not provide jdbc
  330. path=["/bin", "/usr/bin/"],
  331. sudo=True
  332. )
  333. File(params.target,
  334. mode = 0644,
  335. )