resourceFilesKeeper.py 8.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278
  1. #!/usr/bin/env python
  2. '''
  3. Licensed to the Apache Software Foundation (ASF) under one
  4. or more contributor license agreements. See the NOTICE file
  5. distributed with this work for additional information
  6. regarding copyright ownership. The ASF licenses this file
  7. to you under the Apache License, Version 2.0 (the
  8. "License"); you may not use this file except in compliance
  9. with the License. You may obtain a copy of the License at
  10. http://www.apache.org/licenses/LICENSE-2.0
  11. Unless required by applicable law or agreed to in writing, software
  12. distributed under the License is distributed on an "AS IS" BASIS,
  13. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. See the License for the specific language governing permissions and
  15. limitations under the License.
  16. '''
  17. import hashlib
  18. import os, sys
  19. import zipfile
  20. import glob
  21. import pprint
  22. class KeeperException(Exception):
  23. pass
  24. class ResourceFilesKeeper():
  25. """
  26. This class incapsulates all utility methods for resource files maintenance.
  27. """
  28. HOOKS_DIR="hooks"
  29. PACKAGE_DIR="package"
  30. STACKS_DIR="stacks"
  31. COMMON_SERVICES_DIR="common-services"
  32. CUSTOM_ACTIONS_DIR="custom_actions"
  33. HOST_SCRIPTS_DIR="host_scripts"
  34. # For these directories archives are created
  35. ARCHIVABLE_DIRS = [HOOKS_DIR, PACKAGE_DIR]
  36. HASH_SUM_FILE=".hash"
  37. ARCHIVE_NAME="archive.zip"
  38. PYC_EXT=".pyc"
  39. METAINFO_XML = "metainfo.xml"
  40. BUFFER = 1024 * 32
  41. # Change that to True to see debug output at stderr
  42. DEBUG=False
  43. def __init__(self, resources_dir, stacks_dir, verbose=False, nozip=False):
  44. """
  45. nozip = create only hash files and skip creating zip archives
  46. """
  47. self.resources_dir = resources_dir
  48. self.stacks_root = stacks_dir
  49. self.verbose = verbose
  50. self.nozip = nozip
  51. def perform_housekeeping(self):
  52. """
  53. Performs housekeeping operations on resource files
  54. """
  55. self.update_directory_archieves()
  56. # probably, later we will need some additional operations
  57. def _iter_update_directory_archive(self, subdirs_list):
  58. for subdir in subdirs_list:
  59. for root, dirs, _ in os.walk(subdir):
  60. for d in dirs:
  61. if d in self.ARCHIVABLE_DIRS:
  62. full_path = os.path.abspath(os.path.join(root, d))
  63. self.update_directory_archive(full_path)
  64. def _update_resources_subdir_archive(self, subdir):
  65. archive_root = os.path.join(self.resources_dir, subdir)
  66. self.dbg_out("Updating archive for {0} dir at {1}...".format(subdir, archive_root))
  67. # update the directories so that the .hash is generated
  68. self.update_directory_archive(archive_root)
  69. def update_directory_archieves(self):
  70. """
  71. Please see AMBARI-4481 for more details
  72. """
  73. # archive stacks
  74. self.dbg_out("Updating archives for stack dirs at {0}...".format(self.stacks_root))
  75. valid_stacks = self.list_stacks(self.stacks_root)
  76. self.dbg_out("Stacks: {0}".format(pprint.pformat(valid_stacks)))
  77. # Iterate over stack directories
  78. self._iter_update_directory_archive(valid_stacks)
  79. # archive common services
  80. common_services_root = os.path.join(self.resources_dir, self.COMMON_SERVICES_DIR)
  81. self.dbg_out("Updating archives for common services dirs at {0}...".format(common_services_root))
  82. valid_common_services = self.list_common_services(common_services_root)
  83. self.dbg_out("Common Services: {0}".format(pprint.pformat(valid_common_services)))
  84. # Iterate over common services directories
  85. self._iter_update_directory_archive(valid_common_services)
  86. # custom actions
  87. self._update_resources_subdir_archive(self.CUSTOM_ACTIONS_DIR)
  88. # agent host scripts
  89. self._update_resources_subdir_archive(self.HOST_SCRIPTS_DIR)
  90. def _list_metainfo_dirs(self, root_dir):
  91. valid_items = [] # Format: <stack_dir, ignore(True|False)>
  92. glob_pattern = "{0}/*/*".format(root_dir)
  93. dirs = glob.glob(glob_pattern)
  94. for directory in dirs:
  95. metainfo_file = os.path.join(directory, self.METAINFO_XML)
  96. if os.path.exists(metainfo_file):
  97. valid_items.append(directory)
  98. return valid_items
  99. def list_stacks(self, root_dir):
  100. """
  101. Builds a list of stack directories
  102. """
  103. try:
  104. return self._list_metainfo_dirs(root_dir)
  105. except Exception, err:
  106. raise KeeperException("Can not list stacks: {0}".format(str(err)))
  107. def list_common_services(self, root_dir):
  108. """
  109. Builds a list of common services directories
  110. """
  111. try:
  112. return self._list_metainfo_dirs(root_dir)
  113. except Exception, err:
  114. raise KeeperException("Can not list common services: {0}".format(str(err)))
  115. def update_directory_archive(self, directory):
  116. """
  117. If hash sum for directory is not present or differs from saved value,
  118. recalculates hash sum and creates directory archive
  119. """
  120. cur_hash = self.count_hash_sum(directory)
  121. saved_hash = self.read_hash_sum(directory)
  122. if cur_hash != saved_hash:
  123. if not self.nozip:
  124. self.zip_directory(directory)
  125. self.write_hash_sum(directory, cur_hash)
  126. def count_hash_sum(self, directory):
  127. """
  128. Recursively counts hash sum of all files in directory and subdirectories.
  129. Files and directories are processed in alphabetical order.
  130. Ignores previously created directory archives and files containing
  131. previously calculated hashes. Compiled pyc files are also ignored
  132. """
  133. try:
  134. sha1 = hashlib.sha1()
  135. file_list = []
  136. for root, dirs, files in os.walk(directory):
  137. for f in files:
  138. if not self.is_ignored(f):
  139. full_path = os.path.abspath(os.path.join(root, f))
  140. file_list.append(full_path)
  141. file_list.sort()
  142. for path in file_list:
  143. self.dbg_out("Counting hash of {0}".format(path))
  144. with open(path, 'rb') as fh:
  145. while True:
  146. data = fh.read(self.BUFFER)
  147. if not data:
  148. break
  149. sha1.update(data)
  150. return sha1.hexdigest()
  151. except Exception, err:
  152. raise KeeperException("Can not calculate directory "
  153. "hash: {0}".format(str(err)))
  154. def read_hash_sum(self, directory):
  155. """
  156. Tries to read a hash sum from previously generated file. Returns string
  157. containing hash or None
  158. """
  159. hash_file = os.path.join(directory, self.HASH_SUM_FILE)
  160. if os.path.isfile(hash_file):
  161. try:
  162. with open(hash_file) as fh:
  163. return fh.readline().strip()
  164. except Exception, err:
  165. raise KeeperException("Can not read file {0} : {1}".format(hash_file,
  166. str(err)))
  167. else:
  168. return None
  169. def write_hash_sum(self, directory, new_hash):
  170. """
  171. Tries to read a hash sum from previously generated file. Returns string
  172. containing hash or None
  173. """
  174. hash_file = os.path.join(directory, self.HASH_SUM_FILE)
  175. try:
  176. with open(hash_file, "w") as fh:
  177. fh.write(new_hash)
  178. os.chmod(hash_file, 0o666)
  179. except Exception, err:
  180. raise KeeperException("Can not write to file {0} : {1}".format(hash_file,
  181. str(err)))
  182. def zip_directory(self, directory):
  183. """
  184. Packs entire directory into zip file. Hash file is also packaged
  185. into archive
  186. """
  187. self.dbg_out("creating archive for directory {0}".format(directory))
  188. try:
  189. zf = zipfile.ZipFile(os.path.join(directory, self.ARCHIVE_NAME), "w")
  190. abs_src = os.path.abspath(directory)
  191. for root, dirs, files in os.walk(directory):
  192. for filename in files:
  193. # Avoid zipping previous archive and hash file and binary pyc files
  194. if not self.is_ignored(filename):
  195. absname = os.path.abspath(os.path.join(root, filename))
  196. arcname = absname[len(abs_src) + 1:]
  197. self.dbg_out('zipping %s as %s' % (os.path.join(root, filename),
  198. arcname))
  199. zf.write(absname, arcname)
  200. zf.close()
  201. except Exception, err:
  202. raise KeeperException("Can not create zip archive of "
  203. "directory {0} : {1}".format(directory, str(err)))
  204. def is_ignored(self, filename):
  205. """
  206. returns True if filename is ignored when calculating hashing or archiving
  207. """
  208. return filename in [self.HASH_SUM_FILE, self.ARCHIVE_NAME] or \
  209. filename.endswith(self.PYC_EXT)
  210. def dbg_out(self, text):
  211. if self.DEBUG:
  212. sys.stderr.write("{0}\n".format(text))
  213. if not self.DEBUG and self.verbose:
  214. print text
  215. def main(argv=None):
  216. """
  217. This method is called by maven during rpm creation.
  218. Params:
  219. 1: Path to resources root directory
  220. """
  221. res_path = argv[1]
  222. if len(argv) >= 3:
  223. stacks_path = argv[2]
  224. else:
  225. stacks_path = os.path.join(res_path, ResourceFilesKeeper.STACKS_DIR)
  226. resource_files_keeper = ResourceFilesKeeper(res_path, stacks_path, nozip=True)
  227. resource_files_keeper.perform_housekeeping()
  228. if __name__ == '__main__':
  229. main(sys.argv)