checkcompatibility.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328
  1. #!/usr/bin/env python3
  2. #
  3. # Licensed to the Apache Software Foundation (ASF) under one
  4. # or more contributor license agreements. See the NOTICE file
  5. # distributed with this work for additional information
  6. # regarding copyright ownership. The ASF licenses this file
  7. # to you under the Apache License, Version 2.0 (the
  8. # "License"); you may not use this file except in compliance
  9. # with the License. You may obtain a copy of the License at
  10. #
  11. # http://www.apache.org/licenses/LICENSE-2.0
  12. #
  13. # Unless required by applicable law or agreed to in writing,
  14. # software distributed under the License is distributed on an
  15. # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  16. # KIND, either express or implied. See the License for the
  17. # specific language governing permissions and limitations
  18. # under the License.
  19. # Script which checks Java API compatibility between two revisions of the
  20. # Java client.
  21. #
  22. # Originally sourced from Apache Kudu, which was based on the
  23. # compatibility checker from the Apache HBase project, but ported to
  24. # Python for better readability.
  25. import logging
  26. import os
  27. import re
  28. import shutil
  29. import subprocess
  30. import sys
  31. import urllib.request
  32. import argparse
  33. # Various relative paths
  34. REPO_DIR = os.getcwd()
  35. def check_output(*popenargs, **kwargs):
  36. """ Run command with arguments and return its output as a string. """
  37. return subprocess.check_output(*popenargs, **kwargs, encoding='utf-8')
  38. def get_repo_dir():
  39. """ Return the path to the top of the repo. """
  40. dirname, _ = os.path.split(os.path.abspath(__file__))
  41. return os.path.join(dirname, "../..")
  42. def get_scratch_dir():
  43. """ Return the path to the scratch dir that we build within. """
  44. scratch_dir = os.path.join(get_repo_dir(), "target", "compat-check")
  45. if not os.path.exists(scratch_dir):
  46. os.makedirs(scratch_dir)
  47. return scratch_dir
  48. def get_java_acc_dir():
  49. """ Return the path where we check out the Java API Compliance Checker. """
  50. return os.path.join(get_repo_dir(), "target", "java-acc")
  51. def clean_scratch_dir(scratch_dir):
  52. """ Clean up and re-create the scratch directory. """
  53. if os.path.exists(scratch_dir):
  54. logging.info("Removing scratch dir %s...", scratch_dir)
  55. shutil.rmtree(scratch_dir)
  56. logging.info("Creating empty scratch dir %s...", scratch_dir)
  57. os.makedirs(scratch_dir)
  58. def checkout_java_tree(rev, path):
  59. """ Check out the Java source tree for the given revision into
  60. the given path. """
  61. logging.info("Checking out %s in %s", rev, path)
  62. os.makedirs(path)
  63. # Extract java source
  64. subprocess.check_call(["bash", '-o', 'pipefail', "-c",
  65. ("git archive --format=tar %s | " +
  66. "tar -C \"%s\" -xf -") % (rev, path)],
  67. cwd=get_repo_dir())
  68. def get_git_hash(revname):
  69. """ Convert 'revname' to its SHA-1 hash. """
  70. return check_output(["git", "rev-parse", revname],
  71. cwd=get_repo_dir()).strip()
  72. def get_repo_name():
  73. """Get the name of the repo based on the git remote."""
  74. remotes = check_output(["git", "remote", "-v"],
  75. cwd=get_repo_dir()).strip().split("\n")
  76. # Example output:
  77. # origin https://github.com/apache/hadoop.git (fetch)
  78. # origin https://github.com/apache/hadoop.git (push)
  79. remote_url = remotes[0].split("\t")[1].split(" ")[0]
  80. remote = remote_url.split("/")[-1]
  81. if remote.endswith(".git"):
  82. remote = remote[:-4]
  83. return remote
  84. def build_tree(java_path):
  85. """ Run the Java build within 'path'. """
  86. logging.info("Building in %s...", java_path)
  87. subprocess.check_call(["mvn", "-DskipTests", "-Dmaven.javadoc.skip=true",
  88. "package"],
  89. cwd=java_path)
  90. def checkout_java_acc(force):
  91. """
  92. Check out the Java API Compliance Checker. If 'force' is true, will
  93. re-download even if the directory exists.
  94. """
  95. acc_dir = get_java_acc_dir()
  96. if os.path.exists(acc_dir):
  97. logging.info("Java ACC is already downloaded.")
  98. if not force:
  99. return
  100. logging.info("Forcing re-download.")
  101. shutil.rmtree(acc_dir)
  102. logging.info("Downloading Java ACC...")
  103. url = "https://github.com/lvc/japi-compliance-checker/archive/1.8.tar.gz"
  104. scratch_dir = get_scratch_dir()
  105. path = os.path.join(scratch_dir, os.path.basename(url))
  106. jacc = urllib.request.urlopen(url)
  107. with open(path, 'wb') as w:
  108. w.write(jacc.read())
  109. subprocess.check_call(["tar", "xzf", path],
  110. cwd=scratch_dir)
  111. shutil.move(os.path.join(scratch_dir, "japi-compliance-checker-1.8"),
  112. os.path.join(acc_dir))
  113. def find_jars(path):
  114. """ Return a list of jars within 'path' to be checked for compatibility. """
  115. all_jars = set(check_output(["find", path, "-name", "*.jar"]).splitlines())
  116. return [j for j in all_jars if (
  117. "-tests" not in j and
  118. "-sources" not in j and
  119. "-with-dependencies" not in j)]
  120. def write_xml_file(path, version, jars):
  121. """Write the XML manifest file for JACC."""
  122. with open(path, "wt") as f:
  123. f.write("<version>" + version + "</version>\n")
  124. f.write("<archives>")
  125. for j in jars:
  126. f.write(j + "\n")
  127. f.write("</archives>")
  128. def run_java_acc(src_name, src_jars, dst_name, dst_jars, annotations):
  129. """ Run the compliance checker to compare 'src' and 'dst'. """
  130. logging.info("Will check compatibility between original jars:\n\t%s\n" +
  131. "and new jars:\n\t%s",
  132. "\n\t".join(src_jars),
  133. "\n\t".join(dst_jars))
  134. java_acc_path = os.path.join(get_java_acc_dir(), "japi-compliance-checker.pl")
  135. src_xml_path = os.path.join(get_scratch_dir(), "src.xml")
  136. dst_xml_path = os.path.join(get_scratch_dir(), "dst.xml")
  137. write_xml_file(src_xml_path, src_name, src_jars)
  138. write_xml_file(dst_xml_path, dst_name, dst_jars)
  139. out_path = os.path.join(get_scratch_dir(), "report.html")
  140. args = ["perl", java_acc_path,
  141. "-l", get_repo_name(),
  142. "-d1", src_xml_path,
  143. "-d2", dst_xml_path,
  144. "-report-path", out_path]
  145. if annotations is not None:
  146. annotations_path = os.path.join(get_scratch_dir(), "annotations.txt")
  147. with open(annotations_path, "w") as f:
  148. for ann in annotations:
  149. print(ann, file=f)
  150. args += ["-annotations-list", annotations_path]
  151. subprocess.check_call(args)
  152. def filter_jars(jars, include_filters, exclude_filters):
  153. """Filter the list of JARs based on include and exclude filters."""
  154. filtered = []
  155. # Apply include filters
  156. for j in jars:
  157. found = False
  158. basename = os.path.basename(j)
  159. for f in include_filters:
  160. if f.match(basename):
  161. found = True
  162. break
  163. if found:
  164. filtered += [j]
  165. else:
  166. logging.debug("Ignoring JAR %s", j)
  167. # Apply exclude filters
  168. exclude_filtered = []
  169. for j in filtered:
  170. basename = os.path.basename(j)
  171. found = False
  172. for f in exclude_filters:
  173. if f.match(basename):
  174. found = True
  175. break
  176. if found:
  177. logging.debug("Ignoring JAR %s", j)
  178. else:
  179. exclude_filtered += [j]
  180. return exclude_filtered
  181. def main():
  182. """Main function."""
  183. logging.basicConfig(level=logging.INFO)
  184. parser = argparse.ArgumentParser(
  185. description="Run Java API Compliance Checker.")
  186. parser.add_argument("-f", "--force-download",
  187. action="store_true",
  188. help="Download dependencies (i.e. Java JAVA_ACC) " +
  189. "even if they are already present")
  190. parser.add_argument("-i", "--include-file",
  191. action="append",
  192. dest="include_files",
  193. help="Regex filter for JAR files to be included. " +
  194. "Applied before the exclude filters. " +
  195. "Can be specified multiple times.")
  196. parser.add_argument("-e", "--exclude-file",
  197. action="append",
  198. dest="exclude_files",
  199. help="Regex filter for JAR files to be excluded. " +
  200. "Applied after the include filters. " +
  201. "Can be specified multiple times.")
  202. parser.add_argument("-a", "--annotation",
  203. action="append",
  204. dest="annotations",
  205. help="Fully-qualified Java annotation. " +
  206. "Java ACC will only check compatibility of " +
  207. "annotated classes. Can be specified multiple times.")
  208. parser.add_argument("--skip-clean",
  209. action="store_true",
  210. help="Skip cleaning the scratch directory.")
  211. parser.add_argument("--skip-build",
  212. action="store_true",
  213. help="Skip building the projects.")
  214. parser.add_argument("src_rev", nargs=1, type=str, help="Source revision.")
  215. parser.add_argument("dst_rev", nargs="?", type=str, default="HEAD",
  216. help="Destination revision. " +
  217. "If not specified, will use HEAD.")
  218. if len(sys.argv) == 1:
  219. parser.print_help()
  220. sys.exit(1)
  221. args = parser.parse_args()
  222. src_rev, dst_rev = args.src_rev[0], args.dst_rev
  223. logging.info("Source revision: %s", src_rev)
  224. logging.info("Destination revision: %s", dst_rev)
  225. # Construct the JAR regex patterns for filtering.
  226. include_filters = []
  227. if args.include_files is not None:
  228. for f in args.include_files:
  229. logging.info("Applying JAR filename include filter: %s", f)
  230. include_filters += [re.compile(f)]
  231. else:
  232. include_filters = [re.compile(".*")]
  233. exclude_filters = []
  234. if args.exclude_files is not None:
  235. for f in args.exclude_files:
  236. logging.info("Applying JAR filename exclude filter: %s", f)
  237. exclude_filters += [re.compile(f)]
  238. # Construct the annotation list
  239. annotations = args.annotations
  240. if annotations is not None:
  241. logging.info("Filtering classes using %d annotation(s):", len(annotations))
  242. for a in annotations:
  243. logging.info("\t%s", a)
  244. # Download deps.
  245. checkout_java_acc(args.force_download)
  246. # Set up the build.
  247. scratch_dir = get_scratch_dir()
  248. src_dir = os.path.join(scratch_dir, "src")
  249. dst_dir = os.path.join(scratch_dir, "dst")
  250. if args.skip_clean:
  251. logging.info("Skipping cleaning the scratch directory")
  252. else:
  253. clean_scratch_dir(scratch_dir)
  254. # Check out the src and dst source trees.
  255. checkout_java_tree(get_git_hash(src_rev), src_dir)
  256. checkout_java_tree(get_git_hash(dst_rev), dst_dir)
  257. # Run the build in each.
  258. if args.skip_build:
  259. logging.info("Skipping the build")
  260. else:
  261. build_tree(src_dir)
  262. build_tree(dst_dir)
  263. # Find the JARs.
  264. src_jars = find_jars(src_dir)
  265. dst_jars = find_jars(dst_dir)
  266. # Filter the JARs.
  267. src_jars = filter_jars(src_jars, include_filters, exclude_filters)
  268. dst_jars = filter_jars(dst_jars, include_filters, exclude_filters)
  269. if len(src_jars) == 0 or len(dst_jars) == 0:
  270. logging.error("No JARs found! Are your filters too strong?")
  271. sys.exit(1)
  272. run_java_acc(src_rev, src_jars,
  273. dst_rev, dst_jars, annotations)
  274. if __name__ == "__main__":
  275. main()