ensure-jars-have-correct-contents.sh 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111
  1. #!/usr/bin/env bash
  2. # Licensed to the Apache Software Foundation (ASF) under one
  3. # or more contributor license agreements. See the NOTICE file
  4. # distributed with this work for additional information
  5. # regarding copyright ownership. The ASF licenses this file
  6. # to you under the Apache License, Version 2.0 (the
  7. # "License"); you may not use this file except in compliance
  8. # with the License. You may obtain a copy of the License at
  9. #
  10. # http://www.apache.org/licenses/LICENSE-2.0
  11. #
  12. # Unless required by applicable law or agreed to in writing, software
  13. # distributed under the License is distributed on an "AS IS" BASIS,
  14. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. # See the License for the specific language governing permissions and
  16. # limitations under the License.
  17. # Usage: $0 [/path/to/some/example.jar;/path/to/another/example/created.jar]
  18. #
  19. # accepts a single command line argument with a colon separated list of
  20. # paths to jars to check. Iterates through each such passed jar and checks
  21. # all the contained paths to make sure they follow the below constructed
  22. # safe list.
  23. # We use +=, which is a bash 3.1+ feature
  24. if [[ -z "${BASH_VERSINFO[0]}" ]] \
  25. || [[ "${BASH_VERSINFO[0]}" -lt 3 ]] \
  26. || [[ "${BASH_VERSINFO[0]}" -eq 3 && "${BASH_VERSINFO[1]}" -lt 1 ]]; then
  27. echo "bash v3.1+ is required. Sorry."
  28. exit 1
  29. fi
  30. set -e
  31. set -o pipefail
  32. # we have to allow the directories that lead to the org/apache/hadoop dir
  33. allowed_expr="(^org/$|^org/apache/$"
  34. # We allow the following things to exist in our client artifacts:
  35. # * classes in packages that start with org.apache.hadoop, which by
  36. # convention should be in a path that looks like org/apache/hadoop
  37. allowed_expr+="|^org/apache/hadoop/"
  38. # * whatever in the "META-INF" directory
  39. allowed_expr+="|^META-INF/"
  40. # * whatever under the "webapps" directory; for things shipped by yarn
  41. allowed_expr+="|^webapps/"
  42. # * Hadoop's default configuration files, which have the form
  43. # "_module_-default.xml"
  44. allowed_expr+="|^[^-]*-default.xml$"
  45. # * Hadoop's versioning properties files, which have the form
  46. # "_module_-version-info.properties"
  47. allowed_expr+="|^[^-]*-version-info.properties$"
  48. # * Hadoop's application classloader properties file.
  49. allowed_expr+="|^org.apache.hadoop.application-classloader.properties$"
  50. # Comes from dnsjava, not sure if relocatable.
  51. allowed_expr+="|^messages.properties$"
  52. # public suffix list used by httpcomponents
  53. allowed_expr+="|^mozilla/$"
  54. allowed_expr+="|^mozilla/public-suffix-list.txt$"
  55. # Comes from commons-configuration, not sure if relocatable.
  56. allowed_expr+="|^properties.dtd$"
  57. allowed_expr+="|^PropertyList-1.0.dtd$"
  58. # Comes from Ehcache, not relocatable at top level due to limitation
  59. # of shade plugin AFAICT
  60. allowed_expr+="|^ehcache-core.xsd$"
  61. allowed_expr+="|^ehcache-107ext.xsd$"
  62. # Comes from kerby's kerb-simplekdc, not relocatable since at top level
  63. allowed_expr+="|^krb5-template.conf$"
  64. allowed_expr+="|^krb5_udp-template.conf$"
  65. # Jetty uses this style sheet for directory listings. TODO ensure our
  66. # internal use of jetty disallows directory listings and remove this.
  67. allowed_expr+="|^jetty-dir.css$"
  68. # Snappy java is native library. We cannot relocate it to under org/apache/hadoop.
  69. allowed_expr+="|^org/xerial/"
  70. allowed_expr+=")"
  71. declare -i bad_artifacts=0
  72. declare -a bad_contents
  73. declare -a artifact_list
  74. while IFS='' read -r -d ';' line; do artifact_list+=("$line"); done < <(printf '%s;' "$1")
  75. if [ "${#artifact_list[@]}" -eq 0 ]; then
  76. echo "[ERROR] No artifacts passed in."
  77. exit 1
  78. fi
  79. jar_list_failed ()
  80. {
  81. echo "[ERROR] Listing jar contents for file '${artifact}' failed."
  82. exit 1
  83. }
  84. trap jar_list_failed SIGUSR1
  85. for artifact in "${artifact_list[@]}"; do
  86. # Note: On Windows the output from jar tf may contain \r\n's. Normalize to \n.
  87. while IFS='' read -r line; do bad_contents+=("$line"); done < <( ( jar tf "${artifact}" | sed 's/\\r//' || kill -SIGUSR1 $$ ) | grep -v -E "${allowed_expr}" )
  88. if [ ${#bad_contents[@]} -gt 0 ]; then
  89. echo "[ERROR] Found artifact with unexpected contents: '${artifact}'"
  90. echo " Please check the following and either correct the build or update"
  91. echo " the allowed list with reasoning."
  92. echo ""
  93. for bad_line in "${bad_contents[@]}"; do
  94. echo " ${bad_line}"
  95. done
  96. bad_artifacts=${bad_artifacts}+1
  97. else
  98. echo "[INFO] Artifact looks correct: '$(basename "${artifact}")'"
  99. fi
  100. done
  101. if [ "${bad_artifacts}" -gt 0 ]; then
  102. exit 1
  103. fi