123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204 |
- #!/usr/bin/env python
- #
- # Licensed to the Apache Software Foundation (ASF) under one
- # or more contributor license agreements. See the NOTICE file
- # distributed with this work for additional information
- # regarding copyright ownership. The ASF licenses this file
- # to you under the Apache License, Version 2.0 (the
- # "License"); you may not use this file except in compliance
- # with the License. You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- #
- # Given a jenkins test job, this script examines all runs of the job done
- # within specified period of time (number of days prior to the execution
- # time of this script), and reports all failed tests.
- #
- # The output of this script includes a section for each run that has failed
- # tests, with each failed test name listed.
- #
- # More importantly, at the end, it outputs a summary section to list all failed
- # tests within all examined runs, and indicate how many runs a same test
- # failed, and sorted all failed tests by how many runs each test failed.
- #
- # This way, when we see failed tests in PreCommit build, we can quickly tell
- # whether a failed test is a new failure, or it failed before and how often it
- # failed, so to have idea whether it may just be a flaky test.
- #
- # Of course, to be 100% sure about the reason of a test failure, closer look
- # at the failed test for the specific run is necessary.
- #
- import sys
- import platform
- sysversion = sys.hexversion
- onward30 = False
- if sysversion < 0x020600F0:
- sys.exit("Minimum supported python version is 2.6, the current version is " +
- "Python" + platform.python_version())
- if sysversion == 0x030000F0:
- sys.exit("There is a known bug with Python" + platform.python_version() +
- ", please try a different version");
- if sysversion < 0x03000000:
- import urllib2
- else:
- onward30 = True
- import urllib.request
- import datetime
- import json as simplejson
- import logging
- from optparse import OptionParser
- import time
- # Configuration
- DEFAULT_JENKINS_URL = "https://builds.apache.org"
- DEFAULT_JOB_NAME = "Hadoop-Common-trunk"
- DEFAULT_NUM_PREVIOUS_DAYS = 14
- SECONDS_PER_DAY = 86400
- # total number of runs to examine
- numRunsToExamine = 0
- """ Parse arguments """
- def parse_args():
- parser = OptionParser()
- parser.add_option("-J", "--jenkins-url", type="string",
- dest="jenkins_url", help="Jenkins URL",
- default=DEFAULT_JENKINS_URL)
- parser.add_option("-j", "--job-name", type="string",
- dest="job_name", help="Job name to look at",
- default=DEFAULT_JOB_NAME)
- parser.add_option("-n", "--num-days", type="int",
- dest="num_prev_days", help="Number of days to examine",
- default=DEFAULT_NUM_PREVIOUS_DAYS)
- (options, args) = parser.parse_args()
- if args:
- parser.error("unexpected arguments: " + repr(args))
- return options
- """ Load data from specified url """
- def load_url_data(url):
- if onward30:
- ourl = urllib.request.urlopen(url)
- codec = ourl.info().get_param('charset')
- content = ourl.read().decode(codec)
- data = simplejson.loads(content, strict=False)
- else:
- ourl = urllib2.urlopen(url)
- data = simplejson.load(ourl, strict=False)
- return data
-
- """ List all builds of the target project. """
- def list_builds(jenkins_url, job_name):
- url = "%(jenkins)s/job/%(job_name)s/api/json?tree=builds[url,result,timestamp]" % dict(
- jenkins=jenkins_url,
- job_name=job_name)
- try:
- data = load_url_data(url)
- except:
- logging.error("Could not fetch: %s" % url)
- raise
- return data['builds']
- """ Find the names of any tests which failed in the given build output URL. """
- def find_failing_tests(testReportApiJson, jobConsoleOutput):
- ret = set()
- try:
- data = load_url_data(testReportApiJson)
- except:
- logging.error(" Could not open testReport, check " +
- jobConsoleOutput + " for why it was reported failed")
- return ret
- for suite in data['suites']:
- for cs in suite['cases']:
- status = cs['status']
- errDetails = cs['errorDetails']
- if (status == 'REGRESSION' or status == 'FAILED' or (errDetails is not None)):
- ret.add(cs['className'] + "." + cs['name'])
- if len(ret) == 0:
- logging.info(" No failed tests in testReport, check " +
- jobConsoleOutput + " for why it was reported failed.")
- return ret
- """ Iterate runs of specfied job within num_prev_days and collect results """
- def find_flaky_tests(jenkins_url, job_name, num_prev_days):
- global numRunsToExamine
- all_failing = dict()
- # First list all builds
- builds = list_builds(jenkins_url, job_name)
- # Select only those in the last N days
- min_time = int(time.time()) - SECONDS_PER_DAY * num_prev_days
- builds = [b for b in builds if (int(b['timestamp']) / 1000) > min_time]
- # Filter out only those that failed
- failing_build_urls = [(b['url'] , b['timestamp']) for b in builds
- if (b['result'] in ('UNSTABLE', 'FAILURE'))]
- tnum = len(builds)
- num = len(failing_build_urls)
- numRunsToExamine = tnum
- logging.info(" THERE ARE " + str(num) + " builds (out of " + str(tnum)
- + ") that have failed tests in the past " + str(num_prev_days) + " days"
- + ((".", ", as listed below:\n")[num > 0]))
- for failed_build_with_time in failing_build_urls:
- failed_build = failed_build_with_time[0];
- jobConsoleOutput = failed_build + "Console";
- testReport = failed_build + "testReport";
- testReportApiJson = testReport + "/api/json";
- ts = float(failed_build_with_time[1]) / 1000.
- st = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S')
- logging.info("===>%s" % str(testReport) + " (" + st + ")")
- failing = find_failing_tests(testReportApiJson, jobConsoleOutput)
- if failing:
- for ftest in failing:
- logging.info(" Failed test: %s" % ftest)
- all_failing[ftest] = all_failing.get(ftest,0)+1
- return all_failing
- def main():
- global numRunsToExamine
- logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO)
- # set up logger to write to stdout
- soh = logging.StreamHandler(sys.stdout)
- soh.setLevel(logging.INFO)
- logger = logging.getLogger()
- logger.removeHandler(logger.handlers[0])
- logger.addHandler(soh)
- opts = parse_args()
- logging.info("****Recently FAILED builds in url: " + opts.jenkins_url
- + "/job/" + opts.job_name + "")
- all_failing = find_flaky_tests(opts.jenkins_url, opts.job_name,
- opts.num_prev_days)
- if len(all_failing) == 0:
- raise SystemExit(0)
- logging.info("\nAmong " + str(numRunsToExamine) + " runs examined, all failed "
- + "tests <#failedRuns: testName>:")
- # print summary section: all failed tests sorted by how many times they failed
- for tn in sorted(all_failing, key=all_failing.get, reverse=True):
- logging.info(" " + str(all_failing[tn])+ ": " + tn)
- if __name__ == "__main__":
- main()
|