Browse Source

HADOOP-11965. determine-flaky-tests needs a summary mode. Contributed by Yufei Gu.

(cherry picked from commit 74351af3b7521b194116258c96270ddaeccd8126)
Yongjun Zhang 10 years ago
parent
commit
8cac3888c1

+ 49 - 8
dev-support/determine-flaky-tests-hadoop.py

@@ -62,12 +62,19 @@ import time
 DEFAULT_JENKINS_URL = "https://builds.apache.org"
 DEFAULT_JENKINS_URL = "https://builds.apache.org"
 DEFAULT_JOB_NAME = "Hadoop-Common-trunk"
 DEFAULT_JOB_NAME = "Hadoop-Common-trunk"
 DEFAULT_NUM_PREVIOUS_DAYS = 14
 DEFAULT_NUM_PREVIOUS_DAYS = 14
+DEFAULT_TOP_NUM_FAILED_TEST = -1
 
 
 SECONDS_PER_DAY = 86400
 SECONDS_PER_DAY = 86400
 
 
 # total number of runs to examine
 # total number of runs to examine
 numRunsToExamine = 0
 numRunsToExamine = 0
 
 
+#summary mode
+summary_mode = False
+
+#total number of errors
+error_count = 0
+
 """ Parse arguments """
 """ Parse arguments """
 def parse_args():
 def parse_args():
   parser = OptionParser()
   parser = OptionParser()
@@ -80,6 +87,10 @@ def parse_args():
   parser.add_option("-n", "--num-days", type="int",
   parser.add_option("-n", "--num-days", type="int",
                     dest="num_prev_days", help="Number of days to examine",
                     dest="num_prev_days", help="Number of days to examine",
                     default=DEFAULT_NUM_PREVIOUS_DAYS)
                     default=DEFAULT_NUM_PREVIOUS_DAYS)
+  parser.add_option("-t", "--top", type="int",
+                    dest="num_failed_tests",
+                    help="Summary Mode, only show top number of failed tests",
+                    default=DEFAULT_TOP_NUM_FAILED_TEST)
 
 
   (options, args) = parser.parse_args()
   (options, args) = parser.parse_args()
   if args:
   if args:
@@ -100,6 +111,7 @@ def load_url_data(url):
  
  
 """ List all builds of the target project. """
 """ List all builds of the target project. """
 def list_builds(jenkins_url, job_name):
 def list_builds(jenkins_url, job_name):
+  global summary_mode
   url = "%(jenkins)s/job/%(job_name)s/api/json?tree=builds[url,result,timestamp]" % dict(
   url = "%(jenkins)s/job/%(job_name)s/api/json?tree=builds[url,result,timestamp]" % dict(
       jenkins=jenkins_url,
       jenkins=jenkins_url,
       job_name=job_name)
       job_name=job_name)
@@ -108,19 +120,25 @@ def list_builds(jenkins_url, job_name):
     data = load_url_data(url)
     data = load_url_data(url)
 
 
   except:
   except:
-    logging.error("Could not fetch: %s" % url)
+    if not summary_mode:
+      logging.error("Could not fetch: %s" % url)
+    error_count += 1
     raise
     raise
   return data['builds']
   return data['builds']
 
 
 """ Find the names of any tests which failed in the given build output URL. """
 """ Find the names of any tests which failed in the given build output URL. """
 def find_failing_tests(testReportApiJson, jobConsoleOutput):
 def find_failing_tests(testReportApiJson, jobConsoleOutput):
+  global summary_mode
+  global error_count
   ret = set()
   ret = set()
   try:
   try:
     data = load_url_data(testReportApiJson)
     data = load_url_data(testReportApiJson)
 
 
   except:
   except:
-    logging.error("    Could not open testReport, check " +
+    if not summary_mode:
+      logging.error("    Could not open testReport, check " +
         jobConsoleOutput + " for why it was reported failed")
         jobConsoleOutput + " for why it was reported failed")
+    error_count += 1
     return ret
     return ret
 
 
   for suite in data['suites']:
   for suite in data['suites']:
@@ -130,7 +148,7 @@ def find_failing_tests(testReportApiJson, jobConsoleOutput):
       if (status == 'REGRESSION' or status == 'FAILED' or (errDetails is not None)):
       if (status == 'REGRESSION' or status == 'FAILED' or (errDetails is not None)):
         ret.add(cs['className'] + "." + cs['name'])
         ret.add(cs['className'] + "." + cs['name'])
 
 
-  if len(ret) == 0:
+  if len(ret) == 0 and (not summary_mode):
     logging.info("    No failed tests in testReport, check " +
     logging.info("    No failed tests in testReport, check " +
         jobConsoleOutput + " for why it was reported failed.")
         jobConsoleOutput + " for why it was reported failed.")
   return ret
   return ret
@@ -138,6 +156,7 @@ def find_failing_tests(testReportApiJson, jobConsoleOutput):
 """ Iterate runs of specfied job within num_prev_days and collect results """
 """ Iterate runs of specfied job within num_prev_days and collect results """
 def find_flaky_tests(jenkins_url, job_name, num_prev_days):
 def find_flaky_tests(jenkins_url, job_name, num_prev_days):
   global numRunsToExamine
   global numRunsToExamine
+  global summary_mode
   all_failing = dict()
   all_failing = dict()
   # First list all builds
   # First list all builds
   builds = list_builds(jenkins_url, job_name)
   builds = list_builds(jenkins_url, job_name)
@@ -153,7 +172,8 @@ def find_flaky_tests(jenkins_url, job_name, num_prev_days):
   tnum = len(builds)
   tnum = len(builds)
   num = len(failing_build_urls)
   num = len(failing_build_urls)
   numRunsToExamine = tnum
   numRunsToExamine = tnum
-  logging.info("    THERE ARE " + str(num) + " builds (out of " + str(tnum)
+  if not summary_mode:
+    logging.info("    THERE ARE " + str(num) + " builds (out of " + str(tnum)
       + ") that have failed tests in the past " + str(num_prev_days) + " days"
       + ") that have failed tests in the past " + str(num_prev_days) + " days"
       + ((".", ", as listed below:\n")[num > 0]))
       + ((".", ", as listed below:\n")[num > 0]))
 
 
@@ -165,17 +185,20 @@ def find_flaky_tests(jenkins_url, job_name, num_prev_days):
 
 
     ts = float(failed_build_with_time[1]) / 1000.
     ts = float(failed_build_with_time[1]) / 1000.
     st = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S')
     st = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S')
-    logging.info("===>%s" % str(testReport) + " (" + st + ")")
+    if not summary_mode:
+      logging.info("===>%s" % str(testReport) + " (" + st + ")")
     failing = find_failing_tests(testReportApiJson, jobConsoleOutput)
     failing = find_failing_tests(testReportApiJson, jobConsoleOutput)
     if failing:
     if failing:
       for ftest in failing:
       for ftest in failing:
-        logging.info("    Failed test: %s" % ftest)
+        if not summary_mode:
+          logging.info("    Failed test: %s" % ftest)
         all_failing[ftest] = all_failing.get(ftest,0)+1
         all_failing[ftest] = all_failing.get(ftest,0)+1
 
 
   return all_failing
   return all_failing
 
 
 def main():
 def main():
   global numRunsToExamine
   global numRunsToExamine
+  global summary_mode
   logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO)
   logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO)
 
 
   # set up logger to write to stdout
   # set up logger to write to stdout
@@ -189,16 +212,34 @@ def main():
   logging.info("****Recently FAILED builds in url: " + opts.jenkins_url
   logging.info("****Recently FAILED builds in url: " + opts.jenkins_url
       + "/job/" + opts.job_name + "")
       + "/job/" + opts.job_name + "")
 
 
+  if opts.num_failed_tests != -1:
+    summary_mode = True
+
   all_failing = find_flaky_tests(opts.jenkins_url, opts.job_name,
   all_failing = find_flaky_tests(opts.jenkins_url, opts.job_name,
       opts.num_prev_days)
       opts.num_prev_days)
   if len(all_failing) == 0:
   if len(all_failing) == 0:
     raise SystemExit(0)
     raise SystemExit(0)
-  logging.info("\nAmong " + str(numRunsToExamine) + " runs examined, all failed "
-      + "tests <#failedRuns: testName>:")
+
+  if summary_mode and opts.num_failed_tests < len(all_failing):
+    logging.info("\nAmong " + str(numRunsToExamine) +
+                 " runs examined, top " + str(opts.num_failed_tests) +
+                 " failed tests <#failedRuns: testName>:")
+  else:
+      logging.info("\nAmong " + str(numRunsToExamine) +
+                   " runs examined, all failed tests <#failedRuns: testName>:")
 
 
   # print summary section: all failed tests sorted by how many times they failed
   # print summary section: all failed tests sorted by how many times they failed
+  line_count = 0
   for tn in sorted(all_failing, key=all_failing.get, reverse=True):
   for tn in sorted(all_failing, key=all_failing.get, reverse=True):
     logging.info("    " + str(all_failing[tn])+ ": " + tn)
     logging.info("    " + str(all_failing[tn])+ ": " + tn)
+    if summary_mode:
+      line_count += 1
+      if line_count == opts.num_failed_tests:
+        break
+
+  if summary_mode and error_count > 0:
+    logging.info("\n" + str(error_count) + " errors found, you may "
+                 + "re-run in non summary mode to see error details.");
 
 
 if __name__ == "__main__":
 if __name__ == "__main__":
   main()
   main()

+ 3 - 0
hadoop-common-project/hadoop-common/CHANGES.txt

@@ -155,6 +155,9 @@ Release 2.8.0 - UNRELEASED
     HADOOP-11971. Move test utilities for tracing from hadoop-hdfs to
     HADOOP-11971. Move test utilities for tracing from hadoop-hdfs to
     hadoop-common. (Masatake Iwasaki via aajisaka)
     hadoop-common. (Masatake Iwasaki via aajisaka)
 
 
+    HADOOP-11965. determine-flaky-tests needs a summary mode.
+    (Yufei Gu via Yongjun Zhang)
+
   OPTIMIZATIONS
   OPTIMIZATIONS
 
 
     HADOOP-11785. Reduce the number of listStatus operation in distcp
     HADOOP-11785. Reduce the number of listStatus operation in distcp