xref: /Lucene/dev-tools/scripts/reproduceJenkinsFailures.py (revision 0c61c857c89ba0003890fc8a2b5378801b57e0ee)
1f7e166e7SSteve Rowe# Licensed to the Apache Software Foundation (ASF) under one or more
2f7e166e7SSteve Rowe# contributor license agreements.  See the NOTICE file distributed with
3f7e166e7SSteve Rowe# this work for additional information regarding copyright ownership.
4f7e166e7SSteve Rowe# The ASF licenses this file to You under the Apache License, Version 2.0
5f7e166e7SSteve Rowe# (the "License"); you may not use this file except in compliance with
6f7e166e7SSteve Rowe# the License.  You may obtain a copy of the License at
7f7e166e7SSteve Rowe#
8f7e166e7SSteve Rowe#     http://www.apache.org/licenses/LICENSE-2.0
9f7e166e7SSteve Rowe#
10f7e166e7SSteve Rowe# Unless required by applicable law or agreed to in writing, software
11f7e166e7SSteve Rowe# distributed under the License is distributed on an "AS IS" BASIS,
12f7e166e7SSteve Rowe# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13f7e166e7SSteve Rowe# See the License for the specific language governing permissions and
14f7e166e7SSteve Rowe# limitations under the License.
15f7e166e7SSteve Rowe
16a07493d5SSteve Roweimport argparse
17f7e166e7SSteve Roweimport os
18f7e166e7SSteve Roweimport re
19f7e166e7SSteve Roweimport subprocess
20f7e166e7SSteve Roweimport sys
21*0c61c857SSteve Roweimport time
22a07493d5SSteve Roweimport traceback
23f7e166e7SSteve Roweimport urllib.error
24f7e166e7SSteve Roweimport urllib.request
25f7e166e7SSteve Rowefrom textwrap import dedent
26f7e166e7SSteve Rowe
27f7e166e7SSteve Rowe# Example: Checking out Revision e441a99009a557f82ea17ee9f9c3e9b89c75cee6 (refs/remotes/origin/master)
28a07493d5SSteve RowereGitRev = re.compile(r'Checking out Revision (\S+)\s+\(refs/remotes/origin/([^)]+)')
29f7e166e7SSteve Rowe
3088e00d08SSteve Rowe#         Policeman Jenkins example:           [Lucene-Solr-7.x-Linux] $ /var/lib/jenkins/tools/hudson.tasks.Ant_AntInstallation/ANT_1.8.2/bin/ant "-Dargs=-XX:-UseCompressedOops -XX:+UseConcMarkSweepGC" jenkins-hourly
3188e00d08SSteve Rowe# Policeman Jenkins Windows example:      [Lucene-Solr-master-Windows] $ cmd.exe /C "C:\Users\jenkins\tools\hudson.tasks.Ant_AntInstallation\ANT_1.8.2\bin\ant.bat '"-Dargs=-client -XX:+UseConcMarkSweepGC"' jenkins-hourly && exit %%ERRORLEVEL%%"
3288e00d08SSteve Rowe#               ASF Jenkins example:        [Lucene-Solr-Tests-master] $ /home/jenkins/tools/ant/apache-ant-1.8.4/bin/ant jenkins-hourly
3388e00d08SSteve Rowe#       ASF Jenkins nightly example:                        [checkout] $ /home/jenkins/tools/ant/apache-ant-1.8.4/bin/ant -file build.xml -Dtests.multiplier=2 -Dtests.linedocsfile=/home/jenkins/jenkins-slave/workspace/Lucene-Solr-NightlyTests-master/test-data/enwiki.random.lines.txt jenkins-nightly
3488e00d08SSteve Rowe#        ASF Jenkins smoker example: [Lucene-Solr-SmokeRelease-master] $ /home/jenkins/tools/ant/apache-ant-1.8.4/bin/ant nightly-smoke
3588e00d08SSteve RowereAntInvocation = re.compile(r'\bant(?:\.bat)?\s+.*(?:jenkins-(?:hourly|nightly)|nightly-smoke)')
3688e00d08SSteve RowereAntSysprops = re.compile(r'"-D[^"]+"|-D[^=]+="[^"]*"|-D\S+')
3788e00d08SSteve Rowe
38f7e166e7SSteve Rowe# Method example: NOTE: reproduce with: ant test  -Dtestcase=ZkSolrClientTest -Dtests.method=testMultipleWatchesAsync -Dtests.seed=6EF5AB70F0032849 -Dtests.slow=true -Dtests.locale=he-IL -Dtests.timezone=NST -Dtests.asserts=true -Dtests.file.encoding=UTF-8
39f7e166e7SSteve Rowe# Suite example:  NOTE: reproduce with: ant test  -Dtestcase=CloudSolrClientTest -Dtests.seed=DB2DF2D8228BAF27 -Dtests.multiplier=3 -Dtests.slow=true -Dtests.locale=es-AR -Dtests.timezone=America/Argentina/Cordoba -Dtests.asserts=true -Dtests.file.encoding=US-ASCII
40f7e166e7SSteve RowereReproLine = re.compile(r'NOTE:\s+reproduce\s+with:(\s+ant\s+test\s+-Dtestcase=(\S+)\s+(?:-Dtests.method=\S+\s+)?(.*))')
41a07493d5SSteve RowereTestsSeed = re.compile(r'-Dtests.seed=\S+\s*')
42f7e166e7SSteve Rowe
43f7e166e7SSteve Rowe# Example: https://jenkins.thetaphi.de/job/Lucene-Solr-master-Linux/21108/
44f7e166e7SSteve RowereJenkinsURLWithoutConsoleText = re.compile(r'https?://.*/\d+/?\Z', re.IGNORECASE)
45f7e166e7SSteve Rowe
46f7e166e7SSteve RowereJavaFile = re.compile(r'(.*)\.java\Z')
47606e91c2SSteve RowereModule = re.compile(r'\.[\\/](.*)[\\/]src[\\/]')
48f7e166e7SSteve RowereTestOutputFile = re.compile(r'TEST-(.*\.([^-.]+))(?:-\d+)?\.xml\Z')
49f7e166e7SSteve RowereErrorFailure = re.compile(r'(?:errors|failures)="[^0]')
50e71286c8SSteve RowereGitMainBranch = re.compile(r'^(?:master|branch_[x_\d]+)$')
51f7e166e7SSteve Rowe
52f7e166e7SSteve Rowe# consoleText from Policeman Jenkins's Windows jobs fails to decode as UTF-8
53f7e166e7SSteve Roweencoding = 'iso-8859-1'
54f7e166e7SSteve Rowe
55f7e166e7SSteve RowelastFailureCode = 0
56f7e166e7SSteve RowegitCheckoutSucceeded = False
57f7e166e7SSteve Rowe
58a07493d5SSteve Rowedescription = dedent('''\
59a07493d5SSteve Rowe                     Must be run from a Lucene/Solr git workspace. Downloads the Jenkins
60a07493d5SSteve Rowe                     log pointed to by the given URL, parses it for Git revision and failed
61a07493d5SSteve Rowe                     Lucene/Solr tests, checks out the Git revision in the local workspace,
62a07493d5SSteve Rowe                     groups the failed tests by module, then runs
63a07493d5SSteve Rowe                     'ant test -Dtest.dups=%d -Dtests.class="*.test1[|*.test2[...]]" ...'
64a07493d5SSteve Rowe                     in each module of interest, failing at the end if any of the runs fails.
65a07493d5SSteve Rowe                     To control the maximum number of concurrent JVMs used for each module's
66a07493d5SSteve Rowe                     test run, set 'tests.jvms', e.g. in ~/lucene.build.properties
67a07493d5SSteve Rowe                     ''')
68a07493d5SSteve RowedefaultIters = 5
69a07493d5SSteve Rowe
70a07493d5SSteve Rowedef readConfig():
71a07493d5SSteve Rowe  parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter,
72a07493d5SSteve Rowe                                   description=description)
73a07493d5SSteve Rowe  parser.add_argument('url', metavar='URL',
74a07493d5SSteve Rowe                      help='Points to the Jenkins log to parse')
75edd54e55SSteve Rowe  parser.add_argument('--no-git', dest='useGit', action='store_false', default=True,
76edd54e55SSteve Rowe                      help='Do not run "git" at all')
77a07493d5SSteve Rowe  parser.add_argument('--iters', dest='testIters', type=int, default=defaultIters, metavar='N',
78a07493d5SSteve Rowe                      help='Number of iterations per test suite (default: %d)' % defaultIters)
79a07493d5SSteve Rowe  return parser.parse_args()
80a07493d5SSteve Rowe
81f7e166e7SSteve Rowedef runOutput(cmd):
82f7e166e7SSteve Rowe  print('[repro] %s' % cmd)
83f7e166e7SSteve Rowe  try:
84f7e166e7SSteve Rowe    return subprocess.check_output(cmd.split(' '), universal_newlines=True).strip()
85f7e166e7SSteve Rowe  except CalledProcessError as e:
86f7e166e7SSteve Rowe    raise RuntimeError("ERROR: Cmd '%s' failed with exit code %d and the following output:\n%s"
87f7e166e7SSteve Rowe                       % (cmd, e.returncode, e.output))
88f7e166e7SSteve Rowe
89f7e166e7SSteve Rowe# Remembers non-zero exit code in lastFailureCode unless rememberFailure==False
90f7e166e7SSteve Rowedef run(cmd, rememberFailure=True):
91f7e166e7SSteve Rowe  global lastFailureCode
92f7e166e7SSteve Rowe  print('[repro] %s' % cmd)
93f7e166e7SSteve Rowe  code = os.system(cmd)
94f7e166e7SSteve Rowe  if 0 != code and rememberFailure:
95f7e166e7SSteve Rowe    print('\n[repro] Setting last failure code to %d\n' % code)
96f7e166e7SSteve Rowe    lastFailureCode = code
97f7e166e7SSteve Rowe  return code
98f7e166e7SSteve Rowe
99*0c61c857SSteve Rowedef fetchAndParseJenkinsLog(url, numRetries):
100a07493d5SSteve Rowe  global revisionFromLog
101a07493d5SSteve Rowe  global branchFromLog
10288e00d08SSteve Rowe  global antOptions
103a07493d5SSteve Rowe  revisionFromLog = None
10488e00d08SSteve Rowe  antOptions = ''
105a07493d5SSteve Rowe  tests = {}
106f7e166e7SSteve Rowe  print('[repro] Jenkins log URL: %s\n' % url)
107f7e166e7SSteve Rowe  try:
108f7e166e7SSteve Rowe    with urllib.request.urlopen(url) as consoleText:
109f7e166e7SSteve Rowe      for rawLine in consoleText:
110f7e166e7SSteve Rowe        line = rawLine.decode(encoding)
111f7e166e7SSteve Rowe        match = reGitRev.match(line)
112f7e166e7SSteve Rowe        if match is not None:
113a07493d5SSteve Rowe          revisionFromLog = match.group(1)
114a07493d5SSteve Rowe          branchFromLog = match.group(2)
115a07493d5SSteve Rowe          print('[repro] Revision: %s\n' % revisionFromLog)
116f7e166e7SSteve Rowe        else:
117f7e166e7SSteve Rowe          match = reReproLine.search(line)
118f7e166e7SSteve Rowe          if match is not None:
119f7e166e7SSteve Rowe            print('[repro] Repro line: %s\n' % match.group(1))
120f7e166e7SSteve Rowe            testcase = match.group(2)
121f7e166e7SSteve Rowe            reproLineWithoutMethod = match.group(3).strip()
122f7e166e7SSteve Rowe            tests[testcase] = reproLineWithoutMethod
12388e00d08SSteve Rowe          else:
12488e00d08SSteve Rowe            match = reAntInvocation.search(line)
12588e00d08SSteve Rowe            if match is not None:
12688e00d08SSteve Rowe              antOptions = ' '.join(reAntSysprops.findall(line))
12788e00d08SSteve Rowe              if len(antOptions) > 0:
12888e00d08SSteve Rowe                print('[repro] Ant options: %s' % antOptions)
129f7e166e7SSteve Rowe  except urllib.error.URLError as e:
130f7e166e7SSteve Rowe    raise RuntimeError('ERROR: fetching %s : %s' % (url, e))
131*0c61c857SSteve Rowe  except http.client.IncompleteRead as e:
132*0c61c857SSteve Rowe    if numRetries > 0:
133*0c61c857SSteve Rowe      print('[repro] Encountered IncompleteRead exception, pausing and then retrying...')
134*0c61c857SSteve Rowe      time.sleep(2) # pause for 2 seconds
135*0c61c857SSteve Rowe      return fetchAndParseJenkinsLog(url, numRetries - 1)
136*0c61c857SSteve Rowe    else:
137*0c61c857SSteve Rowe      print('[repro] Encountered IncompleteRead exception, aborting after too many retries.')
138*0c61c857SSteve Rowe      raise RuntimeError('ERROR: fetching %s : %s' % (url, e))
139f7e166e7SSteve Rowe
140a07493d5SSteve Rowe  if revisionFromLog == None:
141f7e166e7SSteve Rowe    if reJenkinsURLWithoutConsoleText.match(url):
142f7e166e7SSteve Rowe      print('[repro] Not a Jenkins log. Appending "/consoleText" and retrying ...\n')
143*0c61c857SSteve Rowe      return fetchAndParseJenkinsLog(url + '/consoleText', numRetries)
144f7e166e7SSteve Rowe    else:
145f7e166e7SSteve Rowe      raise RuntimeError('ERROR: %s does not appear to be a Jenkins log.' % url)
146f7e166e7SSteve Rowe  if 0 == len(tests):
147f7e166e7SSteve Rowe    print('[repro] No "reproduce with" lines found; exiting.')
148f7e166e7SSteve Rowe    sys.exit(0)
149a07493d5SSteve Rowe  return tests
150f7e166e7SSteve Rowe
151edd54e55SSteve Rowedef prepareWorkspace(useGit, gitRef):
152f7e166e7SSteve Rowe  global gitCheckoutSucceeded
153edd54e55SSteve Rowe  if useGit:
154a07493d5SSteve Rowe    code = run('git fetch')
155f7e166e7SSteve Rowe    if 0 != code:
156a07493d5SSteve Rowe      raise RuntimeError('ERROR: "git fetch" failed.  See above.')
157a07493d5SSteve Rowe    checkoutCmd = 'git checkout %s' % gitRef
158a07493d5SSteve Rowe    code = run(checkoutCmd)
159a07493d5SSteve Rowe    if 0 != code:
160a07493d5SSteve Rowe      raise RuntimeError('ERROR: "%s" failed.  See above.' % checkoutCmd)
161f7e166e7SSteve Rowe    gitCheckoutSucceeded = True
162e71286c8SSteve Rowe    run('git merge --ff-only', rememberFailure=False) # Ignore failure on non-branch ref
163edd54e55SSteve Rowe
164f7e166e7SSteve Rowe  code = run('ant clean')
165f7e166e7SSteve Rowe  if 0 != code:
166f7e166e7SSteve Rowe    raise RuntimeError('ERROR: "ant clean" failed.  See above.')
167f7e166e7SSteve Rowe
168a07493d5SSteve Rowedef groupTestsByModule(tests):
169a07493d5SSteve Rowe  modules = {}
170f7e166e7SSteve Rowe  for (dir, _, files) in os.walk('.'):
171f7e166e7SSteve Rowe    for file in files:
172f7e166e7SSteve Rowe      match = reJavaFile.search(file)
173f7e166e7SSteve Rowe      if match is not None:
174f7e166e7SSteve Rowe        test = match.group(1)
175f7e166e7SSteve Rowe        if test in tests:
176f7e166e7SSteve Rowe          match = reModule.match(dir)
177f7e166e7SSteve Rowe          module = match.group(1)
178f7e166e7SSteve Rowe          if module not in modules:
179f7e166e7SSteve Rowe            modules[module] = set()
180f7e166e7SSteve Rowe          modules[module].add(test)
181f7e166e7SSteve Rowe  print('[repro] Test suites by module:')
182f7e166e7SSteve Rowe  for module in modules:
183f7e166e7SSteve Rowe    print('[repro]    %s' % module)
184f7e166e7SSteve Rowe    for test in modules[module]:
185f7e166e7SSteve Rowe      print('[repro]       %s' % test)
186a07493d5SSteve Rowe  return modules
187f7e166e7SSteve Rowe
188a07493d5SSteve Rowedef runTests(testIters, modules, tests):
189f7e166e7SSteve Rowe  cwd = os.getcwd()
19088e00d08SSteve Rowe  testCmdline = 'ant test-nocompile -Dtests.dups=%d -Dtests.maxfailures=%d -Dtests.class="%s" -Dtests.showOutput=onerror %s %s'
191f7e166e7SSteve Rowe  for module in modules:
192f7e166e7SSteve Rowe    moduleTests = list(modules[module])
193f7e166e7SSteve Rowe    testList = '|'.join(map(lambda t: '*.%s' % t, moduleTests))
194f7e166e7SSteve Rowe    numTests = len(moduleTests)
195f7e166e7SSteve Rowe    params = tests[moduleTests[0]] # Assumption: all tests in this module have the same cmdline params
196f7e166e7SSteve Rowe    os.chdir(module)
197f7e166e7SSteve Rowe    code = run('ant compile-test')
198f7e166e7SSteve Rowe    try:
199a07493d5SSteve Rowe      if 0 != code:
200f7e166e7SSteve Rowe        raise RuntimeError("ERROR: Compile failed in %s/ with code %d.  See above." % (module, code))
20188e00d08SSteve Rowe      run(testCmdline % (testIters, testIters * numTests, testList, antOptions, params))
202f7e166e7SSteve Rowe    finally:
203f7e166e7SSteve Rowe      os.chdir(cwd)
204f7e166e7SSteve Rowe
205a07493d5SSteve Rowedef printReport(testIters, location):
206f7e166e7SSteve Rowe  failures = {}
207f7e166e7SSteve Rowe  for start in ('lucene/build', 'solr/build'):
208f7e166e7SSteve Rowe    for (dir, _, files) in os.walk(start):
209f7e166e7SSteve Rowe      for file in files:
210f7e166e7SSteve Rowe        testOutputFileMatch = reTestOutputFile.search(file)
211f7e166e7SSteve Rowe        if testOutputFileMatch is not None:
212f7e166e7SSteve Rowe          testcase = testOutputFileMatch.group(1)
213f7e166e7SSteve Rowe          if testcase not in failures:
214f7e166e7SSteve Rowe            failures[testcase] = 0
215f7e166e7SSteve Rowe          with open(os.path.join(dir, file), encoding='UTF-8') as testOutputFile:
216f7e166e7SSteve Rowe            for line in testOutputFile:
217f7e166e7SSteve Rowe              errorFailureMatch = reErrorFailure.search(line)
218f7e166e7SSteve Rowe              if errorFailureMatch is not None:
219f7e166e7SSteve Rowe                failures[testcase] += 1
220f7e166e7SSteve Rowe                break
221a07493d5SSteve Rowe  print("[repro] Failures%s:" % location)
222a07493d5SSteve Rowe  for testcase in sorted(failures, key=lambda t: (failures[t],t)): # sort by failure count, then by testcase
223f7e166e7SSteve Rowe    print("[repro]   %d/%d failed: %s" % (failures[testcase], testIters, testcase))
224a07493d5SSteve Rowe  return failures
225f7e166e7SSteve Rowe
226a07493d5SSteve Rowedef getLocalGitBranch():
227f7e166e7SSteve Rowe  origGitBranch = runOutput('git rev-parse --abbrev-ref HEAD')
228a07493d5SSteve Rowe  if origGitBranch == 'HEAD':                       # In detached HEAD state
229f7e166e7SSteve Rowe    origGitBranch = runOutput('git rev-parse HEAD') # Use the SHA when not on a branch
230f7e166e7SSteve Rowe  print('[repro] Initial local git branch/revision: %s' % origGitBranch)
231a07493d5SSteve Rowe  return origGitBranch
232f7e166e7SSteve Rowe
233f7e166e7SSteve Rowedef main():
234a07493d5SSteve Rowe  config = readConfig()
235*0c61c857SSteve Rowe  tests = fetchAndParseJenkinsLog(config.url, numRetries = 2)
236edd54e55SSteve Rowe  if config.useGit:
237a07493d5SSteve Rowe    localGitBranch = getLocalGitBranch()
238f7e166e7SSteve Rowe
239f7e166e7SSteve Rowe  try:
240edd54e55SSteve Rowe    prepareWorkspace(config.useGit, revisionFromLog)
241a07493d5SSteve Rowe    modules = groupTestsByModule(tests)
242a07493d5SSteve Rowe    runTests(config.testIters, modules, tests)
243a07493d5SSteve Rowe    failures = printReport(config.testIters, '')
244a07493d5SSteve Rowe
245edd54e55SSteve Rowe    if config.useGit:
246a07493d5SSteve Rowe      # Retest 100% failures at the tip of the branch
247a07493d5SSteve Rowe      oldTests = tests
248a07493d5SSteve Rowe      tests = {}
249a07493d5SSteve Rowe      for fullClass in failures:
250a07493d5SSteve Rowe        testcase = fullClass[(fullClass.rindex('.') + 1):]
251a07493d5SSteve Rowe        if failures[fullClass] == config.testIters:
252a07493d5SSteve Rowe          tests[testcase] = oldTests[testcase]
253a07493d5SSteve Rowe      if len(tests) > 0:
254a07493d5SSteve Rowe        print('\n[repro] Re-testing 100%% failures at the tip of %s' % branchFromLog)
255a07493d5SSteve Rowe        prepareWorkspace(False, branchFromLog)
256a07493d5SSteve Rowe        modules = groupTestsByModule(tests)
257a07493d5SSteve Rowe        runTests(config.testIters, modules, tests)
258a07493d5SSteve Rowe        failures = printReport(config.testIters, ' at the tip of %s' % branchFromLog)
259a07493d5SSteve Rowe
260a07493d5SSteve Rowe        # Retest 100% tip-of-branch failures without a seed
261a07493d5SSteve Rowe        oldTests = tests
262a07493d5SSteve Rowe        tests = {}
263a07493d5SSteve Rowe        for fullClass in failures:
264a07493d5SSteve Rowe          testcase = fullClass[(fullClass.rindex('.') + 1):]
265a07493d5SSteve Rowe          if failures[fullClass] == config.testIters:
266a07493d5SSteve Rowe            tests[testcase] = re.sub(reTestsSeed, '', oldTests[testcase])
267a07493d5SSteve Rowe        if len(tests) > 0:
268a07493d5SSteve Rowe          print('\n[repro] Re-testing 100%% failures at the tip of %s without a seed' % branchFromLog)
269a07493d5SSteve Rowe          prepareWorkspace(False, branchFromLog)
270a07493d5SSteve Rowe          modules = groupTestsByModule(tests)
271a07493d5SSteve Rowe          runTests(config.testIters, modules, tests)
272a07493d5SSteve Rowe          printReport(config.testIters, ' at the tip of %s without a seed' % branchFromLog)
273f7e166e7SSteve Rowe  except Exception as e:
274a07493d5SSteve Rowe    print('[repro] %s' % traceback.format_exc())
275f7e166e7SSteve Rowe    sys.exit(1)
276f7e166e7SSteve Rowe  finally:
277edd54e55SSteve Rowe    if config.useGit and gitCheckoutSucceeded:
278a07493d5SSteve Rowe      run('git checkout %s' % localGitBranch, rememberFailure=False) # Restore original git branch/sha
279f7e166e7SSteve Rowe
280f7e166e7SSteve Rowe  print('[repro] Exiting with code %d' % lastFailureCode)
281f7e166e7SSteve Rowe  sys.exit(lastFailureCode)
282f7e166e7SSteve Rowe
283f7e166e7SSteve Roweif __name__ == '__main__':
284f7e166e7SSteve Rowe  try:
285f7e166e7SSteve Rowe    main()
286f7e166e7SSteve Rowe  except KeyboardInterrupt:
287f7e166e7SSteve Rowe    print('[repro] Keyboard interrupt...exiting')
288