1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
content / test / gpu / trim_culprit_cls.py [blame]
#!/usr/bin/env vpython3
# Copyright 2020 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""
Script for determining which CLs in a blamelist ran on a certain trybot.
There are cases where CLs can be absolved of a CI failure if they ran on a
similar trybot before being submitted. This CL will go through each CL in a
given blamelist and determine whether they ran on a specified trybot or not.
This script depends on the `bq` tool, which is available as part of the Google
Cloud SDK https://cloud.google.com/sdk/docs/quickstarts.
Example usage:
trim_culprit_cls.py \
--start-revision <first/oldest revision in the blamelist> \
--end-revision <last/newest revision in the blamelist> \
--trybot <optional trybot name> \
--project <billing project>
Concrete example:
trim_culprit_cls.py \
--start-revision 1cdf916d194215f1e4139f295e494fc1c1863c3c \
--end-revision 9aa31419100be8d0f02708a500aaed7c33a53a10 \
--trybot win_optional_gpu_tests_rel \
--project chromium-swarm
The --project argument can be any project you are associated with in the
Google Cloud console https://console.cloud.google.com/ (see drop-down menu in
the top left corner).
"""
from __future__ import print_function
import argparse
import json
import re
import subprocess
# pylint: disable=line-too-long
# Schemas:
# - go/buildbucket-bq and go/buildbucket-proto/build.proto
# - go/luci/cq/bq and
# https://source.chromium.org/chromium/infra/infra/+/main:go/src/go.chromium.org/luci/cv/api/bigquery/v1/attempt.proto
#
# Original author: maruel@
QUERY_TEMPLATE = """\
WITH cq_builds AS (
SELECT
build.id,
build.critical,
start_time,
TIMESTAMP_DIFF(end_time, start_time, SECOND) AS duration,
cl.change,
cl.patchset
FROM `commit-queue.chromium.attempts` CROSS JOIN UNNEST(builds) AS build CROSS JOIN UNNEST(gerrit_changes) AS cl
WHERE
cl.host = 'chromium-review.googlesource.com'
AND cl.project = 'chromium/src'
AND cl.change = {cl_number}
),
builds AS (
SELECT
patchset,
bb.builder.project||'/'||bb.builder.bucket||'/'||bb.builder.builder AS builder,
'ci.chromium.org/b/'||bb.id AS url,
cq.critical,
bb.status,
cq.start_time,
duration
FROM cq_builds AS cq INNER JOIN `cr-buildbucket.chromium.builds` AS bb ON cq.id = bb.id
WHERE
# Performance optimization.
bb.create_time >= TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 30 DAY)
)
SELECT * FROM builds ORDER BY patchset DESC, critical, builder, start_time
"""
# pylint: enable=line-too-long
GERRIT_URL_REGEX = re.compile(r'^\s*Reviewed-on: (?P<gerrit_url>.*)$',
re.MULTILINE)
class ChangeList():
"""Class for storing relevant information for a CL."""
def __init__(self):
self.revision = None
self.gerrit_url = None
self._cl_number = None
self.largest_patchset = None
self.ran_trybot = None
@property
def cl_number(self):
assert self.gerrit_url
if not self._cl_number:
self._cl_number = self.gerrit_url.split('/')[-1]
return self._cl_number
def __str__(self):
assert self.revision is not None
assert self.gerrit_url is not None
assert self.largest_patchset is not None
assert self.ran_trybot is not None
s = '%s (%s)' % (self.revision, self.gerrit_url)
if not self.ran_trybot:
s += ' <<<< Did not run trybot'
return s
def QueryTrybotsForCl(cl_number, project):
"""Queries BigQuery for the tryjobs run for a CL.
Args:
cl_number: An int or string containing the CL number to query.
project: A string containing the billing project to use for queries.
Returns:
A list of dicts, each entry containing data for one trybot run.
"""
query = QUERY_TEMPLATE.format(cl_number=cl_number)
cmd = [
'bq',
'query',
'--format=json',
'--project_id=%s' % project,
'--max_rows=500',
'--use_legacy_sql=false',
query,
]
with open('/dev/null', 'w') as devnull:
stdout = subprocess.check_output(cmd, stderr=devnull)
return json.loads(stdout)
def FillTrybotRuns(blamelist, trybot, project):
"""Fills the trybot data for the entries in |blamelist|
Args:
blamelist: A list of ChangeList objects with their gerrit_url fields filled.
trybot: A string containing the name of the trybot to check for.
project: A string containing the billing project to use for queries.
"""
total_cls = len(blamelist)
for i, entry in enumerate(blamelist):
print('Getting data for CL %s/%s' % (i + 1, total_cls))
largest_patchset = 0
all_trybots = QueryTrybotsForCl(entry.cl_number, project)
assert all_trybots
# Query orders results by patchset, ensuring that we get relevant results
# even if the number of tryjobs exceeds the row limit, but loading the JSON
# into a dict doesn't preserve ordering, so find the largest patchset now.
for tryjob in all_trybots:
patchset = int(tryjob['patchset'])
if patchset > largest_patchset:
largest_patchset = patchset
entry.largest_patchset = largest_patchset
for tryjob in all_trybots:
if largest_patchset != int(tryjob['patchset']):
continue
# 'builder' field is in the form project/bucket/builder, e.g.
# chromium/try/android-marshmallow-arm64-rel
if trybot == tryjob['builder'].split('/')[-1]:
entry.ran_trybot = True
break
if entry.ran_trybot is None:
entry.ran_trybot = False
def FillGerritUrls(blamelist):
"""Fills the Gerrit URLs for the entries in |blamelist|
Args:
blamelist: A list of ChangeList objects with their revision fields filled.
"""
cmd_template = [
'git',
'show',
'--name-only',
]
for entry in blamelist:
assert entry.revision
stdout = subprocess.check_output(cmd_template + [entry.revision],
stderr=subprocess.STDOUT)
match = GERRIT_URL_REGEX.search(stdout)
assert match
entry.gerrit_url = match.groupdict()['gerrit_url']
assert entry.gerrit_url
def GetBlamelist(start_revision, end_revision):
"""Gets a revision blamelist between the two given revisions.
Args:
start_revision: A string containing the earliest revision in the blamelist.
end_revision: A string containing the latest revision in the blamelist.
Returns:
A list of ChangeList objects with their revision fields filled in, each
corresponding to a revision in the blamelist. The first entry is the
latest in the blamelist.
"""
cmd = [
'git',
'log',
'--pretty=oneline',
'%s~1..%s' % (start_revision, end_revision),
]
stdout = subprocess.check_output(cmd, stderr=subprocess.STDOUT)
blamelist = []
for line in stdout.splitlines():
cl = ChangeList()
cl.revision = line.split()[0]
blamelist.append(cl)
return blamelist
def ParseArgs():
parser = argparse.ArgumentParser(
description='Script to determine which CLs in a blamelist did not run a '
'particular trybot.')
parser.add_argument('--start-revision',
required=True,
help='The earliest revision in the blamelist.')
parser.add_argument('--end-revision',
required=True,
help='The latest revision in the blamelist.')
parser.add_argument('--project',
required=True,
help='A billing project to use for queries.')
parser.add_argument('--trybot',
required=True,
help='The name of the trybot to look for.')
return parser.parse_args()
def main():
args = ParseArgs()
blamelist = GetBlamelist(args.start_revision, args.end_revision)
FillGerritUrls(blamelist)
FillTrybotRuns(blamelist, args.trybot, args.project)
print('\n\nBlamelist (latest first):\n')
for entry in blamelist:
print(entry)
if __name__ == '__main__':
main()