1
    2
    3
    4
    5
    6
    7
    8
    9
   10
   11
   12
   13
   14
   15
   16
   17
   18
   19
   20
   21
   22
   23
   24
   25
   26
   27
   28
   29
   30
   31
   32
   33
   34
   35
   36
   37
   38
   39
   40
   41
   42
   43
   44
   45
   46
   47
   48
   49
   50
   51
   52
   53
   54
   55
   56
   57
   58
   59
   60
   61
   62
   63
   64
   65
   66
   67
   68
   69
   70
   71
   72
   73
   74
   75
   76
   77
   78
   79
   80
   81
   82
   83
   84
   85
   86
   87
   88
   89
   90
   91
   92
   93
   94
   95
   96
   97
   98
   99
  100
  101
  102
  103
  104
  105
  106
  107
  108
  109
  110
  111
  112
  113
  114
  115
  116
  117
  118
  119
  120
  121
  122
  123
  124
  125
  126
  127
  128
  129
  130
  131
  132
  133
  134
  135
  136
  137
  138
  139
  140
  141
  142
  143
  144
  145
  146
  147
  148
  149
  150
  151
  152
  153
  154
  155
  156
  157
  158
  159
  160
  161
  162
  163
  164
  165
  166
  167
  168
  169
  170
  171
  172
  173
  174
  175
  176
  177
  178
  179
  180
  181
  182
  183
  184
  185
  186
  187
  188
  189
  190
  191
  192
  193
  194
  195
  196
  197
  198
  199
  200
  201
  202
  203
  204
  205
  206
  207
  208
  209
  210
  211
  212
  213
  214
  215
  216
  217
  218
  219
  220
  221
  222
  223
  224
  225
  226
  227
  228
  229
  230
  231
  232
  233
  234
  235
  236
  237
  238
  239
  240
  241
  242
  243
  244
  245
  246
  247
  248
  249
  250
  251
  252
  253
  254
  255
  256
  257

content / test / gpu / trim_culprit_cls.py [blame]

#!/usr/bin/env vpython3
# Copyright 2020 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""
Script for determining which CLs in a blamelist ran on a certain trybot.

There are cases where CLs can be absolved of a CI failure if they ran on a
similar trybot before being submitted. This CL will go through each CL in a
given blamelist and determine whether they ran on a specified trybot or not.

This script depends on the `bq` tool, which is available as part of the Google
Cloud SDK https://cloud.google.com/sdk/docs/quickstarts.

Example usage:

trim_culprit_cls.py \
  --start-revision <first/oldest revision in the blamelist> \
  --end-revision <last/newest revision in the blamelist> \
  --trybot <optional trybot name> \
  --project <billing project>

Concrete example:

trim_culprit_cls.py \
  --start-revision 1cdf916d194215f1e4139f295e494fc1c1863c3c \
  --end-revision 9aa31419100be8d0f02708a500aaed7c33a53a10 \
  --trybot win_optional_gpu_tests_rel \
  --project chromium-swarm

The --project argument can be any project you are associated with in the
Google Cloud console https://console.cloud.google.com/ (see drop-down menu in
the top left corner).
"""

from __future__ import print_function

import argparse
import json
import re
import subprocess

# pylint: disable=line-too-long
# Schemas:
# - go/buildbucket-bq and go/buildbucket-proto/build.proto
# - go/luci/cq/bq and
#   https://source.chromium.org/chromium/infra/infra/+/main:go/src/go.chromium.org/luci/cv/api/bigquery/v1/attempt.proto
#
# Original author: maruel@
QUERY_TEMPLATE = """\
WITH cq_builds AS (
  SELECT
    build.id,
    build.critical,
    start_time,
    TIMESTAMP_DIFF(end_time, start_time, SECOND) AS duration,
    cl.change,
    cl.patchset
  FROM `commit-queue.chromium.attempts` CROSS JOIN UNNEST(builds) AS build CROSS JOIN UNNEST(gerrit_changes) AS cl
  WHERE
    cl.host = 'chromium-review.googlesource.com'
    AND cl.project = 'chromium/src'
    AND cl.change = {cl_number}
),

builds AS (
  SELECT
    patchset,
    bb.builder.project||'/'||bb.builder.bucket||'/'||bb.builder.builder AS builder,
    'ci.chromium.org/b/'||bb.id AS url,
    cq.critical,
    bb.status,
    cq.start_time,
    duration
  FROM cq_builds AS cq INNER JOIN `cr-buildbucket.chromium.builds` AS bb ON cq.id = bb.id
  WHERE
    # Performance optimization.
    bb.create_time >= TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 30 DAY)
)

SELECT * FROM builds ORDER BY patchset DESC, critical, builder, start_time
"""
# pylint: enable=line-too-long

GERRIT_URL_REGEX = re.compile(r'^\s*Reviewed-on: (?P<gerrit_url>.*)$',
                              re.MULTILINE)


class ChangeList():
  """Class for storing relevant information for a CL."""

  def __init__(self):
    self.revision = None
    self.gerrit_url = None
    self._cl_number = None
    self.largest_patchset = None
    self.ran_trybot = None

  @property
  def cl_number(self):
    assert self.gerrit_url
    if not self._cl_number:
      self._cl_number = self.gerrit_url.split('/')[-1]
    return self._cl_number

  def __str__(self):
    assert self.revision is not None
    assert self.gerrit_url is not None
    assert self.largest_patchset is not None
    assert self.ran_trybot is not None
    s = '%s (%s)' % (self.revision, self.gerrit_url)
    if not self.ran_trybot:
      s += ' <<<< Did not run trybot'
    return s


def QueryTrybotsForCl(cl_number, project):
  """Queries BigQuery for the tryjobs run for a CL.

  Args:
    cl_number: An int or string containing the CL number to query.
    project: A string containing the billing project to use for queries.

  Returns:
    A list of dicts, each entry containing data for one trybot run.
  """
  query = QUERY_TEMPLATE.format(cl_number=cl_number)

  cmd = [
      'bq',
      'query',
      '--format=json',
      '--project_id=%s' % project,
      '--max_rows=500',
      '--use_legacy_sql=false',
      query,
  ]
  with open('/dev/null', 'w') as devnull:
    stdout = subprocess.check_output(cmd, stderr=devnull)
  return json.loads(stdout)


def FillTrybotRuns(blamelist, trybot, project):
  """Fills the trybot data for the entries in |blamelist|

  Args:
    blamelist: A list of ChangeList objects with their gerrit_url fields filled.
    trybot: A string containing the name of the trybot to check for.
    project: A string containing the billing project to use for queries.
  """
  total_cls = len(blamelist)
  for i, entry in enumerate(blamelist):
    print('Getting data for CL %s/%s' % (i + 1, total_cls))
    largest_patchset = 0
    all_trybots = QueryTrybotsForCl(entry.cl_number, project)
    assert all_trybots
    # Query orders results by patchset, ensuring that we get relevant results
    # even if the number of tryjobs exceeds the row limit, but loading the JSON
    # into a dict doesn't preserve ordering, so find the largest patchset now.
    for tryjob in all_trybots:
      patchset = int(tryjob['patchset'])
      if patchset > largest_patchset:
        largest_patchset = patchset
    entry.largest_patchset = largest_patchset

    for tryjob in all_trybots:
      if largest_patchset != int(tryjob['patchset']):
        continue
      # 'builder' field is in the form project/bucket/builder, e.g.
      # chromium/try/android-marshmallow-arm64-rel
      if trybot == tryjob['builder'].split('/')[-1]:
        entry.ran_trybot = True
        break
    if entry.ran_trybot is None:
      entry.ran_trybot = False


def FillGerritUrls(blamelist):
  """Fills the Gerrit URLs for the entries in |blamelist|

  Args:
    blamelist: A list of ChangeList objects with their revision fields filled.
  """
  cmd_template = [
      'git',
      'show',
      '--name-only',
  ]
  for entry in blamelist:
    assert entry.revision
    stdout = subprocess.check_output(cmd_template + [entry.revision],
                                     stderr=subprocess.STDOUT)
    match = GERRIT_URL_REGEX.search(stdout)
    assert match
    entry.gerrit_url = match.groupdict()['gerrit_url']
    assert entry.gerrit_url


def GetBlamelist(start_revision, end_revision):
  """Gets a revision blamelist between the two given revisions.

  Args:
    start_revision: A string containing the earliest revision in the blamelist.
    end_revision: A string containing the latest revision in the blamelist.

  Returns:
    A list of ChangeList objects with their revision fields filled in, each
    corresponding to a revision in the blamelist. The first entry is the
    latest in the blamelist.
  """
  cmd = [
      'git',
      'log',
      '--pretty=oneline',
      '%s~1..%s' % (start_revision, end_revision),
  ]
  stdout = subprocess.check_output(cmd, stderr=subprocess.STDOUT)

  blamelist = []
  for line in stdout.splitlines():
    cl = ChangeList()
    cl.revision = line.split()[0]
    blamelist.append(cl)
  return blamelist


def ParseArgs():
  parser = argparse.ArgumentParser(
      description='Script to determine which CLs in a blamelist did not run a '
      'particular trybot.')
  parser.add_argument('--start-revision',
                      required=True,
                      help='The earliest revision in the blamelist.')
  parser.add_argument('--end-revision',
                      required=True,
                      help='The latest revision in the blamelist.')
  parser.add_argument('--project',
                      required=True,
                      help='A billing project to use for queries.')
  parser.add_argument('--trybot',
                      required=True,
                      help='The name of the trybot to look for.')
  return parser.parse_args()


def main():
  args = ParseArgs()
  blamelist = GetBlamelist(args.start_revision, args.end_revision)
  FillGerritUrls(blamelist)
  FillTrybotRuns(blamelist, args.trybot, args.project)
  print('\n\nBlamelist (latest first):\n')
  for entry in blamelist:
    print(entry)


if __name__ == '__main__':
  main()