content / test / gpu / trim_culprit_cls.py [blame]

#!/usr/bin/env vpython3
# Copyright 2020 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""
Script for determining which CLs in a blamelist ran on a certain trybot.

There are cases where CLs can be absolved of a CI failure if they ran on a
similar trybot before being submitted. This CL will go through each CL in a
given blamelist and determine whether they ran on a specified trybot or not.

This script depends on the `bq` tool, which is available as part of the Google
Cloud SDK https://cloud.google.com/sdk/docs/quickstarts.

Example usage:

trim_culprit_cls.py \
  --start-revision <first/oldest revision in the blamelist> \
  --end-revision <last/newest revision in the blamelist> \
  --trybot <optional trybot name> \
  --project <billing project>

Concrete example:

trim_culprit_cls.py \
  --start-revision 1cdf916d194215f1e4139f295e494fc1c1863c3c \
  --end-revision 9aa31419100be8d0f02708a500aaed7c33a53a10 \
  --trybot win_optional_gpu_tests_rel \
  --project chromium-swarm

The --project argument can be any project you are associated with in the
Google Cloud console https://console.cloud.google.com/ (see drop-down menu in
the top left corner).
"""

from __future__ import print_function

import argparse
import json
import re
import subprocess

# pylint: disable=line-too-long
# Schemas:
# - go/buildbucket-bq and go/buildbucket-proto/build.proto
# - go/luci/cq/bq and
#   https://source.chromium.org/chromium/infra/infra/+/main:go/src/go.chromium.org/luci/cv/api/bigquery/v1/attempt.proto
#
# Original author: maruel@
QUERY_TEMPLATE = """\
WITH cq_builds AS (
  SELECT
    build.id,
    build.critical,
    start_time,
    TIMESTAMP_DIFF(end_time, start_time, SECOND) AS duration,
    cl.change,
    cl.patchset
  FROM `commit-queue.chromium.attempts` CROSS JOIN UNNEST(builds) AS build CROSS JOIN UNNEST(gerrit_changes) AS cl
  WHERE
    cl.host = 'chromium-review.googlesource.com'
    AND cl.project = 'chromium/src'
    AND cl.change = {cl_number}
),

builds AS (
  SELECT
    patchset,
    bb.builder.project||'/'||bb.builder.bucket||'/'||bb.builder.builder AS builder,
    'ci.chromium.org/b/'||bb.id AS url,
    cq.critical,
    bb.status,
    cq.start_time,
    duration
  FROM cq_builds AS cq INNER JOIN `cr-buildbucket.chromium.builds` AS bb ON cq.id = bb.id
  WHERE
    # Performance optimization.
    bb.create_time >= TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 30 DAY)
)

SELECT * FROM builds ORDER BY patchset DESC, critical, builder, start_time
"""
# pylint: enable=line-too-long

GERRIT_URL_REGEX = re.compile(r'^\s*Reviewed-on: (?P<gerrit_url>.*)$',
                              re.MULTILINE)


class ChangeList():
  """Class for storing relevant information for a CL."""

  def __init__(self):
    self.revision = None
    self.gerrit_url = None
    self._cl_number = None
    self.largest_patchset = None
    self.ran_trybot = None

  @property
  def cl_number(self):
    assert self.gerrit_url
    if not self._cl_number:
      self._cl_number = self.gerrit_url.split('/')[-1]
    return self._cl_number

  def __str__(self):
    assert self.revision is not None
    assert self.gerrit_url is not None
    assert self.largest_patchset is not None
    assert self.ran_trybot is not None
    s = '%s (%s)' % (self.revision, self.gerrit_url)
    if not self.ran_trybot:
      s += ' <<<< Did not run trybot'
    return s


def QueryTrybotsForCl(cl_number, project):
  """Queries BigQuery for the tryjobs run for a CL.

  Args:
    cl_number: An int or string containing the CL number to query.
    project: A string containing the billing project to use for queries.

  Returns:
    A list of dicts, each entry containing data for one trybot run.
  """
  query = QUERY_TEMPLATE.format(cl_number=cl_number)

  cmd = [
      'bq',
      'query',
      '--format=json',
      '--project_id=%s' % project,
      '--max_rows=500',
      '--use_legacy_sql=false',
      query,
  ]
  with open('/dev/null', 'w') as devnull:
    stdout = subprocess.check_output(cmd, stderr=devnull)
  return json.loads(stdout)


def FillTrybotRuns(blamelist, trybot, project):
  """Fills the trybot data for the entries in |blamelist|

  Args:
    blamelist: A list of ChangeList objects with their gerrit_url fields filled.
    trybot: A string containing the name of the trybot to check for.
    project: A string containing the billing project to use for queries.
  """
  total_cls = len(blamelist)
  for i, entry in enumerate(blamelist):
    print('Getting data for CL %s/%s' % (i + 1, total_cls))
    largest_patchset = 0
    all_trybots = QueryTrybotsForCl(entry.cl_number, project)
    assert all_trybots
    # Query orders results by patchset, ensuring that we get relevant results
    # even if the number of tryjobs exceeds the row limit, but loading the JSON
    # into a dict doesn't preserve ordering, so find the largest patchset now.
    for tryjob in all_trybots:
      patchset = int(tryjob['patchset'])
      if patchset > largest_patchset:
        largest_patchset = patchset
    entry.largest_patchset = largest_patchset

    for tryjob in all_trybots:
      if largest_patchset != int(tryjob['patchset']):
        continue
      # 'builder' field is in the form project/bucket/builder, e.g.
      # chromium/try/android-marshmallow-arm64-rel
      if trybot == tryjob['builder'].split('/')[-1]:
        entry.ran_trybot = True
        break
    if entry.ran_trybot is None:
      entry.ran_trybot = False


def FillGerritUrls(blamelist):
  """Fills the Gerrit URLs for the entries in |blamelist|

  Args:
    blamelist: A list of ChangeList objects with their revision fields filled.
  """
  cmd_template = [
      'git',
      'show',
      '--name-only',
  ]
  for entry in blamelist:
    assert entry.revision
    stdout = subprocess.check_output(cmd_template + [entry.revision],
                                     stderr=subprocess.STDOUT)
    match = GERRIT_URL_REGEX.search(stdout)
    assert match
    entry.gerrit_url = match.groupdict()['gerrit_url']
    assert entry.gerrit_url


def GetBlamelist(start_revision, end_revision):
  """Gets a revision blamelist between the two given revisions.

  Args:
    start_revision: A string containing the earliest revision in the blamelist.
    end_revision: A string containing the latest revision in the blamelist.

  Returns:
    A list of ChangeList objects with their revision fields filled in, each
    corresponding to a revision in the blamelist. The first entry is the
    latest in the blamelist.
  """
  cmd = [
      'git',
      'log',
      '--pretty=oneline',
      '%s~1..%s' % (start_revision, end_revision),
  ]
  stdout = subprocess.check_output(cmd, stderr=subprocess.STDOUT)

  blamelist = []
  for line in stdout.splitlines():
    cl = ChangeList()
    cl.revision = line.split()[0]
    blamelist.append(cl)
  return blamelist


def ParseArgs():
  parser = argparse.ArgumentParser(
      description='Script to determine which CLs in a blamelist did not run a '
      'particular trybot.')
  parser.add_argument('--start-revision',
                      required=True,
                      help='The earliest revision in the blamelist.')
  parser.add_argument('--end-revision',
                      required=True,
                      help='The latest revision in the blamelist.')
  parser.add_argument('--project',
                      required=True,
                      help='A billing project to use for queries.')
  parser.add_argument('--trybot',
                      required=True,
                      help='The name of the trybot to look for.')
  return parser.parse_args()


def main():
  args = ParseArgs()
  blamelist = GetBlamelist(args.start_revision, args.end_revision)
  FillGerritUrls(blamelist)
  FillTrybotRuns(blamelist, args.trybot, args.project)
  print('\n\nBlamelist (latest first):\n')
  for entry in blamelist:
    print(entry)


if __name__ == '__main__':
  main()