1
    2
    3
    4
    5
    6
    7
    8
    9
   10
   11
   12
   13
   14
   15
   16
   17
   18
   19
   20
   21
   22
   23
   24
   25
   26
   27
   28
   29
   30
   31
   32
   33
   34
   35
   36
   37
   38
   39
   40
   41
   42
   43
   44
   45
   46
   47
   48
   49
   50
   51
   52
   53
   54
   55
   56
   57
   58
   59
   60
   61
   62
   63
   64
   65
   66
   67
   68
   69
   70
   71
   72
   73
   74
   75
   76
   77
   78
   79
   80
   81
   82
   83
   84
   85
   86
   87
   88
   89
   90
   91
   92
   93
   94
   95
   96
   97
   98
   99
  100
  101
  102
  103
  104
  105
  106
  107
  108
  109
  110
  111
  112
  113
  114
  115
  116
  117
  118
  119
  120
  121
  122
  123
  124
  125
  126
  127
  128
  129
  130
  131
  132
  133
  134
  135
  136
  137
  138
  139
  140
  141
  142
  143
  144
  145
  146
  147
  148
  149
  150
  151
  152
  153
  154
  155
  156
  157
  158
  159
  160
  161
  162
  163
  164
  165
  166
  167
  168
  169
  170

tools / sample_clang_tidy_results.py [blame]

#!/usr/bin/env python3
# Copyright 2023 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Samples clang-tidy results from a JSON file.

Provides information about number of checks triggered and a summary of some of
the checks with links back to code search.

Usage:
tools/sample_clang_tidy_results.py out/all_findings.json
"""

import argparse
import collections
import functools
import json
import logging
import os
import random
import subprocess
import sys
from pathlib import Path
from typing import Any, Dict, List


@functools.lru_cache(maxsize=None)
def get_src_path() -> str:
  src_path = Path(__file__).parent.parent.resolve()
  if not src_path:
    raise NotFoundError(
        'Could not find checkout in any parent of the current path.')
  return src_path


@functools.lru_cache(maxsize=None)
def git_rev_parse_head(path: Path):
  if (path / '.git').exists():
    return subprocess.check_output(['git', 'rev-parse', 'HEAD'],
                                   encoding='utf-8',
                                   cwd=path).strip()
  return git_rev_parse_head(path.parent)


def convert_diag_to_cs(diag: Dict[str, Any]) -> str:
  path = diag['file_path']
  line = diag['line_number']
  name = diag['diag_name']
  replacement = '\n'.join(x['new_text'] for x in diag['replacements'])

  sha = git_rev_parse_head(get_src_path() / path)

  # https://source.chromium.org/chromium/chromium/src/+/main:apps/app_restore_service.cc
  sha_and_path = f'{sha}:{path}'
  return {
      'name':
      name,
      'path': ('https://source.chromium.org/chromium/chromium/src/+/'
               f'{sha}:{path};l={line}'),
      'replacement':
      replacement
  }


@functools.lru_cache(maxsize=None)
def is_first_party_path(path: Path) -> bool:
  if path == get_src_path():
    return True

  if path == '/':
    return False

  if (path / '.git').exists() or (path / '.gclient').exists():
    return False

  return is_first_party_path(path.parent)


def is_first_party_diag(diag: Dict[str, Any]) -> bool:
  path = diag['file_path']
  if path.startswith('out/') or path.startswith('/'):
    return False
  return is_first_party_path(get_src_path() / path)


def select_random_diags(diags: List[Dict[str, Any]], number: int) -> List[Any]:
  first_party = [x for x in diags if is_first_party_diag(x)]
  if len(first_party) <= number:
    return first_party
  return random.sample(first_party, number)


def is_diag_in_test_file(diag: Dict[str, Any]) -> bool:
  file_stem = os.path.splitext(diag['file_path'])[0]
  return (file_stem.endswith('test') or file_stem.endswith('tests')
          or '_test_' in file_stem or '_unittest_' in file_stem)


def is_diag_in_third_party(diag: Dict[str, Any]) -> bool:
  return 'third_party' in diag['file_path']


def main(argv: List[str]):
  logging.basicConfig(
      format='>> %(asctime)s: %(levelname)s: %(filename)s:%(lineno)d: '
      '%(message)s',
      level=logging.INFO,
  )

  parser = argparse.ArgumentParser(
      description=__doc__,
      formatter_class=argparse.RawDescriptionHelpFormatter,
  )
  parser.add_argument('-n',
                      '--number',
                      type=int,
                      default=30,
                      help='How many checks to sample')
  parser.add_argument('--ignore-tests',
                      action='store_true',
                      help='Filters lints in test/unittest files if specified.')
  parser.add_argument('--include-third-party',
                      action='store_true',
                      help='Includes lints in third_party if specified.')
  parser.add_argument('file', help='JSON file to parse')
  opts = parser.parse_args(argv)

  with open(opts.file) as f:
    data = json.load(f)

  print(f'Files with tidy errors: {len(data["failed_tidy_files"])}')
  print(f'Timed out files: {len(data["timed_out_src_files"])}')
  diags = data['diagnostics']

  if not opts.include_third_party:
    new_diags = [x for x in diags if not is_diag_in_third_party(x)]
    print(f'Dropped {len(diags) - len(new_diags)} diags from third_party')
    diags = new_diags

  if opts.ignore_tests:
    new_diags = [x for x in diags if not is_diag_in_test_file(x)]
    print(f'Dropped {len(diags) - len(new_diags)} diags from test files')
    diags = new_diags

  counts = collections.defaultdict(int)
  for x in diags:
    name = x['diag_name']
    counts[name] += 1

  print(f'Total number of diagnostics: {len(diags)}')
  for x in sorted(counts.keys()):
    print(f'\t{x}: {counts[x]}')
  print()

  diags = select_random_diags(diags, opts.number)
  data = [convert_diag_to_cs(x) for x in diags]
  print(f'** Sample of first-party lints: **')
  for x in data:
    print(x['path'])
    print(f'\tDiagnostic: {x["name"]}')
    print(f'\tReplacement: {x["replacement"]}')
    print()

  print('** Link summary **')
  for x in data:
    print(x['path'])


if __name__ == '__main__':
  main(sys.argv[1:])