1
    2
    3
    4
    5
    6
    7
    8
    9
   10
   11
   12
   13
   14
   15
   16
   17
   18
   19
   20
   21
   22
   23
   24
   25
   26
   27
   28
   29
   30
   31
   32
   33
   34
   35
   36
   37
   38
   39
   40
   41
   42
   43
   44
   45
   46
   47
   48
   49
   50
   51
   52
   53
   54
   55
   56
   57
   58
   59
   60
   61
   62
   63
   64
   65
   66
   67
   68
   69
   70
   71
   72
   73
   74
   75
   76
   77
   78
   79
   80
   81
   82
   83
   84
   85
   86
   87
   88
   89
   90
   91
   92
   93
   94
   95
   96
   97
   98
   99
  100
  101
  102
  103
  104
  105
  106
  107
  108
  109
  110
  111
  112
  113
  114
  115
  116
  117
  118
  119
  120
  121
  122
  123
  124
  125
  126
  127
  128
  129
  130
  131
  132
  133
  134
  135
  136
  137
  138
  139
  140
  141
  142
  143
  144
  145
  146
  147
  148
  149
  150
  151
  152
  153
  154
  155
  156
  157
  158
  159
  160
  161
  162
  163
  164
  165
  166

base / containers / analyze_containers_memory_benchmark.py [blame]

#!/usr/bin/env python3
# Copyright 2023 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
#
# Processes the raw output from containers_memory_usage into CSV files. Each CSV
# file contains the results for all tested container types for a given key and
# value type.
#
# Usage:
# $ out/release/containers_memory_benchmark &> output.txt
# $ python3 analyze_containers_memory_benchmark.py < output.txt -o bench-results

import argparse
from collections.abc import Sequence
import csv
import os.path
import re
import sys
from typing import Optional


_HEADER_RE = re.compile(r'===== (?P<name>.+) =====')
_ITER_RE = re.compile(r'iteration (?P<iter>\d+)')
_ALLOC_RE = re.compile(r'alloc address (?P<alloc_addr>.+) size (?P<size>\d+)')
_FREED_RE = re.compile(r'freed address (?P<freed_addr>.+)')


class ContainerStatsProcessor:

  def __init__(self, name: str):
    # e.g. base::flat_map
    self._name = name
    # current number of elements in the container
    self._n = None
    # map of address to size for currently active allocations. Needed because
    # the free handler only records an address, and not a size.
    self._addr_to_size = {}
    # running count of the number of bytes needed at the current iteration
    self._running_size = 0
    # map of container size to number of bytes used to store a container of that
    # size. Keys are expected to be contiguous from 0 to the total iteration
    # count.
    self._data = {}

  @property
  def name(self):
    return self._name

  @property
  def data(self):
    return self._data

  def did_alloc(self, addr: str, size: int):
    self._addr_to_size[addr] = size
    self._running_size += size

  def did_free(self, addr: str):
    size = self._addr_to_size.pop(addr)
    self._running_size -= size

  def did_iterate(self, n: int):
    if self._n is not None:
      self.flush_current_iteration_if_needed()
    self._n = n

  def flush_current_iteration_if_needed(self):
    self._data[self._n] = self._running_size


class TestCaseProcessor:

  def __init__(self, name: str):
    # e.g. int -> std::string
    self._name = name
    # containers for which all allocation data has been processed and finalized.
    self._finalized_stats: list[ContainerStatsProcessor] = []
    # the current container being processed.
    self._current_container_stats: Optional[ContainerStatsProcessor] = None

  @property
  def current_container_stats(self):
    return self._current_container_stats

  def did_begin_container_stats(self, container_type: str):
    self._finalize_current_container_stats_if_needed()
    self._current_container_stats = ContainerStatsProcessor(container_type)

  def did_finish_container_stats(self, output_dir: str):
    self._finalize_current_container_stats_if_needed()
    with open(
        os.path.join(output_dir, f'{self._name}.csv'), 'w', newline=''
    ) as f:
      writer = csv.writer(f)
      # First the column headers...
      writer.writerow(
          ['size'] + [stats.name for stats in self._finalized_stats]
      )
      # In theory, all processed containers should have the same number of keys,
      # but assert just to be sure.
      keys = []
      for stats in self._finalized_stats:
        if not keys:
          keys = sorted(stats.data.keys())
        else:
          assert keys == sorted(stats.data.keys())
      for key in keys:
        writer.writerow(
            [key] + [stats.data[key] for stats in self._finalized_stats]
        )

  def _finalize_current_container_stats_if_needed(self):
    if self._current_container_stats:
      self._current_container_stats.flush_current_iteration_if_needed()
      self._finalized_stats.append(self._current_container_stats)
      self._current_container_stats = None


def main(argv: Sequence[str]) -> None:
  parser = argparse.ArgumentParser(
      description='Processes raw output from containers_memory_usage into CSVs.'
  )
  parser.add_argument(
      '-o', help='directory to write CSV files to', required=True
  )
  args = parser.parse_args()

  # It would be nicer to use a ContextManager, but that complicates splitting up
  # the input and iterating through it. This is "good enough".
  processor: Optional[TestCaseProcessor] = None

  for line in sys.stdin:
    line = line.strip()
    if '->' in line:
      if processor:
        processor.did_finish_container_stats(args.o)
      processor = TestCaseProcessor(line)
      continue

    match = _HEADER_RE.match(line)
    if match:
      processor.did_begin_container_stats(match.group('name'))

    match = _ITER_RE.match(line)
    if match:
      processor.current_container_stats.did_iterate(int(match.group('iter')))
      continue

    match = _ALLOC_RE.match(line)
    if match:
      processor.current_container_stats.did_alloc(
          match.group('alloc_addr'), int(match.group('size'))
      )
      continue

    match = _FREED_RE.match(line)
    if match:
      processor.current_container_stats.did_free(match.group('freed_addr'))
      continue

  if processor:
    processor.did_finish_container_stats(args.o)


if __name__ == '__main__':
  main(sys.argv)