1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
base / containers / analyze_containers_memory_benchmark.py [blame]
#!/usr/bin/env python3
# Copyright 2023 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
#
# Processes the raw output from containers_memory_usage into CSV files. Each CSV
# file contains the results for all tested container types for a given key and
# value type.
#
# Usage:
# $ out/release/containers_memory_benchmark &> output.txt
# $ python3 analyze_containers_memory_benchmark.py < output.txt -o bench-results
import argparse
from collections.abc import Sequence
import csv
import os.path
import re
import sys
from typing import Optional
_HEADER_RE = re.compile(r'===== (?P<name>.+) =====')
_ITER_RE = re.compile(r'iteration (?P<iter>\d+)')
_ALLOC_RE = re.compile(r'alloc address (?P<alloc_addr>.+) size (?P<size>\d+)')
_FREED_RE = re.compile(r'freed address (?P<freed_addr>.+)')
class ContainerStatsProcessor:
def __init__(self, name: str):
# e.g. base::flat_map
self._name = name
# current number of elements in the container
self._n = None
# map of address to size for currently active allocations. Needed because
# the free handler only records an address, and not a size.
self._addr_to_size = {}
# running count of the number of bytes needed at the current iteration
self._running_size = 0
# map of container size to number of bytes used to store a container of that
# size. Keys are expected to be contiguous from 0 to the total iteration
# count.
self._data = {}
@property
def name(self):
return self._name
@property
def data(self):
return self._data
def did_alloc(self, addr: str, size: int):
self._addr_to_size[addr] = size
self._running_size += size
def did_free(self, addr: str):
size = self._addr_to_size.pop(addr)
self._running_size -= size
def did_iterate(self, n: int):
if self._n is not None:
self.flush_current_iteration_if_needed()
self._n = n
def flush_current_iteration_if_needed(self):
self._data[self._n] = self._running_size
class TestCaseProcessor:
def __init__(self, name: str):
# e.g. int -> std::string
self._name = name
# containers for which all allocation data has been processed and finalized.
self._finalized_stats: list[ContainerStatsProcessor] = []
# the current container being processed.
self._current_container_stats: Optional[ContainerStatsProcessor] = None
@property
def current_container_stats(self):
return self._current_container_stats
def did_begin_container_stats(self, container_type: str):
self._finalize_current_container_stats_if_needed()
self._current_container_stats = ContainerStatsProcessor(container_type)
def did_finish_container_stats(self, output_dir: str):
self._finalize_current_container_stats_if_needed()
with open(
os.path.join(output_dir, f'{self._name}.csv'), 'w', newline=''
) as f:
writer = csv.writer(f)
# First the column headers...
writer.writerow(
['size'] + [stats.name for stats in self._finalized_stats]
)
# In theory, all processed containers should have the same number of keys,
# but assert just to be sure.
keys = []
for stats in self._finalized_stats:
if not keys:
keys = sorted(stats.data.keys())
else:
assert keys == sorted(stats.data.keys())
for key in keys:
writer.writerow(
[key] + [stats.data[key] for stats in self._finalized_stats]
)
def _finalize_current_container_stats_if_needed(self):
if self._current_container_stats:
self._current_container_stats.flush_current_iteration_if_needed()
self._finalized_stats.append(self._current_container_stats)
self._current_container_stats = None
def main(argv: Sequence[str]) -> None:
parser = argparse.ArgumentParser(
description='Processes raw output from containers_memory_usage into CSVs.'
)
parser.add_argument(
'-o', help='directory to write CSV files to', required=True
)
args = parser.parse_args()
# It would be nicer to use a ContextManager, but that complicates splitting up
# the input and iterating through it. This is "good enough".
processor: Optional[TestCaseProcessor] = None
for line in sys.stdin:
line = line.strip()
if '->' in line:
if processor:
processor.did_finish_container_stats(args.o)
processor = TestCaseProcessor(line)
continue
match = _HEADER_RE.match(line)
if match:
processor.did_begin_container_stats(match.group('name'))
match = _ITER_RE.match(line)
if match:
processor.current_container_stats.did_iterate(int(match.group('iter')))
continue
match = _ALLOC_RE.match(line)
if match:
processor.current_container_stats.did_alloc(
match.group('alloc_addr'), int(match.group('size'))
)
continue
match = _FREED_RE.match(line)
if match:
processor.current_container_stats.did_free(match.group('freed_addr'))
continue
if processor:
processor.did_finish_container_stats(args.o)
if __name__ == '__main__':
main(sys.argv)