1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
content / test / gpu / gold_inexact_matching / base_parameter_optimizer.py [blame]
# Copyright 2020 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import collections
import glob
import hashlib
import itertools
import io
import json
import logging
import multiprocessing
import os
import shutil
import subprocess
import tempfile
from typing import Dict, List, Optional, Set, Tuple
from PIL import Image # pylint: disable=import-error
import requests # pylint: disable=import-error
from gold_inexact_matching import common_typing as ct
from gold_inexact_matching import parameter_set
CHROMIUM_SRC_DIR = os.path.realpath(
os.path.join(os.path.dirname(__file__), '..', '..', '..', '..'))
GOLDCTL_PATHS = [
os.path.join(CHROMIUM_SRC_DIR, 'tools', 'skia_goldctl', 'linux', 'goldctl'),
os.path.join(CHROMIUM_SRC_DIR, 'tools', 'skia_goldctl', 'mac', 'goldctl'),
os.path.join(CHROMIUM_SRC_DIR, 'tools', 'skia_goldctl', 'win',
'goldctl.exe'),
]
# The downloaded expectations are in the following format:
# {
# branch (str): {
# test_name (str): {
# digest1 (str): status (str),
# digest2 (str): status (str),
# ...
# }
# }
# }
ExpectationJson = Dict[str, Dict[str, Dict[str, str]]]
class BaseParameterOptimizer():
"""Abstract base class for running a parameter optimization for a test."""
MIN_EDGE_THRESHOLD = 0
MAX_EDGE_THRESHOLD = 255
MIN_MAX_DIFF = 0
MIN_DELTA_THRESHOLD = 0
# 4 channels, ranging from 0-255 each.
MAX_DELTA_THRESHOLD = 255 * 4
def __init__(self, args: ct.ParsedCmdArgs, test_name: str):
"""
Args:
args: The parse arguments from an argparse.ArgumentParser.
test_name: The name of the test to optimize.
"""
self._args = args
self._test_name = test_name
self._goldctl_binary: Optional[str] = None
self._working_dir: Optional[str] = None
self._expectations: Optional[ExpectationJson] = None
# TODO(skbug.com/10610): Switch away from the public instance once
# authentication is fixed for the non-public instance.
self._gold_url = 'https://%s-public-gold.skia.org' % args.gold_instance
self._pool = multiprocessing.Pool()
# A map of strings, denoting a resolution or trace, to a set of strings,
# denoting images that are that dimension or belong to that trace.
self._images: Dict[str, Set[str]] = collections.defaultdict(set)
self._VerifyArgs()
parameter_set.ParameterSet.ignored_border_thickness = \
self._args.ignored_border_thickness
@classmethod
def AddArguments(cls, parser: ct.CmdArgParser) -> ct.ArgumentGroupTuple:
"""Add optimizer-specific arguments to the parser.
Args:
parser: An argparse.ArgumentParser instance.
Returns:
A 3-tuple (common_group, sobel_group, fuzzy_group). All three are
argument groups of |parser| corresponding to arguments for any sort of
inexact matching algorithm, arguments specific to Sobel filter matching,
and arguments specific to fuzzy matching.
"""
common_group = parser.add_argument_group('Common Arguments')
common_group.add_argument(
'--test',
required=True,
action='append',
dest='test_names',
help='The name of a test to find parameter values for, as reported in '
'the Skia Gold UI. Can be passed multiple times to run optimizations '
'for multiple tests.')
common_group.add_argument('--gold-instance',
default='chrome',
help='The Skia Gold instance to interact with.')
common_group.add_argument(
'--corpus',
default='chrome-gpu',
help='The corpus within the instance to interact with.')
common_group.add_argument(
'--target-success-percent',
default=100,
type=float,
help='The percentage of comparisons that need to succeed in order for '
'a set of parameters to be considered good.')
common_group.add_argument(
'--no-cleanup',
action='store_true',
default=False,
help="Don't clean up the temporary files left behind by the "
'optimization process.')
common_group.add_argument(
'--group-images-by-resolution',
action='store_true',
default=False,
help='Group images for comparison based on resolution instead of by '
'Gold trace. This will likely add some noise, as some comparisons will '
'be made that Gold would not consider, but this has the benefit of '
'optimizing over all historical data instead of only over data in '
'the past several hundred commits. Note that this will likely '
'result in a significantly longer runtime.')
sobel_group = parser.add_argument_group(
'Sobel Arguments',
'To disable Sobel functionality, set both min and max edge thresholds '
'to 255.')
sobel_group.add_argument(
'--min-edge-threshold',
default=10,
type=int,
help='The minimum value to consider for the Sobel edge threshold. '
'Lower values result in more of the image being blacked out before '
'comparison.')
sobel_group.add_argument(
'--max-edge-threshold',
default=255,
type=int,
help='The maximum value to consider for the Sobel edge threshold. '
'Higher values result in less of the image being blacked out before '
'comparison.')
fuzzy_group = parser.add_argument_group(
'Fuzzy Arguments',
'To disable Fuzzy functionality, set min/max for both parameters to 0')
fuzzy_group.add_argument(
'--min-max-different-pixels',
dest='min_max_diff',
default=0,
type=int,
help='The minimum value to consider for the maximum number of '
'different pixels. Lower values result in less fuzzy comparisons being '
'allowed.')
fuzzy_group.add_argument(
'--max-max-different-pixels',
dest='max_max_diff',
default=50,
type=int,
help='The maximum value to consider for the maximum number of '
'different pixels. Higher values result in more fuzzy comparisons '
'being allowed.')
fuzzy_group.add_argument(
'--min-delta-threshold',
default=0,
type=int,
help='The minimum value to consider for the per-channel delta sum '
'threshold. Lower values result in less fuzzy comparisons being '
'allowed.')
fuzzy_group.add_argument(
'--max-delta-threshold',
default=30,
type=int,
help='The maximum value to consider for the per-channel delta sum '
'threshold. Higher values result in more fuzzy comparisons being '
'allowed.')
fuzzy_group.add_argument(
'--ignored-border-thickness',
default=0,
type=int,
help='How many pixels along the border of the image to ignore. 0 is '
'typical for most tests, 1 is useful for tests that have edges going '
'all the way to the border of the image and are using a Sobel filter.')
return common_group, sobel_group, fuzzy_group
def _VerifyArgs(self) -> None:
"""Verifies that the provided arguments are valid for an optimizer."""
assert self._args.target_success_percent > 0
assert self._args.target_success_percent <= 100
assert self._args.min_edge_threshold >= self.MIN_EDGE_THRESHOLD
assert self._args.max_edge_threshold <= self.MAX_EDGE_THRESHOLD
assert self._args.min_edge_threshold <= self._args.max_edge_threshold
assert self._args.min_max_diff >= self.MIN_MAX_DIFF
assert self._args.min_max_diff <= self._args.max_max_diff
assert self._args.min_delta_threshold >= self.MIN_DELTA_THRESHOLD
assert self._args.max_delta_threshold <= self.MAX_DELTA_THRESHOLD
assert self._args.min_delta_threshold <= self._args.max_delta_threshold
assert self._args.ignored_border_thickness >= 0
def RunOptimization(self) -> None:
"""Runs an optimization for whatever test and parameters were supplied.
The results should be printed to stdout when they are available.
"""
self._working_dir = tempfile.mkdtemp()
try:
self._DownloadData()
# Do a preliminary test to make sure that the most permissive
# parameters can succeed.
logging.info('Verifying initial parameters')
success, num_pixels, max_delta = self._RunComparisonForParameters(
self._GetMostPermissiveParameters())
if not success:
raise RuntimeError(
'Most permissive parameters did not result in a comparison '
'success. Try loosening parameters or lowering target success '
'percent. Max differing pixels: %d, max delta: %s' % (num_pixels,
max_delta))
self._RunOptimizationImpl()
finally:
if not self._args.no_cleanup:
shutil.rmtree(self._working_dir)
# Cleanup files left behind by "goldctl match"
for f in glob.iglob(os.path.join(tempfile.gettempdir(), 'goldctl-*')):
shutil.rmtree(f)
def _RunOptimizationImpl(self) -> None:
"""Runs the algorithm-specific optimization code for an optimizer."""
raise NotImplementedError()
def _GetMostPermissiveParameters(self) -> parameter_set.ParameterSet:
return parameter_set.ParameterSet(self._args.max_max_diff,
self._args.max_delta_threshold,
self._args.min_edge_threshold)
def _DownloadData(self) -> None:
"""Downloads all the necessary data for a test."""
assert self._working_dir
logging.info('Downloading images')
if self._args.group_images_by_resolution:
self._DownloadExpectations('%s/json/v2/expectations' % self._gold_url)
self._DownloadImagesForResolutionGrouping()
else:
# A grouping ID is an MD5 hash of a JSON object containing the corpus and
# name of a test with its keys sorted alphabetically.
grouping_dict = {
'name': self._test_name,
'source_type': self._args.corpus,
}
# Specify separators to avoid the automatic whitespace, which Go/Gold
# does.
json_str = json.dumps(grouping_dict,
sort_keys=True,
separators=(',', ':'))
md5 = hashlib.md5()
md5.update(json_str.encode('utf-8'))
self._DownloadExpectations('%s/json/v1/positivedigestsbygrouping/%s' %
(self._gold_url, md5.hexdigest()))
self._DownloadImagesForTraceGrouping()
for grouping, digests in self._images.items():
logging.info('Found %d images for group %s', len(digests), grouping)
logging.debug('Digests: %r', digests)
def _DownloadExpectations(self, url: str) -> None:
"""Downloads the expectation JSON from Gold into memory."""
logging.info('Downloading expectations JSON')
r = requests.get(url)
assert r.status_code == 200
self._expectations = r.json()
def _DownloadImagesForResolutionGrouping(self) -> None:
"""Downloads all the positive images for a test to disk.
Images are grouped by resolution.
"""
assert self._expectations
test_expectations = self._expectations.get('primary',
{}).get(self._test_name, {})
positive_digests = [
digest for digest, status in test_expectations.items()
if status == 'positive'
]
if not positive_digests:
raise RuntimeError('Failed to find any positive digests for test %s' %
self._test_name)
for digest in positive_digests:
content = self._DownloadImageWithDigest(digest)
image = Image.open(io.BytesIO(content))
self._images['%dx%d' % (image.size[0], image.size[1])].add(digest)
def _DownloadImagesForTraceGrouping(self) -> None:
"""Download all recent positive images for a test to disk.
Images are grouped by Skia Gold trace ID, i.e. each hardware/software
combination is a separate group.
"""
assert self._expectations
# The downloaded trace data contains a list of traces, each with a list of
# digests. The digests should be unique within each trace, but convert to
# sets just to be sure.
for trace in self._expectations['traces']:
trace_id = trace['trace_id']
digests = set(trace['digests'])
if not digests:
logging.warning(
'Failed to find any positive digests for test %s and trace %s. '
'This is likely due to the trace being old.', self._test_name,
trace_id)
self._images[trace_id] = digests
for d in digests:
self._DownloadImageWithDigest(d)
def _DownloadImageWithDigest(self, digest: str) -> bytes:
"""Downloads an image with the given digest and saves it to disk.
Args:
digest: The md5 digest of the image to download.
Returns:
A copy of the image content that was written to disk as bytes.
"""
logging.debug('Downloading image %s.png', digest)
r = requests.get('%s/img/images/%s.png' % (self._gold_url, digest))
assert r.status_code == 200
with open(self._GetImagePath(digest), 'wb') as outfile:
outfile.write(r.content)
return r.content
def _GetImagePath(self, digest: str) -> str:
"""Gets a filepath to an image based on digest.
Args:
digest: The md5 digest of the image, as provided by Gold.
Returns:
A string containing a filepath to where the image should be on disk.
"""
return os.path.join(self._working_dir, '%s.png' % digest)
def _GetGoldctlBinary(self) -> str:
"""Gets the filepath to the goldctl binary to use.
Returns:
A string containing a filepath to the goldctl binary to use.
"""
if not self._goldctl_binary:
for path in GOLDCTL_PATHS:
if os.path.isfile(path):
self._goldctl_binary = path
break
if not self._goldctl_binary:
raise RuntimeError(
'Could not find goldctl binary. Checked %s' % GOLDCTL_PATHS)
return self._goldctl_binary
def _RunComparisonForParameters(
self, parameters: parameter_set.ParameterSet) -> Tuple[bool, int, int]:
"""Runs a comparison for all image combinations using some parameters.
Args:
parameters: A parameter_set.ParameterSet instance containing parameters to
use.
Returns:
A 3-tuple (success, num_pixels, max_diff). |success| is a boolean
denoting whether enough comparisons succeeded to meet the desired success
percentage. |num_pixels| is an int denoting the maximum number of pixels
that did not match across all comparisons. |max_delta| is the maximum
per-channel delta sum across all comparisons.
"""
logging.debug('Running comparison for parameters: %s', parameters)
num_attempts = 0
num_successes = 0
max_num_pixels = -1
max_max_delta = -1
for resolution, digest_list in self._images.items():
logging.debug('Resolution/trace: %s, digests: %s', resolution,
digest_list)
cmds = [
self._GenerateComparisonCmd(l, r, parameters)
for (l, r) in itertools.combinations(digest_list, 2)
]
results = self._pool.map(RunCommandAndExtractData, cmds)
for (success, num_pixels, max_delta) in results:
num_attempts += 1
if success:
num_successes += 1
max_num_pixels = max(num_pixels, max_num_pixels)
max_max_delta = max(max_delta, max_max_delta)
# This could potentially happen if run on a test where there's only one
# positive image per resolution/trace.
if num_attempts == 0:
num_attempts = 1
num_successes = 1
success_percent = float(num_successes) * 100 / num_attempts
logging.debug('Success percent: %s', success_percent)
logging.debug('target success percent: %s',
self._args.target_success_percent)
successful = success_percent >= self._args.target_success_percent
logging.debug(
'Successful: %s, Max different pixels: %d, Max per-channel delta sum: '
'%d', successful, max_num_pixels, max_max_delta)
return successful, max_num_pixels, max_max_delta
def _GenerateComparisonCmd(
self, left_digest: str, right_digest: str,
parameters: parameter_set.ParameterSet) -> List[str]:
"""Generates a comparison command for the given arguments.
The returned command can be passed directly to a subprocess call.
Args:
left_digest: The first/left image digest to compare.
right_digest: The second/right image digest to compare.
parameters: A parameter_set.ParameterSet instance containing the
parameters to use for image comparison.
Returns:
A list of strings specifying a goldctl command to compare |left_digest|
to |right_digest| using the parameters in |parameters|.
"""
cmd = [
self._GetGoldctlBinary(),
'match',
self._GetImagePath(left_digest),
self._GetImagePath(right_digest),
'--algorithm',
'sobel',
] + parameters.AsList()
return cmd
def RunCommandAndExtractData(cmd: List[str]) -> Tuple[bool, int, int]:
"""Runs a comparison command and extracts data from it.
This is outside of the parameter optimizers because it is meant to be run via
multiprocessing.Pool.map(), which does not play nice with class methods since
they can't be easily pickled.
Args:
cmd: A list of strings containing the command to run.
Returns:
A 3-tuple (success, num_pixels, max_delta). |success| is a boolean denoting
whether the comparison succeeded or not. |num_pixels| is an int denoting
the number of pixels that did not match. |max_delta| is the maximum
per-channel delta sum in the comparison.
"""
output = subprocess.check_output(cmd, stderr=subprocess.STDOUT)
if not isinstance(output, str):
output = output.decode('utf-8')
success = False
num_pixels = 0
max_delta = 0
for line in output.splitlines():
if 'Images match.' in line:
success = True
if 'Number of different pixels' in line:
num_pixels = int(line.split(':')[1])
if 'Maximum per-channel delta sum' in line:
max_delta = int(line.split(':')[1])
logging.debug('Result for %r: success: %s, num_pixels: %d, max_delta: %d',
cmd, success, num_pixels, max_delta)
return success, num_pixels, max_delta