1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
build / toolchain / win / midl.py [blame]
# Copyright 2017 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
from __future__ import division
import array
import difflib
import filecmp
import io
import operator
import os
import posixpath
import re
import shutil
import struct
import subprocess
import sys
import tempfile
import uuid
from functools import reduce
def ZapTimestamp(filename):
contents = open(filename, 'rb').read()
# midl.exe writes timestamp 2147483647 (2^31 - 1) as creation date into its
# outputs, but using the local timezone. To make the output timezone-
# independent, replace that date with a fixed string of the same length.
# Also blank out the minor version number.
if filename.endswith('.tlb'):
# See https://chromium-review.googlesource.com/c/chromium/src/+/693223 for
# a fairly complete description of the .tlb binary format.
# TLB files start with a 54 byte header. Offset 0x20 stores how many types
# are defined in the file, and the header is followed by that many uint32s.
# After that, 15 section headers appear. Each section header is 16 bytes,
# starting with offset and length uint32s.
# Section 12 in the file contains custom() data. custom() data has a type
# (int, string, etc). Each custom data chunk starts with a uint16_t
# describing its type. Type 8 is string data, consisting of a uint32_t
# len, followed by that many data bytes, followed by 'W' bytes to pad to a
# 4 byte boundary. Type 0x13 is uint32 data, followed by 4 data bytes,
# followed by two 'W' to pad to a 4 byte boundary.
# The custom block always starts with one string containing "Created by
# MIDL version 8...", followed by one uint32 containing 0x7fffffff,
# followed by another uint32 containing the MIDL compiler version (e.g.
# 0x0801026e for v8.1.622 -- 0x26e == 622). These 3 fields take 0x54 bytes.
# There might be more custom data after that, but these 3 blocks are always
# there for file-level metadata.
# All data is little-endian in the file.
assert contents[0:8] == b'MSFT\x02\x00\x01\x00'
ntypes, = struct.unpack_from('<I', contents, 0x20)
custom_off, custom_len = struct.unpack_from(
'<II', contents, 0x54 + 4*ntypes + 11*16)
assert custom_len >= 0x54
# First: Type string (0x8), followed by 0x3e characters.
assert contents[custom_off:custom_off + 6] == b'\x08\x00\x3e\x00\x00\x00'
assert re.match(
br'Created by MIDL version 8\.\d\d\.\d{4} '
br'at ... Jan 1. ..:..:.. 2038\n',
contents[custom_off + 6:custom_off + 6 + 0x3e])
# Second: Type uint32 (0x13) storing 0x7fffffff (followed by WW / 0x57 pad)
assert contents[custom_off+6+0x3e:custom_off+6+0x3e+8] == \
b'\x13\x00\xff\xff\xff\x7f\x57\x57'
# Third: Type uint32 (0x13) storing MIDL compiler version.
assert contents[custom_off + 6 + 0x3e + 8:custom_off + 6 + 0x3e + 8 +
2] == b'\x13\x00'
# Replace "Created by" string with fixed string, and fixed MIDL version with
# 8.1.622 always.
contents = (
contents[0:custom_off + 6] +
b'Created by MIDL version 8.xx.xxxx at a redacted point in time\n' +
# uint32 (0x13) val 0x7fffffff, WW, uint32 (0x13), val 0x0801026e, WW
b'\x13\x00\xff\xff\xff\x7f\x57\x57\x13\x00\x6e\x02\x01\x08\x57\x57' +
contents[custom_off + 0x54:])
else:
contents = re.sub(
br'File created by MIDL compiler version 8\.\d\d\.\d{4} \*/\r\n'
br'/\* at ... Jan 1. ..:..:.. 2038',
br'File created by MIDL compiler version 8.xx.xxxx */\r\n'
br'/* at a redacted point in time', contents)
contents = re.sub(
br' Oicf, W1, Zp8, env=(.....) \(32b run\), '
br'target_arch=(AMD64|X86) 8\.\d\d\.\d{4}',
br' Oicf, W1, Zp8, env=\1 (32b run), target_arch=\2 8.xx.xxxx',
contents)
# TODO(thakis): If we need more hacks than these, try to verify checked-in
# outputs when we're using the hermetic toolchain.
# midl.exe older than 8.1.622 omit '//' after #endif, fix that:
contents = contents.replace(b'#endif !_MIDL_USE_GUIDDEF_',
b'#endif // !_MIDL_USE_GUIDDEF_')
# midl.exe puts the midl version into code in one place. To have
# predictable output, lie about the midl version if it's not 8.1.622.
# This is unfortunate, but remember that there's beauty too in imperfection.
contents = contents.replace(b'0x801026c, /* MIDL Version 8.1.620 */',
b'0x801026e, /* MIDL Version 8.1.622 */')
open(filename, 'wb').write(contents)
def get_tlb_contents(tlb_file):
# See ZapTimestamp() for a short overview of the .tlb format.
contents = open(tlb_file, 'rb').read()
assert contents[0:8] == b'MSFT\x02\x00\x01\x00'
ntypes, = struct.unpack_from('<I', contents, 0x20)
type_off, type_len = struct.unpack_from('<II', contents, 0x54 + 4*ntypes)
guid_off, guid_len = struct.unpack_from(
'<II', contents, 0x54 + 4*ntypes + 5*16)
assert guid_len % 24 == 0
contents = array.array('B', contents)
return contents, ntypes, type_off, guid_off, guid_len
def recreate_guid_hashtable(contents, ntypes, guid_off, guid_len):
# This function is called after changing guids in section 6 (the "guid"
# section). This function recreates the GUID hashtable in section 5. Since the
# hash table uses chaining, it's easiest to recompute it from scratch rather
# than trying to patch it up.
hashtab = [0xffffffff] * (0x80 // 4)
for guidind in range(guid_off, guid_off + guid_len, 24):
guidbytes, typeoff, nextguid = struct.unpack_from(
'<16sII', contents, guidind)
words = struct.unpack('<8H', guidbytes)
# midl seems to use the following simple hash function for GUIDs:
guidhash = reduce(operator.xor, [w for w in words]) % (0x80 // 4)
nextguid = hashtab[guidhash]
struct.pack_into('<I', contents, guidind + 0x14, nextguid)
hashtab[guidhash] = guidind - guid_off
hash_off, hash_len = struct.unpack_from(
'<II', contents, 0x54 + 4*ntypes + 4*16)
for i, hashval in enumerate(hashtab):
struct.pack_into('<I', contents, hash_off + 4*i, hashval)
def overwrite_guids_h(h_file, dynamic_guids):
contents = open(h_file, 'rb').read()
for key in dynamic_guids:
contents = re.sub(key, dynamic_guids[key], contents, flags=re.I)
open(h_file, 'wb').write(contents)
def get_uuid_format(guid, prefix):
formatted_uuid = b'0x%s,0x%s,0x%s,' % (guid[0:8], guid[9:13], guid[14:18])
formatted_uuid += b'%s0x%s,0x%s' % (prefix, guid[19:21], guid[21:23])
for i in range(24, len(guid), 2):
formatted_uuid += b',0x' + guid[i:i + 2]
return formatted_uuid
def get_uuid_format_iid_file(guid):
# Convert from "D0E1CACC-C63C-4192-94AB-BF8EAD0E3B83" to
# 0xD0E1CACC,0xC63C,0x4192,0x94,0xAB,0xBF,0x8E,0xAD,0x0E,0x3B,0x83.
return get_uuid_format(guid, b'')
def overwrite_guids_iid(iid_file, dynamic_guids):
contents = open(iid_file, 'rb').read()
for key in dynamic_guids:
contents = re.sub(get_uuid_format_iid_file(key),
get_uuid_format_iid_file(dynamic_guids[key]),
contents,
flags=re.I)
open(iid_file, 'wb').write(contents)
def get_uuid_format_proxy_file(guid):
# Convert from "D0E1CACC-C63C-4192-94AB-BF8EAD0E3B83" to
# {0xD0E1CACC,0xC63C,0x4192,{0x94,0xAB,0xBF,0x8E,0xAD,0x0E,0x3B,0x83}}.
return get_uuid_format(guid, b'{')
def overwrite_guids_proxy(proxy_file, dynamic_guids):
contents = open(proxy_file, 'rb').read()
for key in dynamic_guids:
contents = re.sub(get_uuid_format_proxy_file(key),
get_uuid_format_proxy_file(dynamic_guids[key]),
contents,
flags=re.I)
open(proxy_file, 'wb').write(contents)
def getguid(contents, offset):
# Returns a guid string of the form "D0E1CACC-C63C-4192-94AB-BF8EAD0E3B83".
g0, g1, g2, g3 = struct.unpack_from('<IHH8s', contents, offset)
g3 = b''.join([b'%02X' % g for g in bytearray(g3)])
return b'%08X-%04X-%04X-%s-%s' % (g0, g1, g2, g3[0:4], g3[4:])
def setguid(contents, offset, guid):
guid = uuid.UUID(guid.decode('utf-8'))
struct.pack_into('<IHH8s', contents, offset,
*(guid.fields[0:3] + (guid.bytes[8:], )))
def overwrite_guids_tlb(tlb_file, dynamic_guids):
contents, ntypes, type_off, guid_off, guid_len = get_tlb_contents(tlb_file)
for i in range(0, guid_len, 24):
current_guid = getguid(contents, guid_off + i)
for key in dynamic_guids:
if key.lower() == current_guid.lower():
setguid(contents, guid_off + i, dynamic_guids[key])
recreate_guid_hashtable(contents, ntypes, guid_off, guid_len)
open(tlb_file, 'wb').write(contents)
# Handle multiple guid substitutions, where |dynamic_guids| is of the form
# "PLACEHOLDER-GUID-158428a4-6014-4978-83ba-9fad0dabe791="
# "3d852661-c795-4d20-9b95-5561e9a1d2d9,"
# "PLACEHOLDER-GUID-63B8FFB1-5314-48C9-9C57-93EC8BC6184B="
# "D0E1CACC-C63C-4192-94AB-BF8EAD0E3B83".
#
# Before specifying |dynamic_guids| in the build, the IDL file is first compiled
# with "158428a4-6014-4978-83ba-9fad0dabe791" and
# "63B8FFB1-5314-48C9-9C57-93EC8BC6184B". These are the "replaceable" guids,
# i.e., guids that can be replaced in future builds. The resulting MIDL outputs
# are copied over to src\third_party\win_build_output\.
#
# Then, in the future, any changes to these guids can be accomplished by
# providing |dynamic_guids| of the format above in the build file. These
# "dynamic" guid changes by themselves will not require the MIDL compiler and
# therefore will not require copying output over to
# src\third_party\win_build_output\.
#
# The pre-generated src\third_party\win_build_output\ files are used for
# cross-compiling on other platforms, since the MIDL compiler is Windows-only.
def overwrite_guids(h_file, iid_file, proxy_file, tlb_file, dynamic_guids):
# Fix up GUIDs in .h, _i.c, _p.c, and .tlb.
overwrite_guids_h(h_file, dynamic_guids)
overwrite_guids_iid(iid_file, dynamic_guids)
overwrite_guids_proxy(proxy_file, dynamic_guids)
if tlb_file:
overwrite_guids_tlb(tlb_file, dynamic_guids)
# This function removes all occurrences of 'PLACEHOLDER-GUID-' from the
# template, and if |dynamic_guids| is specified, also replaces the guids within
# the file. Finally, it writes the resultant output to the |idl| file.
def generate_idl_from_template(idl_template, dynamic_guids, idl):
contents = open(idl_template, 'rb').read()
contents = re.sub(b'PLACEHOLDER-GUID-', b'', contents, flags=re.I)
if dynamic_guids:
for key in dynamic_guids:
contents = re.sub(key, dynamic_guids[key], contents, flags=re.I)
open(idl, 'wb').write(contents)
# This function runs the MIDL compiler with the provided arguments. It creates
# and returns a tuple of |0,midl_output_dir| on success.
def run_midl(args, env_dict):
midl_output_dir = tempfile.mkdtemp()
delete_midl_output_dir = True
try:
popen = subprocess.Popen(args + ['/out', midl_output_dir],
shell=True,
universal_newlines=True,
env=env_dict,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT)
out, _ = popen.communicate()
# Filter junk out of stdout, and write filtered versions. Output we want
# to filter is pairs of lines that look like this:
# Processing C:\Program Files (x86)\Microsoft SDKs\...\include\objidl.idl
# objidl.idl
lines = out.splitlines()
prefixes = ('Processing ', '64 bit Processing ')
processing = set(
os.path.basename(x) for x in lines if x.startswith(prefixes))
for line in lines:
if not line.startswith(prefixes) and line not in processing:
print(line)
if popen.returncode != 0:
return popen.returncode, midl_output_dir
for f in os.listdir(midl_output_dir):
ZapTimestamp(os.path.join(midl_output_dir, f))
delete_midl_output_dir = False
finally:
if os.path.exists(midl_output_dir) and delete_midl_output_dir:
shutil.rmtree(midl_output_dir)
return 0, midl_output_dir
# This function adds support for dynamic generation of guids: when values are
# specified as 'uuid5:name', this function will substitute the values with
# generated dynamic guids using the uuid5 function. The uuid5 function generates
# a guid based on the SHA-1 hash of a namespace identifier (which is the guid
# that comes after 'PLACEHOLDER-GUID-') and a name (which is a string, such as a
# version string "87.1.2.3").
#
# For instance, when |dynamic_guid| is of the form:
# "PLACEHOLDER-GUID-158428a4-6014-4978-83ba-9fad0dabe791=uuid5:88.0.4307.0
# ,"
# "PLACEHOLDER-GUID-63B8FFB1-5314-48C9-9C57-93EC8BC6184B=uuid5:88.0.4307.0
# "
#
# "PLACEHOLDER-GUID-158428a4-6014-4978-83ba-9fad0dabe791" would be substituted
# with uuid5("158428a4-6014-4978-83ba-9fad0dabe791", "88.0.4307.0"), which is
# "64700170-AD80-5DE3-924E-2F39D862CFD5". And
# "PLACEHOLDER-GUID-63B8FFB1-5314-48C9-9C57-93EC8BC6184B" would be
# substituted with uuid5("63B8FFB1-5314-48C9-9C57-93EC8BC6184B", "88.0.4307.0"),
# which is "7B6E7538-3C38-5565-BC92-42BCEE268D76".
def uuid5_substitutions(dynamic_guids):
for key, value in dynamic_guids.items():
if value.startswith('uuid5:'):
name = value.split('uuid5:', 1)[1]
assert name
dynamic_guids[key] = str(uuid.uuid5(uuid.UUID(key), name)).upper()
def main(arch, gendir, outdir, dynamic_guids, tlb, h, dlldata, iid, proxy,
clang, idl, *flags):
# Copy checked-in outputs to final location.
source = gendir
if os.path.isdir(os.path.join(source, os.path.basename(idl))):
source = os.path.join(source, os.path.basename(idl))
source = os.path.join(source, arch.split('.')[1]) # Append 'x86' or 'x64'.
source = os.path.normpath(source)
source_exists = True
if not os.path.isdir(source):
source_exists = False
if sys.platform != 'win32':
print('Directory %s needs to be populated from Windows first' % source)
return 1
# This is a brand new IDL file that does not have outputs under
# third_party\win_build_output\midl. We create an empty directory for now.
os.makedirs(source)
common_files = [h, iid]
if tlb != 'none':
# Not all projects use tlb files.
common_files += [tlb]
else:
tlb = None
if dlldata != 'none':
# Not all projects use dlldta files.
common_files += [dlldata]
else:
dlldata = None
# Not all projects use proxy files
if proxy != 'none':
# Not all projects use proxy files.
common_files += [proxy]
else:
proxy = None
for source_file in common_files:
file_path = os.path.join(source, source_file)
if not os.path.isfile(file_path):
source_exists = False
if sys.platform != 'win32':
print('File %s needs to be generated from Windows first' % file_path)
return 1
# Either this is a brand new IDL file that does not have outputs under
# third_party\win_build_output\midl or the file is (unexpectedly) missing.
# We create an empty file for now. The rest of the machinery below will
# then generate the correctly populated file using the MIDL compiler and
# instruct the developer to copy that file under
# third_party\win_build_output\midl.
open(file_path, 'wb').close()
shutil.copy(file_path, outdir)
if dynamic_guids != 'none':
assert '=' in dynamic_guids
if dynamic_guids.startswith("ignore_proxy_stub,"):
# TODO(ganesh): The custom proxy/stub file ("_p.c") is not generated
# correctly for dynamic IIDs (but correctly if there are only dynamic
# CLSIDs). The proxy/stub lookup functions generated by MIDL.exe within
# "_p.c" rely on a sorted set of vtable lists, which we are not currently
# regenerating. At the moment, no project in Chromium that uses dynamic
# IIDs is relying on the custom proxy/stub file. So for now, if
# |dynamic_guids| is prefixed with "ignore_proxy_stub,", we exclude the
# custom proxy/stub file from the directory comparisons.
common_files.remove(proxy)
dynamic_guids = dynamic_guids.split("ignore_proxy_stub,", 1)[1]
dynamic_guids = re.sub('PLACEHOLDER-GUID-', '', dynamic_guids, flags=re.I)
dynamic_guids = dynamic_guids.split(',')
dynamic_guids = dict(s.split('=') for s in dynamic_guids)
uuid5_substitutions(dynamic_guids)
dynamic_guids_bytes = {
k.encode('utf-8'): v.encode('utf-8')
for k, v in dynamic_guids.items()
}
if source_exists:
overwrite_guids(*(os.path.join(outdir, file) if file else None
for file in [h, iid, proxy, tlb]),
dynamic_guids=dynamic_guids_bytes)
else:
dynamic_guids = None
# On non-Windows, that's all we can do.
if sys.platform != 'win32':
return 0
idl_template = None
if dynamic_guids:
idl_template = idl
# posixpath is used here to keep the MIDL-generated files with a uniform
# separator of '/' instead of mixed '/' and '\\'.
idl = posixpath.join(
outdir,
os.path.splitext(os.path.basename(idl_template))[0] + '.idl')
# |idl_template| can contain one or more occurrences of guids that are
# substituted with |dynamic_guids|, and then MIDL is run on the substituted
# IDL file.
generate_idl_from_template(idl_template, dynamic_guids_bytes, idl)
# On Windows, run midl.exe on the input and check that its outputs are
# identical to the checked-in outputs (after replacing guids if
# |dynamic_guids| is specified).
# Read the environment block from the file. This is stored in the format used
# by CreateProcess. Drop last 2 NULs, one for list terminator, one for
# trailing vs. separator.
env_pairs = open(arch).read()[:-2].split('\0')
env_dict = dict([item.split('=', 1) for item in env_pairs])
# Extract the /D options and send them to the preprocessor.
preprocessor_options = '-E -nologo -Wno-nonportable-include-path'
preprocessor_options += ''.join(
[' ' + flag for flag in flags if flag.startswith('/D')])
args = ['midl', '/nologo'] + list(flags) + (['/tlb', tlb] if tlb else []) + [
'/h', h
] + (['/dlldata', dlldata] if dlldata else []) + ['/iid', iid] + (
['/proxy', proxy] if proxy else
[]) + ['/cpp_cmd', clang, '/cpp_opt', preprocessor_options, idl]
returncode, midl_output_dir = run_midl(args, env_dict)
if returncode != 0:
return returncode
# Now compare the output in midl_output_dir to the copied-over outputs.
_, mismatch, errors = filecmp.cmpfiles(midl_output_dir, outdir, common_files)
assert not errors
if mismatch:
print('midl.exe output different from files in %s, see %s' %
(outdir, midl_output_dir))
for f in mismatch:
if f.endswith('.tlb'): continue
fromfile = os.path.join(outdir, f)
tofile = os.path.join(midl_output_dir, f)
print(''.join(
difflib.unified_diff(
io.open(fromfile).readlines(),
io.open(tofile).readlines(), fromfile, tofile)))
if dynamic_guids:
# |idl_template| can contain one or more occurrences of guids prefixed
# with 'PLACEHOLDER-GUID-'. We first remove the extraneous
# 'PLACEHOLDER-GUID-' prefix and then run MIDL on the substituted IDL
# file.
# No guid substitutions are done at this point, because we want to compile
# with the placeholder guids and then instruct the user to copy the output
# over to |source| which is typically src\third_party\win_build_output\.
# In future runs, the placeholder guids in |source| are replaced with the
# guids specified in |dynamic_guids|.
generate_idl_from_template(idl_template, None, idl)
returncode, midl_output_dir = run_midl(args, env_dict)
if returncode != 0:
return returncode
print('To rebaseline:')
print(r' copy /y %s\* %s' % (midl_output_dir, source))
return 1
return 0
if __name__ == '__main__':
sys.exit(main(*sys.argv[1:]))