1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
base / allocator / dispatcher / tls.h [blame]
// Copyright 2022 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef BASE_ALLOCATOR_DISPATCHER_TLS_H_
#define BASE_ALLOCATOR_DISPATCHER_TLS_H_
#include <string_view>
#include "build/build_config.h"
#if BUILDFLAG(IS_POSIX) // the current allocation mechanism (mmap) and TLS
// support (pthread) are both defined by POSIX
#define USE_LOCAL_TLS_EMULATION() true
#else
#define USE_LOCAL_TLS_EMULATION() false
#endif
#if USE_LOCAL_TLS_EMULATION()
#include <pthread.h>
#include <algorithm>
#include <atomic>
#include <functional>
#include <memory>
#include <mutex>
#include "base/base_export.h"
#include "base/check.h"
#include "base/compiler_specific.h"
#if PA_BUILDFLAG(USE_PARTITION_ALLOC)
#include "partition_alloc/partition_alloc_constants.h" // nogncheck
#endif
#if HAS_FEATURE(thread_sanitizer)
#define DISABLE_TSAN_INSTRUMENTATION __attribute__((no_sanitize("thread")))
#else
#define DISABLE_TSAN_INSTRUMENTATION
#endif
#define STR_HELPER(x) #x
#define STR(x) STR_HELPER(x)
// Verify that a condition holds and cancel the process in case it doesn't. The
// functionality is similar to RAW_CHECK but includes more information in the
// logged messages. It is non allocating to prevent recursions.
#define TLS_RAW_CHECK(error_message, condition) \
TLS_RAW_CHECK_IMPL(error_message, condition, __FILE__, __LINE__)
#define TLS_RAW_CHECK_IMPL(error_message, condition, file, line) \
do { \
if (!(condition)) { \
constexpr const char* message = \
"TLS System: " error_message " Failed condition '" #condition \
"' in (" file "@" STR(line) ").\n"; \
::logging::RawCheckFailure(message); \
} \
} while (0)
namespace base::debug {
struct CrashKeyString;
}
namespace base::allocator::dispatcher {
namespace internal {
// Allocate memory using POSIX' mmap and unmap functionality. The allocator
// implements the allocator interface required by ThreadLocalStorage.
struct BASE_EXPORT MMapAllocator {
// The minimum size of a memory chunk when allocating. Even for chunks with
// fewer bytes, at least AllocationChunkSize bytes are allocated. For mmap, this
// is usually the page size of the system.
// For various OS-CPU combinations, partition_alloc::PartitionPageSize() is not
// constexpr. Hence, we can not use this value but define it locally.
#if defined(PAGE_ALLOCATOR_CONSTANTS_ARE_CONSTEXPR) && \
PAGE_ALLOCATOR_CONSTANTS_ARE_CONSTEXPR
constexpr static size_t AllocationChunkSize =
partition_alloc::PartitionPageSize();
#elif BUILDFLAG(IS_APPLE)
constexpr static size_t AllocationChunkSize = 16384;
#elif BUILDFLAG(IS_ANDROID) && defined(ARCH_CPU_64_BITS)
constexpr static size_t AllocationChunkSize = 16384;
#elif BUILDFLAG(IS_LINUX) && defined(ARCH_CPU_ARM64)
constexpr static size_t AllocationChunkSize = 16384;
#else
constexpr static size_t AllocationChunkSize = 4096;
#endif
// Allocate size_in_bytes bytes of raw memory. Return nullptr if allocation
// fails.
void* AllocateMemory(size_t size_in_bytes);
// Free the raw memory pointed to by pointer_to_allocated. Returns a boolean
// value indicating if the free was successful.
bool FreeMemoryForTesting(void* pointer_to_allocated, size_t size_in_bytes);
};
// The allocator used by default for the thread local storage.
using DefaultAllocator = MMapAllocator;
using OnThreadTerminationFunction = void (*)(void*);
// The TLS system used by default for the thread local storage. It stores and
// retrieves thread specific data pointers.
class BASE_EXPORT PThreadTLSSystem {
public:
PThreadTLSSystem();
PThreadTLSSystem(const PThreadTLSSystem&) = delete;
PThreadTLSSystem(PThreadTLSSystem&&);
PThreadTLSSystem& operator=(const PThreadTLSSystem&) = delete;
PThreadTLSSystem& operator=(PThreadTLSSystem&&);
// Initialize the TLS system to store a data set for different threads.
// @param thread_termination_function An optional function which will be
// invoked upon termination of a thread.
bool Setup(OnThreadTerminationFunction thread_termination_function,
std::string_view instance_id);
// Tear down the TLS system. After completing tear down, the thread
// termination function passed to Setup will not be invoked anymore.
bool TearDownForTesting();
// Get the pointer to the data associated to the current thread. Returns
// nullptr if the TLS system is not initialized or no data was set before.
void* GetThreadSpecificData();
// Set the pointer to the data associated to the current thread. Return true
// if stored successfully, false otherwise.
bool SetThreadSpecificData(void* data);
private:
base::debug::CrashKeyString* crash_key_ = nullptr;
pthread_key_t data_access_key_ = 0;
#if DCHECK_IS_ON()
// From POSIX standard at https://www.open-std.org/jtc1/sc22/open/n4217.pdf:
// The effect of calling pthread_getspecific() or pthread_setspecific() with a
// key value not obtained from pthread_key_create() or after key has been
// deleted with pthread_key_delete() is undefined.
//
// Unfortunately, POSIX doesn't define a special value of pthread_key_t
// indicating an invalid key which would allow us to detect accesses outside
// of initialized state. Hence, to prevent us from drifting into the evil
// realm of undefined behaviour we store whether we're somewhere between Setup
// and Teardown.
std::atomic_bool initialized_{false};
#endif
};
using DefaultTLSSystem = PThreadTLSSystem;
// In some scenarios, most notably when testing, the allocator and TLS system
// passed to |ThreadLocalStorage| are not copyable and have to be wrapped, i.e.
// using std::reference_wrapper. |dereference| is a small helper to retrieve the
// underlying value.
template <typename T>
T& dereference(T& ref) {
return ref;
}
template <typename T>
T& dereference(std::reference_wrapper<T>& ref) {
// std::reference_wrapper requires a valid reference for construction,
// therefore, no need in checking here.
return ref.get();
}
// Store thread local data. The data is organized in chunks, where each chunk
// holds |ItemsPerChunk|. Each item may be free or used.
//
// When a thread requests data, the chunks are searched for a free data item,
// which is registered for this thread and marked as |used|. Further requests by
// this thread will then always return the same item. When a thread terminates,
// the item will be reset and return to the pool of free items.
//
// Upon construction, the first chunk is created. If a thread requests data and
// there is no free item available, another chunk is created. Upon destruction,
// all memory is freed. Pointers to data items become invalid!
//
// Constructor and destructor are not thread safe.
//
// @tparam PayloadType The item type to be stored.
// @tparam AllocatorType The allocator being used. An allocator must provide
// the following interface:
// void* AllocateMemory(size_t size_in_bytes); // Allocate size_in_bytes bytes
// of raw memory.
// void FreeMemory(void* pointer_to_allocated, size_t size_in_bytes); // Free
// the raw memory pointed to by pointer_to_allocated.
// Any failure in allocation or free must terminate the process.
// @tparam TLSSystemType The TLS system being used. A TLS system must provide
// the following interface:
// bool Setup(OnThreadTerminationFunction thread_termination_function);
// bool Destroy();
// void* GetThreadSpecificData();
// bool SetThreadSpecificData(void* data);
// @tparam AllocationChunkSize The minimum size of a memory chunk that the
// allocator can handle. We try to size the chunks so that each chunk uses this
// size to the maximum.
// @tparam IsDestructibleForTesting For testing purposes we allow the destructor
// to perform clean up upon destruction. Otherwise, using the destructor will
// result in a compilation failure.
template <typename PayloadType,
typename AllocatorType,
typename TLSSystemType,
size_t AllocationChunkSize,
bool IsDestructibleForTesting>
struct ThreadLocalStorage {
explicit ThreadLocalStorage(std::string_view instance_id)
: root_(AllocateAndInitializeChunk()) {
Initialize(instance_id);
}
// Create a new instance of |ThreadLocalStorage| using the passed allocator
// and TLS system. This initializes the underlying TLS system and creates the
// first chunk of data.
ThreadLocalStorage(std::string_view instance_id,
AllocatorType allocator,
TLSSystemType tls_system)
: allocator_(std::move(allocator)),
tls_system_(std::move(tls_system)),
root_(AllocateAndInitializeChunk()) {
Initialize(instance_id);
}
// Deletes an instance of |ThreadLocalStorage| and delete all the data chunks
// created.
~ThreadLocalStorage() {
if constexpr (IsDestructibleForTesting) {
TearDownForTesting();
} else if constexpr (!IsDestructibleForTesting) {
static_assert(
IsDestructibleForTesting,
"ThreadLocalStorage cannot be destructed outside of test code.");
}
}
// Explicitly prevent all forms of Copy/Move construction/assignment. For an
// exact copy of ThreadLocalStorage we would need to copy the mapping of
// thread to item, which we can't do at the moment. On the other side, our
// atomic members do not support moving out of the box.
ThreadLocalStorage(const ThreadLocalStorage&) = delete;
ThreadLocalStorage(ThreadLocalStorage&& other) = delete;
ThreadLocalStorage& operator=(const ThreadLocalStorage&) = delete;
ThreadLocalStorage& operator=(ThreadLocalStorage&&) = delete;
// Get the data item for the current thread. If no data is registered so far,
// find a free item in the chunks and register it for the current thread.
PayloadType* GetThreadLocalData() {
auto& tls_system = dereference(tls_system_);
auto* slot = static_cast<SingleSlot*>(tls_system.GetThreadSpecificData());
if (slot == nullptr) [[unlikely]] {
slot = FindAndAllocateFreeSlot(root_.load(std::memory_order_relaxed));
// We might be called in the course of handling a memory allocation. We do
// not use CHECK since they might allocate and cause a recursion.
TLS_RAW_CHECK("Failed to set thread specific data.",
tls_system.SetThreadSpecificData(slot));
// Reset the content to wipe out any previous data.
Reset(slot->item);
}
return &(slot->item);
}
private:
// Encapsulate the payload item and some administrative data.
struct SingleSlot {
PayloadType item;
#if !defined(__cpp_lib_atomic_value_initialization) || \
__cpp_lib_atomic_value_initialization < 201911L
std::atomic_flag is_used = ATOMIC_FLAG_INIT;
#else
std::atomic_flag is_used;
#endif
};
template <size_t NumberOfItems>
struct ChunkT {
SingleSlot slots[NumberOfItems];
// Pointer to the next chunk.
std::atomic<ChunkT*> next_chunk = nullptr;
// Helper flag to ensure we create the next chunk only once in a multi
// threaded environment.
std::once_flag create_next_chunk_flag;
};
template <size_t LowerNumberOfItems,
size_t UpperNumberOfItems,
size_t NumberOfBytes>
static constexpr size_t CalculateEffectiveNumberOfItemsBinSearch() {
if constexpr (LowerNumberOfItems == UpperNumberOfItems) {
return LowerNumberOfItems;
}
constexpr size_t CurrentNumberOfItems =
(UpperNumberOfItems - LowerNumberOfItems) / 2 + LowerNumberOfItems;
if constexpr (sizeof(ChunkT<CurrentNumberOfItems>) > NumberOfBytes) {
return CalculateEffectiveNumberOfItemsBinSearch<
LowerNumberOfItems, CurrentNumberOfItems, NumberOfBytes>();
}
if constexpr (sizeof(ChunkT<CurrentNumberOfItems + 1>) < NumberOfBytes) {
return CalculateEffectiveNumberOfItemsBinSearch<
CurrentNumberOfItems + 1, UpperNumberOfItems, NumberOfBytes>();
}
return CurrentNumberOfItems;
}
// Calculate the maximum number of items we can store in one chunk without the
// size of the chunk exceeding NumberOfBytes. To avoid things like alignment
// and packing tampering with the calculation, instead of calculating the
// correct number of items we use sizeof-operator against ChunkT to search for
// the correct size. Unfortunately, the number of recursions is limited by the
// compiler. Therefore, we use a binary search instead of a simple linear
// search.
template <size_t MinimumNumberOfItems, size_t NumberOfBytes>
static constexpr size_t CalculateEffectiveNumberOfItems() {
if constexpr (sizeof(ChunkT<MinimumNumberOfItems>) < NumberOfBytes) {
constexpr size_t LowerNumberOfItems = MinimumNumberOfItems;
constexpr size_t UpperNumberOfItems =
NumberOfBytes / sizeof(PayloadType) + 1;
return CalculateEffectiveNumberOfItemsBinSearch<
LowerNumberOfItems, UpperNumberOfItems, NumberOfBytes>();
}
return MinimumNumberOfItems;
}
public:
// The minimum number of items per chunk. It should be high enough to
// accommodate most items in the root chunk whilst not wasting to much space
// on unnecessary items.
static constexpr size_t MinimumNumberOfItemsPerChunk = 75;
// The effective number of items per chunk. We use the AllocationChunkSize as
// a hint to calculate to effective number of items so we occupy one of these
// memory chunks to the maximum extent possible.
static constexpr size_t ItemsPerChunk =
CalculateEffectiveNumberOfItems<MinimumNumberOfItemsPerChunk,
AllocationChunkSize>();
private:
using Chunk = ChunkT<ItemsPerChunk>;
static_assert(ItemsPerChunk >= MinimumNumberOfItemsPerChunk);
// Mark an item's slot ready for reuse. This function is used as thread
// termination function in the TLS system. We do not destroy anything at this
// point but simply mark the slot as unused.
static void MarkSlotAsFree(void* data) {
// We always store SingleSlots in the TLS system. Therefore, we cast to
// SingleSlot and reset the is_used flag.
auto* const slot = static_cast<SingleSlot*>(data);
// We might be called in the course of handling a memory allocation.
// Therefore, do not use CHECK since it might allocate and cause a
// recursion.
TLS_RAW_CHECK("Received an invalid slot.",
slot && slot->is_used.test_and_set());
slot->is_used.clear(std::memory_order_relaxed);
}
// Perform common initialization during construction of an instance.
void Initialize(std::string_view instance_id) {
// The constructor must be called outside of the allocation path. Therefore,
// it is secure to verify with CHECK.
// Passing MarkSlotAsFree as thread_termination_function we ensure the
// slot/item assigned to the finished thread will be returned to the pool of
// unused items.
CHECK(dereference(tls_system_).Setup(&MarkSlotAsFree, instance_id));
}
Chunk* AllocateAndInitializeChunk() {
void* const uninitialized_memory =
dereference(allocator_).AllocateMemory(sizeof(Chunk));
// We might be called in the course of handling a memory allocation. We do
// not use CHECK since they might allocate and cause a recursion.
TLS_RAW_CHECK("Failed to allocate memory for new chunk.",
uninitialized_memory != nullptr);
return new (uninitialized_memory) Chunk{};
}
void FreeAndDeallocateChunkForTesting(Chunk* chunk_to_erase) {
chunk_to_erase->~Chunk();
// FreeAndDeallocateChunkForTesting must be called outside of the allocation
// path. Therefore, it is secure to verify with CHECK.
CHECK(dereference(allocator_)
.FreeMemoryForTesting(chunk_to_erase, sizeof(Chunk)));
}
// Find a free slot in the passed chunk, reserve it and return it to the
// caller. If no free slot can be found, head on to the next chunk. If the
// next chunk doesn't exist, create it.
SingleSlot* FindAndAllocateFreeSlot(Chunk* const chunk) {
SingleSlot* const slot = std::find_if_not(
std::begin(chunk->slots), std::end(chunk->slots),
[](SingleSlot& candidate_slot) {
return candidate_slot.is_used.test_and_set(std::memory_order_relaxed);
});
// So we found a slot. Happily return it to the caller.
if (slot != std::end(chunk->slots)) {
return slot;
}
// Ok, there are no more free slots in this chunk. First, ensure the next
// chunk is valid and create one if necessary.
std::call_once(chunk->create_next_chunk_flag, [&] {
// From https://eel.is/c++draft/thread.once.callonce#3
//
// Synchronization: For any given once_flag: all active executions occur
// in a total order; completion of an active execution synchronizes with
// the start of the next one in this total order; and the returning
// execution synchronizes with the return from all passive executions.
//
// Therefore, we do only a relaxed store here, call_once synchronizes with
// other threads.
chunk->next_chunk.store(AllocateAndInitializeChunk(),
std::memory_order_relaxed);
});
return FindAndAllocateFreeSlot(chunk->next_chunk);
}
template <bool IsDestructibleForTestingP = IsDestructibleForTesting>
typename std::enable_if<IsDestructibleForTestingP>::type
TearDownForTesting() {
// The destructor must be called outside of the allocation path. Therefore,
// it is secure to verify with CHECK.
// All accessing threads must be terminated by now. For additional security
// we tear down the TLS system first. This way we ensure that
// MarkSlotAsFree is not called anymore and we have no accesses from the
// TLS system's side.
CHECK(dereference(tls_system_).TearDownForTesting());
// Delete all data chunks.
for (auto* chunk = root_.load(); chunk != nullptr;) {
auto* next_chunk = chunk->next_chunk.load();
FreeAndDeallocateChunkForTesting(chunk);
chunk = next_chunk;
}
}
// Reset a single item to its default value.
// Since items are re-used, they may be accessed from different threads,
// causing TSan to trigger. Therefore, the reset is exempt from TSan
// instrumentation.
DISABLE_TSAN_INSTRUMENTATION void Reset(PayloadType& item) { item = {}; }
AllocatorType allocator_;
TLSSystemType tls_system_;
std::atomic<Chunk*> const root_;
};
} // namespace internal
// The ThreadLocalStorage visible to the user. This uses the internal default
// allocator and TLS system.
template <typename StorageType,
typename AllocatorType = internal::DefaultAllocator,
typename TLSSystemType = internal::DefaultTLSSystem,
size_t AllocationChunkSize = AllocatorType::AllocationChunkSize,
bool IsDestructibleForTesting = false>
using ThreadLocalStorage =
internal::ThreadLocalStorage<StorageType,
AllocatorType,
TLSSystemType,
AllocationChunkSize,
IsDestructibleForTesting>;
} // namespace base::allocator::dispatcher
#undef TLS_RAW_CHECK_IMPL
#undef TLS_RAW_CHECK
#undef STR
#undef STR_HELPER
#endif // USE_LOCAL_TLS_EMULATION()
#endif // BASE_ALLOCATOR_DISPATCHER_TLS_H_