1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
base / hash / hash.cc [blame]
// Copyright 2014 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "base/hash/hash.h"
#include <cstddef>
#include <cstdint>
#include <limits>
#include <string>
#include <string_view>
#include "base/containers/span.h"
#include "base/dcheck_is_on.h"
#include "base/notreached.h"
#include "base/numerics/safe_conversions.h"
#include "base/third_party/cityhash/city.h"
// Definition in base/third_party/superfasthash/superfasthash.c. (Third-party
// code did not come with its own header file, so declaring the function here.)
// Note: This algorithm is also in Blink under Source/wtf/StringHasher.h.
extern "C" uint32_t SuperFastHash(const char* data, int len);
namespace base {
namespace {
size_t FastHashImpl(base::span<const uint8_t> data) {
auto chars = as_chars(data);
// We use the updated CityHash within our namespace (not the deprecated
// version from third_party/smhasher).
if constexpr (sizeof(size_t) > 4) {
return base::internal::cityhash_v111::CityHash64(chars.data(),
chars.size());
} else {
return base::internal::cityhash_v111::CityHash32(chars.data(),
chars.size());
}
}
// Implement hashing for pairs of at-most 32 bit integer values.
// When size_t is 32 bits, we turn the 64-bit hash code into 32 bits by using
// multiply-add hashing. This algorithm, as described in
// Theorem 4.3.3 of the thesis "Über die Komplexität der Multiplikation in
// eingeschränkten Branchingprogrammmodellen" by Woelfel, is:
//
// h32(x32, y32) = (h64(x32, y32) * rand_odd64 + rand16 * 2^16) % 2^64 / 2^32
//
// Contact danakj@chromium.org for any questions.
size_t HashInts32Impl(uint32_t value1, uint32_t value2) {
uint64_t value1_64 = value1;
uint64_t hash64 = (value1_64 << 32) | value2;
if (sizeof(size_t) >= sizeof(uint64_t))
return static_cast<size_t>(hash64);
uint64_t odd_random = 481046412LL << 32 | 1025306955LL;
uint32_t shift_random = 10121U << 16;
hash64 = hash64 * odd_random + shift_random;
size_t high_bits =
static_cast<size_t>(hash64 >> (8 * (sizeof(uint64_t) - sizeof(size_t))));
return high_bits;
}
// Implement hashing for pairs of up-to 64-bit integer values.
// We use the compound integer hash method to produce a 64-bit hash code, by
// breaking the two 64-bit inputs into 4 32-bit values:
// http://opendatastructures.org/versions/edition-0.1d/ods-java/node33.html#SECTION00832000000000000000
// Then we reduce our result to 32 bits if required, similar to above.
size_t HashInts64Impl(uint64_t value1, uint64_t value2) {
uint32_t short_random1 = 842304669U;
uint32_t short_random2 = 619063811U;
uint32_t short_random3 = 937041849U;
uint32_t short_random4 = 3309708029U;
uint32_t value1a = static_cast<uint32_t>(value1 & 0xffffffff);
uint32_t value1b = static_cast<uint32_t>((value1 >> 32) & 0xffffffff);
uint32_t value2a = static_cast<uint32_t>(value2 & 0xffffffff);
uint32_t value2b = static_cast<uint32_t>((value2 >> 32) & 0xffffffff);
uint64_t product1 = static_cast<uint64_t>(value1a) * short_random1;
uint64_t product2 = static_cast<uint64_t>(value1b) * short_random2;
uint64_t product3 = static_cast<uint64_t>(value2a) * short_random3;
uint64_t product4 = static_cast<uint64_t>(value2b) * short_random4;
uint64_t hash64 = product1 + product2 + product3 + product4;
if (sizeof(size_t) >= sizeof(uint64_t))
return static_cast<size_t>(hash64);
uint64_t odd_random = 1578233944LL << 32 | 194370989LL;
uint32_t shift_random = 20591U << 16;
hash64 = hash64 * odd_random + shift_random;
size_t high_bits =
static_cast<size_t>(hash64 >> (8 * (sizeof(uint64_t) - sizeof(size_t))));
return high_bits;
}
// The random seed is used to perturb the output of base::FastHash() and
// base::HashInts() so that it is only deterministic within the lifetime of a
// process. This prevents inadvertent dependencies on the underlying
// implementation, e.g. anything that persists the hash value and expects it to
// be unchanging will break.
//
// Note: this is the same trick absl uses to generate a random seed. This is
// more robust than using base::RandBytes(), which can fail inside a sandboxed
// environment. Note that without ASLR, the seed won't be quite as random...
#if DCHECK_IS_ON()
constexpr const void* kSeed = &kSeed;
#endif
template <typename T>
T Scramble(T input) {
#if DCHECK_IS_ON()
return HashInts64Impl(input, reinterpret_cast<uintptr_t>(kSeed));
#else
return input;
#endif
}
} // namespace
size_t FastHash(base::span<const uint8_t> data) {
return Scramble(FastHashImpl(data));
}
uint32_t Hash(base::span<const uint8_t> data) {
// Currently our in-memory hash is the same as the persistent hash. The
// split between in-memory and persistent hash functions is maintained to
// allow the in-memory hash function to be updated in the future.
return PersistentHash(data);
}
uint32_t Hash(const std::string& str) {
return PersistentHash(as_byte_span(str));
}
uint32_t PersistentHash(span<const uint8_t> data) {
// This hash function must not change, since it is designed to be persistable
// to disk.
if (data.size() > size_t{std::numeric_limits<int>::max()}) {
NOTREACHED();
}
auto chars = as_chars(data);
return ::SuperFastHash(chars.data(), checked_cast<int>(chars.size()));
}
uint32_t PersistentHash(std::string_view str) {
return PersistentHash(as_byte_span(str));
}
size_t HashInts32(uint32_t value1, uint32_t value2) {
return Scramble(HashInts32Impl(value1, value2));
}
size_t HashInts64(uint64_t value1, uint64_t value2) {
return Scramble(HashInts64Impl(value1, value2));
}
} // namespace base