1
    2
    3
    4
    5
    6
    7
    8
    9
   10
   11
   12
   13
   14
   15
   16
   17
   18
   19
   20
   21
   22
   23
   24
   25
   26
   27
   28
   29
   30
   31
   32
   33
   34
   35
   36
   37
   38
   39
   40
   41
   42
   43
   44
   45
   46
   47
   48
   49
   50
   51
   52
   53
   54
   55
   56
   57
   58
   59
   60
   61
   62
   63
   64
   65
   66
   67
   68
   69
   70
   71
   72
   73
   74
   75
   76
   77
   78
   79
   80
   81
   82
   83
   84
   85
   86
   87
   88
   89
   90
   91
   92
   93
   94
   95
   96
   97
   98
   99
  100
  101
  102
  103
  104
  105
  106
  107
  108
  109
  110
  111
  112
  113
  114
  115
  116
  117
  118
  119
  120
  121
  122
  123
  124
  125
  126
  127
  128
  129
  130
  131
  132
  133
  134
  135
  136
  137
  138
  139
  140
  141
  142
  143
  144
  145
  146
  147
  148
  149
  150
  151
  152
  153
  154
  155
  156
  157
  158
  159
  160
  161
  162

base / hash / hash.cc [blame]

// Copyright 2014 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "base/hash/hash.h"

#include <cstddef>
#include <cstdint>
#include <limits>
#include <string>
#include <string_view>

#include "base/containers/span.h"
#include "base/dcheck_is_on.h"
#include "base/notreached.h"
#include "base/numerics/safe_conversions.h"
#include "base/third_party/cityhash/city.h"

// Definition in base/third_party/superfasthash/superfasthash.c. (Third-party
// code did not come with its own header file, so declaring the function here.)
// Note: This algorithm is also in Blink under Source/wtf/StringHasher.h.
extern "C" uint32_t SuperFastHash(const char* data, int len);

namespace base {

namespace {

size_t FastHashImpl(base::span<const uint8_t> data) {
  auto chars = as_chars(data);
  // We use the updated CityHash within our namespace (not the deprecated
  // version from third_party/smhasher).
  if constexpr (sizeof(size_t) > 4) {
    return base::internal::cityhash_v111::CityHash64(chars.data(),
                                                     chars.size());
  } else {
    return base::internal::cityhash_v111::CityHash32(chars.data(),
                                                     chars.size());
  }
}

// Implement hashing for pairs of at-most 32 bit integer values.
// When size_t is 32 bits, we turn the 64-bit hash code into 32 bits by using
// multiply-add hashing. This algorithm, as described in
// Theorem 4.3.3 of the thesis "Über die Komplexität der Multiplikation in
// eingeschränkten Branchingprogrammmodellen" by Woelfel, is:
//
//   h32(x32, y32) = (h64(x32, y32) * rand_odd64 + rand16 * 2^16) % 2^64 / 2^32
//
// Contact danakj@chromium.org for any questions.
size_t HashInts32Impl(uint32_t value1, uint32_t value2) {
  uint64_t value1_64 = value1;
  uint64_t hash64 = (value1_64 << 32) | value2;

  if (sizeof(size_t) >= sizeof(uint64_t))
    return static_cast<size_t>(hash64);

  uint64_t odd_random = 481046412LL << 32 | 1025306955LL;
  uint32_t shift_random = 10121U << 16;

  hash64 = hash64 * odd_random + shift_random;
  size_t high_bits =
      static_cast<size_t>(hash64 >> (8 * (sizeof(uint64_t) - sizeof(size_t))));
  return high_bits;
}

// Implement hashing for pairs of up-to 64-bit integer values.
// We use the compound integer hash method to produce a 64-bit hash code, by
// breaking the two 64-bit inputs into 4 32-bit values:
// http://opendatastructures.org/versions/edition-0.1d/ods-java/node33.html#SECTION00832000000000000000
// Then we reduce our result to 32 bits if required, similar to above.
size_t HashInts64Impl(uint64_t value1, uint64_t value2) {
  uint32_t short_random1 = 842304669U;
  uint32_t short_random2 = 619063811U;
  uint32_t short_random3 = 937041849U;
  uint32_t short_random4 = 3309708029U;

  uint32_t value1a = static_cast<uint32_t>(value1 & 0xffffffff);
  uint32_t value1b = static_cast<uint32_t>((value1 >> 32) & 0xffffffff);
  uint32_t value2a = static_cast<uint32_t>(value2 & 0xffffffff);
  uint32_t value2b = static_cast<uint32_t>((value2 >> 32) & 0xffffffff);

  uint64_t product1 = static_cast<uint64_t>(value1a) * short_random1;
  uint64_t product2 = static_cast<uint64_t>(value1b) * short_random2;
  uint64_t product3 = static_cast<uint64_t>(value2a) * short_random3;
  uint64_t product4 = static_cast<uint64_t>(value2b) * short_random4;

  uint64_t hash64 = product1 + product2 + product3 + product4;

  if (sizeof(size_t) >= sizeof(uint64_t))
    return static_cast<size_t>(hash64);

  uint64_t odd_random = 1578233944LL << 32 | 194370989LL;
  uint32_t shift_random = 20591U << 16;

  hash64 = hash64 * odd_random + shift_random;
  size_t high_bits =
      static_cast<size_t>(hash64 >> (8 * (sizeof(uint64_t) - sizeof(size_t))));
  return high_bits;
}

// The random seed is used to perturb the output of base::FastHash() and
// base::HashInts() so that it is only deterministic within the lifetime of a
// process. This prevents inadvertent dependencies on the underlying
// implementation, e.g. anything that persists the hash value and expects it to
// be unchanging will break.
//
// Note: this is the same trick absl uses to generate a random seed. This is
// more robust than using base::RandBytes(), which can fail inside a sandboxed
// environment. Note that without ASLR, the seed won't be quite as random...
#if DCHECK_IS_ON()
constexpr const void* kSeed = &kSeed;
#endif

template <typename T>
T Scramble(T input) {
#if DCHECK_IS_ON()
  return HashInts64Impl(input, reinterpret_cast<uintptr_t>(kSeed));
#else
  return input;
#endif
}

}  // namespace

size_t FastHash(base::span<const uint8_t> data) {
  return Scramble(FastHashImpl(data));
}

uint32_t Hash(base::span<const uint8_t> data) {
  // Currently our in-memory hash is the same as the persistent hash. The
  // split between in-memory and persistent hash functions is maintained to
  // allow the in-memory hash function to be updated in the future.
  return PersistentHash(data);
}

uint32_t Hash(const std::string& str) {
  return PersistentHash(as_byte_span(str));
}

uint32_t PersistentHash(span<const uint8_t> data) {
  // This hash function must not change, since it is designed to be persistable
  // to disk.
  if (data.size() > size_t{std::numeric_limits<int>::max()}) {
    NOTREACHED();
  }
  auto chars = as_chars(data);
  return ::SuperFastHash(chars.data(), checked_cast<int>(chars.size()));
}

uint32_t PersistentHash(std::string_view str) {
  return PersistentHash(as_byte_span(str));
}

size_t HashInts32(uint32_t value1, uint32_t value2) {
  return Scramble(HashInts32Impl(value1, value2));
}

size_t HashInts64(uint64_t value1, uint64_t value2) {
  return Scramble(HashInts64Impl(value1, value2));
}

}  // namespace base