1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
base / i18n / streaming_utf8_validator.cc [blame]
// Copyright 2014 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifdef UNSAFE_BUFFERS_BUILD
// TODO(crbug.com/40284755): Remove this and spanify to fix the errors.
#pragma allow_unsafe_buffers
#endif
// This implementation doesn't use ICU. The ICU macros are oriented towards
// character-at-a-time processing, whereas byte-at-a-time processing is easier
// with streaming input.
#include "base/i18n/streaming_utf8_validator.h"
#include "base/check_op.h"
#include "base/i18n/utf8_validator_tables.h"
namespace base {
namespace {
uint8_t StateTableLookup(uint8_t offset) {
DCHECK_LT(offset, internal::kUtf8ValidatorTablesSize);
return internal::kUtf8ValidatorTables[offset];
}
} // namespace
StreamingUtf8Validator::State StreamingUtf8Validator::AddBytes(
base::span<const uint8_t> data) {
// Copy |state_| into a local variable so that the compiler doesn't have to be
// careful of aliasing.
uint8_t state = state_;
for (const uint8_t ch : data) {
if ((ch & 0x80) == 0) {
if (state == 0)
continue;
state = internal::I18N_UTF8_VALIDATOR_INVALID_INDEX;
break;
}
const uint8_t shift_amount = StateTableLookup(state);
const uint8_t shifted_char = (ch & 0x7F) >> shift_amount;
state = StateTableLookup(state + shifted_char + 1);
// State may be INVALID here, but this code is optimised for the case of
// valid UTF-8 and it is more efficient (by about 2%) to not attempt an
// early loop exit unless we hit an ASCII character.
}
state_ = state;
return state == 0 ? VALID_ENDPOINT
: state == internal::I18N_UTF8_VALIDATOR_INVALID_INDEX
? INVALID
: VALID_MIDPOINT;
}
void StreamingUtf8Validator::Reset() {
state_ = 0u;
}
bool StreamingUtf8Validator::Validate(const std::string& string) {
return StreamingUtf8Validator().AddBytes(base::as_byte_span(string)) ==
VALID_ENDPOINT;
}
} // namespace base