1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
base / i18n / char_iterator.cc [blame]
// Copyright 2011 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifdef UNSAFE_BUFFERS_BUILD
// TODO(crbug.com/40284755): Remove this and spanify to fix the errors.
#pragma allow_unsafe_buffers
#endif
#include "base/i18n/char_iterator.h"
#include <string_view>
#include "base/check_op.h"
#include "base/third_party/icu/icu_utf.h"
namespace base {
namespace i18n {
// UTF8CharIterator ------------------------------------------------------------
UTF8CharIterator::UTF8CharIterator(std::string_view str)
: str_(str), array_pos_(0), next_pos_(0), char_pos_(0), char_(0) {
if (!str_.empty())
CBU8_NEXT(str_.data(), next_pos_, str_.length(), char_);
}
UTF8CharIterator::~UTF8CharIterator() = default;
bool UTF8CharIterator::Advance() {
if (array_pos_ >= str_.length())
return false;
array_pos_ = next_pos_;
char_pos_++;
if (next_pos_ < str_.length())
CBU8_NEXT(str_.data(), next_pos_, str_.length(), char_);
return true;
}
// UTF16CharIterator -----------------------------------------------------------
UTF16CharIterator::UTF16CharIterator(std::u16string_view str)
: UTF16CharIterator(str, 0) {}
UTF16CharIterator::UTF16CharIterator(UTF16CharIterator&& to_move) = default;
UTF16CharIterator::~UTF16CharIterator() = default;
UTF16CharIterator& UTF16CharIterator::operator=(UTF16CharIterator&& to_move) =
default;
// static
UTF16CharIterator UTF16CharIterator::LowerBound(std::u16string_view str,
size_t array_index) {
DCHECK_LE(array_index, str.length());
CBU16_SET_CP_START(str.data(), 0, array_index);
return UTF16CharIterator(str, array_index);
}
// static
UTF16CharIterator UTF16CharIterator::UpperBound(std::u16string_view str,
size_t array_index) {
DCHECK_LE(array_index, str.length());
CBU16_SET_CP_LIMIT(str.data(), 0, array_index, str.length());
return UTF16CharIterator(str, array_index);
}
int32_t UTF16CharIterator::NextCodePoint() const {
if (next_pos_ >= str_.length())
return 0;
base_icu::UChar32 c;
CBU16_GET(str_.data(), 0, next_pos_, str_.length(), c);
return c;
}
int32_t UTF16CharIterator::PreviousCodePoint() const {
if (array_pos_ == 0)
return 0;
uint32_t pos = array_pos_;
base_icu::UChar32 c;
CBU16_PREV(str_.data(), 0, pos, c);
return c;
}
bool UTF16CharIterator::Advance() {
if (array_pos_ >= str_.length())
return false;
array_pos_ = next_pos_;
char_offset_++;
if (next_pos_ < str_.length())
ReadChar();
return true;
}
bool UTF16CharIterator::Rewind() {
if (array_pos_ == 0)
return false;
next_pos_ = array_pos_;
char_offset_--;
CBU16_PREV(str_.data(), 0, array_pos_, char_);
return true;
}
UTF16CharIterator::UTF16CharIterator(std::u16string_view str,
size_t initial_pos)
: str_(str),
array_pos_(initial_pos),
next_pos_(initial_pos),
char_offset_(0),
char_(0) {
// This has the side-effect of advancing |next_pos_|.
if (array_pos_ < str_.length())
ReadChar();
}
void UTF16CharIterator::ReadChar() {
// This is actually a huge macro, so is worth having in a separate function.
CBU16_NEXT(str_.data(), next_pos_, str_.length(), char_);
}
} // namespace i18n
} // namespace base