1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
content / web_test / renderer / web_test_spell_checker.cc [blame]
// Copyright 2014 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifdef UNSAFE_BUFFERS_BUILD
// TODO(crbug.com/342213636): Remove this and spanify to fix the errors.
#pragma allow_unsafe_buffers
#endif
#include "content/web_test/renderer/web_test_spell_checker.h"
#include <stddef.h>
#include <algorithm>
#include "base/check_op.h"
#include "base/ranges/algorithm.h"
#include "base/strings/string_util.h"
namespace content {
namespace {
void Append(blink::WebVector<blink::WebString>* data,
const blink::WebString& item) {
blink::WebVector<blink::WebString> result(data->size() + 1);
for (size_t i = 0; i < data->size(); ++i)
result[i] = (*data)[i];
result[data->size()] = item;
data->swap(result);
}
bool IsASCIIAlpha(char ch) {
return base::IsAsciiLower(ch | 0x20);
}
} // namespace
WebTestSpellChecker::WebTestSpellChecker() = default;
WebTestSpellChecker::~WebTestSpellChecker() = default;
bool WebTestSpellChecker::SpellCheckWord(const blink::WebString& text,
size_t* misspelled_offset,
size_t* misspelled_length) {
DCHECK(misspelled_offset);
DCHECK(misspelled_length);
// Initialize this spellchecker.
InitializeIfNeeded();
// Reset the result values as our spellchecker does.
*misspelled_offset = 0;
*misspelled_length = 0;
// Convert to a std::u16string because we store std::u16string instances in
// misspelled_words_ and blink::WebString has no find().
std::u16string string_text = text.Utf16();
int skipped_length = 0;
while (!string_text.empty()) {
// Extract the first possible English word from the given string.
// The given string may include non-ASCII characters or numbers. So, we
// should filter out such characters before start looking up our
// misspelled-word table.
// (This is a simple version of our SpellCheckWordIterator class.)
// If the given string doesn't include any ASCII characters, we can treat
// the string as valid one.
std::u16string::iterator first_char =
base::ranges::find_if(string_text, IsASCIIAlpha);
if (first_char == string_text.end())
return true;
int word_offset = std::distance(string_text.begin(), first_char);
int max_word_length = static_cast<int>(string_text.length()) - word_offset;
int word_length;
std::u16string word;
// Look up our misspelled-word table to check if the extracted word is a
// known misspelled word, and return the offset and the length of the
// extracted word if this word is a known misspelled word.
// (See the comment in WebTestSpellChecker::InitializeIfNeeded() why we use
// a misspelled-word table.)
for (size_t i = 0; i < misspelled_words_.size(); ++i) {
word_length =
static_cast<int>(misspelled_words_.at(i).length()) > max_word_length
? max_word_length
: static_cast<int>(misspelled_words_.at(i).length());
word = string_text.substr(word_offset, word_length);
if (word == misspelled_words_.at(i) &&
(static_cast<int>(string_text.length()) ==
word_offset + word_length ||
!IsASCIIAlpha(string_text[word_offset + word_length]))) {
*misspelled_offset = word_offset + skipped_length;
*misspelled_length = word_length;
break;
}
}
if (*misspelled_length > 0)
break;
std::u16string::iterator last_char = std::find_if_not(
string_text.begin() + word_offset, string_text.end(), IsASCIIAlpha);
if (last_char == string_text.end())
word_length = static_cast<int>(string_text.length()) - word_offset;
else
word_length = std::distance(first_char, last_char);
DCHECK_LT(0, word_offset + word_length);
string_text = string_text.substr(word_offset + word_length);
skipped_length += word_offset + word_length;
}
return false;
}
bool WebTestSpellChecker::HasInCache(const blink::WebString& word) {
return word == "Spell wellcome. Is it broken?" ||
word == "Spell wellcome.\x007F";
}
bool WebTestSpellChecker::IsMultiWordMisspelling(
const blink::WebString& text,
std::vector<blink::WebTextCheckingResult>* results) {
if (text == "Helllo wordl.") {
results->push_back(blink::WebTextCheckingResult(
blink::kWebTextDecorationTypeSpelling, 0, 6,
std::vector<blink::WebString>({"Hello"})));
results->push_back(blink::WebTextCheckingResult(
blink::kWebTextDecorationTypeSpelling, 7, 5,
std::vector<blink::WebString>({"world"})));
return true;
}
return false;
}
void WebTestSpellChecker::FillSuggestionList(
const blink::WebString& word,
blink::WebVector<blink::WebString>* suggestions) {
if (word == "wellcome")
Append(suggestions, blink::WebString::FromUTF8("welcome"));
else if (word == "upper case")
Append(suggestions, blink::WebString::FromUTF8("uppercase"));
else if (word == "Helllo")
Append(suggestions, blink::WebString::FromUTF8("Hello"));
else if (word == "wordl")
Append(suggestions, blink::WebString::FromUTF8("world"));
}
bool WebTestSpellChecker::InitializeIfNeeded() {
// Exit if we have already initialized this object.
if (initialized_)
return false;
// Create a table that consists of misspelled words used in Blink web tests.
// Since Blink web tests don't have so many misspelled words as
// well-spelled words, it is easier to compare the given word with misspelled
// ones than to compare with well-spelled ones.
static const char* misspelled_words[] = {
// These words are known misspelled words in web tests.
// If there are other misspelled words in web tests, please add them in
// this array.
"foo", "Foo", "baz", "fo", "LibertyF", "chello", "xxxtestxxx", "XXxxx",
"Textx", "blockquoted", "asd", "Lorem", "Nunc", "Curabitur", "eu", "adlj",
"adaasj", "sdklj", "jlkds", "jsaada", "jlda", "contentEditable",
// Prefer to match the full word than a partial word when there's an
// ambiguous boundary.
"zz't", "zz",
// The following words are used by unit tests.
"ifmmp", "qwertyuiopasd", "qwertyuiopasdf", "upper case", "wellcome"};
misspelled_words_.clear();
for (size_t i = 0; i < std::size(misspelled_words); ++i)
misspelled_words_.push_back(
std::u16string(misspelled_words[i],
misspelled_words[i] + strlen(misspelled_words[i])));
// Mark as initialized to prevent this object from being initialized twice
// or more.
initialized_ = true;
// Since this WebTestSpellChecker class doesn't download dictionaries, this
// function always returns false.
return false;
}
} // namespace content