1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
url / url_canon_fileurl.cc [blame]
// Copyright 2013 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifdef UNSAFE_BUFFERS_BUILD
// TODO(crbug.com/350788890): Remove this and spanify to fix the errors.
#pragma allow_unsafe_buffers
#endif
// Functions for canonicalizing "file:" URLs.
#include <string_view>
#include "base/strings/string_util.h"
#include "url/url_canon.h"
#include "url/url_canon_internal.h"
#include "url/url_file.h"
#include "url/url_parse_internal.h"
namespace url {
namespace {
bool IsLocalhost(const char* spec, int begin, int end) {
if (begin > end)
return false;
return std::string_view(&spec[begin], end - begin) == "localhost";
}
bool IsLocalhost(const char16_t* spec, int begin, int end) {
if (begin > end)
return false;
return std::u16string_view(&spec[begin], end - begin) == u"localhost";
}
template <typename CHAR>
int DoFindWindowsDriveLetter(const CHAR* spec, int begin, int end) {
if (begin > end)
return -1;
// First guess the beginning of the drive letter.
// If there is something that looks like a drive letter in the spec between
// begin and end, store its position in drive_letter_pos.
int drive_letter_pos =
DoesContainWindowsDriveSpecUntil(spec, begin, end, end);
if (drive_letter_pos < begin)
return -1;
// Check if the path up to the drive letter candidate can be canonicalized as
// "/".
Component sub_path = MakeRange(begin, drive_letter_pos);
RawCanonOutput<1024> output;
Component output_path;
bool success = CanonicalizePath(spec, sub_path, &output, &output_path);
if (!success || output_path.len != 1 || output.at(output_path.begin) != '/') {
return -1;
}
return drive_letter_pos;
}
#ifdef WIN32
// Given a pointer into the spec, this copies and canonicalizes the drive
// letter and colon to the output, if one is found. If there is not a drive
// spec, it won't do anything. The index of the next character in the input
// spec is returned (after the colon when a drive spec is found, the begin
// offset if one is not).
template <typename CHAR>
int FileDoDriveSpec(const CHAR* spec, int begin, int end, CanonOutput* output) {
int drive_letter_pos = FindWindowsDriveLetter(spec, begin, end);
if (drive_letter_pos < begin)
return begin;
// By now, a valid drive letter is confirmed at position drive_letter_pos,
// followed by a valid drive letter separator (a colon or a pipe).
output->push_back('/');
// Normalize Windows drive letters to uppercase.
if (base::IsAsciiLower(spec[drive_letter_pos]))
output->push_back(static_cast<char>(spec[drive_letter_pos] - 'a' + 'A'));
else
output->push_back(static_cast<char>(spec[drive_letter_pos]));
// Normalize the character following it to a colon rather than pipe.
output->push_back(':');
return drive_letter_pos + 2;
}
#endif // WIN32
template<typename CHAR, typename UCHAR>
bool DoFileCanonicalizePath(const CHAR* spec,
const Component& path,
CanonOutput* output,
Component* out_path) {
// Copies and normalizes the "c:" at the beginning, if present.
out_path->begin = output->length();
int after_drive;
#ifdef WIN32
after_drive = FileDoDriveSpec(spec, path.begin, path.end(), output);
#else
after_drive = path.begin;
#endif
// Copies the rest of the path, starting from the slash following the
// drive colon (if any, Windows only), or the first slash of the path.
bool success = true;
if (after_drive < path.end()) {
// Use the regular path canonicalizer to canonicalize the rest of the path
// after the drive.
//
// Give it a fake output component to write into, since we will be
// calculating the out_path ourselves (consisting of both the drive and the
// path we canonicalize here).
Component sub_path = MakeRange(after_drive, path.end());
Component fake_output_path;
success = CanonicalizePath(spec, sub_path, output, &fake_output_path);
} else if (after_drive == path.begin) {
// No input path and no drive spec, canonicalize to a slash.
output->push_back('/');
}
out_path->len = output->length() - out_path->begin;
return success;
}
template<typename CHAR, typename UCHAR>
bool DoCanonicalizeFileURL(const URLComponentSource<CHAR>& source,
const Parsed& parsed,
CharsetConverter* query_converter,
CanonOutput* output,
Parsed* new_parsed) {
DCHECK(!parsed.has_opaque_path);
// Things we don't set in file: URLs.
new_parsed->username = Component();
new_parsed->password = Component();
new_parsed->port = Component();
// Scheme (known, so we don't bother running it through the more
// complicated scheme canonicalizer).
new_parsed->scheme.begin = output->length();
output->Append("file://");
new_parsed->scheme.len = 4;
// If the host is localhost, and the path starts with a Windows drive letter,
// remove the host component. This does the following transformation:
// file://localhost/C:/hello.txt -> file:///C:/hello.txt
//
// Note: we do this on every platform per URL Standard, not just Windows.
//
// TODO(crbug.com/41299821): According to the latest URL spec, this
// transformation should be done regardless of the path.
Component host_range = parsed.host;
if (IsLocalhost(source.host, host_range.begin, host_range.end()) &&
FindWindowsDriveLetter(source.path, parsed.path.begin,
parsed.path.end()) >= parsed.path.begin) {
host_range.reset();
}
// Append the host. For many file URLs, this will be empty. For UNC, this
// will be present.
// TODO(brettw) This doesn't do any checking for host name validity. We
// should probably handle validity checking of UNC hosts differently than
// for regular IP hosts.
bool success =
CanonicalizeFileHost(source.host, host_range, *output, new_parsed->host);
success &= DoFileCanonicalizePath<CHAR, UCHAR>(source.path, parsed.path,
output, &new_parsed->path);
CanonicalizeQuery(source.query, parsed.query, query_converter,
output, &new_parsed->query);
CanonicalizeRef(source.ref, parsed.ref, output, &new_parsed->ref);
return success;
}
} // namespace
int FindWindowsDriveLetter(const char* spec, int begin, int end) {
return DoFindWindowsDriveLetter(spec, begin, end);
}
int FindWindowsDriveLetter(const char16_t* spec, int begin, int end) {
return DoFindWindowsDriveLetter(spec, begin, end);
}
bool CanonicalizeFileURL(const char* spec,
int spec_len,
const Parsed& parsed,
CharsetConverter* query_converter,
CanonOutput* output,
Parsed* new_parsed) {
return DoCanonicalizeFileURL<char, unsigned char>(
URLComponentSource<char>(spec), parsed, query_converter,
output, new_parsed);
}
bool CanonicalizeFileURL(const char16_t* spec,
int spec_len,
const Parsed& parsed,
CharsetConverter* query_converter,
CanonOutput* output,
Parsed* new_parsed) {
return DoCanonicalizeFileURL<char16_t, char16_t>(
URLComponentSource<char16_t>(spec), parsed, query_converter, output,
new_parsed);
}
bool FileCanonicalizePath(const char* spec,
const Component& path,
CanonOutput* output,
Component* out_path) {
return DoFileCanonicalizePath<char, unsigned char>(spec, path,
output, out_path);
}
bool FileCanonicalizePath(const char16_t* spec,
const Component& path,
CanonOutput* output,
Component* out_path) {
return DoFileCanonicalizePath<char16_t, char16_t>(spec, path, output,
out_path);
}
bool ReplaceFileURL(const char* base,
const Parsed& base_parsed,
const Replacements<char>& replacements,
CharsetConverter* query_converter,
CanonOutput* output,
Parsed* new_parsed) {
URLComponentSource<char> source(base);
Parsed parsed(base_parsed);
SetupOverrideComponents(base, replacements, &source, &parsed);
return DoCanonicalizeFileURL<char, unsigned char>(
source, parsed, query_converter, output, new_parsed);
}
bool ReplaceFileURL(const char* base,
const Parsed& base_parsed,
const Replacements<char16_t>& replacements,
CharsetConverter* query_converter,
CanonOutput* output,
Parsed* new_parsed) {
RawCanonOutput<1024> utf8;
URLComponentSource<char> source(base);
Parsed parsed(base_parsed);
SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed);
return DoCanonicalizeFileURL<char, unsigned char>(
source, parsed, query_converter, output, new_parsed);
}
} // namespace url