1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
url / url_canon_host.cc [blame]
// Copyright 2013 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifdef UNSAFE_BUFFERS_BUILD
// TODO(crbug.com/350788890): Remove this and spanify to fix the errors.
#pragma allow_unsafe_buffers
#endif
#include "base/check.h"
#include "base/cpu_reduction_experiment.h"
#include "url/url_canon.h"
#include "url/url_canon_internal.h"
#include "url/url_features.h"
namespace url {
namespace {
// This table lists the canonical version of all characters we allow in the
// input, with 0 indicating it is disallowed. We use the magic kEsc value to
// indicate that this character should be escaped. At present, ' ' (SPACE) and
// '*' (asterisk) are still non-compliant to the URL Standard. See
// https://crbug.com/1416013 for details.
const unsigned char kEsc = 0xff;
// clang-format off
const unsigned char kHostCharLookup[0x80] = {
// 00-1f: all are invalid
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
// ' ' ! " # $ % & ' ( ) * + , - . /
kEsc,'!', '"', 0, '$', 0, '&', '\'','(', ')', kEsc, '+', ',', '-', '.', 0,
// 0 1 2 3 4 5 6 7 8 9 : ; < = > ?
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';' , 0, '=', 0, 0,
// @ A B C D E F G H I J K L M N O
0, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
// P Q R S T U V W X Y Z [ \ ] ^ _
'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '[', 0, ']', 0, '_',
// ` a b c d e f g h i j k l m n o
'`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
// p q r s t u v w x y z { | } ~
'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '{', 0, '}', '~', 0 };
// clang-format on
// https://url.spec.whatwg.org/#forbidden-host-code-point
const uint8_t kForbiddenHost = 0x1;
// TODO(crbug.com/40063064): Merge other lookup tables into this table. That can
// be probably done after https://crbug.com/1416013 is resolved.
//
// This table is currently only used for an opaque-host in non-special URLs.
const uint8_t kHostCharacterTable[128] = {
kForbiddenHost, // 0x00 (NUL)
0, // 0x01
0, // 0x02
0, // 0x03
0, // 0x04
0, // 0x05
0, // 0x06
0, // 0x07
0, // 0x08
kForbiddenHost, // 0x09 (TAB)
kForbiddenHost, // 0x0A (LF)
0, // 0x0B
0, // 0x0C
kForbiddenHost, // 0x0D (CR)
0, // 0x0E
0, // 0x0F
0, // 0x10
0, // 0x11
0, // 0x12
0, // 0x13
0, // 0x14
0, // 0x15
0, // 0x16
0, // 0x17
0, // 0x18
0, // 0x19
0, // 0x1A
0, // 0x1B
0, // 0x1C
0, // 0x1D
0, // 0x1E
0, // 0x1F
kForbiddenHost, // ' '
0, // '!'
0, // '"'
kForbiddenHost, // '#'
0, // '$'
0, // '%'
0, // '&'
0, // '\''
0, // '('
0, // ')'
0, // '*'
0, // '+'
0, // ','
0, // '-'
0, // '.'
kForbiddenHost, // '/'
0, // '0'
0, // '1'
0, // '2'
0, // '3'
0, // '4'
0, // '5'
0, // '6'
0, // '7'
0, // '8'
0, // '9'
kForbiddenHost, // ':'
0, // ';'
kForbiddenHost, // '<'
0, // '='
kForbiddenHost, // '>'
kForbiddenHost, // '?'
kForbiddenHost, // '@'
0, // 'A'
0, // 'B'
0, // 'C'
0, // 'D'
0, // 'E'
0, // 'F'
0, // 'G'
0, // 'H'
0, // 'I'
0, // 'J'
0, // 'K'
0, // 'L'
0, // 'M'
0, // 'N'
0, // 'O'
0, // 'P'
0, // 'Q'
0, // 'R'
0, // 'S'
0, // 'T'
0, // 'U'
0, // 'V'
0, // 'W'
0, // 'X'
0, // 'Y'
0, // 'Z'
kForbiddenHost, // '['
kForbiddenHost, // '\\'
kForbiddenHost, // ']'
kForbiddenHost, // '^'
0, // '_'
0, // '`'
0, // 'a'
0, // 'b'
0, // 'c'
0, // 'd'
0, // 'e'
0, // 'f'
0, // 'g'
0, // 'h'
0, // 'i'
0, // 'j'
0, // 'k'
0, // 'l'
0, // 'm'
0, // 'n'
0, // 'o'
0, // 'p'
0, // 'q'
0, // 'r'
0, // 's'
0, // 't'
0, // 'u'
0, // 'v'
0, // 'w'
0, // 'x'
0, // 'y'
0, // 'z'
0, // '{'
kForbiddenHost, // '|'
0, // '}'
0, // '~'
0, // 0x7F (DEL)
};
// clang-format on
bool IsForbiddenHostCodePoint(uint8_t ch) {
return ch <= 0x7F && (kHostCharacterTable[ch] & kForbiddenHost);
}
// RFC1034 maximum FQDN length.
constexpr size_t kMaxHostLength = 253;
// Generous padding to account for the fact that UTS#46 normalization can cause
// a long string to actually shrink and fit within the 253 character RFC1034
// FQDN length limit. Note that this can still be too short for pathological
// cases: An arbitrary number of characters (e.g. U+00AD SOFT HYPHEN) can be
// removed from the input by UTS#46 processing. However, this should be
// sufficient for all normally-encountered, non-abusive hostname strings.
constexpr size_t kMaxHostBufferLength = kMaxHostLength * 5;
constexpr size_t kTempHostBufferLen = 1024;
using StackBuffer = RawCanonOutputT<char, kTempHostBufferLen>;
using StackBufferW = RawCanonOutputT<char16_t, kTempHostBufferLen>;
// Scans a host name and fills in the output flags according to what we find.
// |has_non_ascii| will be true if there are any non-7-bit characters, and
// |has_escaped| will be true if there is a percent sign.
template<typename CHAR, typename UCHAR>
void ScanHostname(const CHAR* spec,
const Component& host,
bool* has_non_ascii,
bool* has_escaped) {
int end = host.end();
*has_non_ascii = false;
*has_escaped = false;
for (int i = host.begin; i < end; i++) {
if (static_cast<UCHAR>(spec[i]) >= 0x80)
*has_non_ascii = true;
else if (spec[i] == '%')
*has_escaped = true;
}
}
// Canonicalizes a host name that is entirely 8-bit characters (even though
// the type holding them may be 16 bits. Escaped characters will be unescaped.
// Non-7-bit characters (for example, UTF-8) will be passed unchanged.
//
// The |*has_non_ascii| flag will be true if there are non-7-bit characters in
// the output.
//
// This function is used in two situations:
//
// * When the caller knows there is no non-ASCII or percent escaped
// characters. This is what DoHost does. The result will be a completely
// canonicalized host since we know nothing weird can happen (escaped
// characters could be unescaped to non-7-bit, so they have to be treated
// with suspicion at this point). It does not use the |has_non_ascii| flag.
//
// * When the caller has an 8-bit string that may need unescaping.
// DoComplexHost calls us this situation to do unescaping and validation.
// After this, it may do other IDN operations depending on the value of the
// |*has_non_ascii| flag.
//
// The return value indicates if the output is a potentially valid host name.
template <CanonMode canon_mode, typename INCHAR, typename OUTCHAR>
bool DoSimpleHost(const INCHAR* host,
size_t host_len,
CanonOutputT<OUTCHAR>* output,
bool* has_non_ascii) {
*has_non_ascii = false;
bool success = true;
for (size_t i = 0; i < host_len; ++i) {
unsigned int source = host[i];
if (source == '%') {
// Unescape first, if possible.
// Source will be used only if decode operation was successful.
if (!DecodeEscaped(host, &i, host_len,
reinterpret_cast<unsigned char*>(&source))) {
// Invalid escaped character. There is nothing that can make this
// host valid. We append an escaped percent so the URL looks reasonable
// and mark as failed.
AppendEscapedChar('%', output);
success = false;
continue;
}
}
if (source < 0x80) {
// We have ASCII input, we can use our lookup table.
unsigned char replacement = kHostCharLookup[source];
if (!replacement) {
// Invalid character, add it as percent-escaped and mark as failed.
AppendEscapedChar(source, output);
success = false;
} else if (replacement == kEsc) {
// This character is valid but should be escaped.
AppendEscapedChar(source, output);
if (source == ' ' &&
url::IsDisallowingSpaceCharacterInURLHostParsing() &&
canon_mode != CanonMode::kFileURL) {
success = false;
}
} else {
// Common case, the given character is valid in a hostname, the lookup
// table tells us the canonical representation of that character (lower
// cased).
output->push_back(replacement);
}
} else {
// It's a non-ascii char. Just push it to the output.
// In case where we have char16 input, and char output it's safe to
// cast char16->char only if input string was converted to ASCII.
output->push_back(static_cast<OUTCHAR>(source));
*has_non_ascii = true;
}
}
return success;
}
// Canonicalizes a host that requires IDN conversion. Returns true on success
template <CanonMode canon_mode>
bool DoIDNHost(const char16_t* src, size_t src_len, CanonOutput* output) {
int original_output_len = output->length(); // So we can rewind below.
// We need to escape URL before doing IDN conversion, since punicode strings
// cannot be escaped after they are created.
RawCanonOutputW<kTempHostBufferLen> url_escaped_host;
bool has_non_ascii;
DoSimpleHost<canon_mode>(src, src_len, &url_escaped_host, &has_non_ascii);
if (url_escaped_host.length() > kMaxHostBufferLength) {
AppendInvalidNarrowString(src, 0, src_len, output);
return false;
}
StackBufferW wide_output;
if (!IDNToASCII(url_escaped_host.view(), &wide_output)) {
// Some error, give up. This will write some reasonable looking
// representation of the string to the output.
AppendInvalidNarrowString(src, 0, src_len, output);
return false;
}
// Now we check the ASCII output like a normal host. It will also handle
// unescaping. Although we unescaped everything before this function call, if
// somebody does %00 as fullwidth, ICU will convert this to ASCII.
bool success = DoSimpleHost<canon_mode>(
wide_output.data(), wide_output.length(), output, &has_non_ascii);
if (has_non_ascii) {
// ICU generated something that DoSimpleHost didn't think looked like
// ASCII. This is quite rare, but ICU might convert some characters to
// percent signs which might generate new escape sequences which might in
// turn be invalid. An example is U+FE6A "small percent" which ICU will
// name prep into an ASCII percent and then we can interpret the following
// characters as escaped characters.
//
// If DoSimpleHost didn't think the output was ASCII, just escape the
// thing we gave ICU and give up. DoSimpleHost will have handled a further
// level of escaping from ICU for simple ASCII cases (i.e. if ICU generates
// a new escaped ASCII sequence like "%41" we'll unescape it) but it won't
// do more (like handle escaped non-ASCII sequences). Handling the escaped
// ASCII isn't strictly necessary, but DoSimpleHost handles this case
// anyway so we handle it/
output->set_length(original_output_len);
AppendInvalidNarrowString(wide_output.data(), 0, wide_output.length(),
output);
return false;
}
return success;
}
// 8-bit convert host to its ASCII version: this converts the UTF-8 input to
// UTF-16. The has_escaped flag should be set if the input string requires
// unescaping.
template <CanonMode canon_mode>
bool DoComplexHost(const char* host,
size_t host_len,
bool has_non_ascii,
bool has_escaped,
CanonOutput* output) {
// Save the current position in the output. We may write stuff and rewind it
// below, so we need to know where to rewind to.
size_t begin_length = output->length();
// Points to the UTF-8 data we want to convert. This will either be the
// input or the unescaped version written to |*output| if necessary.
const char* utf8_source;
size_t utf8_source_len;
bool are_all_escaped_valid = true;
if (has_escaped) {
// Unescape before converting to UTF-16 for IDN. We write this into the
// output because it most likely does not require IDNization, and we can
// save another huge stack buffer. It will be replaced below if it requires
// IDN. This will also update our non-ASCII flag so we know whether the
// unescaped input requires IDN.
if (!DoSimpleHost<canon_mode>(host, host_len, output, &has_non_ascii)) {
// Error with some escape sequence. We'll call the current output
// complete. DoSimpleHost will have written some "reasonable" output
// for the invalid escapes, but the output could be non-ASCII and
// needs to go through re-encoding below.
are_all_escaped_valid = false;
}
// Unescaping may have left us with ASCII input, in which case the
// unescaped version we wrote to output is complete.
if (!has_non_ascii) {
return are_all_escaped_valid;
}
// Save the pointer into the data was just converted (it may be appended to
// other data in the output buffer).
utf8_source = &output->data()[begin_length];
utf8_source_len = output->length() - begin_length;
} else {
// We don't need to unescape, use input for IDNization later. (We know the
// input has non-ASCII, or the simple version would have been called
// instead of us.)
utf8_source = host;
utf8_source_len = host_len;
}
// Non-ASCII input requires IDN, convert to UTF-16 and do the IDN conversion.
// Above, we may have used the output to write the unescaped values to, so
// we have to rewind it to where we started after we convert it to UTF-16.
StackBufferW utf16;
if (!ConvertUTF8ToUTF16(utf8_source, utf8_source_len, &utf16)) {
// In this error case, the input may or may not be the output.
StackBuffer utf8;
for (size_t i = 0; i < utf8_source_len; i++)
utf8.push_back(utf8_source[i]);
output->set_length(begin_length);
AppendInvalidNarrowString(utf8.data(), 0, utf8.length(), output);
return false;
}
output->set_length(begin_length);
// This will call DoSimpleHost which will do normal ASCII canonicalization
// and also check for IP addresses in the outpt.
return DoIDNHost<canon_mode>(utf16.data(), utf16.length(), output) &&
are_all_escaped_valid;
}
// UTF-16 convert host to its ASCII version. The set up is already ready for
// the backend, so we just pass through. The has_escaped flag should be set if
// the input string requires unescaping.
template <CanonMode canon_mode>
bool DoComplexHost(const char16_t* host,
size_t host_len,
bool has_non_ascii,
bool has_escaped,
CanonOutput* output) {
if (has_escaped) {
// Yikes, we have escaped characters with wide input. The escaped
// characters should be interpreted as UTF-8. To solve this problem,
// we convert to UTF-8, unescape, then convert back to UTF-16 for IDN.
//
// We don't bother to optimize the conversion in the ASCII case (which
// *could* just be a copy) and use the UTF-8 path, because it should be
// very rare that host names have escaped characters, and it is relatively
// fast to do the conversion anyway.
StackBuffer utf8;
if (!ConvertUTF16ToUTF8(host, host_len, &utf8)) {
AppendInvalidNarrowString(host, 0, host_len, output);
return false;
}
// Once we convert to UTF-8, we can use the 8-bit version of the complex
// host handling code above.
return DoComplexHost<canon_mode>(utf8.data(), utf8.length(), has_non_ascii,
has_escaped, output);
}
// No unescaping necessary, we can safely pass the input to ICU. This
// function will only get called if we either have escaped or non-ascii
// input, so it's safe to just use ICU now. Even if the input is ASCII,
// this function will do the right thing (just slower than we could).
return DoIDNHost<canon_mode>(host, host_len, output);
}
template <typename CHAR, typename UCHAR, CanonMode canon_mode>
bool DoHostSubstring(const CHAR* spec,
const Component& host,
CanonOutput* output) {
DCHECK(host.is_valid());
bool has_non_ascii, has_escaped;
ScanHostname<CHAR, UCHAR>(spec, host, &has_non_ascii, &has_escaped);
if (has_non_ascii || has_escaped) {
return DoComplexHost<canon_mode>(&spec[host.begin],
static_cast<size_t>(host.len),
has_non_ascii, has_escaped, output);
}
const bool success = DoSimpleHost<canon_mode>(
&spec[host.begin], static_cast<size_t>(host.len), output, &has_non_ascii);
DCHECK(!has_non_ascii);
return success;
}
template <typename CharT>
bool DoOpaqueHost(const std::basic_string_view<CharT> host,
CanonOutput& output) {
// URL Standard: https://url.spec.whatwg.org/#concept-opaque-host-parser
size_t host_len = host.size();
for (size_t i = 0; i < host_len; ++i) {
char16_t ch = host[i];
// The characters '[', ':', and ']', are checked later in
// `CanonicalizeIPv6Address` function.
if (ch != '[' && ch != ']' && ch != ':' && IsForbiddenHostCodePoint(ch)) {
return false;
}
// Implementation note:
//
// URL Standard: Step 3 in
// https://url.spec.whatwg.org/#concept-opaque-host-parser
//
// > 3. If input contains a U+0025 (%) and the two code points following
// > it are not ASCII hex digits, invalid-URL-unit validation error.
//
// `invalid-URL-unit` is NOT marked as failure. We don't need to consider
// step 3 here.
// URL Standard: Step 4 in
// https://url.spec.whatwg.org/#concept-opaque-host-parser
//
// > 4. Return the result of running UTF-8 percent-encode on input using
// > the C0 control percent-encode set.
if (IsInC0ControlPercentEncodeSet(ch)) {
AppendUTF8EscapedChar(host.data(), &i, host_len, &output);
} else {
output.push_back(ch);
}
}
return true;
}
template <typename CHAR, typename UCHAR, CanonMode canon_mode>
void DoHost(const CHAR* spec,
const Component& host,
CanonOutput& output,
CanonHostInfo& host_info) {
// URL Standard: https://url.spec.whatwg.org/#host-parsing
// Keep track of output's initial length, so we can rewind later.
const int output_begin = output.length();
if (host.is_empty()) {
// Empty hosts don't need anything.
host_info.family = CanonHostInfo::NEUTRAL;
// Carry over the valid empty host for non-special URLs.
//
// Component(0, 0) should be considered invalid here for historical reasons.
//
// TODO(crbug.com/40063064): Update the callers so that they don't pass
// Component(0, 0) as an invalid `host`.
if (host.begin != 0 && host.len == 0) {
host_info.out_host = Component(output_begin, 0);
} else {
host_info.out_host = Component();
}
return;
}
bool success;
if constexpr (canon_mode == CanonMode::kSpecialURL ||
canon_mode == CanonMode::kFileURL) {
success = DoHostSubstring<CHAR, UCHAR, canon_mode>(spec, host, &output);
} else {
// URL Standard: https://url.spec.whatwg.org/#concept-opaque-host-parser
success = DoOpaqueHost(host.as_string_view_on(spec), output);
}
if (success) {
// After all the other canonicalization, check if we ended up with an IP
// address. IP addresses are small, so writing into this temporary buffer
// should not cause an allocation.
RawCanonOutput<64> canon_ip;
if constexpr (canon_mode == CanonMode::kSpecialURL ||
canon_mode == CanonMode::kFileURL) {
CanonicalizeIPAddress(output.data(),
MakeRange(output_begin, output.length()), &canon_ip,
&host_info);
} else {
// Non-special URLs support only IPv6.
CanonicalizeIPv6Address(output.data(),
MakeRange(output_begin, output.length()),
canon_ip, host_info);
}
// If we got an IPv4/IPv6 address, copy the canonical form back to the
// real buffer. Otherwise, it's a hostname or broken IP, in which case
// we just leave it in place.
if (host_info.IsIPAddress()) {
output.set_length(output_begin);
output.Append(canon_ip.view());
}
} else {
// Canonicalization failed. Set BROKEN to notify the caller.
host_info.family = CanonHostInfo::BROKEN;
}
host_info.out_host = MakeRange(output_begin, output.length());
}
} // namespace
bool CanonicalizeHost(const char* spec,
const Component& host,
CanonOutput* output,
Component* out_host) {
DCHECK(output);
DCHECK(out_host);
return CanonicalizeSpecialHost(spec, host, *output, *out_host);
}
bool CanonicalizeHost(const char16_t* spec,
const Component& host,
CanonOutput* output,
Component* out_host) {
DCHECK(output);
DCHECK(out_host);
return CanonicalizeSpecialHost(spec, host, *output, *out_host);
}
bool CanonicalizeSpecialHost(const char* spec,
const Component& host,
CanonOutput& output,
Component& out_host) {
CanonHostInfo host_info;
DoHost<char, unsigned char, CanonMode::kSpecialURL>(spec, host, output,
host_info);
out_host = host_info.out_host;
return (host_info.family != CanonHostInfo::BROKEN);
}
bool CanonicalizeSpecialHost(const char16_t* spec,
const Component& host,
CanonOutput& output,
Component& out_host) {
CanonHostInfo host_info;
DoHost<char16_t, char16_t, CanonMode::kSpecialURL>(spec, host, output,
host_info);
out_host = host_info.out_host;
return (host_info.family != CanonHostInfo::BROKEN);
}
bool CanonicalizeFileHost(const char* spec,
const Component& host,
CanonOutput& output,
Component& out_host) {
CanonHostInfo host_info;
DoHost<char, unsigned char, CanonMode::kFileURL>(spec, host, output,
host_info);
out_host = host_info.out_host;
return (host_info.family != CanonHostInfo::BROKEN);
}
bool CanonicalizeFileHost(const char16_t* spec,
const Component& host,
CanonOutput& output,
Component& out_host) {
CanonHostInfo host_info;
DoHost<char16_t, char16_t, CanonMode::kFileURL>(spec, host, output,
host_info);
out_host = host_info.out_host;
return (host_info.family != CanonHostInfo::BROKEN);
}
bool CanonicalizeNonSpecialHost(const char* spec,
const Component& host,
CanonOutput& output,
Component& out_host) {
CanonHostInfo host_info;
DoHost<char, unsigned char, CanonMode::kNonSpecialURL>(spec, host, output,
host_info);
out_host = host_info.out_host;
return (host_info.family != CanonHostInfo::BROKEN);
}
bool CanonicalizeNonSpecialHost(const char16_t* spec,
const Component& host,
CanonOutput& output,
Component& out_host) {
CanonHostInfo host_info;
DoHost<char16_t, char16_t, CanonMode::kNonSpecialURL>(spec, host, output,
host_info);
out_host = host_info.out_host;
return (host_info.family != CanonHostInfo::BROKEN);
}
void CanonicalizeHostVerbose(const char* spec,
const Component& host,
CanonOutput* output,
CanonHostInfo* host_info) {
DCHECK(output);
DCHECK(host_info);
CanonicalizeSpecialHostVerbose(spec, host, *output, *host_info);
}
void CanonicalizeHostVerbose(const char16_t* spec,
const Component& host,
CanonOutput* output,
CanonHostInfo* host_info) {
DCHECK(output);
DCHECK(host_info);
CanonicalizeSpecialHostVerbose(spec, host, *output, *host_info);
}
void CanonicalizeSpecialHostVerbose(const char* spec,
const Component& host,
CanonOutput& output,
CanonHostInfo& host_info) {
DoHost<char, unsigned char, CanonMode::kSpecialURL>(spec, host, output,
host_info);
}
void CanonicalizeSpecialHostVerbose(const char16_t* spec,
const Component& host,
CanonOutput& output,
CanonHostInfo& host_info) {
DoHost<char16_t, char16_t, CanonMode::kSpecialURL>(spec, host, output,
host_info);
}
void CanonicalizeFileHostVerbose(const char* spec,
const Component& host,
CanonOutput& output,
CanonHostInfo& host_info) {
DoHost<char, unsigned char, CanonMode::kFileURL>(spec, host, output,
host_info);
}
void CanonicalizeFileHostVerbose(const char16_t* spec,
const Component& host,
CanonOutput& output,
CanonHostInfo& host_info) {
DoHost<char16_t, char16_t, CanonMode::kFileURL>(spec, host, output,
host_info);
}
bool CanonicalizeHostSubstring(const char* spec,
const Component& host,
CanonOutput* output) {
return DoHostSubstring<char, unsigned char, CanonMode::kSpecialURL>(
spec, host, output);
}
bool CanonicalizeHostSubstring(const char16_t* spec,
const Component& host,
CanonOutput* output) {
return DoHostSubstring<char16_t, char16_t, CanonMode::kSpecialURL>(spec, host,
output);
}
void CanonicalizeNonSpecialHostVerbose(const char* spec,
const Component& host,
CanonOutput& output,
CanonHostInfo& host_info) {
DoHost<char, unsigned char, CanonMode::kNonSpecialURL>(spec, host, output,
host_info);
}
void CanonicalizeNonSpecialHostVerbose(const char16_t* spec,
const Component& host,
CanonOutput& output,
CanonHostInfo& host_info) {
DoHost<char16_t, char16_t, CanonMode::kNonSpecialURL>(spec, host, output,
host_info);
}
} // namespace url