1
    2
    3
    4
    5
    6
    7
    8
    9
   10
   11
   12
   13
   14
   15
   16
   17
   18
   19
   20
   21
   22
   23
   24
   25
   26
   27
   28
   29
   30
   31
   32
   33
   34
   35
   36
   37
   38
   39
   40
   41
   42
   43
   44
   45
   46
   47
   48
   49
   50
   51
   52
   53
   54
   55
   56
   57
   58
   59
   60
   61
   62
   63
   64
   65
   66
   67
   68
   69
   70
   71
   72
   73
   74
   75
   76
   77
   78
   79
   80
   81
   82
   83
   84
   85
   86
   87
   88
   89
   90
   91
   92
   93
   94
   95
   96
   97
   98
   99
  100
  101
  102
  103
  104
  105
  106
  107
  108
  109
  110
  111
  112
  113
  114
  115
  116
  117
  118
  119
  120
  121
  122
  123
  124
  125
  126
  127
  128
  129
  130
  131
  132
  133
  134
  135
  136
  137
  138
  139
  140
  141
  142
  143
  144
  145
  146
  147
  148
  149
  150
  151
  152
  153
  154
  155
  156
  157
  158
  159
  160
  161
  162
  163
  164
  165
  166
  167
  168
  169
  170
  171
  172
  173
  174
  175
  176
  177
  178
  179
  180
  181
  182

url / scheme_host_port.h [blame]

// Copyright 2015 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef URL_SCHEME_HOST_PORT_H_
#define URL_SCHEME_HOST_PORT_H_

#include <stdint.h>

#include <string>
#include <string_view>

#include "base/component_export.h"

class GURL;

namespace url {

struct Parsed;

// This class represents a (scheme, host, port) tuple extracted from a URL.
//
// The primary purpose of this class is to represent relevant network-authority
// information for a URL. It is _not_ an Origin, as described in RFC 6454. In
// particular, it is generally NOT the right thing to use for security
// decisions.
//
// Instead, this class is a mechanism for simplifying URLs with standard schemes
// (that is, those which follow the generic syntax of RFC 3986) down to the
// uniquely identifying information necessary for network fetches. This makes it
// suitable as a cache key for a collection of active connections, for instance.
// It may, however, be inappropriate to use as a cache key for persistent
// storage associated with a host.
//
// In particular, note that:
//
// * SchemeHostPort can only represent schemes which follow the RFC 3986 syntax
//   (e.g. those registered with GURL as "standard schemes"). Non-standard
//   schemes such as "blob", "filesystem", "data", and "javascript" can only be
//   represented as invalid SchemeHostPort objects.
//
// * For example, the "file" scheme follows the standard syntax, but it is
//   important to note that the authority portion (host, port) is optional.
//   URLs without an authority portion will be represented with an empty string
//   for the host, and a port of 0 (e.g. "file:///etc/hosts" =>
//   ("file", "", 0)), and URLs with a host-only authority portion will be
//   represented with a port of 0 (e.g. "file://example.com/etc/hosts" =>
//   ("file", "example.com", 0)). See Section 3 of RFC 3986 to better understand
//   these constructs.
//
// * SchemeHostPort has no notion of the Origin concept (RFC 6454), and in
//   particular, it has no notion of an opaque Origin. If you need to take
//   opaque origins into account (and, if you're making security-relevant
//   decisions then you absolutely do), please use 'url::Origin' instead.
//
// Usage:
//
// * SchemeHostPort objects are commonly created from GURL objects:
//
//     GURL url("https://example.com/");
//     url::SchemeHostPort tuple(url);
//     tuple.scheme(); // "https"
//     tuple.host(); // "example.com"
//     tuple.port(); // 443
//
// * Objects may also be explicitly created and compared:
//
//     url::SchemeHostPort tuple(url::kHttpsScheme, "example.com", 443);
//     tuple.scheme(); // "https"
//     tuple.host(); // "example.com"
//     tuple.port(); // 443
//
//     GURL url("https://example.com/");
//     tuple == url::SchemeHostPort(url); // true
class COMPONENT_EXPORT(URL) SchemeHostPort {
 public:
  // Creates an invalid (scheme, host, port) tuple, which represents an invalid
  // or non-standard URL.
  SchemeHostPort();

  // Creates a (scheme, host, port) tuple. |host| must be a canonicalized
  // A-label (that is, '☃.net' must be provided as 'xn--n3h.net'). |scheme|
  // must be a standard scheme. |port| must be 0 if |scheme| does not support
  // ports (e.g. 'file').
  //
  // Copies the data in |scheme| and |host|.
  SchemeHostPort(std::string_view scheme, std::string_view host, uint16_t port);

  // Metadata influencing whether or not the constructor should sanity check
  // host canonicalization.
  enum ConstructPolicy { CHECK_CANONICALIZATION, ALREADY_CANONICALIZED };

  // Creates a (scheme, host, port) tuple without performing sanity checking
  // that the host and port are canonicalized. This should only be used when
  // converting between already normalized types, and should NOT be used for
  // IPC.
  SchemeHostPort(std::string scheme,
                 std::string host,
                 uint16_t port,
                 ConstructPolicy policy);

  // Creates a (scheme, host, port) tuple from |url|, as described at
  // https://tools.ietf.org/html/rfc6454#section-4
  //
  // If |url| is invalid or non-standard, the result will be an invalid
  // SchemeHostPort object.
  explicit SchemeHostPort(const GURL& url);

  // Copyable and movable.
  SchemeHostPort(const SchemeHostPort&) = default;
  SchemeHostPort& operator=(const SchemeHostPort&) = default;
  SchemeHostPort(SchemeHostPort&&) noexcept = default;
  SchemeHostPort& operator=(SchemeHostPort&&) noexcept = default;

  ~SchemeHostPort();

  // Returns the host component, in URL form. That is all IDN domain names will
  // be expressed as A-Labels ('☃.net' will be returned as 'xn--n3h.net'), and
  // and all IPv6 addresses will be enclosed in brackets ("[2001:db8::1]").
  const std::string& host() const { return host_; }
  const std::string& scheme() const { return scheme_; }
  uint16_t port() const { return port_; }
  bool IsValid() const;

  // Serializes the SchemeHostPort tuple to a canonical form.
  //
  // While this string form resembles the Origin serialization specified in
  // Section 6.2 of RFC 6454, it is important to note that invalid
  // SchemeHostPort tuples serialize to the empty string, rather than being
  // serialized as would an opaque Origin.
  std::string Serialize() const;

  // Efficiently returns what GURL(Serialize()) would return, without needing to
  // re-parse the URL. Note: this still performs allocations to copy data into
  // GURL, so please avoid using this method if you only need to work on
  // schemes, hosts, or ports individually.
  // For example, see crrev.com/c/3637099/comments/782360d0_e14757be.
  GURL GetURL() const;

  // Estimates dynamic memory usage.
  // See base/trace_event/memory_usage_estimator.h for more info.
  size_t EstimateMemoryUsage() const;

  // Two SchemeHostPort objects are "equal" iff their schemes, hosts, and ports
  // are exact matches.
  //
  // Note that this comparison is _not_ the same as an origin-based comparison.
  // In particular, invalid SchemeHostPort objects match each other (and
  // themselves). Opaque origins, on the other hand, would not.
  bool operator==(const SchemeHostPort& other) const {
    return port_ == other.port() && scheme_ == other.scheme() &&
           host_ == other.host();
  }
  bool operator!=(const SchemeHostPort& other) const {
    return !(*this == other);
  }
  // Allows SchemeHostPort to be used as a key in STL (for example, a std::set
  // or std::map).
  bool operator<(const SchemeHostPort& other) const;

  // Whether to discard host and port information for a specific scheme.
  //
  // Note that this hack is required to avoid breaking existing Android WebView
  // behaviors. Currently, Android WebView doesn't use host and port information
  // for non-special URLs. See https://crbug.com/40063064 for details.
  static bool ShouldDiscardHostAndPort(std::string_view scheme);

  std::string SerializeInternal(url::Parsed* parsed) const;

 private:
  std::string scheme_;
  std::string host_;
  uint16_t port_ = 0;
};

COMPONENT_EXPORT(URL)
std::ostream& operator<<(std::ostream& out,
                         const SchemeHostPort& scheme_host_port);

}  // namespace url

#endif  // URL_SCHEME_HOST_PORT_H_