[go: nahoru, domu]

blob: 55c9df7e148a7948d41896f8b788bc3f0ba61d43 [file] [log] [blame]
// Copyright 2023 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Functions to canonicalize non-special URLs.
#include "url/url_canon.h"
#include "url/url_canon_internal.h"
namespace url {
namespace {
template <typename CHAR>
bool DoCanonicalizeNonSpecialURL(const URLComponentSource<CHAR>& source,
const Parsed& parsed,
CharsetConverter* query_converter,
CanonOutput& output,
Parsed& new_parsed) {
// The implementation is similar to `DoCanonicalizeStandardURL()`, but there
// are many subtle differences. So we have a different function for
// canonicalizing non-special URLs.
//
// Since canonicalization is also used from url::ReplaceComponents(),
// we have to handle an invalid URL replacement here, such as:
//
// > const url = "git:///";
// > url.username = "x";
// > url.href
// "git:///" (this should not be "git://x@").
DCHECK(!parsed.has_opaque_path);
// Scheme: this will append the colon.
bool success = CanonicalizeScheme(source.scheme, parsed.scheme, &output,
&new_parsed.scheme);
bool have_authority =
(parsed.username.is_valid() || parsed.password.is_valid() ||
parsed.host.is_valid() || parsed.port.is_valid());
// Non-special URL examples which should be carefully handled:
//
// | URL | parsed.user | parsed.host | have_authority | Valid URL? |
// |----------+---------------+---------------+----------------+------------|
// | git:/a | invalid | invalid | false | valid |
// | git://@/ | valid (empty) | invalid | true | invalid |
// | git:/// | invalid | valid (empty) | true | valid |
if (have_authority) {
// Only write the authority separators when we have a scheme.
if (parsed.scheme.is_valid()) {
output.push_back('/');
output.push_back('/');
}
// Username and Password
//
// URL Standard:
// - https://url.spec.whatwg.org/#cannot-have-a-username-password-port
// - https://url.spec.whatwg.org/#dom-url-username
// - https://url.spec.whatwg.org/#dom-url-password
if (parsed.host.is_nonempty()) {
// User info: the canonicalizer will handle the : and @.
success &= CanonicalizeUserInfo(
source.username, parsed.username, source.password, parsed.password,
&output, &new_parsed.username, &new_parsed.password);
} else {
new_parsed.username.reset();
new_parsed.password.reset();
}
// Host
if (parsed.host.is_valid()) {
success &= CanonicalizeNonSpecialHost(source.host, parsed.host, output,
new_parsed.host);
} else {
new_parsed.host.reset();
// URL is invalid if `have_authority` is true, but `parsed.host` is
// invalid. Example: "git://@/".
success = false;
}
// Port
//
// URL Standard:
// - https://url.spec.whatwg.org/#cannot-have-a-username-password-port
// - https://url.spec.whatwg.org/#dom-url-port
if (parsed.host.is_nonempty()) {
success &= CanonicalizePort(source.port, parsed.port, PORT_UNSPECIFIED,
&output, &new_parsed.port);
} else {
new_parsed.port.reset();
}
} else {
// No authority, clear the components.
new_parsed.host.reset();
new_parsed.username.reset();
new_parsed.password.reset();
new_parsed.port.reset();
}
// Path
if (parsed.path.is_valid()) {
success &=
CanonicalizePath(source.path, parsed.path, CanonMode::kNonSpecialURL,
&output, &new_parsed.path);
} else {
new_parsed.path.reset();
}
// Query
CanonicalizeQuery(source.query, parsed.query, query_converter, &output,
&new_parsed.query);
// Ref: ignore failure for this, since the page can probably still be loaded.
CanonicalizeRef(source.ref, parsed.ref, &output, &new_parsed.ref);
// Carry over the flag for potentially dangling markup:
if (parsed.potentially_dangling_markup) {
new_parsed.potentially_dangling_markup = true;
}
return success;
}
} // namespace
bool CanonicalizeNonSpecialURL(const char* spec,
int spec_len,
const Parsed& parsed,
CharsetConverter* query_converter,
CanonOutput& output,
Parsed& new_parsed) {
// Carry over the flag.
new_parsed.has_opaque_path = parsed.has_opaque_path;
if (parsed.has_opaque_path) {
return CanonicalizePathURL(spec, spec_len, parsed, &output, &new_parsed);
}
return DoCanonicalizeNonSpecialURL(URLComponentSource(spec), parsed,
query_converter, output, new_parsed);
}
bool CanonicalizeNonSpecialURL(const char16_t* spec,
int spec_len,
const Parsed& parsed,
CharsetConverter* query_converter,
CanonOutput& output,
Parsed& new_parsed) {
// Carry over the flag.
new_parsed.has_opaque_path = parsed.has_opaque_path;
if (parsed.has_opaque_path) {
return CanonicalizePathURL(spec, spec_len, parsed, &output, &new_parsed);
}
return DoCanonicalizeNonSpecialURL(URLComponentSource(spec), parsed,
query_converter, output, new_parsed);
}
bool ReplaceNonSpecialURL(const char* base,
const Parsed& base_parsed,
const Replacements<char>& replacements,
CharsetConverter* query_converter,
CanonOutput& output,
Parsed& new_parsed) {
if (base_parsed.has_opaque_path) {
return ReplacePathURL(base, base_parsed, replacements, &output,
&new_parsed);
}
URLComponentSource<char> source(base);
Parsed parsed(base_parsed);
SetupOverrideComponents(base, replacements, &source, &parsed);
return DoCanonicalizeNonSpecialURL(source, parsed, query_converter, output,
new_parsed);
}
// For 16-bit replacements, we turn all the replacements into UTF-8 so the
// regular code path can be used.
bool ReplaceNonSpecialURL(const char* base,
const Parsed& base_parsed,
const Replacements<char16_t>& replacements,
CharsetConverter* query_converter,
CanonOutput& output,
Parsed& new_parsed) {
if (base_parsed.has_opaque_path) {
return ReplacePathURL(base, base_parsed, replacements, &output,
&new_parsed);
}
RawCanonOutput<1024> utf8;
URLComponentSource<char> source(base);
Parsed parsed(base_parsed);
SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed);
return DoCanonicalizeNonSpecialURL(source, parsed, query_converter, output,
new_parsed);
}
} // namespace url