Skip to content

Commit

Permalink
[GURL] (2 of 2) Strip username/password/port when canonicalizing, if …
Browse files Browse the repository at this point in the history
…the scheme doesn't support them

The goal of this CL is to inhibit port numbers and usernames in internal schemes
like "chrome-extension" and "chrome". Currently, navigations to chrome-extension:// URLs
with ports actually get suprisingly far; it seems like no good can possibly come from
that.

A new SchemeType is added: SCHEME_WITH_HOST_AND_PORT (no user information).
This is only used when canonicalizing the inner URL of filesystem: -- e.g.,
filesystem:http://user@host:20/temp/foo now canonicalizes to
filesystem:http://host:20/temp/foo; whereas filesystem:chrome://user@host:20/temp/foo
canonicalizes to filesystem:chrome://host/temp/foo

Bug: 606001,809062
Cq-Include-Trybots: master.tryserver.chromium.mac:ios-simulator-cronet;master.tryserver.chromium.mac:ios-simulator-full-configs
Change-Id: I77c5ba3d2fe964deb8aadae95a06519ce038c472
Reviewed-on: https://chromium-review.googlesource.com/974380
Reviewed-by: Vasilii Sukhanov <[email protected]>
Reviewed-by: Tommy Li <[email protected]>
Reviewed-by: Mike West <[email protected]>
Commit-Queue: Nick Carter <[email protected]>
Cr-Original-Commit-Position: refs/heads/master@{#547882}
Cr-Mirrored-From: https://chromium.googlesource.com/chromium/src
Cr-Mirrored-Commit: ff69a10a306f029b25759e78e0c3cc98a3fa840c
  • Loading branch information
nick-chromium authored and Commit Bot committed Apr 4, 2018
1 parent 9b53d43 commit b3bc1d8
Show file tree
Hide file tree
Showing 10 changed files with 177 additions and 99 deletions.
6 changes: 3 additions & 3 deletions gurl_unittest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ TEST(GURLTest, CopyFileSystem) {
GURL url2(url);
EXPECT_TRUE(url2.is_valid());

EXPECT_EQ("filesystem:https://user:pass@google.com:99/t/foo;bar?q=a#ref", url2.spec());
EXPECT_EQ("filesystem:https://google.com:99/t/foo;bar?q=a#ref", url2.spec());
EXPECT_EQ("filesystem", url2.scheme());
EXPECT_EQ("", url2.username());
EXPECT_EQ("", url2.password());
Expand All @@ -211,8 +211,8 @@ TEST(GURLTest, CopyFileSystem) {
const GURL* inner = url2.inner_url();
ASSERT_TRUE(inner);
EXPECT_EQ("https", inner->scheme());
EXPECT_EQ("user", inner->username());
EXPECT_EQ("pass", inner->password());
EXPECT_EQ("", inner->username());
EXPECT_EQ("", inner->password());
EXPECT_EQ("google.com", inner->host());
EXPECT_EQ("99", inner->port());
EXPECT_EQ(99, inner->IntPort());
Expand Down
1 change: 1 addition & 0 deletions scheme_host_port.cc
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ bool IsValidInput(const base::StringPiece& scheme,
return false;

switch (scheme_type) {
case SCHEME_WITH_HOST_AND_PORT:
case SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION:
// A URL with |scheme| is required to have the host and port (may be
// omitted in a serialization if it's the same as the default value).
Expand Down
29 changes: 29 additions & 0 deletions url_canon.h
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,31 @@ class URL_EXPORT CharsetConverter {
CanonOutput* output) = 0;
};

// Schemes --------------------------------------------------------------------

// Types of a scheme representing the requirements on the data represented by
// the authority component of a URL with the scheme.
enum SchemeType {
// The authority component of a URL with the scheme has the form
// "username:password@host:port". The username and password entries are
// optional; the host may not be empty. The default value of the port can be
// omitted in serialization. This type occurs with network schemes like http,
// https, and ftp.
SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION,
// The authority component of a URL with the scheme has the form "host:port",
// and does not include username or password. The default value of the port
// can be omitted in serialization. Used by inner URLs of filesystem URLs of
// origins with network hosts, from which the username and password are
// stripped.
SCHEME_WITH_HOST_AND_PORT,
// The authority component of an URL with the scheme has the form "host", and
// does not include port, username, or password. Used when the hosts are not
// network addresses; for example, schemes used internally by the browser.
SCHEME_WITH_HOST,
// A URL with the scheme doesn't have the authority component.
SCHEME_WITHOUT_AUTHORITY,
};

// Whitespace -----------------------------------------------------------------

// Searches for whitespace that should be removed from the middle of URLs, and
Expand Down Expand Up @@ -549,12 +574,14 @@ URL_EXPORT void CanonicalizeRef(const base::char16* spec,
URL_EXPORT bool CanonicalizeStandardURL(const char* spec,
int spec_len,
const Parsed& parsed,
SchemeType scheme_type,
CharsetConverter* query_converter,
CanonOutput* output,
Parsed* new_parsed);
URL_EXPORT bool CanonicalizeStandardURL(const base::char16* spec,
int spec_len,
const Parsed& parsed,
SchemeType scheme_type,
CharsetConverter* query_converter,
CanonOutput* output,
Parsed* new_parsed);
Expand Down Expand Up @@ -802,13 +829,15 @@ class Replacements {
URL_EXPORT bool ReplaceStandardURL(const char* base,
const Parsed& base_parsed,
const Replacements<char>& replacements,
SchemeType scheme_type,
CharsetConverter* query_converter,
CanonOutput* output,
Parsed* new_parsed);
URL_EXPORT bool ReplaceStandardURL(
const char* base,
const Parsed& base_parsed,
const Replacements<base::char16>& replacements,
SchemeType scheme_type,
CharsetConverter* query_converter,
CanonOutput* output,
Parsed* new_parsed);
Expand Down
14 changes: 10 additions & 4 deletions url_canon_filesystemurl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -43,16 +43,22 @@ bool DoCanonicalizeFileSystemURL(const CHAR* spec,
return false;

bool success = true;
SchemeType inner_scheme_type = SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION;
if (CompareSchemeComponent(spec, inner_parsed->scheme, url::kFileScheme)) {
new_inner_parsed.scheme.begin = output->length();
output->Append("file://", 7);
new_inner_parsed.scheme.len = 4;
success &= CanonicalizePath(spec, inner_parsed->path, output,
&new_inner_parsed.path);
} else if (IsStandard(spec, inner_parsed->scheme)) {
success = CanonicalizeStandardURL(spec, parsed.inner_parsed()->Length(),
*parsed.inner_parsed(), charset_converter,
output, &new_inner_parsed);
} else if (GetStandardSchemeType(spec, inner_parsed->scheme,
&inner_scheme_type)) {
if (inner_scheme_type == SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION) {
// Strip out the user information from the inner URL, if any.
inner_scheme_type = SCHEME_WITH_HOST_AND_PORT;
}
success = CanonicalizeStandardURL(
spec, parsed.inner_parsed()->Length(), *parsed.inner_parsed(),
inner_scheme_type, charset_converter, output, &new_inner_parsed);
} else {
// TODO(ericu): The URL is wrong, but should we try to output more of what
// we were given? Echoing back filesystem:mailto etc. doesn't seem all that
Expand Down
9 changes: 8 additions & 1 deletion url_canon_relative.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include "url/url_constants.h"
#include "url/url_file.h"
#include "url/url_parse_internal.h"
#include "url/url_util.h"
#include "url/url_util_internal.h"

namespace url {
Expand Down Expand Up @@ -407,7 +408,13 @@ bool DoResolveRelativeHost(const char* base_url,
output->ReserveSizeIfNeeded(
replacements.components().Length() +
base_parsed.CountCharactersBefore(Parsed::USERNAME, false));
return ReplaceStandardURL(base_url, base_parsed, replacements,
SchemeType scheme_type = SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION;
if (!GetStandardSchemeType(base_url, base_parsed.scheme, &scheme_type)) {
// A path with an authority section gets canonicalized under standard URL
// rules, even though the base was not known to be standard.
scheme_type = SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION;
}
return ReplaceStandardURL(base_url, base_parsed, replacements, scheme_type,
query_converter, output, out_parsed);
}

Expand Down
54 changes: 37 additions & 17 deletions url_canon_stdurl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,20 +13,29 @@ namespace url {

namespace {

template<typename CHAR, typename UCHAR>
template <typename CHAR, typename UCHAR>
bool DoCanonicalizeStandardURL(const URLComponentSource<CHAR>& source,
const Parsed& parsed,
SchemeType scheme_type,
CharsetConverter* query_converter,
CanonOutput* output,
Parsed* new_parsed) {
// Scheme: this will append the colon.
bool success = CanonicalizeScheme(source.scheme, parsed.scheme,
output, &new_parsed->scheme);

bool scheme_supports_user_info =
(scheme_type == SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION);
bool scheme_supports_ports =
(scheme_type == SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION ||
scheme_type == SCHEME_WITH_HOST_AND_PORT);

// Authority (username, password, host, port)
bool have_authority;
if (parsed.username.is_valid() || parsed.password.is_valid() ||
parsed.host.is_nonempty() || parsed.port.is_valid()) {
if ((scheme_supports_user_info &&
(parsed.username.is_valid() || parsed.password.is_valid())) ||
parsed.host.is_nonempty() ||
(scheme_supports_ports && parsed.port.is_valid())) {
have_authority = true;

// Only write the authority separators when we have a scheme.
Expand All @@ -36,11 +45,14 @@ bool DoCanonicalizeStandardURL(const URLComponentSource<CHAR>& source,
}

// User info: the canonicalizer will handle the : and @.
success &= CanonicalizeUserInfo(source.username, parsed.username,
source.password, parsed.password,
output,
&new_parsed->username,
&new_parsed->password);
if (scheme_supports_user_info) {
success &= CanonicalizeUserInfo(
source.username, parsed.username, source.password, parsed.password,
output, &new_parsed->username, &new_parsed->password);
} else {
new_parsed->username.reset();
new_parsed->password.reset();
}

success &= CanonicalizeHost(source.host, parsed.host,
output, &new_parsed->host);
Expand All @@ -50,10 +62,14 @@ bool DoCanonicalizeStandardURL(const URLComponentSource<CHAR>& source,
success = false;

// Port: the port canonicalizer will handle the colon.
int default_port = DefaultPortForScheme(
&output->data()[new_parsed->scheme.begin], new_parsed->scheme.len);
success &= CanonicalizePort(source.port, parsed.port, default_port,
output, &new_parsed->port);
if (scheme_supports_ports) {
int default_port = DefaultPortForScheme(
&output->data()[new_parsed->scheme.begin], new_parsed->scheme.len);
success &= CanonicalizePort(source.port, parsed.port, default_port,
output, &new_parsed->port);
} else {
new_parsed->port.reset();
}
} else {
// No authority, clear the components.
have_authority = false;
Expand Down Expand Up @@ -127,23 +143,25 @@ int DefaultPortForScheme(const char* scheme, int scheme_len) {
bool CanonicalizeStandardURL(const char* spec,
int spec_len,
const Parsed& parsed,
SchemeType scheme_type,
CharsetConverter* query_converter,
CanonOutput* output,
Parsed* new_parsed) {
return DoCanonicalizeStandardURL<char, unsigned char>(
URLComponentSource<char>(spec), parsed, query_converter,
URLComponentSource<char>(spec), parsed, scheme_type, query_converter,
output, new_parsed);
}

bool CanonicalizeStandardURL(const base::char16* spec,
int spec_len,
const Parsed& parsed,
SchemeType scheme_type,
CharsetConverter* query_converter,
CanonOutput* output,
Parsed* new_parsed) {
return DoCanonicalizeStandardURL<base::char16, base::char16>(
URLComponentSource<base::char16>(spec), parsed, query_converter,
output, new_parsed);
URLComponentSource<base::char16>(spec), parsed, scheme_type,
query_converter, output, new_parsed);
}

// It might be nice in the future to optimize this so unchanged components don't
Expand All @@ -158,21 +176,23 @@ bool CanonicalizeStandardURL(const base::char16* spec,
bool ReplaceStandardURL(const char* base,
const Parsed& base_parsed,
const Replacements<char>& replacements,
SchemeType scheme_type,
CharsetConverter* query_converter,
CanonOutput* output,
Parsed* new_parsed) {
URLComponentSource<char> source(base);
Parsed parsed(base_parsed);
SetupOverrideComponents(base, replacements, &source, &parsed);
return DoCanonicalizeStandardURL<char, unsigned char>(
source, parsed, query_converter, output, new_parsed);
source, parsed, scheme_type, query_converter, output, new_parsed);
}

// For 16-bit replacements, we turn all the replacements into UTF-8 so the
// regular code path can be used.
bool ReplaceStandardURL(const char* base,
const Parsed& base_parsed,
const Replacements<base::char16>& replacements,
SchemeType scheme_type,
CharsetConverter* query_converter,
CanonOutput* output,
Parsed* new_parsed) {
Expand All @@ -181,7 +201,7 @@ bool ReplaceStandardURL(const char* base,
Parsed parsed(base_parsed);
SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed);
return DoCanonicalizeStandardURL<char, unsigned char>(
source, parsed, query_converter, output, new_parsed);
source, parsed, scheme_type, query_converter, output, new_parsed);
}

} // namespace url
59 changes: 40 additions & 19 deletions url_canon_unittest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1433,7 +1433,8 @@ TEST(URLCanonTest, CanonicalizeStandardURL) {
std::string out_str;
StdStringCanonOutput output(&out_str);
bool success = CanonicalizeStandardURL(
cases[i].input, url_len, parsed, NULL, &output, &out_parsed);
cases[i].input, url_len, parsed,
SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION, NULL, &output, &out_parsed);
output.Complete();

EXPECT_EQ(cases[i].expected_success, success);
Expand Down Expand Up @@ -1479,8 +1480,9 @@ TEST(URLCanonTest, ReplaceStandardURL) {
std::string out_str;
StdStringCanonOutput output(&out_str);
Parsed out_parsed;
ReplaceStandardURL(replace_cases[i].base, parsed, r, NULL, &output,
&out_parsed);
ReplaceStandardURL(replace_cases[i].base, parsed, r,
SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION, NULL,
&output, &out_parsed);
output.Complete();

EXPECT_EQ(replace_cases[i].expected, out_str);
Expand All @@ -1501,15 +1503,19 @@ TEST(URLCanonTest, ReplaceStandardURL) {
std::string out_str1;
StdStringCanonOutput output1(&out_str1);
Parsed new_parsed;
ReplaceStandardURL(src, parsed, r, NULL, &output1, &new_parsed);
ReplaceStandardURL(src, parsed, r,
SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION, NULL,
&output1, &new_parsed);
output1.Complete();
EXPECT_STREQ("http://www.google.com/", out_str1.c_str());

// Same with an "invalid" path.
r.SetPath(reinterpret_cast<char*>(0x00000001), Component());
std::string out_str2;
StdStringCanonOutput output2(&out_str2);
ReplaceStandardURL(src, parsed, r, NULL, &output2, &new_parsed);
ReplaceStandardURL(src, parsed, r,
SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION, NULL,
&output2, &new_parsed);
output2.Complete();
EXPECT_STREQ("http://www.google.com/", out_str2.c_str());
}
Expand Down Expand Up @@ -1564,24 +1570,39 @@ TEST(URLCanonTest, ReplaceFileURL) {
TEST(URLCanonTest, ReplaceFileSystemURL) {
ReplaceCase replace_cases[] = {
// Replace everything in the outer URL.
{"filesystem:file:///temporary/gaba?query#ref", NULL, NULL, NULL, NULL, NULL, "/foo", "b", "c", "filesystem:file:///temporary/foo?b#c"},
{"filesystem:file:///temporary/gaba?query#ref", NULL, NULL, NULL, NULL,
NULL, "/foo", "b", "c", "filesystem:file:///temporary/foo?b#c"},
// Replace nothing
{"filesystem:file:///temporary/gaba?query#ref", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "filesystem:file:///temporary/gaba?query#ref"},
{"filesystem:file:///temporary/gaba?query#ref", NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, "filesystem:file:///temporary/gaba?query#ref"},
// Clear non-path components (common)
{"filesystem:file:///temporary/gaba?query#ref", NULL, NULL, NULL, NULL, NULL, NULL, kDeleteComp, kDeleteComp, "filesystem:file:///temporary/gaba"},
{"filesystem:file:///temporary/gaba?query#ref", NULL, NULL, NULL, NULL,
NULL, NULL, kDeleteComp, kDeleteComp,
"filesystem:file:///temporary/gaba"},
// Replace path with something that doesn't begin with a slash and make
// sure it gets added properly.
{"filesystem:file:///temporary/gaba?query#ref", NULL, NULL, NULL, NULL, NULL, "interesting/", NULL, NULL, "filesystem:file:///temporary/interesting/?query#ref"},
// Replace scheme -- shouldn't do anything.
{"filesystem:http://u:[email protected]/t/gaba?query#ref", "http", NULL, NULL, NULL, NULL, NULL, NULL, NULL, "filesystem:http://u:[email protected]/t/gaba?query#ref"},
// Replace username -- shouldn't do anything.
{"filesystem:http://u:[email protected]/t/gaba?query#ref", NULL, "u2", NULL, NULL, NULL, NULL, NULL, NULL, "filesystem:http://u:[email protected]/t/gaba?query#ref"},
// Replace password -- shouldn't do anything.
{"filesystem:http://u:[email protected]/t/gaba?query#ref", NULL, NULL, "pw2", NULL, NULL, NULL, NULL, NULL, "filesystem:http://u:[email protected]/t/gaba?query#ref"},
// Replace host -- shouldn't do anything.
{"filesystem:http://u:[email protected]/t/gaba?query#ref", NULL, NULL, NULL, "foo.com", NULL, NULL, NULL, NULL, "filesystem:http://u:[email protected]/t/gaba?query#ref"},
// Replace port -- shouldn't do anything.
{"filesystem:http://u:[email protected]:40/t/gaba?query#ref", NULL, NULL, NULL, NULL, "41", NULL, NULL, NULL, "filesystem:http://u:[email protected]:40/t/gaba?query#ref"},
{"filesystem:file:///temporary/gaba?query#ref", NULL, NULL, NULL, NULL,
NULL, "interesting/", NULL, NULL,
"filesystem:file:///temporary/interesting/?query#ref"},
// Replace scheme -- shouldn't do anything except canonicalize.
{"filesystem:http://u:[email protected]/t/gaba?query#ref", "http", NULL, NULL,
NULL, NULL, NULL, NULL, NULL,
"filesystem:http://bar.com/t/gaba?query#ref"},
// Replace username -- shouldn't do anything except canonicalize.
{"filesystem:http://u:[email protected]/t/gaba?query#ref", NULL, "u2", NULL, NULL,
NULL, NULL, NULL, NULL, "filesystem:http://bar.com/t/gaba?query#ref"},
// Replace password -- shouldn't do anything except canonicalize.
{"filesystem:http://u:[email protected]/t/gaba?query#ref", NULL, NULL, "pw2",
NULL, NULL, NULL, NULL, NULL,
"filesystem:http://bar.com/t/gaba?query#ref"},
// Replace host -- shouldn't do anything except canonicalize.
{"filesystem:http://u:[email protected]:80/t/gaba?query#ref", NULL, NULL, NULL,
"foo.com", NULL, NULL, NULL, NULL,
"filesystem:http://bar.com/t/gaba?query#ref"},
// Replace port -- shouldn't do anything except canonicalize.
{"filesystem:http://u:[email protected]:40/t/gaba?query#ref", NULL, NULL, NULL,
NULL, "41", NULL, NULL, NULL,
"filesystem:http://bar.com:40/t/gaba?query#ref"},
};

for (size_t i = 0; i < arraysize(replace_cases); i++) {
Expand Down
Loading

0 comments on commit b3bc1d8

Please sign in to comment.