Blame - url/url_canon_fileurl.cc - chromium/src

blob: 1130a97d12ff8aca6687be544c86c0c4e936c1e7 [file] [log] [blame]

tfarina@chromium.org	51bcc5d	2013-04-24 01:41:37	[diff] [blame^]	1	// Copyright 2013 The Chromium Authors. All rights reserved.
				2	// Use of this source code is governed by a BSD-style license that can be
				3	// found in the LICENSE file.
brettw@chromium.org	e7bba5f	2013-04-10 20:10:52	[diff] [blame]	4
				5	// Functions for canonicalizing "file:" URLs.
				6
tfarina@chromium.org	318076b	2013-04-18 21:19:45	[diff] [blame]	7	#include "url/url_canon.h"
				8	#include "url/url_canon_internal.h"
				9	#include "url/url_file.h"
				10	#include "url/url_parse_internal.h"
brettw@chromium.org	e7bba5f	2013-04-10 20:10:52	[diff] [blame]	11
				12	namespace url_canon {
				13
				14	namespace {
				15
				16	#ifdef WIN32
				17
				18	// Given a pointer into the spec, this copies and canonicalizes the drive
				19	// letter and colon to the output, if one is found. If there is not a drive
				20	// spec, it won't do anything. The index of the next character in the input
				21	// spec is returned (after the colon when a drive spec is found, the begin
				22	// offset if one is not).
				23	template<typename CHAR>
				24	int FileDoDriveSpec(const CHAR* spec, int begin, int end,
				25	CanonOutput* output) {
				26	// The path could be one of several things: /foo/bar, c:/foo/bar, /c:/foo,
				27	// (with backslashes instead of slashes as well).
				28	int num_slashes = url_parse::CountConsecutiveSlashes(spec, begin, end);
				29	int after_slashes = begin + num_slashes;
				30
				31	if (!url_parse::DoesBeginWindowsDriveSpec(spec, after_slashes, end))
				32	return begin; // Haven't consumed any characters
				33
				34	// A drive spec is the start of a path, so we need to add a slash for the
				35	// authority terminator (typically the third slash).
				36	output->push_back('/');
				37
				38	// DoesBeginWindowsDriveSpec will ensure that the drive letter is valid
				39	// and that it is followed by a colon/pipe.
				40
				41	// Normalize Windows drive letters to uppercase
				42	if (spec[after_slashes] >= 'a' && spec[after_slashes] <= 'z')
				43	output->push_back(spec[after_slashes] - 'a' + 'A');
				44	else
				45	output->push_back(static_cast<char>(spec[after_slashes]));
				46
				47	// Normalize the character following it to a colon rather than pipe.
				48	output->push_back(':');
				49	return after_slashes + 2;
				50	}
				51
				52	#endif // WIN32
				53
				54	template<typename CHAR, typename UCHAR>
				55	bool DoFileCanonicalizePath(const CHAR* spec,
				56	const url_parse::Component& path,
				57	CanonOutput* output,
				58	url_parse::Component* out_path) {
				59	// Copies and normalizes the "c:" at the beginning, if present.
				60	out_path->begin = output->length();
				61	int after_drive;
				62	#ifdef WIN32
				63	after_drive = FileDoDriveSpec(spec, path.begin, path.end(), output);
				64	#else
				65	after_drive = path.begin;
				66	#endif
				67
				68	// Copies the rest of the path, starting from the slash following the
				69	// drive colon (if any, Windows only), or the first slash of the path.
				70	bool success = true;
				71	if (after_drive < path.end()) {
				72	// Use the regular path canonicalizer to canonicalize the rest of the
				73	// path. Give it a fake output component to write into. DoCanonicalizeFile
				74	// will compute the full path component.
				75	url_parse::Component sub_path =
				76	url_parse::MakeRange(after_drive, path.end());
				77	url_parse::Component fake_output_path;
				78	success = CanonicalizePath(spec, sub_path, output, &fake_output_path);
				79	} else {
				80	// No input path, canonicalize to a slash.
				81	output->push_back('/');
				82	}
				83
				84	out_path->len = output->length() - out_path->begin;
				85	return success;
				86	}
				87
				88	template<typename CHAR, typename UCHAR>
				89	bool DoCanonicalizeFileURL(const URLComponentSource<CHAR>& source,
				90	const url_parse::Parsed& parsed,
				91	CharsetConverter* query_converter,
				92	CanonOutput* output,
				93	url_parse::Parsed* new_parsed) {
				94	// Things we don't set in file: URLs.
				95	new_parsed->username = url_parse::Component();
				96	new_parsed->password = url_parse::Component();
				97	new_parsed->port = url_parse::Component();
				98
				99	// Scheme (known, so we don't bother running it through the more
				100	// complicated scheme canonicalizer).
				101	new_parsed->scheme.begin = output->length();
				102	output->Append("file://", 7);
				103	new_parsed->scheme.len = 4;
				104
				105	// Append the host. For many file URLs, this will be empty. For UNC, this
				106	// will be present.
				107	// TODO(brettw) This doesn't do any checking for host name validity. We
				108	// should probably handle validity checking of UNC hosts differently than
				109	// for regular IP hosts.
				110	bool success = CanonicalizeHost(source.host, parsed.host,
				111	output, &new_parsed->host);
				112	success &= DoFileCanonicalizePath<CHAR, UCHAR>(source.path, parsed.path,
				113	output, &new_parsed->path);
				114	CanonicalizeQuery(source.query, parsed.query, query_converter,
				115	output, &new_parsed->query);
				116
				117	// Ignore failure for refs since the URL can probably still be loaded.
				118	CanonicalizeRef(source.ref, parsed.ref, output, &new_parsed->ref);
				119
				120	return success;
				121	}
				122
				123	} // namespace
				124
				125	bool CanonicalizeFileURL(const char* spec,
				126	int spec_len,
				127	const url_parse::Parsed& parsed,
				128	CharsetConverter* query_converter,
				129	CanonOutput* output,
				130	url_parse::Parsed* new_parsed) {
				131	return DoCanonicalizeFileURL<char, unsigned char>(
				132	URLComponentSource<char>(spec), parsed, query_converter,
				133	output, new_parsed);
				134	}
				135
				136	bool CanonicalizeFileURL(const char16* spec,
				137	int spec_len,
				138	const url_parse::Parsed& parsed,
				139	CharsetConverter* query_converter,
				140	CanonOutput* output,
				141	url_parse::Parsed* new_parsed) {
				142	return DoCanonicalizeFileURL<char16, char16>(
				143	URLComponentSource<char16>(spec), parsed, query_converter,
				144	output, new_parsed);
				145	}
				146
				147	bool FileCanonicalizePath(const char* spec,
				148	const url_parse::Component& path,
				149	CanonOutput* output,
				150	url_parse::Component* out_path) {
				151	return DoFileCanonicalizePath<char, unsigned char>(spec, path,
				152	output, out_path);
				153	}
				154
				155	bool FileCanonicalizePath(const char16* spec,
				156	const url_parse::Component& path,
				157	CanonOutput* output,
				158	url_parse::Component* out_path) {
				159	return DoFileCanonicalizePath<char16, char16>(spec, path,
				160	output, out_path);
				161	}
				162
				163	bool ReplaceFileURL(const char* base,
				164	const url_parse::Parsed& base_parsed,
				165	const Replacements<char>& replacements,
				166	CharsetConverter* query_converter,
				167	CanonOutput* output,
				168	url_parse::Parsed* new_parsed) {
				169	URLComponentSource<char> source(base);
				170	url_parse::Parsed parsed(base_parsed);
				171	SetupOverrideComponents(base, replacements, &source, &parsed);
				172	return DoCanonicalizeFileURL<char, unsigned char>(
				173	source, parsed, query_converter, output, new_parsed);
				174	}
				175
				176	bool ReplaceFileURL(const char* base,
				177	const url_parse::Parsed& base_parsed,
				178	const Replacements<char16>& replacements,
				179	CharsetConverter* query_converter,
				180	CanonOutput* output,
				181	url_parse::Parsed* new_parsed) {
				182	RawCanonOutput<1024> utf8;
				183	URLComponentSource<char> source(base);
				184	url_parse::Parsed parsed(base_parsed);
				185	SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed);
				186	return DoCanonicalizeFileURL<char, unsigned char>(
				187	source, parsed, query_converter, output, new_parsed);
				188	}
				189
				190	} // namespace url_canon