Blame - base/file_util.cc - chromium/src

blob: 07fb71c738c4914a9928d62f15c105e92a08d396 [file] [log] [blame]

license.bot	bf09a50	2008-08-24 00:55:55	[diff] [blame^]	1	// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
				2	// Use of this source code is governed by a BSD-style license that can be
				3	// found in the LICENSE file.
initial.commit	d7cae12	2008-07-26 21:49:38	[diff] [blame]	4
				5	#include "base/file_util.h"
				6
initial.commit	d7cae12	2008-07-26 21:49:38	[diff] [blame]	7	#include <fstream>
initial.commit	d7cae12	2008-07-26 21:49:38	[diff] [blame]	8
				9	#include "base/logging.h"
initial.commit	d7cae12	2008-07-26 21:49:38	[diff] [blame]	10	#include "base/string_util.h"
initial.commit	d7cae12	2008-07-26 21:49:38	[diff] [blame]	11	#include "unicode/uniset.h"
				12
initial.commit	d7cae12	2008-07-26 21:49:38	[diff] [blame]	13	namespace file_util {
				14
initial.commit	d7cae12	2008-07-26 21:49:38	[diff] [blame]	15	const wchar_t kExtensionSeparator = L'.';
				16
erikkay@google.com	37088fef	2008-08-15 17:32:10	[diff] [blame]	17	void PathComponents(const std::wstring& path,
				18	std::vector<std::wstring>* components) {
				19	DCHECK(components != NULL);
				20	if (components == NULL)
				21	return;
				22	std::wstring::size_type start = 0;
				23	std::wstring::size_type end = path.find(kPathSeparator, start);
				24
				25	// Special case the "/" or "\" directory. On Windows with a drive letter,
				26	// this code path won't hit, but the right thing should still happen.
				27	// "E:\foo" will turn into "E:","foo".
				28	if (end == start) {
				29	components->push_back(std::wstring(path, 0, 1));
				30	start = end + 1;
				31	end = path.find(kPathSeparator, start);
				32	}
				33	while (end != std::wstring::npos) {
				34	std::wstring component = std::wstring(path, start, end - start);
				35	components->push_back(component);
				36	start = end + 1;
				37	end = path.find(kPathSeparator, start);
				38	}
				39	std::wstring component = std::wstring(path, start);
				40	components->push_back(component);
				41	}
				42
initial.commit	d7cae12	2008-07-26 21:49:38	[diff] [blame]	43	bool EndsWithSeparator(std::wstring* path) {
erikkay@google.com	37088fef	2008-08-15 17:32:10	[diff] [blame]	44	return EndsWithSeparator(*path);
				45	}
				46
				47	bool EndsWithSeparator(const std::wstring& path) {
erikkay@google.com	e662113	2008-08-15 18:00:48	[diff] [blame]	48	bool is_sep = (path.length() > 0 &&
				49	(path)[path.length() - 1] == kPathSeparator);
erikkay@google.com	37088fef	2008-08-15 17:32:10	[diff] [blame]	50	return is_sep;
initial.commit	d7cae12	2008-07-26 21:49:38	[diff] [blame]	51	}
				52
				53	void TrimTrailingSeparator(std::wstring* dir) {
erikkay@google.com	37088fef	2008-08-15 17:32:10	[diff] [blame]	54	while (dir->length() > 1 && EndsWithSeparator(dir))
initial.commit	d7cae12	2008-07-26 21:49:38	[diff] [blame]	55	dir->resize(dir->length() - 1);
				56	}
				57
				58	void UpOneDirectory(std::wstring* dir) {
				59	TrimTrailingSeparator(dir);
				60
erikkay@google.com	5af2edb9	2008-08-08 20:16:08	[diff] [blame]	61	std::wstring::size_type last_sep = dir->find_last_of(kPathSeparator);
				62	if (last_sep != std::wstring::npos)
initial.commit	d7cae12	2008-07-26 21:49:38	[diff] [blame]	63	dir->resize(last_sep);
				64	}
				65
				66	void UpOneDirectoryOrEmpty(std::wstring* dir) {
				67	TrimTrailingSeparator(dir);
				68
erikkay@google.com	5af2edb9	2008-08-08 20:16:08	[diff] [blame]	69	std::wstring::size_type last_sep = dir->find_last_of(kPathSeparator);
				70	if (last_sep != std::wstring::npos)
initial.commit	d7cae12	2008-07-26 21:49:38	[diff] [blame]	71	dir->resize(last_sep);
				72	else
				73	dir->clear();
				74	}
				75
				76	void TrimFilename(std::wstring* path) {
				77	if (EndsWithSeparator(path)) {
				78	TrimTrailingSeparator(path);
				79	} else {
erikkay@google.com	5af2edb9	2008-08-08 20:16:08	[diff] [blame]	80	std::wstring::size_type last_sep = path->find_last_of(kPathSeparator);
				81	if (last_sep != std::wstring::npos)
initial.commit	d7cae12	2008-07-26 21:49:38	[diff] [blame]	82	path->resize(last_sep);
				83	}
				84	}
				85
erikkay@google.com	5af2edb9	2008-08-08 20:16:08	[diff] [blame]	86	std::wstring GetFilenameFromPath(const std::wstring& path) {
erikkay@google.com	e662113	2008-08-15 18:00:48	[diff] [blame]	87	// TODO(erikkay): fix this - it's not using kPathSeparator, but win unit test
				88	// are exercising '/' as a path separator as well.
				89	std::wstring::size_type pos = path.find_last_of(L"\\/");
erikkay@google.com	5af2edb9	2008-08-08 20:16:08	[diff] [blame]	90	return std::wstring(path, pos == std::wstring::npos ? 0 : pos+1);
initial.commit	d7cae12	2008-07-26 21:49:38	[diff] [blame]	91	}
				92
erikkay@google.com	5af2edb9	2008-08-08 20:16:08	[diff] [blame]	93	std::wstring GetFileExtensionFromPath(const std::wstring& path) {
				94	std::wstring file_name = GetFilenameFromPath(path);
				95	std::wstring::size_type last_dot = file_name.rfind(L'.');
				96	return std::wstring(last_dot == std::wstring::npos? L"" : file_name, last_dot+1);
initial.commit	d7cae12	2008-07-26 21:49:38	[diff] [blame]	97	}
				98
				99	void AppendToPath(std::wstring* path, const std::wstring& new_ending) {
				100	if (!path) {
				101	NOTREACHED();
				102	return; // Don't crash in this function in release builds.
				103	}
				104
				105	if (!EndsWithSeparator(path))
				106	path->push_back(kPathSeparator);
				107	path->append(new_ending);
				108	}
				109
				110	void InsertBeforeExtension(std::wstring* path, const std::wstring& suffix) {
				111	DCHECK(path);
				112
erikkay@google.com	5af2edb9	2008-08-08 20:16:08	[diff] [blame]	113	const std::wstring::size_type last_dot = path->rfind(kExtensionSeparator);
				114	const std::wstring::size_type last_sep = path->rfind(kPathSeparator);
initial.commit	d7cae12	2008-07-26 21:49:38	[diff] [blame]	115
erikkay@google.com	5af2edb9	2008-08-08 20:16:08	[diff] [blame]	116	if (last_dot == std::wstring::npos \|\|
				117	(last_sep != std::wstring::npos && last_dot < last_sep)) {
initial.commit	d7cae12	2008-07-26 21:49:38	[diff] [blame]	118	// The path looks something like "C:\pics.old\jojo" or "C:\pics\jojo".
				119	// We should just append the suffix to the entire path.
				120	path->append(suffix);
				121	return;
				122	}
				123
				124	path->insert(last_dot, suffix);
				125	}
				126
				127	void ReplaceIllegalCharacters(std::wstring* file_name, int replace_char) {
				128	DCHECK(file_name);
				129
ericroman@google.com	d324ab33	2008-08-18 16:00:38	[diff] [blame]	130	// Control characters, formatting characters, non-characters, and
initial.commit	d7cae12	2008-07-26 21:49:38	[diff] [blame]	131	// some printable ASCII characters regarded as dangerous ('"*/:<>?\\').
				132	// See http://blogs.msdn.com/michkap/archive/2006/11/03/941420.aspx
				133	// and http://msdn2.microsoft.com/en-us/library/Aa365247.aspx
				134	// TODO(jungshik): Revisit the set. ZWJ and ZWNJ are excluded because they
				135	// are legitimate in Arabic and some S/SE Asian scripts. However, when used
				136	// elsewhere, they can be confusing/problematic.
				137	// Also, consider wrapping the set with our Singleton class to create and
				138	// freeze it only once. Note that there's a trade-off between memory and
				139	// speed.
				140
				141	UErrorCode status = U_ZERO_ERROR;
erikkay@google.com	5af2edb9	2008-08-08 20:16:08	[diff] [blame]	142	#if defined(WCHAR_T_IS_UTF16)
initial.commit	d7cae12	2008-07-26 21:49:38	[diff] [blame]	143	UnicodeSet illegal_characters(UnicodeString(
				144	L"[[\"*/:<>?\\\\\|][:Cc:][:Cf:] - [\u200c\u200d]]"), status);
				145	#else
				146	UnicodeSet illegal_characters(UNICODE_STRING_SIMPLE(
				147	"[[\"*/:<>?\\\\\|][:Cc:][:Cf:] - [\\u200c\\u200d]]").unescape(), status);
				148	#endif
				149	DCHECK(U_SUCCESS(status));
				150	// Add non-characters. If this becomes a performance bottleneck by
				151	// any chance, check \|ucs4 & 0xFFFEu == 0xFFFEu\|, instead.
				152	illegal_characters.add(0xFDD0, 0xFDEF);
				153	for (int i = 0; i <= 0x10; ++i) {
				154	int plane_base = 0x10000 * i;
				155	illegal_characters.add(plane_base + 0xFFFE, plane_base + 0xFFFF);
				156	}
				157	illegal_characters.freeze();
				158	DCHECK(!illegal_characters.contains(replace_char) && replace_char < 0x10000);
				159
				160	// Remove leading and trailing whitespace.
				161	TrimWhitespace(*file_name, TRIM_ALL, file_name);
				162
				163	std::wstring::size_type i = 0;
				164	std::wstring::size_type length = file_name->size();
erikkay@google.com	5af2edb9	2008-08-08 20:16:08	[diff] [blame]	165	const wchar_t* wstr = file_name->data();
				166	#if defined(WCHAR_T_IS_UTF16)
initial.commit	d7cae12	2008-07-26 21:49:38	[diff] [blame]	167	// Using \|span\| method of UnicodeSet might speed things up a bit, but
				168	// it's not likely to matter here.
initial.commit	d7cae12	2008-07-26 21:49:38	[diff] [blame]	169	std::wstring temp;
				170	temp.reserve(length);
				171	while (i < length) {
				172	UChar32 ucs4;
				173	std::wstring::size_type prev = i;
				174	U16_NEXT(wstr, i, length, ucs4);
				175	if (illegal_characters.contains(ucs4)) {
				176	temp.push_back(replace_char);
				177	} else if (ucs4 < 0x10000) {
				178	temp.push_back(ucs4);
				179	} else {
				180	temp.push_back(wstr[prev]);
				181	temp.push_back(wstr[prev + 1]);
				182	}
				183	}
				184	file_name->swap(temp);
erikkay@google.com	5af2edb9	2008-08-08 20:16:08	[diff] [blame]	185	#elif defined(WCHAR_T_IS_UTF32)
initial.commit	d7cae12	2008-07-26 21:49:38	[diff] [blame]	186	while (i < length) {
				187	if (illegal_characters.contains(wstr[i])) {
erikkay@google.com	5af2edb9	2008-08-08 20:16:08	[diff] [blame]	188	(*file_name)[i] = replace_char;
initial.commit	d7cae12	2008-07-26 21:49:38	[diff] [blame]	189	}
erikkay@google.com	37088fef	2008-08-15 17:32:10	[diff] [blame]	190	++i;
initial.commit	d7cae12	2008-07-26 21:49:38	[diff] [blame]	191	}
				192	#else
				193	#error wchar_t* should be either UTF-16 or UTF-32
				194	#endif
				195	}
				196
				197	void ReplaceExtension(std::wstring* file_name, const std::wstring& extension) {
erikkay@google.com	5af2edb9	2008-08-08 20:16:08	[diff] [blame]	198	const std::wstring::size_type last_dot = file_name->rfind(L'.');
				199	std::wstring result = file_name->substr(0, last_dot);
initial.commit	d7cae12	2008-07-26 21:49:38	[diff] [blame]	200	if (!extension.empty() && extension != L".") {
				201	if (extension.at(0) != L'.')
				202	result.append(L".");
				203	result.append(extension);
				204	}
				205	file_name->swap(result);
				206	}
				207
initial.commit	d7cae12	2008-07-26 21:49:38	[diff] [blame]	208	bool ContentsEqual(const std::wstring& filename1,
				209	const std::wstring& filename2) {
				210	// We open the file in binary format even if they are text files because
				211	// we are just comparing that bytes are exactly same in both files and not
				212	// doing anything smart with text formatting.
erikkay@google.com	5af2edb9	2008-08-08 20:16:08	[diff] [blame]	213	#if defined(OS_WIN)
initial.commit	d7cae12	2008-07-26 21:49:38	[diff] [blame]	214	std::ifstream file1(filename1.c_str(), std::ios::in \| std::ios::binary);
				215	std::ifstream file2(filename2.c_str(), std::ios::in \| std::ios::binary);
erikkay@google.com	5af2edb9	2008-08-08 20:16:08	[diff] [blame]	216	#elif defined(OS_POSIX)
				217	std::ifstream file1(WideToUTF8(filename1).c_str(),
				218	std::ios::in \| std::ios::binary);
				219	std::ifstream file2(WideToUTF8(filename2).c_str(),
				220	std::ios::in \| std::ios::binary);
				221	#endif
				222
initial.commit	d7cae12	2008-07-26 21:49:38	[diff] [blame]	223	// Even if both files aren't openable (and thus, in some sense, "equal"),
				224	// any unusable file yields a result of "false".
				225	if (!file1.is_open() \|\| !file2.is_open())
				226	return false;
				227
				228	const int BUFFER_SIZE = 2056;
				229	char buffer1[BUFFER_SIZE], buffer2[BUFFER_SIZE];
				230	do {
				231	file1.read(buffer1, BUFFER_SIZE);
				232	file2.read(buffer2, BUFFER_SIZE);
				233
				234	if ((file1.eof() && !file2.eof()) \|\|
				235	(!file1.eof() && file2.eof()) \|\|
				236	(file1.gcount() != file2.gcount()) \|\|
				237	(memcmp(buffer1, buffer2, file1.gcount()))) {
				238	file1.close();
				239	file2.close();
				240	return false;
				241	}
				242	} while (!file1.eof() && !file2.eof());
				243
				244	file1.close();
				245	file2.close();
				246	return true;
				247	}
				248
				249	bool ReadFileToString(const std::wstring& path, std::string* contents) {
erikkay@google.com	5af2edb9	2008-08-08 20:16:08	[diff] [blame]	250	#if defined(OS_WIN)
initial.commit	d7cae12	2008-07-26 21:49:38	[diff] [blame]	251	FILE* file;
				252	errno_t err = _wfopen_s(&file, path.c_str(), L"rbS");
				253	if (err != 0)
				254	return false;
erikkay@google.com	5af2edb9	2008-08-08 20:16:08	[diff] [blame]	255	#elif defined(OS_POSIX)
				256	FILE* file = fopen(WideToUTF8(path).c_str(), "r");
				257	if (!file)
				258	return false;
				259	#endif
initial.commit	d7cae12	2008-07-26 21:49:38	[diff] [blame]	260
				261	char buf[1 << 16];
				262	size_t len;
				263	while ((len = fread(buf, 1, sizeof(buf), file)) > 0) {
				264	contents->append(buf, len);
				265	}
				266	fclose(file);
				267
				268	return true;
initial.commit	d7cae12	2008-07-26 21:49:38	[diff] [blame]	269	}
				270
				271	} // namespace
license.bot	bf09a50	2008-08-24 00:55:55	[diff] [blame^]	272