[go: nahoru, domu]

blob: 07fb71c738c4914a9928d62f15c105e92a08d396 [file] [log] [blame]
license.botbf09a502008-08-24 00:55:551// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
initial.commitd7cae122008-07-26 21:49:384
5#include "base/file_util.h"
6
initial.commitd7cae122008-07-26 21:49:387#include <fstream>
initial.commitd7cae122008-07-26 21:49:388
9#include "base/logging.h"
initial.commitd7cae122008-07-26 21:49:3810#include "base/string_util.h"
initial.commitd7cae122008-07-26 21:49:3811#include "unicode/uniset.h"
12
initial.commitd7cae122008-07-26 21:49:3813namespace file_util {
14
initial.commitd7cae122008-07-26 21:49:3815const wchar_t kExtensionSeparator = L'.';
16
erikkay@google.com37088fef2008-08-15 17:32:1017void PathComponents(const std::wstring& path,
18 std::vector<std::wstring>* components) {
19 DCHECK(components != NULL);
20 if (components == NULL)
21 return;
22 std::wstring::size_type start = 0;
23 std::wstring::size_type end = path.find(kPathSeparator, start);
24
25 // Special case the "/" or "\" directory. On Windows with a drive letter,
26 // this code path won't hit, but the right thing should still happen.
27 // "E:\foo" will turn into "E:","foo".
28 if (end == start) {
29 components->push_back(std::wstring(path, 0, 1));
30 start = end + 1;
31 end = path.find(kPathSeparator, start);
32 }
33 while (end != std::wstring::npos) {
34 std::wstring component = std::wstring(path, start, end - start);
35 components->push_back(component);
36 start = end + 1;
37 end = path.find(kPathSeparator, start);
38 }
39 std::wstring component = std::wstring(path, start);
40 components->push_back(component);
41}
42
initial.commitd7cae122008-07-26 21:49:3843bool EndsWithSeparator(std::wstring* path) {
erikkay@google.com37088fef2008-08-15 17:32:1044 return EndsWithSeparator(*path);
45}
46
47bool EndsWithSeparator(const std::wstring& path) {
erikkay@google.come6621132008-08-15 18:00:4848 bool is_sep = (path.length() > 0 &&
49 (path)[path.length() - 1] == kPathSeparator);
erikkay@google.com37088fef2008-08-15 17:32:1050 return is_sep;
initial.commitd7cae122008-07-26 21:49:3851}
52
53void TrimTrailingSeparator(std::wstring* dir) {
erikkay@google.com37088fef2008-08-15 17:32:1054 while (dir->length() > 1 && EndsWithSeparator(dir))
initial.commitd7cae122008-07-26 21:49:3855 dir->resize(dir->length() - 1);
56}
57
58void UpOneDirectory(std::wstring* dir) {
59 TrimTrailingSeparator(dir);
60
erikkay@google.com5af2edb92008-08-08 20:16:0861 std::wstring::size_type last_sep = dir->find_last_of(kPathSeparator);
62 if (last_sep != std::wstring::npos)
initial.commitd7cae122008-07-26 21:49:3863 dir->resize(last_sep);
64}
65
66void UpOneDirectoryOrEmpty(std::wstring* dir) {
67 TrimTrailingSeparator(dir);
68
erikkay@google.com5af2edb92008-08-08 20:16:0869 std::wstring::size_type last_sep = dir->find_last_of(kPathSeparator);
70 if (last_sep != std::wstring::npos)
initial.commitd7cae122008-07-26 21:49:3871 dir->resize(last_sep);
72 else
73 dir->clear();
74}
75
76void TrimFilename(std::wstring* path) {
77 if (EndsWithSeparator(path)) {
78 TrimTrailingSeparator(path);
79 } else {
erikkay@google.com5af2edb92008-08-08 20:16:0880 std::wstring::size_type last_sep = path->find_last_of(kPathSeparator);
81 if (last_sep != std::wstring::npos)
initial.commitd7cae122008-07-26 21:49:3882 path->resize(last_sep);
83 }
84}
85
erikkay@google.com5af2edb92008-08-08 20:16:0886std::wstring GetFilenameFromPath(const std::wstring& path) {
erikkay@google.come6621132008-08-15 18:00:4887 // TODO(erikkay): fix this - it's not using kPathSeparator, but win unit test
88 // are exercising '/' as a path separator as well.
89 std::wstring::size_type pos = path.find_last_of(L"\\/");
erikkay@google.com5af2edb92008-08-08 20:16:0890 return std::wstring(path, pos == std::wstring::npos ? 0 : pos+1);
initial.commitd7cae122008-07-26 21:49:3891}
92
erikkay@google.com5af2edb92008-08-08 20:16:0893std::wstring GetFileExtensionFromPath(const std::wstring& path) {
94 std::wstring file_name = GetFilenameFromPath(path);
95 std::wstring::size_type last_dot = file_name.rfind(L'.');
96 return std::wstring(last_dot == std::wstring::npos? L"" : file_name, last_dot+1);
initial.commitd7cae122008-07-26 21:49:3897}
98
99void AppendToPath(std::wstring* path, const std::wstring& new_ending) {
100 if (!path) {
101 NOTREACHED();
102 return; // Don't crash in this function in release builds.
103 }
104
105 if (!EndsWithSeparator(path))
106 path->push_back(kPathSeparator);
107 path->append(new_ending);
108}
109
110void InsertBeforeExtension(std::wstring* path, const std::wstring& suffix) {
111 DCHECK(path);
112
erikkay@google.com5af2edb92008-08-08 20:16:08113 const std::wstring::size_type last_dot = path->rfind(kExtensionSeparator);
114 const std::wstring::size_type last_sep = path->rfind(kPathSeparator);
initial.commitd7cae122008-07-26 21:49:38115
erikkay@google.com5af2edb92008-08-08 20:16:08116 if (last_dot == std::wstring::npos ||
117 (last_sep != std::wstring::npos && last_dot < last_sep)) {
initial.commitd7cae122008-07-26 21:49:38118 // The path looks something like "C:\pics.old\jojo" or "C:\pics\jojo".
119 // We should just append the suffix to the entire path.
120 path->append(suffix);
121 return;
122 }
123
124 path->insert(last_dot, suffix);
125}
126
127void ReplaceIllegalCharacters(std::wstring* file_name, int replace_char) {
128 DCHECK(file_name);
129
ericroman@google.comd324ab332008-08-18 16:00:38130 // Control characters, formatting characters, non-characters, and
initial.commitd7cae122008-07-26 21:49:38131 // some printable ASCII characters regarded as dangerous ('"*/:<>?\\').
132 // See http://blogs.msdn.com/michkap/archive/2006/11/03/941420.aspx
133 // and http://msdn2.microsoft.com/en-us/library/Aa365247.aspx
134 // TODO(jungshik): Revisit the set. ZWJ and ZWNJ are excluded because they
135 // are legitimate in Arabic and some S/SE Asian scripts. However, when used
136 // elsewhere, they can be confusing/problematic.
137 // Also, consider wrapping the set with our Singleton class to create and
138 // freeze it only once. Note that there's a trade-off between memory and
139 // speed.
140
141 UErrorCode status = U_ZERO_ERROR;
erikkay@google.com5af2edb92008-08-08 20:16:08142#if defined(WCHAR_T_IS_UTF16)
initial.commitd7cae122008-07-26 21:49:38143 UnicodeSet illegal_characters(UnicodeString(
144 L"[[\"*/:<>?\\\\|][:Cc:][:Cf:] - [\u200c\u200d]]"), status);
145#else
146 UnicodeSet illegal_characters(UNICODE_STRING_SIMPLE(
147 "[[\"*/:<>?\\\\|][:Cc:][:Cf:] - [\\u200c\\u200d]]").unescape(), status);
148#endif
149 DCHECK(U_SUCCESS(status));
150 // Add non-characters. If this becomes a performance bottleneck by
151 // any chance, check |ucs4 & 0xFFFEu == 0xFFFEu|, instead.
152 illegal_characters.add(0xFDD0, 0xFDEF);
153 for (int i = 0; i <= 0x10; ++i) {
154 int plane_base = 0x10000 * i;
155 illegal_characters.add(plane_base + 0xFFFE, plane_base + 0xFFFF);
156 }
157 illegal_characters.freeze();
158 DCHECK(!illegal_characters.contains(replace_char) && replace_char < 0x10000);
159
160 // Remove leading and trailing whitespace.
161 TrimWhitespace(*file_name, TRIM_ALL, file_name);
162
163 std::wstring::size_type i = 0;
164 std::wstring::size_type length = file_name->size();
erikkay@google.com5af2edb92008-08-08 20:16:08165 const wchar_t* wstr = file_name->data();
166#if defined(WCHAR_T_IS_UTF16)
initial.commitd7cae122008-07-26 21:49:38167 // Using |span| method of UnicodeSet might speed things up a bit, but
168 // it's not likely to matter here.
initial.commitd7cae122008-07-26 21:49:38169 std::wstring temp;
170 temp.reserve(length);
171 while (i < length) {
172 UChar32 ucs4;
173 std::wstring::size_type prev = i;
174 U16_NEXT(wstr, i, length, ucs4);
175 if (illegal_characters.contains(ucs4)) {
176 temp.push_back(replace_char);
177 } else if (ucs4 < 0x10000) {
178 temp.push_back(ucs4);
179 } else {
180 temp.push_back(wstr[prev]);
181 temp.push_back(wstr[prev + 1]);
182 }
183 }
184 file_name->swap(temp);
erikkay@google.com5af2edb92008-08-08 20:16:08185#elif defined(WCHAR_T_IS_UTF32)
initial.commitd7cae122008-07-26 21:49:38186 while (i < length) {
187 if (illegal_characters.contains(wstr[i])) {
erikkay@google.com5af2edb92008-08-08 20:16:08188 (*file_name)[i] = replace_char;
initial.commitd7cae122008-07-26 21:49:38189 }
erikkay@google.com37088fef2008-08-15 17:32:10190 ++i;
initial.commitd7cae122008-07-26 21:49:38191 }
192#else
193#error wchar_t* should be either UTF-16 or UTF-32
194#endif
195}
196
197void ReplaceExtension(std::wstring* file_name, const std::wstring& extension) {
erikkay@google.com5af2edb92008-08-08 20:16:08198 const std::wstring::size_type last_dot = file_name->rfind(L'.');
199 std::wstring result = file_name->substr(0, last_dot);
initial.commitd7cae122008-07-26 21:49:38200 if (!extension.empty() && extension != L".") {
201 if (extension.at(0) != L'.')
202 result.append(L".");
203 result.append(extension);
204 }
205 file_name->swap(result);
206}
207
initial.commitd7cae122008-07-26 21:49:38208bool ContentsEqual(const std::wstring& filename1,
209 const std::wstring& filename2) {
210 // We open the file in binary format even if they are text files because
211 // we are just comparing that bytes are exactly same in both files and not
212 // doing anything smart with text formatting.
erikkay@google.com5af2edb92008-08-08 20:16:08213#if defined(OS_WIN)
initial.commitd7cae122008-07-26 21:49:38214 std::ifstream file1(filename1.c_str(), std::ios::in | std::ios::binary);
215 std::ifstream file2(filename2.c_str(), std::ios::in | std::ios::binary);
erikkay@google.com5af2edb92008-08-08 20:16:08216#elif defined(OS_POSIX)
217 std::ifstream file1(WideToUTF8(filename1).c_str(),
218 std::ios::in | std::ios::binary);
219 std::ifstream file2(WideToUTF8(filename2).c_str(),
220 std::ios::in | std::ios::binary);
221#endif
222
initial.commitd7cae122008-07-26 21:49:38223 // Even if both files aren't openable (and thus, in some sense, "equal"),
224 // any unusable file yields a result of "false".
225 if (!file1.is_open() || !file2.is_open())
226 return false;
227
228 const int BUFFER_SIZE = 2056;
229 char buffer1[BUFFER_SIZE], buffer2[BUFFER_SIZE];
230 do {
231 file1.read(buffer1, BUFFER_SIZE);
232 file2.read(buffer2, BUFFER_SIZE);
233
234 if ((file1.eof() && !file2.eof()) ||
235 (!file1.eof() && file2.eof()) ||
236 (file1.gcount() != file2.gcount()) ||
237 (memcmp(buffer1, buffer2, file1.gcount()))) {
238 file1.close();
239 file2.close();
240 return false;
241 }
242 } while (!file1.eof() && !file2.eof());
243
244 file1.close();
245 file2.close();
246 return true;
247}
248
249bool ReadFileToString(const std::wstring& path, std::string* contents) {
erikkay@google.com5af2edb92008-08-08 20:16:08250#if defined(OS_WIN)
initial.commitd7cae122008-07-26 21:49:38251 FILE* file;
252 errno_t err = _wfopen_s(&file, path.c_str(), L"rbS");
253 if (err != 0)
254 return false;
erikkay@google.com5af2edb92008-08-08 20:16:08255#elif defined(OS_POSIX)
256 FILE* file = fopen(WideToUTF8(path).c_str(), "r");
257 if (!file)
258 return false;
259#endif
initial.commitd7cae122008-07-26 21:49:38260
261 char buf[1 << 16];
262 size_t len;
263 while ((len = fread(buf, 1, sizeof(buf), file)) > 0) {
264 contents->append(buf, len);
265 }
266 fclose(file);
267
268 return true;
initial.commitd7cae122008-07-26 21:49:38269}
270
271} // namespace
license.botbf09a502008-08-24 00:55:55272