license.bot | bf09a50 | 2008-08-24 00:55:55 | [diff] [blame^] | 1 | // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
initial.commit | d7cae12 | 2008-07-26 21:49:38 | [diff] [blame] | 4 | |
| 5 | #include "base/file_util.h" |
| 6 | |
initial.commit | d7cae12 | 2008-07-26 21:49:38 | [diff] [blame] | 7 | #include <fstream> |
initial.commit | d7cae12 | 2008-07-26 21:49:38 | [diff] [blame] | 8 | |
| 9 | #include "base/logging.h" |
initial.commit | d7cae12 | 2008-07-26 21:49:38 | [diff] [blame] | 10 | #include "base/string_util.h" |
initial.commit | d7cae12 | 2008-07-26 21:49:38 | [diff] [blame] | 11 | #include "unicode/uniset.h" |
| 12 | |
initial.commit | d7cae12 | 2008-07-26 21:49:38 | [diff] [blame] | 13 | namespace file_util { |
| 14 | |
initial.commit | d7cae12 | 2008-07-26 21:49:38 | [diff] [blame] | 15 | const wchar_t kExtensionSeparator = L'.'; |
| 16 | |
erikkay@google.com | 37088fef | 2008-08-15 17:32:10 | [diff] [blame] | 17 | void PathComponents(const std::wstring& path, |
| 18 | std::vector<std::wstring>* components) { |
| 19 | DCHECK(components != NULL); |
| 20 | if (components == NULL) |
| 21 | return; |
| 22 | std::wstring::size_type start = 0; |
| 23 | std::wstring::size_type end = path.find(kPathSeparator, start); |
| 24 | |
| 25 | // Special case the "/" or "\" directory. On Windows with a drive letter, |
| 26 | // this code path won't hit, but the right thing should still happen. |
| 27 | // "E:\foo" will turn into "E:","foo". |
| 28 | if (end == start) { |
| 29 | components->push_back(std::wstring(path, 0, 1)); |
| 30 | start = end + 1; |
| 31 | end = path.find(kPathSeparator, start); |
| 32 | } |
| 33 | while (end != std::wstring::npos) { |
| 34 | std::wstring component = std::wstring(path, start, end - start); |
| 35 | components->push_back(component); |
| 36 | start = end + 1; |
| 37 | end = path.find(kPathSeparator, start); |
| 38 | } |
| 39 | std::wstring component = std::wstring(path, start); |
| 40 | components->push_back(component); |
| 41 | } |
| 42 | |
initial.commit | d7cae12 | 2008-07-26 21:49:38 | [diff] [blame] | 43 | bool EndsWithSeparator(std::wstring* path) { |
erikkay@google.com | 37088fef | 2008-08-15 17:32:10 | [diff] [blame] | 44 | return EndsWithSeparator(*path); |
| 45 | } |
| 46 | |
| 47 | bool EndsWithSeparator(const std::wstring& path) { |
erikkay@google.com | e662113 | 2008-08-15 18:00:48 | [diff] [blame] | 48 | bool is_sep = (path.length() > 0 && |
| 49 | (path)[path.length() - 1] == kPathSeparator); |
erikkay@google.com | 37088fef | 2008-08-15 17:32:10 | [diff] [blame] | 50 | return is_sep; |
initial.commit | d7cae12 | 2008-07-26 21:49:38 | [diff] [blame] | 51 | } |
| 52 | |
| 53 | void TrimTrailingSeparator(std::wstring* dir) { |
erikkay@google.com | 37088fef | 2008-08-15 17:32:10 | [diff] [blame] | 54 | while (dir->length() > 1 && EndsWithSeparator(dir)) |
initial.commit | d7cae12 | 2008-07-26 21:49:38 | [diff] [blame] | 55 | dir->resize(dir->length() - 1); |
| 56 | } |
| 57 | |
| 58 | void UpOneDirectory(std::wstring* dir) { |
| 59 | TrimTrailingSeparator(dir); |
| 60 | |
erikkay@google.com | 5af2edb9 | 2008-08-08 20:16:08 | [diff] [blame] | 61 | std::wstring::size_type last_sep = dir->find_last_of(kPathSeparator); |
| 62 | if (last_sep != std::wstring::npos) |
initial.commit | d7cae12 | 2008-07-26 21:49:38 | [diff] [blame] | 63 | dir->resize(last_sep); |
| 64 | } |
| 65 | |
| 66 | void UpOneDirectoryOrEmpty(std::wstring* dir) { |
| 67 | TrimTrailingSeparator(dir); |
| 68 | |
erikkay@google.com | 5af2edb9 | 2008-08-08 20:16:08 | [diff] [blame] | 69 | std::wstring::size_type last_sep = dir->find_last_of(kPathSeparator); |
| 70 | if (last_sep != std::wstring::npos) |
initial.commit | d7cae12 | 2008-07-26 21:49:38 | [diff] [blame] | 71 | dir->resize(last_sep); |
| 72 | else |
| 73 | dir->clear(); |
| 74 | } |
| 75 | |
| 76 | void TrimFilename(std::wstring* path) { |
| 77 | if (EndsWithSeparator(path)) { |
| 78 | TrimTrailingSeparator(path); |
| 79 | } else { |
erikkay@google.com | 5af2edb9 | 2008-08-08 20:16:08 | [diff] [blame] | 80 | std::wstring::size_type last_sep = path->find_last_of(kPathSeparator); |
| 81 | if (last_sep != std::wstring::npos) |
initial.commit | d7cae12 | 2008-07-26 21:49:38 | [diff] [blame] | 82 | path->resize(last_sep); |
| 83 | } |
| 84 | } |
| 85 | |
erikkay@google.com | 5af2edb9 | 2008-08-08 20:16:08 | [diff] [blame] | 86 | std::wstring GetFilenameFromPath(const std::wstring& path) { |
erikkay@google.com | e662113 | 2008-08-15 18:00:48 | [diff] [blame] | 87 | // TODO(erikkay): fix this - it's not using kPathSeparator, but win unit test |
| 88 | // are exercising '/' as a path separator as well. |
| 89 | std::wstring::size_type pos = path.find_last_of(L"\\/"); |
erikkay@google.com | 5af2edb9 | 2008-08-08 20:16:08 | [diff] [blame] | 90 | return std::wstring(path, pos == std::wstring::npos ? 0 : pos+1); |
initial.commit | d7cae12 | 2008-07-26 21:49:38 | [diff] [blame] | 91 | } |
| 92 | |
erikkay@google.com | 5af2edb9 | 2008-08-08 20:16:08 | [diff] [blame] | 93 | std::wstring GetFileExtensionFromPath(const std::wstring& path) { |
| 94 | std::wstring file_name = GetFilenameFromPath(path); |
| 95 | std::wstring::size_type last_dot = file_name.rfind(L'.'); |
| 96 | return std::wstring(last_dot == std::wstring::npos? L"" : file_name, last_dot+1); |
initial.commit | d7cae12 | 2008-07-26 21:49:38 | [diff] [blame] | 97 | } |
| 98 | |
| 99 | void AppendToPath(std::wstring* path, const std::wstring& new_ending) { |
| 100 | if (!path) { |
| 101 | NOTREACHED(); |
| 102 | return; // Don't crash in this function in release builds. |
| 103 | } |
| 104 | |
| 105 | if (!EndsWithSeparator(path)) |
| 106 | path->push_back(kPathSeparator); |
| 107 | path->append(new_ending); |
| 108 | } |
| 109 | |
| 110 | void InsertBeforeExtension(std::wstring* path, const std::wstring& suffix) { |
| 111 | DCHECK(path); |
| 112 | |
erikkay@google.com | 5af2edb9 | 2008-08-08 20:16:08 | [diff] [blame] | 113 | const std::wstring::size_type last_dot = path->rfind(kExtensionSeparator); |
| 114 | const std::wstring::size_type last_sep = path->rfind(kPathSeparator); |
initial.commit | d7cae12 | 2008-07-26 21:49:38 | [diff] [blame] | 115 | |
erikkay@google.com | 5af2edb9 | 2008-08-08 20:16:08 | [diff] [blame] | 116 | if (last_dot == std::wstring::npos || |
| 117 | (last_sep != std::wstring::npos && last_dot < last_sep)) { |
initial.commit | d7cae12 | 2008-07-26 21:49:38 | [diff] [blame] | 118 | // The path looks something like "C:\pics.old\jojo" or "C:\pics\jojo". |
| 119 | // We should just append the suffix to the entire path. |
| 120 | path->append(suffix); |
| 121 | return; |
| 122 | } |
| 123 | |
| 124 | path->insert(last_dot, suffix); |
| 125 | } |
| 126 | |
| 127 | void ReplaceIllegalCharacters(std::wstring* file_name, int replace_char) { |
| 128 | DCHECK(file_name); |
| 129 | |
ericroman@google.com | d324ab33 | 2008-08-18 16:00:38 | [diff] [blame] | 130 | // Control characters, formatting characters, non-characters, and |
initial.commit | d7cae12 | 2008-07-26 21:49:38 | [diff] [blame] | 131 | // some printable ASCII characters regarded as dangerous ('"*/:<>?\\'). |
| 132 | // See http://blogs.msdn.com/michkap/archive/2006/11/03/941420.aspx |
| 133 | // and http://msdn2.microsoft.com/en-us/library/Aa365247.aspx |
| 134 | // TODO(jungshik): Revisit the set. ZWJ and ZWNJ are excluded because they |
| 135 | // are legitimate in Arabic and some S/SE Asian scripts. However, when used |
| 136 | // elsewhere, they can be confusing/problematic. |
| 137 | // Also, consider wrapping the set with our Singleton class to create and |
| 138 | // freeze it only once. Note that there's a trade-off between memory and |
| 139 | // speed. |
| 140 | |
| 141 | UErrorCode status = U_ZERO_ERROR; |
erikkay@google.com | 5af2edb9 | 2008-08-08 20:16:08 | [diff] [blame] | 142 | #if defined(WCHAR_T_IS_UTF16) |
initial.commit | d7cae12 | 2008-07-26 21:49:38 | [diff] [blame] | 143 | UnicodeSet illegal_characters(UnicodeString( |
| 144 | L"[[\"*/:<>?\\\\|][:Cc:][:Cf:] - [\u200c\u200d]]"), status); |
| 145 | #else |
| 146 | UnicodeSet illegal_characters(UNICODE_STRING_SIMPLE( |
| 147 | "[[\"*/:<>?\\\\|][:Cc:][:Cf:] - [\\u200c\\u200d]]").unescape(), status); |
| 148 | #endif |
| 149 | DCHECK(U_SUCCESS(status)); |
| 150 | // Add non-characters. If this becomes a performance bottleneck by |
| 151 | // any chance, check |ucs4 & 0xFFFEu == 0xFFFEu|, instead. |
| 152 | illegal_characters.add(0xFDD0, 0xFDEF); |
| 153 | for (int i = 0; i <= 0x10; ++i) { |
| 154 | int plane_base = 0x10000 * i; |
| 155 | illegal_characters.add(plane_base + 0xFFFE, plane_base + 0xFFFF); |
| 156 | } |
| 157 | illegal_characters.freeze(); |
| 158 | DCHECK(!illegal_characters.contains(replace_char) && replace_char < 0x10000); |
| 159 | |
| 160 | // Remove leading and trailing whitespace. |
| 161 | TrimWhitespace(*file_name, TRIM_ALL, file_name); |
| 162 | |
| 163 | std::wstring::size_type i = 0; |
| 164 | std::wstring::size_type length = file_name->size(); |
erikkay@google.com | 5af2edb9 | 2008-08-08 20:16:08 | [diff] [blame] | 165 | const wchar_t* wstr = file_name->data(); |
| 166 | #if defined(WCHAR_T_IS_UTF16) |
initial.commit | d7cae12 | 2008-07-26 21:49:38 | [diff] [blame] | 167 | // Using |span| method of UnicodeSet might speed things up a bit, but |
| 168 | // it's not likely to matter here. |
initial.commit | d7cae12 | 2008-07-26 21:49:38 | [diff] [blame] | 169 | std::wstring temp; |
| 170 | temp.reserve(length); |
| 171 | while (i < length) { |
| 172 | UChar32 ucs4; |
| 173 | std::wstring::size_type prev = i; |
| 174 | U16_NEXT(wstr, i, length, ucs4); |
| 175 | if (illegal_characters.contains(ucs4)) { |
| 176 | temp.push_back(replace_char); |
| 177 | } else if (ucs4 < 0x10000) { |
| 178 | temp.push_back(ucs4); |
| 179 | } else { |
| 180 | temp.push_back(wstr[prev]); |
| 181 | temp.push_back(wstr[prev + 1]); |
| 182 | } |
| 183 | } |
| 184 | file_name->swap(temp); |
erikkay@google.com | 5af2edb9 | 2008-08-08 20:16:08 | [diff] [blame] | 185 | #elif defined(WCHAR_T_IS_UTF32) |
initial.commit | d7cae12 | 2008-07-26 21:49:38 | [diff] [blame] | 186 | while (i < length) { |
| 187 | if (illegal_characters.contains(wstr[i])) { |
erikkay@google.com | 5af2edb9 | 2008-08-08 20:16:08 | [diff] [blame] | 188 | (*file_name)[i] = replace_char; |
initial.commit | d7cae12 | 2008-07-26 21:49:38 | [diff] [blame] | 189 | } |
erikkay@google.com | 37088fef | 2008-08-15 17:32:10 | [diff] [blame] | 190 | ++i; |
initial.commit | d7cae12 | 2008-07-26 21:49:38 | [diff] [blame] | 191 | } |
| 192 | #else |
| 193 | #error wchar_t* should be either UTF-16 or UTF-32 |
| 194 | #endif |
| 195 | } |
| 196 | |
| 197 | void ReplaceExtension(std::wstring* file_name, const std::wstring& extension) { |
erikkay@google.com | 5af2edb9 | 2008-08-08 20:16:08 | [diff] [blame] | 198 | const std::wstring::size_type last_dot = file_name->rfind(L'.'); |
| 199 | std::wstring result = file_name->substr(0, last_dot); |
initial.commit | d7cae12 | 2008-07-26 21:49:38 | [diff] [blame] | 200 | if (!extension.empty() && extension != L".") { |
| 201 | if (extension.at(0) != L'.') |
| 202 | result.append(L"."); |
| 203 | result.append(extension); |
| 204 | } |
| 205 | file_name->swap(result); |
| 206 | } |
| 207 | |
initial.commit | d7cae12 | 2008-07-26 21:49:38 | [diff] [blame] | 208 | bool ContentsEqual(const std::wstring& filename1, |
| 209 | const std::wstring& filename2) { |
| 210 | // We open the file in binary format even if they are text files because |
| 211 | // we are just comparing that bytes are exactly same in both files and not |
| 212 | // doing anything smart with text formatting. |
erikkay@google.com | 5af2edb9 | 2008-08-08 20:16:08 | [diff] [blame] | 213 | #if defined(OS_WIN) |
initial.commit | d7cae12 | 2008-07-26 21:49:38 | [diff] [blame] | 214 | std::ifstream file1(filename1.c_str(), std::ios::in | std::ios::binary); |
| 215 | std::ifstream file2(filename2.c_str(), std::ios::in | std::ios::binary); |
erikkay@google.com | 5af2edb9 | 2008-08-08 20:16:08 | [diff] [blame] | 216 | #elif defined(OS_POSIX) |
| 217 | std::ifstream file1(WideToUTF8(filename1).c_str(), |
| 218 | std::ios::in | std::ios::binary); |
| 219 | std::ifstream file2(WideToUTF8(filename2).c_str(), |
| 220 | std::ios::in | std::ios::binary); |
| 221 | #endif |
| 222 | |
initial.commit | d7cae12 | 2008-07-26 21:49:38 | [diff] [blame] | 223 | // Even if both files aren't openable (and thus, in some sense, "equal"), |
| 224 | // any unusable file yields a result of "false". |
| 225 | if (!file1.is_open() || !file2.is_open()) |
| 226 | return false; |
| 227 | |
| 228 | const int BUFFER_SIZE = 2056; |
| 229 | char buffer1[BUFFER_SIZE], buffer2[BUFFER_SIZE]; |
| 230 | do { |
| 231 | file1.read(buffer1, BUFFER_SIZE); |
| 232 | file2.read(buffer2, BUFFER_SIZE); |
| 233 | |
| 234 | if ((file1.eof() && !file2.eof()) || |
| 235 | (!file1.eof() && file2.eof()) || |
| 236 | (file1.gcount() != file2.gcount()) || |
| 237 | (memcmp(buffer1, buffer2, file1.gcount()))) { |
| 238 | file1.close(); |
| 239 | file2.close(); |
| 240 | return false; |
| 241 | } |
| 242 | } while (!file1.eof() && !file2.eof()); |
| 243 | |
| 244 | file1.close(); |
| 245 | file2.close(); |
| 246 | return true; |
| 247 | } |
| 248 | |
| 249 | bool ReadFileToString(const std::wstring& path, std::string* contents) { |
erikkay@google.com | 5af2edb9 | 2008-08-08 20:16:08 | [diff] [blame] | 250 | #if defined(OS_WIN) |
initial.commit | d7cae12 | 2008-07-26 21:49:38 | [diff] [blame] | 251 | FILE* file; |
| 252 | errno_t err = _wfopen_s(&file, path.c_str(), L"rbS"); |
| 253 | if (err != 0) |
| 254 | return false; |
erikkay@google.com | 5af2edb9 | 2008-08-08 20:16:08 | [diff] [blame] | 255 | #elif defined(OS_POSIX) |
| 256 | FILE* file = fopen(WideToUTF8(path).c_str(), "r"); |
| 257 | if (!file) |
| 258 | return false; |
| 259 | #endif |
initial.commit | d7cae12 | 2008-07-26 21:49:38 | [diff] [blame] | 260 | |
| 261 | char buf[1 << 16]; |
| 262 | size_t len; |
| 263 | while ((len = fread(buf, 1, sizeof(buf), file)) > 0) { |
| 264 | contents->append(buf, len); |
| 265 | } |
| 266 | fclose(file); |
| 267 | |
| 268 | return true; |
initial.commit | d7cae12 | 2008-07-26 21:49:38 | [diff] [blame] | 269 | } |
| 270 | |
| 271 | } // namespace |
license.bot | bf09a50 | 2008-08-24 00:55:55 | [diff] [blame^] | 272 | |