[go: nahoru, domu]

blob: 34c4ab70c8cb291950d3803ef2a828c662e339a0 [file] [log] [blame]
Dan McArdle7097ced2024-01-19 21:21:481// Copyright 2024 The Chromium Authors
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// This fuzzer exercises BuiltInRecovery like sql_built_in_recovery_fuzzer, but
6// employs a different strategy for generating database files. Rather than
7// directly interpreting the fuzzer input as a SQLite database file, this fuzzer
8// constructs a DB from fuzzer-derived SQL statements and then mutates the file
9// with fuzzer-derived XOR masks before exercising recovery.
10
11#include <fuzzer/FuzzedDataProvider.h>
12#include <stdint.h>
13
14#include <ios>
15#include <iostream>
16
17#include "base/check.h"
18#include "base/check_op.h"
19#include "base/command_line.h"
20#include "base/containers/span.h"
Dan McArdle892f4752024-01-26 19:03:2521#include "base/files/file_enumerator.h"
Dan McArdle7097ced2024-01-19 21:21:4822#include "base/files/file_path.h"
23#include "base/files/file_util.h"
24#include "base/files/scoped_temp_dir.h"
25#include "base/logging.h"
26#include "base/ranges/algorithm.h"
27#include "base/strings/strcat.h"
28#include "base/strings/string_number_conversions.h"
29#include "base/strings/string_util.h"
30#include "base/test/bind.h"
31#include "base/test/scoped_logging_settings.h"
32#include "base/values.h"
33#include "sql/database.h"
34#include "sql/fuzzers/sql_disk_corruption.pb.h"
35#include "sql/recovery.h"
36#include "sql/statement.h"
37#include "testing/libfuzzer/proto/lpm_interface.h"
38#include "third_party/sqlite/fuzz/sql_query_proto_to_string.h"
39
40namespace {
41
42// Initializes and manages state shared between fuzzer iterations. Use this to
43// interact with global variables, environment variables, the filesystem, etc.
44class Environment {
45 public:
46 Environment()
47 : temp_dir_(MakeTempDir()),
48 db_path_(GetTempFilePath("db.sqlite")),
49 should_dump_input_(std::getenv("LPM_DUMP_NATIVE_INPUT") != nullptr) {
50 logging::SetMinLogLevel(logging::LOGGING_ERROR);
51 }
52
53 ~Environment() { AssertTempDirIsEmpty(); }
54
55 // By convention, the LPM_DUMP_NATIVE_INPUT environment variable indicates
56 // that the fuzzer should print its input in a readable format.
57 bool should_dump_input() const { return should_dump_input_; }
58
59 // The path to the database's backing file.
60 const base::FilePath& db_path() const { return db_path_; }
61
62 // Deletes the backing file and related journal files.
63 void DeleteDbFiles() const {
64 CHECK(base::DeleteFile(GetTempFilePath("db.sqlite")));
65 CHECK(base::DeleteFile(GetTempFilePath("db.sqlite-journal")));
66 CHECK(base::DeleteFile(GetTempFilePath("db.sqlite-wal")));
67 }
68
69 void AssertTempDirIsEmpty() const {
Dan McArdle892f4752024-01-26 19:03:2570 if (base::IsDirectoryEmpty(temp_dir_.GetPath())) {
71 return;
72 }
73
74 base::FileEnumerator files(temp_dir_.GetPath(), /*recursive=*/true,
75 base::FileEnumerator::FileType::FILES |
76 base::FileEnumerator::FileType::DIRECTORIES);
77 LOG(ERROR) << "Unexpected files or directories in temp dir:";
78 files.ForEach(
79 [](const base::FilePath& path) { LOG(ERROR) << " " << path; });
80 LOG(FATAL) << "Expected temp dir to be empty: " << temp_dir_.GetPath();
Dan McArdle7097ced2024-01-19 21:21:4881 }
82
83 private:
84 static base::ScopedTempDir MakeTempDir() {
85#if BUILDFLAG(IS_POSIX) || BUILDFLAG(IS_FUCHSIA)
86 base::CommandLine::Init(0, nullptr);
87 base::FilePath shmem_temp_dir;
88 CHECK(base::GetShmemTempDir(false, &shmem_temp_dir));
89 base::ScopedTempDir temp_dir;
90 CHECK(temp_dir.CreateUniqueTempDirUnderPath(shmem_temp_dir));
91 return temp_dir;
92#else
93 base::ScopedTempDir temp_dir;
94 CHECK(temp_dir.CreateUniqueTempDir());
95 return temp_dir;
96#endif
97 }
98
99 base::FilePath GetTempFilePath(std::string_view name) const {
100 return temp_dir_.GetPath().AppendASCII(name);
101 }
102
103 base::ScopedTempDir temp_dir_;
104 base::FilePath db_path_;
105 bool should_dump_input_ = false;
106};
107
108// A wrapper around the fuzzer's input proto. Does some preprocessing to map the
109// input to a higher-level test case.
110class TestCase {
111 public:
112 // A single mutation instruction.
113 struct Mutation {
114 int64_t pos;
115 uint64_t xor_mask;
116 };
117
118 explicit TestCase(const sql_fuzzers::RecoveryFuzzerTestCase& input)
119 : strategy_(RecoveryStrategyFromInt(input.recovery_strategy())),
120 wal_mode_(input.wal_mode()),
121 sql_statement_(sql_fuzzer::SQLQueriesToString(input.queries())),
122 sql_statement_after_open_(
Dan McArdlecbbd39c2024-01-24 21:42:52123 sql_fuzzer::SQLQueriesToString(input.queries_after_open())) {
Dan McArdle7097ced2024-01-19 21:21:48124 // Parse the input's `mutations` map as `Mutation` structs.
125 mutations_.reserve(input.mutations_size());
126 for (const auto& [pos, xor_mask] : input.mutations()) {
127 // Ignore the zero mask because it is XOR's identity value.
128 mutations_.emplace_back(pos, xor_mask ? xor_mask : 1);
129 }
130 }
131
132 sql::BuiltInRecovery::Strategy strategy() const { return strategy_; }
133 bool wal_mode() const { return wal_mode_; }
134 base::span<const Mutation> mutations() const { return mutations_; }
135 std::string_view sql_statement() const { return sql_statement_; }
136 std::string_view sql_statement_after_open() const {
137 return sql_statement_after_open_;
138 }
139
140 // Print as a human-readable string.
141 std::ostream& Print(std::ostream& os) const {
142 os << "Test Case:" << std::endl;
143 os << "- strategy: " << DebugFormat(strategy_) << std::endl;
144 os << "- wal_mode: " << (wal_mode_ ? "true" : "false") << std::endl;
145 os << "- mutations: " << std::endl;
146 os << std::hex;
147 for (const Mutation& mutation : mutations()) {
148 os << " {pos=0x" << mutation.pos << ", xor_mask=0x"
149 << mutation.xor_mask << "}," << std::endl;
150 }
151 os << std::dec;
152 os << "- sql_statement: " << DebugFormat(sql_statement()) << std::endl;
Dan McArdlecbbd39c2024-01-24 21:42:52153 os << "- sql_statement_after_open: "
154 << DebugFormat(sql_statement_after_open()) << std::endl;
Dan McArdle7097ced2024-01-19 21:21:48155 return os;
156 }
157
158 private:
159 // Converts an arbitrary int to a valid enum value.
160 static sql::BuiltInRecovery::Strategy RecoveryStrategyFromInt(int input);
161 // Converts arbitrary bytes in `s` to a human-readable ASCII string.
162 // Non-printable characters are hex-escaped.
163 static std::string DebugFormat(std::string_view s);
164 // Converts the value of `strategy`, which must be a valid enum value, to a
165 // human-readable string.
166 static constexpr const char* DebugFormat(
167 sql::BuiltInRecovery::Strategy strategy);
168
169 // Fields parsed from the fuzzer input:
170 const sql::BuiltInRecovery::Strategy strategy_ =
171 sql::BuiltInRecovery::Strategy::kRecoverOrRaze;
172 const bool wal_mode_ = false;
173 std::vector<Mutation> mutations_;
174 const std::string sql_statement_;
175 const std::string sql_statement_after_open_;
176};
177
178std::ostream& operator<<(std::ostream& os, const TestCase& test_case) {
179 return test_case.Print(os);
180}
181
182} // namespace
183
184DEFINE_PROTO_FUZZER(const sql_fuzzers::RecoveryFuzzerTestCase& fuzzer_input) {
185 static Environment env;
186
187 // Ignore this input if it includes any "ATTACH DATABASE" queries. These
188 // queries may cause SQLite to create files like `file::memory:` in the
189 // current working directory, which is undesirable. (See how `AttachDatabase`
190 // is handled in //third_party/sqlite/fuzz/sql_query_proto_to_string.cc.)
191 //
192 // TODO: A slight improvement would be to filter out individual "ATTACH
193 // DATABASE" queries rather than throwing away the whole test case.
194 if (base::ranges::any_of(fuzzer_input.queries().extra_queries(),
195 &sql_query_grammar::SQLQuery::has_attach_db) ||
196 base::ranges::any_of(fuzzer_input.queries_after_open().extra_queries(),
197 &sql_query_grammar::SQLQuery::has_attach_db)) {
198 return;
199 }
200
201 // The purpose of this fuzzer is to throw *corrupted* database files at the
202 // recovery module. If there are no mutations, this test case is out of scope.
203 if (fuzzer_input.mutations().empty()) {
204 return;
205 }
206
207 TestCase test_case(fuzzer_input);
208
209 if (env.should_dump_input()) {
210 std::cout << test_case;
211 }
212
213 sql::DatabaseOptions database_options;
214 database_options.wal_mode = test_case.wal_mode();
215 sql::Database database(database_options);
216 CHECK(database.Open(env.db_path()));
217
218 // Bootstrap the database with SQL queries derived from `fuzzer_input`.
219 {
220 // SQLite may warn us about errors in these queries, e.g. "unknown database
221 // foo". Temporarily silence those warnings.
222 logging::ScopedLoggingSettings scoped_logging;
223 logging::SetMinLogLevel(logging::LOGGING_FATAL);
224 std::ignore = database.Execute(test_case.sql_statement().data());
225 }
226 database.Close();
227
228 // Mutate the backing file. Skip the expensive file operations when there are
229 // no bytes to mutate.
230 int64_t file_length;
231 CHECK(base::GetFileSize(env.db_path(), &file_length));
232 if (file_length > 0) {
233 base::File file(env.db_path(), base::File::FLAG_OPEN |
234 base::File::FLAG_READ |
235 base::File::FLAG_WRITE);
236 // Apply each mutation without sorting by file position. These random-access
237 // file operations could be a performance bottleneck if the temp directory
238 // is on a physical disk.
239 for (TestCase::Mutation mutation : test_case.mutations()) {
240 // File read/write operations expect positions to point within the file.
241 mutation.pos %= file_length;
242 if (mutation.pos < 0) {
243 mutation.pos = 0;
244 }
245
246 uint64_t buf = 0;
247 const int num_read =
248 file.Read(mutation.pos, reinterpret_cast<char*>(&buf), sizeof(buf));
249 CHECK_NE(num_read, -1);
250 if (num_read == 0) {
251 continue;
252 }
253
254 buf ^= mutation.xor_mask;
255
256 // Write `buf` back to the file, being careful not to add bytes to the
257 // file that did not exist before.
258 CHECK_NE(
259 file.Write(mutation.pos, reinterpret_cast<char*>(&buf), num_read),
260 -1);
261 }
262 CHECK_EQ(file_length, file.GetLength());
263 }
264
265 bool attempted_recovery = false;
266 auto error_callback =
267 base::BindLambdaForTesting([&](int extended_error, sql::Statement*) {
268 if (!attempted_recovery) {
269 attempted_recovery = sql::BuiltInRecovery::RecoverIfPossible(
270 &database, extended_error, test_case.strategy());
271 }
272 });
273 database.set_error_callback(std::move(error_callback));
274
275 // Reopen the database after potentially corrupting the file. This may run
276 // the error callback.
277 const bool opened = database.Open(env.db_path());
278 if (opened) {
279 logging::ScopedLoggingSettings scoped_logging;
280 logging::SetMinLogLevel(logging::LOGGING_FATAL);
281 std::ignore = database.Execute(test_case.sql_statement_after_open().data());
282
283 database.Close();
284 }
285
286 // Delete the backing file to prepare for the next iteration.
287 env.DeleteDbFiles();
288 // Ensure that no unexpected files were created in the temp directory.
289 env.AssertTempDirIsEmpty();
290}
291
292namespace {
293
294sql::BuiltInRecovery::Strategy TestCase::RecoveryStrategyFromInt(int input) {
295 static_assert(
296 std::is_same_v<std::underlying_type<sql::BuiltInRecovery::Strategy>::type,
297 decltype(input)>,
298 "sql::BuiltInRecovery::Strategy's underlying type must match the input");
299
300 const auto strategy = static_cast<sql::BuiltInRecovery::Strategy>(input);
301
302 // Ensure that we remember to update the fuzzer if more strategies are added.
303 switch (strategy) {
304 case sql::BuiltInRecovery::Strategy::kRecoverOrRaze:
305 case sql::BuiltInRecovery::Strategy::kRecoverWithMetaVersionOrRaze:
306 return strategy;
307 }
308 // When `input` is out of range, return a default value.
309 return sql::BuiltInRecovery::Strategy::kRecoverOrRaze;
310}
311
312std::string TestCase::DebugFormat(std::string_view s) {
313 std::string out;
314 out.reserve(s.length() + 2);
315 out.push_back('"');
316 for (char c : s) {
317 if (base::IsAsciiPrintable(c)) {
318 out.push_back(c);
319 } else {
320 out.push_back('\\');
321 out.push_back('x');
322 base::AppendHexEncodedByte(static_cast<uint8_t>(c), /*output=*/out);
323 }
324 }
325 out.push_back('"');
326 return out;
327}
328
329constexpr const char* TestCase::DebugFormat(
330 sql::BuiltInRecovery::Strategy strategy) {
331 switch (strategy) {
332 case sql::BuiltInRecovery::Strategy::kRecoverOrRaze:
333 return "kRecoverOrRaze";
334 case sql::BuiltInRecovery::Strategy::kRecoverWithMetaVersionOrRaze:
335 return "kRecoverWithMetaVersionOrRaze";
336 }
337}
338
339} // namespace