| // Copyright 2024 The Chromium Authors |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| // This fuzzer exercises BuiltInRecovery like sql_built_in_recovery_fuzzer, but |
| // employs a different strategy for generating database files. Rather than |
| // directly interpreting the fuzzer input as a SQLite database file, this fuzzer |
| // constructs a DB from fuzzer-derived SQL statements and then mutates the file |
| // with fuzzer-derived XOR masks before exercising recovery. |
| |
| #include <fuzzer/FuzzedDataProvider.h> |
| #include <stdint.h> |
| |
| #include <ios> |
| #include <iostream> |
| |
| #include "base/check.h" |
| #include "base/check_op.h" |
| #include "base/command_line.h" |
| #include "base/containers/span.h" |
| #include "base/files/file_enumerator.h" |
| #include "base/files/file_path.h" |
| #include "base/files/file_util.h" |
| #include "base/files/scoped_temp_dir.h" |
| #include "base/logging.h" |
| #include "base/ranges/algorithm.h" |
| #include "base/strings/strcat.h" |
| #include "base/strings/string_number_conversions.h" |
| #include "base/strings/string_util.h" |
| #include "base/test/bind.h" |
| #include "base/test/scoped_logging_settings.h" |
| #include "base/values.h" |
| #include "sql/database.h" |
| #include "sql/fuzzers/sql_disk_corruption.pb.h" |
| #include "sql/recovery.h" |
| #include "sql/statement.h" |
| #include "testing/libfuzzer/proto/lpm_interface.h" |
| #include "third_party/sqlite/fuzz/sql_query_proto_to_string.h" |
| |
| namespace { |
| |
| // Initializes and manages state shared between fuzzer iterations. Use this to |
| // interact with global variables, environment variables, the filesystem, etc. |
| class Environment { |
| public: |
| Environment() |
| : temp_dir_(MakeTempDir()), |
| db_path_(GetTempFilePath("db.sqlite")), |
| should_dump_input_(std::getenv("LPM_DUMP_NATIVE_INPUT") != nullptr) { |
| logging::SetMinLogLevel(logging::LOGGING_ERROR); |
| } |
| |
| ~Environment() { AssertTempDirIsEmpty(); } |
| |
| // By convention, the LPM_DUMP_NATIVE_INPUT environment variable indicates |
| // that the fuzzer should print its input in a readable format. |
| bool should_dump_input() const { return should_dump_input_; } |
| |
| // The path to the database's backing file. |
| const base::FilePath& db_path() const { return db_path_; } |
| |
| // Deletes the backing file and related journal files. |
| void DeleteDbFiles() const { |
| CHECK(base::DeleteFile(GetTempFilePath("db.sqlite"))); |
| CHECK(base::DeleteFile(GetTempFilePath("db.sqlite-journal"))); |
| CHECK(base::DeleteFile(GetTempFilePath("db.sqlite-wal"))); |
| } |
| |
| void AssertTempDirIsEmpty() const { |
| if (base::IsDirectoryEmpty(temp_dir_.GetPath())) { |
| return; |
| } |
| |
| base::FileEnumerator files(temp_dir_.GetPath(), /*recursive=*/true, |
| base::FileEnumerator::FileType::FILES | |
| base::FileEnumerator::FileType::DIRECTORIES); |
| LOG(ERROR) << "Unexpected files or directories in temp dir:"; |
| files.ForEach( |
| [](const base::FilePath& path) { LOG(ERROR) << " " << path; }); |
| LOG(FATAL) << "Expected temp dir to be empty: " << temp_dir_.GetPath(); |
| } |
| |
| private: |
| static base::ScopedTempDir MakeTempDir() { |
| #if BUILDFLAG(IS_POSIX) || BUILDFLAG(IS_FUCHSIA) |
| base::CommandLine::Init(0, nullptr); |
| base::FilePath shmem_temp_dir; |
| CHECK(base::GetShmemTempDir(false, &shmem_temp_dir)); |
| base::ScopedTempDir temp_dir; |
| CHECK(temp_dir.CreateUniqueTempDirUnderPath(shmem_temp_dir)); |
| return temp_dir; |
| #else |
| base::ScopedTempDir temp_dir; |
| CHECK(temp_dir.CreateUniqueTempDir()); |
| return temp_dir; |
| #endif |
| } |
| |
| base::FilePath GetTempFilePath(std::string_view name) const { |
| return temp_dir_.GetPath().AppendASCII(name); |
| } |
| |
| base::ScopedTempDir temp_dir_; |
| base::FilePath db_path_; |
| bool should_dump_input_ = false; |
| }; |
| |
| // A wrapper around the fuzzer's input proto. Does some preprocessing to map the |
| // input to a higher-level test case. |
| class TestCase { |
| public: |
| // A single mutation instruction. |
| struct Mutation { |
| int64_t pos; |
| uint64_t xor_mask; |
| }; |
| |
| explicit TestCase(const sql_fuzzers::RecoveryFuzzerTestCase& input) |
| : strategy_(RecoveryStrategyFromInt(input.recovery_strategy())), |
| wal_mode_(input.wal_mode()), |
| sql_statement_(sql_fuzzer::SQLQueriesToString(input.queries())), |
| sql_statement_after_open_( |
| sql_fuzzer::SQLQueriesToString(input.queries_after_open())) { |
| // Parse the input's `mutations` map as `Mutation` structs. |
| mutations_.reserve(input.mutations_size()); |
| for (const auto& [pos, xor_mask] : input.mutations()) { |
| // Ignore the zero mask because it is XOR's identity value. |
| mutations_.emplace_back(pos, xor_mask ? xor_mask : 1); |
| } |
| } |
| |
| sql::BuiltInRecovery::Strategy strategy() const { return strategy_; } |
| bool wal_mode() const { return wal_mode_; } |
| base::span<const Mutation> mutations() const { return mutations_; } |
| std::string_view sql_statement() const { return sql_statement_; } |
| std::string_view sql_statement_after_open() const { |
| return sql_statement_after_open_; |
| } |
| |
| // Print as a human-readable string. |
| std::ostream& Print(std::ostream& os) const { |
| os << "Test Case:" << std::endl; |
| os << "- strategy: " << DebugFormat(strategy_) << std::endl; |
| os << "- wal_mode: " << (wal_mode_ ? "true" : "false") << std::endl; |
| os << "- mutations: " << std::endl; |
| os << std::hex; |
| for (const Mutation& mutation : mutations()) { |
| os << " {pos=0x" << mutation.pos << ", xor_mask=0x" |
| << mutation.xor_mask << "}," << std::endl; |
| } |
| os << std::dec; |
| os << "- sql_statement: " << DebugFormat(sql_statement()) << std::endl; |
| os << "- sql_statement_after_open: " |
| << DebugFormat(sql_statement_after_open()) << std::endl; |
| return os; |
| } |
| |
| private: |
| // Converts an arbitrary int to a valid enum value. |
| static sql::BuiltInRecovery::Strategy RecoveryStrategyFromInt(int input); |
| // Converts arbitrary bytes in `s` to a human-readable ASCII string. |
| // Non-printable characters are hex-escaped. |
| static std::string DebugFormat(std::string_view s); |
| // Converts the value of `strategy`, which must be a valid enum value, to a |
| // human-readable string. |
| static constexpr const char* DebugFormat( |
| sql::BuiltInRecovery::Strategy strategy); |
| |
| // Fields parsed from the fuzzer input: |
| const sql::BuiltInRecovery::Strategy strategy_ = |
| sql::BuiltInRecovery::Strategy::kRecoverOrRaze; |
| const bool wal_mode_ = false; |
| std::vector<Mutation> mutations_; |
| const std::string sql_statement_; |
| const std::string sql_statement_after_open_; |
| }; |
| |
| std::ostream& operator<<(std::ostream& os, const TestCase& test_case) { |
| return test_case.Print(os); |
| } |
| |
| } // namespace |
| |
| DEFINE_PROTO_FUZZER(const sql_fuzzers::RecoveryFuzzerTestCase& fuzzer_input) { |
| static Environment env; |
| |
| // Ignore this input if it includes any "ATTACH DATABASE" queries. These |
| // queries may cause SQLite to create files like `file::memory:` in the |
| // current working directory, which is undesirable. (See how `AttachDatabase` |
| // is handled in //third_party/sqlite/fuzz/sql_query_proto_to_string.cc.) |
| // |
| // TODO: A slight improvement would be to filter out individual "ATTACH |
| // DATABASE" queries rather than throwing away the whole test case. |
| if (base::ranges::any_of(fuzzer_input.queries().extra_queries(), |
| &sql_query_grammar::SQLQuery::has_attach_db) || |
| base::ranges::any_of(fuzzer_input.queries_after_open().extra_queries(), |
| &sql_query_grammar::SQLQuery::has_attach_db)) { |
| return; |
| } |
| |
| // The purpose of this fuzzer is to throw *corrupted* database files at the |
| // recovery module. If there are no mutations, this test case is out of scope. |
| if (fuzzer_input.mutations().empty()) { |
| return; |
| } |
| |
| TestCase test_case(fuzzer_input); |
| |
| if (env.should_dump_input()) { |
| std::cout << test_case; |
| } |
| |
| sql::DatabaseOptions database_options; |
| database_options.wal_mode = test_case.wal_mode(); |
| sql::Database database(database_options); |
| CHECK(database.Open(env.db_path())); |
| |
| // Bootstrap the database with SQL queries derived from `fuzzer_input`. |
| { |
| // SQLite may warn us about errors in these queries, e.g. "unknown database |
| // foo". Temporarily silence those warnings. |
| logging::ScopedLoggingSettings scoped_logging; |
| logging::SetMinLogLevel(logging::LOGGING_FATAL); |
| std::ignore = database.Execute(test_case.sql_statement().data()); |
| } |
| database.Close(); |
| |
| // Mutate the backing file. Skip the expensive file operations when there are |
| // no bytes to mutate. |
| int64_t file_length; |
| CHECK(base::GetFileSize(env.db_path(), &file_length)); |
| if (file_length > 0) { |
| base::File file(env.db_path(), base::File::FLAG_OPEN | |
| base::File::FLAG_READ | |
| base::File::FLAG_WRITE); |
| // Apply each mutation without sorting by file position. These random-access |
| // file operations could be a performance bottleneck if the temp directory |
| // is on a physical disk. |
| for (TestCase::Mutation mutation : test_case.mutations()) { |
| // File read/write operations expect positions to point within the file. |
| mutation.pos %= file_length; |
| if (mutation.pos < 0) { |
| mutation.pos = 0; |
| } |
| |
| uint64_t buf = 0; |
| const int num_read = |
| file.Read(mutation.pos, reinterpret_cast<char*>(&buf), sizeof(buf)); |
| CHECK_NE(num_read, -1); |
| if (num_read == 0) { |
| continue; |
| } |
| |
| buf ^= mutation.xor_mask; |
| |
| // Write `buf` back to the file, being careful not to add bytes to the |
| // file that did not exist before. |
| CHECK_NE( |
| file.Write(mutation.pos, reinterpret_cast<char*>(&buf), num_read), |
| -1); |
| } |
| CHECK_EQ(file_length, file.GetLength()); |
| } |
| |
| bool attempted_recovery = false; |
| auto error_callback = |
| base::BindLambdaForTesting([&](int extended_error, sql::Statement*) { |
| if (!attempted_recovery) { |
| attempted_recovery = sql::BuiltInRecovery::RecoverIfPossible( |
| &database, extended_error, test_case.strategy()); |
| } |
| }); |
| database.set_error_callback(std::move(error_callback)); |
| |
| // Reopen the database after potentially corrupting the file. This may run |
| // the error callback. |
| const bool opened = database.Open(env.db_path()); |
| if (opened) { |
| logging::ScopedLoggingSettings scoped_logging; |
| logging::SetMinLogLevel(logging::LOGGING_FATAL); |
| std::ignore = database.Execute(test_case.sql_statement_after_open().data()); |
| |
| database.Close(); |
| } |
| |
| // Delete the backing file to prepare for the next iteration. |
| env.DeleteDbFiles(); |
| // Ensure that no unexpected files were created in the temp directory. |
| env.AssertTempDirIsEmpty(); |
| } |
| |
| namespace { |
| |
| sql::BuiltInRecovery::Strategy TestCase::RecoveryStrategyFromInt(int input) { |
| static_assert( |
| std::is_same_v<std::underlying_type<sql::BuiltInRecovery::Strategy>::type, |
| decltype(input)>, |
| "sql::BuiltInRecovery::Strategy's underlying type must match the input"); |
| |
| const auto strategy = static_cast<sql::BuiltInRecovery::Strategy>(input); |
| |
| // Ensure that we remember to update the fuzzer if more strategies are added. |
| switch (strategy) { |
| case sql::BuiltInRecovery::Strategy::kRecoverOrRaze: |
| case sql::BuiltInRecovery::Strategy::kRecoverWithMetaVersionOrRaze: |
| return strategy; |
| } |
| // When `input` is out of range, return a default value. |
| return sql::BuiltInRecovery::Strategy::kRecoverOrRaze; |
| } |
| |
| std::string TestCase::DebugFormat(std::string_view s) { |
| std::string out; |
| out.reserve(s.length() + 2); |
| out.push_back('"'); |
| for (char c : s) { |
| if (base::IsAsciiPrintable(c)) { |
| out.push_back(c); |
| } else { |
| out.push_back('\\'); |
| out.push_back('x'); |
| base::AppendHexEncodedByte(static_cast<uint8_t>(c), /*output=*/out); |
| } |
| } |
| out.push_back('"'); |
| return out; |
| } |
| |
| constexpr const char* TestCase::DebugFormat( |
| sql::BuiltInRecovery::Strategy strategy) { |
| switch (strategy) { |
| case sql::BuiltInRecovery::Strategy::kRecoverOrRaze: |
| return "kRecoverOrRaze"; |
| case sql::BuiltInRecovery::Strategy::kRecoverWithMetaVersionOrRaze: |
| return "kRecoverWithMetaVersionOrRaze"; |
| } |
| } |
| |
| } // namespace |