[Courgette] Reduce AssemblyProgram to reduce Courgette-apply RAM floor and disk churn.
This is the main CL of the AssemblyProgram reduction effort, which makes
Disassembler emit instructions directly to EncodedProgram, and bypass
AssemblyProgram, which becomes a holder of LabelManager and Label
Annotation. Another big change is that Disassembler usage is exposed to
high-level flows (rather than having it hidden inside
ParseDetectedExecutable*()). Details:
- Remove ParseDetectedExecutable*() and expose DetectDisassembler().
- AssemblyProgram: Remove:
- Entire Instruction class hierarchy.
- Allocated instruction in the heap (including preallocated DEFBYTE
instances).
- enum OP {}, which duplicates EncodedProgram::OP.
- InstructionVector of pointers, which takes 100's of MB on disk.
- Encode() interface.
- AssemblyProgram: Add:
- AnnotateLabels() to generate Label Annotation directly from a
generator (injected by Disassembler), using LabelReceptor template
class for counting and storage.
- PrepareEncodedProgram() to inject data needed (e.g., Label) to
EncodedProgram.
- Disassembler: Disassemble() is split into CreateProgram() and
DisassembleAndEncode().
- EncodedProgram: Old AssemblyProgram::Encode() code now moved to
InstructionStoreReceptor. New GenerateInstruction() is added to
receive generator from Disassemblyprogram.
- CourgetteFlow: Disassembler explicitly managed:
- Remove ReadAssemblyProgramFromBuffer().
- Add ReadDisassemblerFromBuffer().
- Add CreateEncodedProgramFromDisassemblerAndAssemblyProgram().
- High-level flows in courgette_tool.cc, patch_generator_x86_32.h,
patcher_x86_32.h: Using new more complex flow that involves
Disassembler; using ASCII graphics in comment to illustrate.
- Unit tests updates.
Documentation update will be done in a follow-up.
BUG=660980
Review-Url: https://codereview.chromium.org/2854113002
Cr-Commit-Position: refs/heads/master@{#472907}
diff --git a/courgette/adjustment_method_unittest.cc b/courgette/adjustment_method_unittest.cc
index bf42f4ac..062bf63 100644
--- a/courgette/adjustment_method_unittest.cc
+++ b/courgette/adjustment_method_unittest.cc
@@ -3,12 +3,14 @@
// found in the LICENSE file.
#include <memory>
+#include <sstream>
#include <string>
#include <utility>
#include <vector>
#include "base/bind.h"
#include "base/memory/ptr_util.h"
+#include "base/strings/stringprintf.h"
#include "courgette/assembly_program.h"
#include "courgette/courgette.h"
#include "courgette/encoded_program.h"
@@ -65,7 +67,7 @@
},
labelA, labelB);
- EXPECT_TRUE(prog->GenerateInstructions(gen, true));
+ EXPECT_TRUE(prog->AnnotateLabels(gen));
EXPECT_EQ(6U, prog->abs32_label_annotations().size());
EXPECT_EQ(0U, prog->rel32_label_annotations().size());
@@ -88,36 +90,25 @@
return MakeProgram(1);
}
- // Returns a string that is the serialized version of |program|.
- // Deletes |program|.
- std::string Serialize(std::unique_ptr<AssemblyProgram> program) const {
- std::unique_ptr<EncodedProgram> encoded;
+ // Returns a string that is the serialized version of |program| annotations.
+ std::string Serialize(AssemblyProgram* program) const {
+ std::ostringstream oss;
+ for (const Label* label : program->abs32_label_annotations())
+ oss << "(" << label->rva_ << "," << label->index_ << ")";
+ oss << ";";
+ for (const Label* label : program->rel32_label_annotations())
+ oss << "(" << label->rva_ << "," << label->index_ << ")";
- const Status encode_status = Encode(*program, &encoded);
- EXPECT_EQ(C_OK, encode_status);
-
- program.reset();
-
- SinkStreamSet sinks;
- const Status write_status = WriteEncodedProgram(encoded.get(), &sinks);
- EXPECT_EQ(C_OK, write_status);
-
- encoded.reset();
-
- SinkStream sink;
- bool can_collect = sinks.CopyTo(&sink);
- EXPECT_TRUE(can_collect);
-
- return std::string(reinterpret_cast<const char *>(sink.Buffer()),
- sink.Length());
+ EXPECT_GT(oss.str().length(), 1U); // Ensure results are non-trivial.
+ return oss.str();
}
};
void AdjustmentMethodTest::Test1() const {
std::unique_ptr<AssemblyProgram> prog1 = MakeProgramA();
std::unique_ptr<AssemblyProgram> prog2 = MakeProgramB();
- std::string s1 = Serialize(std::move(prog1));
- std::string s2 = Serialize(std::move(prog2));
+ std::string s1 = Serialize(prog1.get());
+ std::string s2 = Serialize(prog2.get());
// Don't use EXPECT_EQ because strings are unprintable.
EXPECT_FALSE(s1 == s2); // Unadjusted A and B differ.
@@ -126,8 +117,8 @@
std::unique_ptr<AssemblyProgram> prog6 = MakeProgramB();
Status can_adjust = Adjust(*prog5, prog6.get());
EXPECT_EQ(C_OK, can_adjust);
- std::string s5 = Serialize(std::move(prog5));
- std::string s6 = Serialize(std::move(prog6));
+ std::string s5 = Serialize(prog5.get());
+ std::string s6 = Serialize(prog6.get());
EXPECT_TRUE(s1 == s5); // Adjustment did not change A (prog5)
EXPECT_TRUE(s5 == s6); // Adjustment did change B into A
diff --git a/courgette/assembly_program.cc b/courgette/assembly_program.cc
index 531b685..58c16b4 100644
--- a/courgette/assembly_program.cc
+++ b/courgette/assembly_program.cc
@@ -2,111 +2,31 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
+#include <stddef.h>
+
#include "courgette/assembly_program.h"
-#include "base/callback.h"
#include "base/logging.h"
-#include "courgette/courgette.h"
-#include "courgette/disassembler.h"
#include "courgette/encoded_program.h"
+#include "courgette/instruction_utils.h"
namespace courgette {
namespace {
-// Sets the current address for the emitting instructions.
-class OriginInstruction : public Instruction {
+// An instruction receptor that adds each received abs32/rel32 Label* to the
+// matching VECTOR member variable. Template VECTOR allows code reuse for
+// counting (CountingVector) and storage (std::vector).
+template <template <typename T, typename... Args> class CONTAINER>
+class LabelReceptor : public InstructionReceptor {
public:
- explicit OriginInstruction(RVA rva) : Instruction(ORIGIN, 0), rva_(rva) {}
- RVA origin_rva() const { return rva_; }
- private:
- RVA rva_;
-};
+ using VECTOR = CONTAINER<Label*>;
-// Emits an entire PE base relocation table.
-class PeRelocsInstruction : public Instruction {
- public:
- PeRelocsInstruction() : Instruction(MAKEPERELOCS) {}
-};
+ LabelReceptor() = default;
+ ~LabelReceptor() override = default;
-// Emits an ELF relocation table.
-class ElfRelocsInstruction : public Instruction {
- public:
- ElfRelocsInstruction() : Instruction(MAKEELFRELOCS) {}
-};
-
-// Emits an ELF ARM relocation table.
-class ElfARMRelocsInstruction : public Instruction {
- public:
- ElfARMRelocsInstruction() : Instruction(MAKEELFARMRELOCS) {}
-};
-
-// Emits a single byte.
-class ByteInstruction : public Instruction {
- public:
- explicit ByteInstruction(uint8_t value) : Instruction(DEFBYTE, value) {}
- uint8_t byte_value() const { return info_; }
-};
-
-// Emits a single byte.
-class BytesInstruction : public Instruction {
- public:
- BytesInstruction(const uint8_t* values, size_t len)
- : Instruction(DEFBYTES, 0), values_(values), len_(len) {}
- const uint8_t* byte_values() const { return values_; }
- size_t len() const { return len_; }
-
- private:
- const uint8_t* values_;
- size_t len_;
-};
-
-// A ABS32 to REL32 instruction emits a reference to a label's address.
-class InstructionWithLabel : public Instruction {
- public:
- InstructionWithLabel(OP op, Label* label)
- : Instruction(op, 0), label_(label) {
- if (label == NULL) NOTREACHED();
- }
- Label* label() const { return label_; }
- protected:
- Label* label_;
-};
-
-// An ARM REL32 instruction emits a reference to a label's address and
-// a specially-compressed ARM op.
-class InstructionWithLabelARM : public InstructionWithLabel {
- public:
- InstructionWithLabelARM(OP op,
- uint16_t compressed_op,
- Label* label,
- const uint8_t* arm_op,
- uint16_t op_size)
- : InstructionWithLabel(op, label),
- compressed_op_(compressed_op),
- arm_op_(arm_op),
- op_size_(op_size) {
- if (label == NULL) NOTREACHED();
- }
- uint16_t compressed_op() const { return compressed_op_; }
- const uint8_t* arm_op() const { return arm_op_; }
- uint16_t op_size() const { return op_size_; }
-
- private:
- uint16_t compressed_op_;
- const uint8_t* arm_op_;
- uint16_t op_size_;
-};
-
-/******** InstructionCountReceptor ********/
-
-// An InstructionReceptor that counts space occupied by emitted instructions.
-class InstructionCountReceptor : public InstructionReceptor {
- public:
- InstructionCountReceptor() = default;
-
- size_t abs_count() const { return abs_count_; }
- size_t rel_count() const { return rel_count_; }
+ VECTOR* mutable_abs32_vector() { return &abs32_vector_; }
+ VECTOR* mutable_rel32_vector() { return &rel32_vector_; }
// InstructionReceptor:
CheckBool EmitPeRelocs() override { return true; }
@@ -118,156 +38,38 @@
return true;
}
CheckBool EmitRel32(Label* label) override {
- ++rel_count_;
+ rel32_vector_.push_back(label);
return true;
}
CheckBool EmitRel32ARM(uint16_t op,
Label* label,
const uint8_t* arm_op,
uint16_t op_size) override {
- ++rel_count_;
+ rel32_vector_.push_back(label);
return true;
}
CheckBool EmitAbs32(Label* label) override {
- ++abs_count_;
+ abs32_vector_.push_back(label);
return true;
}
CheckBool EmitAbs64(Label* label) override {
- ++abs_count_;
+ abs32_vector_.push_back(label);
return true;
}
private:
- size_t abs_count_ = 0;
- size_t rel_count_ = 0;
+ VECTOR abs32_vector_;
+ VECTOR rel32_vector_;
- DISALLOW_COPY_AND_ASSIGN(InstructionCountReceptor);
-};
-
-/******** InstructionStoreReceptor ********/
-
-// An InstructionReceptor that stores emitted instructions.
-class InstructionStoreReceptor : public InstructionReceptor {
- public:
- InstructionStoreReceptor(AssemblyProgram* program, bool annotate_labels)
- : program_(program), annotate_labels_(annotate_labels) {
- CHECK(program_);
- }
-
- // TODO(huangs): 2017/04: Add Reserve().
-
- // InstructionReceptor:
- // TODO(huangs): 2017/04: Move implementations here.
- CheckBool EmitPeRelocs() override { return program_->EmitPeRelocs(); }
- CheckBool EmitElfRelocation() override {
- return program_->EmitElfRelocation();
- }
- CheckBool EmitElfARMRelocation() override {
- return program_->EmitElfARMRelocation();
- }
- CheckBool EmitOrigin(RVA rva) override { return program_->EmitOrigin(rva); }
- CheckBool EmitSingleByte(uint8_t byte) override {
- return program_->EmitSingleByte(byte);
- }
- CheckBool EmitMultipleBytes(const uint8_t* bytes, size_t len) override {
- return program_->EmitMultipleBytes(bytes, len);
- }
- CheckBool EmitRel32(Label* label) override {
- if (annotate_labels_)
- program_->mutable_rel32_label_annotations()->push_back(label);
- return program_->EmitRel32(label);
- }
- CheckBool EmitRel32ARM(uint16_t op,
- Label* label,
- const uint8_t* arm_op,
- uint16_t op_size) override {
- if (annotate_labels_)
- program_->mutable_rel32_label_annotations()->push_back(label);
- return program_->EmitRel32ARM(op, label, arm_op, op_size);
- }
- CheckBool EmitAbs32(Label* label) override {
- if (annotate_labels_)
- program_->mutable_abs32_label_annotations()->push_back(label);
- return program_->EmitAbs32(label);
- }
- CheckBool EmitAbs64(Label* label) override {
- if (annotate_labels_)
- program_->mutable_abs32_label_annotations()->push_back(label);
- return program_->EmitAbs64(label);
- }
-
- private:
- AssemblyProgram* program_;
- const bool annotate_labels_;
-
- DISALLOW_COPY_AND_ASSIGN(InstructionStoreReceptor);
+ DISALLOW_COPY_AND_ASSIGN(LabelReceptor);
};
} // namespace
-/******** AssemblyProgram ********/
-
AssemblyProgram::AssemblyProgram(ExecutableType kind, uint64_t image_base)
: kind_(kind), image_base_(image_base) {}
-AssemblyProgram::~AssemblyProgram() {
- for (size_t i = 0; i < instructions_.size(); ++i) {
- Instruction* instruction = instructions_[i];
- if (instruction->op() != DEFBYTE) // Owned by byte_instruction_cache_.
- UncheckedDelete(instruction);
- }
- if (byte_instruction_cache_.get()) {
- for (size_t i = 0; i < 256; ++i)
- UncheckedDelete(byte_instruction_cache_[i]);
- }
-}
-
-CheckBool AssemblyProgram::EmitPeRelocs() {
- return Emit(ScopedInstruction(UncheckedNew<PeRelocsInstruction>()));
-}
-
-CheckBool AssemblyProgram::EmitElfRelocation() {
- return Emit(ScopedInstruction(UncheckedNew<ElfRelocsInstruction>()));
-}
-
-CheckBool AssemblyProgram::EmitElfARMRelocation() {
- return Emit(ScopedInstruction(UncheckedNew<ElfARMRelocsInstruction>()));
-}
-
-CheckBool AssemblyProgram::EmitOrigin(RVA rva) {
- return Emit(ScopedInstruction(UncheckedNew<OriginInstruction>(rva)));
-}
-
-CheckBool AssemblyProgram::EmitSingleByte(uint8_t byte) {
- return EmitShared(GetByteInstruction(byte));
-}
-
-CheckBool AssemblyProgram::EmitMultipleBytes(const uint8_t* bytes, size_t len) {
- return Emit(ScopedInstruction(UncheckedNew<BytesInstruction>(bytes, len)));
-}
-
-CheckBool AssemblyProgram::EmitRel32(Label* label) {
- return Emit(
- ScopedInstruction(UncheckedNew<InstructionWithLabel>(REL32, label)));
-}
-
-CheckBool AssemblyProgram::EmitRel32ARM(uint16_t op,
- Label* label,
- const uint8_t* arm_op,
- uint16_t op_size) {
- return Emit(ScopedInstruction(UncheckedNew<InstructionWithLabelARM>(
- REL32ARM, op, label, arm_op, op_size)));
-}
-
-CheckBool AssemblyProgram::EmitAbs32(Label* label) {
- return Emit(
- ScopedInstruction(UncheckedNew<InstructionWithLabel>(ABS32, label)));
-}
-
-CheckBool AssemblyProgram::EmitAbs64(Label* label) {
- return Emit(
- ScopedInstruction(UncheckedNew<InstructionWithLabel>(ABS64, label)));
-}
+AssemblyProgram::~AssemblyProgram() = default;
void AssemblyProgram::PrecomputeLabels(RvaVisitor* abs32_visitor,
RvaVisitor* rel32_visitor) {
@@ -315,157 +117,30 @@
return rel32_label_manager_.Find(rva);
}
-CheckBool AssemblyProgram::GenerateInstructions(const InstructionGenerator& gen,
- bool annotate_labels) {
- // Pass 1: Count storage space required and reserve in advance.
- InstructionCountReceptor count_receptor;
+CheckBool AssemblyProgram::AnnotateLabels(const InstructionGenerator& gen) {
+ // Pass 1: Compute required space.
+ LabelReceptor<CountingVector> count_receptor;
if (!gen.Run(&count_receptor))
return false;
- if (annotate_labels) {
- DCHECK(abs32_label_annotations_.empty());
- abs32_label_annotations_.reserve(count_receptor.abs_count());
- DCHECK(rel32_label_annotations_.empty());
- rel32_label_annotations_.reserve(count_receptor.rel_count());
- }
-
- // Pass 2: Emit all instructions to reserved buffer (uses Phase 1 count).
- // Populates |abs32_label_annotations_| and |re32_label_annotations_| if
- // |annotate_labels| is true.
- InstructionStoreReceptor store_receptor(this, annotate_labels);
- return gen.Run(&store_receptor);
-}
-
-CheckBool AssemblyProgram::Emit(ScopedInstruction instruction) {
- if (!instruction || !instructions_.push_back(instruction.get()))
+ // Pass 2: Reserve and store annotations.
+ LabelReceptor<std::vector> annotate_receptor;
+ annotate_receptor.mutable_abs32_vector()->reserve(
+ count_receptor.mutable_abs32_vector()->size());
+ annotate_receptor.mutable_rel32_vector()->reserve(
+ count_receptor.mutable_rel32_vector()->size());
+ if (!gen.Run(&annotate_receptor))
return false;
- // Ownership successfully passed to instructions_.
- ignore_result(instruction.release());
+
+ // Move results to |abs32_label_annotations_| and |re32_label_annotations_|.
+ abs32_label_annotations_.swap(*annotate_receptor.mutable_abs32_vector());
+ rel32_label_annotations_.swap(*annotate_receptor.mutable_rel32_vector());
return true;
}
-CheckBool AssemblyProgram::EmitShared(Instruction* instruction) {
- DCHECK(!instruction || instruction->op() == DEFBYTE);
- return instruction && instructions_.push_back(instruction);
-}
-
-std::unique_ptr<EncodedProgram> AssemblyProgram::Encode() const {
- std::unique_ptr<EncodedProgram> encoded(new EncodedProgram());
-
+bool AssemblyProgram::PrepareEncodedProgram(EncodedProgram* encoded) const {
encoded->set_image_base(image_base_);
-
- if (!encoded->ImportLabels(abs32_label_manager_, rel32_label_manager_))
- return nullptr;
-
- for (size_t i = 0; i < instructions_.size(); ++i) {
- Instruction* instruction = instructions_[i];
-
- switch (instruction->op()) {
- case ORIGIN: {
- OriginInstruction* org = static_cast<OriginInstruction*>(instruction);
- if (!encoded->AddOrigin(org->origin_rva()))
- return nullptr;
- break;
- }
- case DEFBYTE: {
- uint8_t b = static_cast<ByteInstruction*>(instruction)->byte_value();
- if (!encoded->AddCopy(1, &b))
- return nullptr;
- break;
- }
- case DEFBYTES: {
- const uint8_t* byte_values =
- static_cast<BytesInstruction*>(instruction)->byte_values();
- size_t len = static_cast<BytesInstruction*>(instruction)->len();
-
- if (!encoded->AddCopy(len, byte_values))
- return nullptr;
- break;
- }
- case REL32: {
- Label* label = static_cast<InstructionWithLabel*>(instruction)->label();
- if (!encoded->AddRel32(label->index_))
- return nullptr;
- break;
- }
- case REL32ARM: {
- Label* label =
- static_cast<InstructionWithLabelARM*>(instruction)->label();
- uint16_t compressed_op =
- static_cast<InstructionWithLabelARM*>(instruction)->compressed_op();
- if (!encoded->AddRel32ARM(compressed_op, label->index_))
- return nullptr;
- break;
- }
- case ABS32: {
- Label* label = static_cast<InstructionWithLabel*>(instruction)->label();
- if (!encoded->AddAbs32(label->index_))
- return nullptr;
- break;
- }
- case ABS64: {
- Label* label = static_cast<InstructionWithLabel*>(instruction)->label();
- if (!encoded->AddAbs64(label->index_))
- return nullptr;
- break;
- }
- case MAKEPERELOCS: {
- if (!encoded->AddPeMakeRelocs(kind_))
- return nullptr;
- break;
- }
- case MAKEELFRELOCS: {
- if (!encoded->AddElfMakeRelocs())
- return nullptr;
- break;
- }
- case MAKEELFARMRELOCS: {
- if (!encoded->AddElfARMMakeRelocs())
- return nullptr;
- break;
- }
- default: {
- NOTREACHED() << "Unknown Insn OP kind";
- }
- }
- }
-
- return encoded;
-}
-
-Instruction* AssemblyProgram::GetByteInstruction(uint8_t byte) {
- if (!byte_instruction_cache_) {
- Instruction** ram = nullptr;
- if (!base::UncheckedMalloc(sizeof(Instruction*) * 256,
- reinterpret_cast<void**>(&ram))) {
- return nullptr;
- }
- byte_instruction_cache_.reset(ram);
-
- for (int i = 0; i < 256; ++i) {
- byte_instruction_cache_[i] =
- UncheckedNew<ByteInstruction>(static_cast<uint8_t>(i));
- if (!byte_instruction_cache_[i]) {
- for (int j = 0; j < i; ++j)
- UncheckedDelete(byte_instruction_cache_[j]);
- byte_instruction_cache_.reset();
- return nullptr;
- }
- }
- }
-
- return byte_instruction_cache_[byte];
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-Status Encode(const AssemblyProgram& program,
- std::unique_ptr<EncodedProgram>* output) {
- // Explicitly release any memory associated with the output before encoding.
- output->reset();
-
- *output = program.Encode();
- return (*output) ? C_OK : C_GENERAL_ERROR;
+ return encoded->ImportLabels(abs32_label_manager_, rel32_label_manager_);
}
} // namespace courgette
diff --git a/courgette/assembly_program.h b/courgette/assembly_program.h
index 3fd1c99b..9ec944a 100644
--- a/courgette/assembly_program.h
+++ b/courgette/assembly_program.h
@@ -5,80 +5,42 @@
#ifndef COURGETTE_ASSEMBLY_PROGRAM_H_
#define COURGETTE_ASSEMBLY_PROGRAM_H_
-#include <stddef.h>
#include <stdint.h>
-#include <memory>
#include <vector>
-#include "base/callback_forward.h"
#include "base/macros.h"
-#include "base/memory/free_deleter.h"
#include "courgette/courgette.h"
#include "courgette/image_utils.h"
#include "courgette/instruction_utils.h"
#include "courgette/label_manager.h"
-#include "courgette/memory_allocator.h"
+#include "courgette/memory_allocator.h" // For CheckBool.
namespace courgette {
class EncodedProgram;
-// Opcodes of simple assembly language
-enum OP {
- ORIGIN, // ORIGIN <rva> - set current address for assembly.
- MAKEPERELOCS, // Generates a base relocation table.
- MAKEELFRELOCS, // Generates a base relocation table.
- DEFBYTE, // DEFBYTE <value> - emit a byte literal.
- REL32, // REL32 <label> - emit a rel32 encoded reference to 'label'.
- ABS32, // ABS32 <label> - emit an abs32 encoded reference to 'label'.
- REL32ARM, // REL32ARM <c_op> <label> - arm-specific rel32 reference
- MAKEELFARMRELOCS, // Generates a base relocation table.
- DEFBYTES, // Emits any number of byte literals
- ABS64, // ABS64 <label> - emit an abs64 encoded reference to 'label'.
- LAST_OP
-};
-
-// Base class for instructions. Because we have so many instructions we want to
-// keep them as small as possible. For this reason we avoid virtual functions.
-class Instruction {
- public:
- OP op() const { return static_cast<OP>(op_); }
-
- protected:
- explicit Instruction(OP op) : op_(op), info_(0) {}
- Instruction(OP op, unsigned int info) : op_(op), info_(info) {}
-
- uint32_t op_ : 4; // A few bits to store the OP code.
- uint32_t info_ : 28; // Remaining bits in first word available to subclass.
-
- private:
- DISALLOW_COPY_AND_ASSIGN(Instruction);
-};
-
-// An AssemblyProgram is the result of disassembling an executable file.
+// An AssemblyProgram stores Labels extracted from an executable file, and
+// (optionally) Label annotations. It is initialized by a Disassembler, but
+// stores separate state so that the Disassembler can be deleted. Typical usage:
//
-// * The disassembler creates labels in the AssemblyProgram and emits
-// 'Instructions'.
-// * The disassembler then calls DefaultAssignIndexes to assign
-// addresses to positions in the address tables.
+// * The Disassembler calls PrecomputeLabels() and injects RVAs for abs32/rel32
+// references. These are used to initialize labels.
+// * The Disassembler calls DefaultAssignIndexes() to assign addresses to
+// positions in the address tables.
// * [Optional step]
-// * At this point the AssemblyProgram can be converted into an
-// EncodedProgram and serialized to an output stream.
-// * Later, the EncodedProgram can be deserialized and assembled into
-// the original file.
+// * The Disassembler can use Labels in AssemblyProgram to convert the
+// executable file to an EncodedProgram, serialized to an output stream.
+// * Later, the Disassembler can use the AssemblyProgram to can be deserialized
+// and assembled into the original executable file via an EncodedProgram.
//
-// The optional step is to modify the AssemblyProgram. One form of modification
-// is to assign indexes in such a way as to make the EncodedProgram for this
-// AssemblyProgram look more like the EncodedProgram for some other
-// AssemblyProgram. The modification process should call UnassignIndexes, do
-// its own assignment, and then call AssignRemainingIndexes to ensure all
-// indexes are assigned.
-
+// The optional step is to adjust Labels in the AssemblyProgram. One form of
+// adjustment is to assign indexes in such a way as to make the EncodedProgram
+// for an executable look more like the EncodedProgram for another exectuable.
+// The adjustment process should call UnassignIndexes(), do its own assignment,
+// and then call AssignRemainingIndexes() to ensure all indexes are assigned.
class AssemblyProgram {
public:
- using LabelHandler = base::Callback<void(Label*)>;
-
AssemblyProgram(ExecutableType kind, uint64_t image_base);
~AssemblyProgram();
@@ -89,12 +51,6 @@
const std::vector<Label*>& rel32_label_annotations() const {
return rel32_label_annotations_;
}
- std::vector<Label*>* mutable_abs32_label_annotations() {
- return &abs32_label_annotations_;
- }
- std::vector<Label*>* mutable_rel32_label_annotations() {
- return &rel32_label_annotations_;
- }
// Traverses RVAs in |abs32_visitor| and |rel32_visitor| to precompute Labels.
void PrecomputeLabels(RvaVisitor* abs32_visitor, RvaVisitor* rel32_visitor);
@@ -113,77 +69,21 @@
// Looks up rel32 label. Returns null if none found.
Label* FindRel32Label(RVA rva);
- // Calls |gen| in 2 passes to emit instructions. In pass 1 we provide a
- // receptor to count space requirement. In pass 2 we provide a receptor to
- // store instructions. If |annotate_labels| is true, then extracts Label
- // annotations into |*_label_annotations_|.
- CheckBool GenerateInstructions(const InstructionGenerator& gen,
- bool annotate_labels);
+ // Uses |gen| to initializes |*_label_annotations_|.
+ CheckBool AnnotateLabels(const InstructionGenerator& gen);
- // Returns an EncodeProgram that converts program to encoded form.
- std::unique_ptr<EncodedProgram> Encode() const;
-
- // TODO(huangs): Implement these in InstructionStoreReceptor.
- // Instructions will be assembled in the order they are emitted.
-
- // Generates an entire base relocation table.
- CheckBool EmitPeRelocs() WARN_UNUSED_RESULT;
-
- // Generates an ELF style relocation table for X86.
- CheckBool EmitElfRelocation() WARN_UNUSED_RESULT;
-
- // Generates an ELF style relocation table for ARM.
- CheckBool EmitElfARMRelocation() WARN_UNUSED_RESULT;
-
- // Following instruction will be assembled at address 'rva'.
- CheckBool EmitOrigin(RVA rva) WARN_UNUSED_RESULT;
-
- // Generates a single byte of data or machine instruction.
- CheckBool EmitSingleByte(uint8_t byte) WARN_UNUSED_RESULT;
-
- // Generates multiple bytes of data or machine instructions.
- CheckBool EmitMultipleBytes(const uint8_t* bytes,
- size_t len) WARN_UNUSED_RESULT;
-
- // Generates a 4-byte relative reference to address of 'label'.
- CheckBool EmitRel32(Label* label) WARN_UNUSED_RESULT;
-
- // Generates a 4-byte relative reference to address of 'label' for ARM.
- CheckBool EmitRel32ARM(uint16_t op,
- Label* label,
- const uint8_t* arm_op,
- uint16_t op_size) WARN_UNUSED_RESULT;
-
- // Generates a 4-byte absolute reference to address of 'label'.
- CheckBool EmitAbs32(Label* label) WARN_UNUSED_RESULT;
-
- // Generates an 8-byte absolute reference to address of 'label'.
- CheckBool EmitAbs64(Label* label) WARN_UNUSED_RESULT;
+ // Initializes |encoded| by injecting basic data and Label data.
+ bool PrepareEncodedProgram(EncodedProgram* encoded) const;
private:
- using InstructionVector = NoThrowBuffer<Instruction*>;
-
- using ScopedInstruction =
- std::unique_ptr<Instruction, UncheckedDeleter<Instruction>>;
-
- CheckBool Emit(ScopedInstruction instruction) WARN_UNUSED_RESULT;
- CheckBool EmitShared(Instruction* instruction) WARN_UNUSED_RESULT;
-
static const int kLabelLowerLimit;
// Looks up a label or creates a new one. Might return NULL.
Label* FindLabel(RVA rva, RVAToLabel* labels);
- // Sharing instructions that emit a single byte saves a lot of space.
- Instruction* GetByteInstruction(uint8_t byte);
-
const ExecutableType kind_;
const uint64_t image_base_; // Desired or mandated base address of image.
- std::unique_ptr<Instruction* [], base::FreeDeleter> byte_instruction_cache_;
-
- InstructionVector instructions_; // All the instructions in program.
-
// Storage and lookup of Labels associated with target addresses. We use
// separate abs32 and rel32 labels.
LabelManager abs32_label_manager_;
@@ -197,12 +97,6 @@
DISALLOW_COPY_AND_ASSIGN(AssemblyProgram);
};
-// Converts |program| into encoded form, returning it as |*output|.
-// Returns C_OK if succeeded, otherwise returns an error status and sets
-// |*output| to null.
-Status Encode(const AssemblyProgram& program,
- std::unique_ptr<EncodedProgram>* output);
-
} // namespace courgette
#endif // COURGETTE_ASSEMBLY_PROGRAM_H_
diff --git a/courgette/courgette_flow.cc b/courgette/courgette_flow.cc
index 2fa9810..4a1f7f3 100644
--- a/courgette/courgette_flow.cc
+++ b/courgette/courgette_flow.cc
@@ -11,6 +11,7 @@
#include "base/memory/ptr_util.h"
#include "base/strings/stringprintf.h"
#include "courgette/assembly_program.h"
+#include "courgette/disassembler.h"
#include "courgette/encoded_program.h"
#include "courgette/program_detector.h"
@@ -86,16 +87,14 @@
}
}
-void CourgetteFlow::ReadAssemblyProgramFromBuffer(Group group,
- const BasicBuffer& buffer,
- bool annotate) {
+void CourgetteFlow::ReadDisassemblerFromBuffer(Group group,
+ const BasicBuffer& buffer) {
if (failed())
return;
Data* d = data(group);
- auto parser = annotate ? ParseDetectedExecutableWithAnnotation
- : ParseDetectedExecutable;
- if (!check(parser(buffer.data(), buffer.length(), &d->program)))
- setMessage("Cannot parse %s (code = %d).", name(group), status_);
+ d->disassembler = DetectDisassembler(buffer.data(), buffer.length());
+ if (!check(d->disassembler.get() != nullptr, C_INPUT_NOT_RECOGNIZED))
+ setMessage("Cannot detect program for %s.", name(group));
}
void CourgetteFlow::ReadEncodedProgramFromSourceStreamSet(
@@ -109,12 +108,27 @@
setMessage("Cannot read %s as encoded program.", name(group));
}
-void CourgetteFlow::CreateEncodedProgramFromAssemblyProgram(Group group) {
+void CourgetteFlow::CreateAssemblyProgramFromDisassembler(Group group,
+ bool annotate) {
if (failed())
return;
Data* d = data(group);
- if (!check(Encode(*d->program, &d->encoded)))
- setMessage("Cannot encode %s (code = %d).", name(group), status_);
+ d->program = d->disassembler->CreateProgram(annotate);
+ if (!check(d->program.get() != nullptr, C_DISASSEMBLY_FAILED))
+ setMessage("Cannot create AssemblyProgram for %s.", name(group));
+}
+
+void CourgetteFlow::CreateEncodedProgramFromDisassemblerAndAssemblyProgram(
+ Group group) {
+ if (failed())
+ return;
+ Data* d = data(group);
+ d->encoded.reset(new EncodedProgram());
+ if (!check(d->disassembler->DisassembleAndEncode(d->program.get(),
+ d->encoded.get()))) {
+ setMessage("Cannot disassemble to form EncodedProgram for %s.",
+ name(group));
+ }
}
void CourgetteFlow::WriteSinkStreamFromSinkStreamSet(Group group,
@@ -153,6 +167,12 @@
setMessage("Cannot adjust %s to match %s.", name(OLD), name(NEW));
}
+void CourgetteFlow::DestroyDisassembler(Group group) {
+ if (failed())
+ return;
+ data(group)->disassembler.reset();
+}
+
void CourgetteFlow::DestroyAssemblyProgram(Group group) {
if (failed())
return;
diff --git a/courgette/courgette_flow.h b/courgette/courgette_flow.h
index e651be7..8329742 100644
--- a/courgette/courgette_flow.h
+++ b/courgette/courgette_flow.h
@@ -16,6 +16,7 @@
namespace courgette {
class AssemblyProgram;
+class Disassembler;
class EncodedProgram;
// An adaptor for Region as BasicBuffer.
@@ -47,6 +48,7 @@
Data();
~Data();
+ std::unique_ptr<Disassembler> disassembler;
std::unique_ptr<AssemblyProgram> program;
std::unique_ptr<EncodedProgram> encoded;
SinkStreamSet sinks;
@@ -78,12 +80,8 @@
// Reads |buffer| to initialize |data(group)->sources|.
void ReadSourceStreamSetFromBuffer(Group group, const BasicBuffer& buffer);
- // Reads |buffer| to initialize |data(group)->program|, passing |annotate| as
- // initialization parameter (true if AdjustNewAssemblyProgramToMatchOld() gets
- // called later).
- void ReadAssemblyProgramFromBuffer(Group group,
- const BasicBuffer& buffer,
- bool annotate);
+ // Reads |buffer| to initialize |data(group)->disassembler|.
+ void ReadDisassemblerFromBuffer(Group group, const BasicBuffer& buffer);
// Reads |opt_sources| if given, or else |data(group)->sources| to initialize
// |data(group).encoded|.
@@ -91,8 +89,14 @@
Group group,
SourceStreamSet* opt_sources = nullptr);
- // Uses |data(group)->program| to initialize |data(group)->encoded|.
- void CreateEncodedProgramFromAssemblyProgram(Group group);
+ // Uses |data(group)->disassembler| to initialize |data(group)->program|,
+ // passing |annotate| as initialization parameter (should be true if
+ // AdjustNewAssemblyProgramToMatchOld() gets called later).
+ void CreateAssemblyProgramFromDisassembler(Group group, bool annotate);
+
+ // Uses |data(group)->disassembler| and |data(group)->program| to initialize
+ // |data(group)->encoded|.
+ void CreateEncodedProgramFromDisassemblerAndAssemblyProgram(Group group);
// Serializese |data(group)->sinks| to |sink|.
void WriteSinkStreamFromSinkStreamSet(Group group, SinkStream* sink);
@@ -111,6 +115,8 @@
// Destructor commands to reduce memory usage.
+ void DestroyDisassembler(Group group);
+
void DestroyAssemblyProgram(Group group);
void DestroyEncodedProgram(Group group);
diff --git a/courgette/courgette_tool.cc b/courgette/courgette_tool.cc
index 46bd578..7abcf6e 100644
--- a/courgette/courgette_tool.cc
+++ b/courgette/courgette_tool.cc
@@ -155,8 +155,10 @@
const base::FilePath& output_file) {
CourgetteFlow flow;
BufferedFileReader input_buffer(input_file, flow.name(flow.ONLY));
- flow.ReadAssemblyProgramFromBuffer(flow.ONLY, input_buffer, false);
- flow.CreateEncodedProgramFromAssemblyProgram(flow.ONLY);
+ flow.ReadDisassemblerFromBuffer(flow.ONLY, input_buffer);
+ flow.CreateAssemblyProgramFromDisassembler(flow.ONLY, false);
+ flow.CreateEncodedProgramFromDisassemblerAndAssemblyProgram(flow.ONLY);
+ flow.DestroyDisassembler(flow.ONLY);
flow.DestroyAssemblyProgram(flow.ONLY);
flow.WriteSinkStreamSetFromEncodedProgram(flow.ONLY);
flow.DestroyEncodedProgram(flow.ONLY);
@@ -171,16 +173,27 @@
void DisassembleAndAdjust(const base::FilePath& old_file,
const base::FilePath& new_file,
const base::FilePath& output_file) {
+ // Flow graph and process sequence (DA = Disassembler, AP = AssemblyProgram,
+ // EP = EncodedProgram, Adj = Adjusted):
+ // [1 Old DA] --> [2 Old AP] [4 New AP] <-- [3 New DA]
+ // | | |
+ // | v (move) v
+ // +---> [5 Adj New AP] --> [6 New EP]
+ // (7 Write)
CourgetteFlow flow;
BufferedFileReader old_buffer(old_file, flow.name(flow.OLD));
BufferedFileReader new_buffer(new_file, flow.name(flow.NEW));
- flow.ReadAssemblyProgramFromBuffer(flow.OLD, old_buffer, true);
- flow.ReadAssemblyProgramFromBuffer(flow.NEW, new_buffer, true);
- flow.AdjustNewAssemblyProgramToMatchOld();
+ flow.ReadDisassemblerFromBuffer(flow.OLD, old_buffer); // 1
+ flow.CreateAssemblyProgramFromDisassembler(flow.OLD, true); // 2
+ flow.DestroyDisassembler(flow.OLD);
+ flow.ReadDisassemblerFromBuffer(flow.NEW, new_buffer); // 3
+ flow.CreateAssemblyProgramFromDisassembler(flow.NEW, true); // 4
+ flow.AdjustNewAssemblyProgramToMatchOld(); // 5
flow.DestroyAssemblyProgram(flow.OLD);
- flow.CreateEncodedProgramFromAssemblyProgram(flow.NEW);
+ flow.CreateEncodedProgramFromDisassemblerAndAssemblyProgram(flow.NEW); // 6
flow.DestroyAssemblyProgram(flow.NEW);
- flow.WriteSinkStreamSetFromEncodedProgram(flow.NEW);
+ flow.DestroyDisassembler(flow.NEW);
+ flow.WriteSinkStreamSetFromEncodedProgram(flow.NEW); // 7
flow.DestroyEncodedProgram(flow.NEW);
courgette::SinkStream sink;
flow.WriteSinkStreamFromSinkStreamSet(flow.NEW, &sink);
@@ -199,20 +212,33 @@
const base::FilePath& new_file,
const base::FilePath& output_file_root,
bool adjust) {
+ // Same as PatchGeneratorX86_32::Transform(), except Adjust is optional, and
+ // |flow|'s internal SinkStreamSet get used.
+ // Flow graph and process sequence (DA = Disassembler, AP = AssemblyProgram,
+ // EP = EncodedProgram, Adj = Adjusted):
+ // [1 Old DA] --> [2 Old AP] [6 New AP] <-- [5 New DA]
+ // | | | | |
+ // v | | v (move) v
+ // [3 Old EP] <-----+ +->[7 Adj New AP] --> [8 New EP]
+ // (4 Write) (9 Write)
CourgetteFlow flow;
BufferedFileReader old_buffer(old_file, flow.name(flow.OLD));
BufferedFileReader new_buffer(new_file, flow.name(flow.NEW));
- flow.ReadAssemblyProgramFromBuffer(flow.OLD, old_buffer, adjust);
- flow.ReadAssemblyProgramFromBuffer(flow.NEW, new_buffer, adjust);
- if (adjust)
- flow.AdjustNewAssemblyProgramToMatchOld();
- flow.CreateEncodedProgramFromAssemblyProgram(flow.OLD);
- flow.DestroyAssemblyProgram(flow.OLD);
- flow.CreateEncodedProgramFromAssemblyProgram(flow.NEW);
- flow.DestroyAssemblyProgram(flow.NEW);
- flow.WriteSinkStreamSetFromEncodedProgram(flow.OLD);
+ flow.ReadDisassemblerFromBuffer(flow.OLD, old_buffer); // 1
+ flow.CreateAssemblyProgramFromDisassembler(flow.OLD, adjust); // 2
+ flow.CreateEncodedProgramFromDisassemblerAndAssemblyProgram(flow.OLD); // 3
+ flow.DestroyDisassembler(flow.OLD);
+ flow.WriteSinkStreamSetFromEncodedProgram(flow.OLD); // 4
flow.DestroyEncodedProgram(flow.OLD);
- flow.WriteSinkStreamSetFromEncodedProgram(flow.NEW);
+ flow.ReadDisassemblerFromBuffer(flow.NEW, new_buffer); // 5
+ flow.CreateAssemblyProgramFromDisassembler(flow.NEW, adjust); // 6
+ if (adjust)
+ flow.AdjustNewAssemblyProgramToMatchOld(); // 7, optional
+ flow.DestroyAssemblyProgram(flow.OLD);
+ flow.CreateEncodedProgramFromDisassemblerAndAssemblyProgram(flow.NEW); // 8
+ flow.DestroyAssemblyProgram(flow.NEW);
+ flow.DestroyDisassembler(flow.NEW);
+ flow.WriteSinkStreamSetFromEncodedProgram(flow.NEW); // 9
flow.DestroyEncodedProgram(flow.NEW);
if (flow.failed())
Problem(flow.message().c_str());
diff --git a/courgette/disassembler.cc b/courgette/disassembler.cc
index 64ca2740..e14ff32 100644
--- a/courgette/disassembler.cc
+++ b/courgette/disassembler.cc
@@ -7,6 +7,7 @@
#include "base/logging.h"
#include "base/memory/ptr_util.h"
#include "courgette/assembly_program.h"
+#include "courgette/encoded_program.h"
namespace courgette {
@@ -54,8 +55,7 @@
return FileOffsetToPointer(file_offset);
}
-std::unique_ptr<AssemblyProgram> Disassembler::Disassemble(
- bool annotate_labels) {
+std::unique_ptr<AssemblyProgram> Disassembler::CreateProgram(bool annotate) {
if (!ok() || !ExtractAbs32Locations() || !ExtractRel32Locations())
return nullptr;
@@ -64,16 +64,25 @@
PrecomputeLabels(program.get());
RemoveUnusedRel32Locations(program.get());
+ program->DefaultAssignIndexes();
- if (!program->GenerateInstructions(GetInstructionGenerator(program.get()),
- annotate_labels)) {
- return nullptr;
+ if (annotate) {
+ if (!program->AnnotateLabels(GetInstructionGenerator(program.get())))
+ return nullptr;
}
- program->DefaultAssignIndexes();
return program;
}
+Status Disassembler::DisassembleAndEncode(AssemblyProgram* program,
+ EncodedProgram* encoded) {
+ program->PrepareEncodedProgram(encoded);
+ return encoded->GenerateInstructions(program->kind(),
+ GetInstructionGenerator(program))
+ ? C_OK
+ : C_DISASSEMBLY_FAILED;
+}
+
bool Disassembler::Good() {
failure_reason_ = nullptr;
return true;
diff --git a/courgette/disassembler.h b/courgette/disassembler.h
index d3175d4..5b1bb8de 100644
--- a/courgette/disassembler.h
+++ b/courgette/disassembler.h
@@ -18,6 +18,7 @@
namespace courgette {
class AssemblyProgram;
+class EncodedProgram;
class Disassembler : public AddressTranslator {
public:
@@ -91,9 +92,13 @@
virtual bool ParseHeader() = 0;
// Extracts and stores references from the main image. Returns a new
- // AssemblyProgram initialized using data parsed from the main image and
- // |annotate_labels|, or null on failure.
- std::unique_ptr<AssemblyProgram> Disassemble(bool annotate_labels);
+ // AssemblyProgram with initialized Labels, or null on failure.
+ std::unique_ptr<AssemblyProgram> CreateProgram(bool annotate);
+
+ // Goes through the entire program (with the help of |program|), computes all
+ // instructions, and stores them into |encoded|.
+ Status DisassembleAndEncode(AssemblyProgram* program,
+ EncodedProgram* encoded);
// ok() may always be called but returns true only after ParseHeader()
// succeeds.
diff --git a/courgette/disassembler_elf_32_x86_unittest.cc b/courgette/disassembler_elf_32_x86_unittest.cc
index d44defc..d6943b5e 100644
--- a/courgette/disassembler_elf_32_x86_unittest.cc
+++ b/courgette/disassembler_elf_32_x86_unittest.cc
@@ -91,7 +91,7 @@
EXPECT_EQ('L', offset_p[2]);
EXPECT_EQ('F', offset_p[3]);
- std::unique_ptr<AssemblyProgram> program = disassembler->Disassemble(false);
+ std::unique_ptr<AssemblyProgram> program = disassembler->CreateProgram(false);
EXPECT_TRUE(nullptr != program.get());
const std::vector<RVA>& abs32_list = disassembler->Abs32Locations();
diff --git a/courgette/encode_decode_unittest.cc b/courgette/encode_decode_unittest.cc
index 7b705f12..a07baf3 100644
--- a/courgette/encode_decode_unittest.cc
+++ b/courgette/encode_decode_unittest.cc
@@ -6,12 +6,9 @@
#include <memory>
-#include "courgette/assembly_program.h"
#include "courgette/base_test_unittest.h"
#include "courgette/courgette.h"
#include "courgette/courgette_flow.h"
-#include "courgette/encoded_program.h"
-#include "courgette/program_detector.h"
#include "courgette/streams.h"
namespace courgette {
@@ -31,11 +28,15 @@
// Convert executable to encoded assembly.
RegionBuffer original_buffer(Region(original_data, original_length));
- flow.ReadAssemblyProgramFromBuffer(flow.ONLY, original_buffer, false);
+ flow.ReadDisassemblerFromBuffer(flow.ONLY, original_buffer);
+ EXPECT_EQ(C_OK, flow.status());
+ EXPECT_TRUE(nullptr != flow.data(flow.ONLY)->disassembler.get());
+
+ flow.CreateAssemblyProgramFromDisassembler(flow.ONLY, false);
EXPECT_EQ(C_OK, flow.status());
EXPECT_TRUE(nullptr != flow.data(flow.ONLY)->program.get());
- flow.CreateEncodedProgramFromAssemblyProgram(flow.ONLY);
+ flow.CreateEncodedProgramFromDisassemblerAndAssemblyProgram(flow.ONLY);
EXPECT_EQ(C_OK, flow.status());
EXPECT_TRUE(nullptr != flow.data(flow.ONLY)->encoded.get());
@@ -43,6 +44,10 @@
EXPECT_EQ(C_OK, flow.status());
EXPECT_TRUE(nullptr == flow.data(flow.ONLY)->program.get());
+ flow.DestroyDisassembler(flow.ONLY);
+ EXPECT_EQ(C_OK, flow.status());
+ EXPECT_TRUE(nullptr == flow.data(flow.ONLY)->disassembler.get());
+
flow.WriteSinkStreamSetFromEncodedProgram(flow.ONLY);
EXPECT_EQ(C_OK, flow.status());
diff --git a/courgette/encoded_program.cc b/courgette/encoded_program.cc
index 5e84ec34..c502978 100644
--- a/courgette/encoded_program.cc
+++ b/courgette/encoded_program.cc
@@ -127,6 +127,56 @@
return ok;
}
+/******** InstructionStoreReceptor ********/
+
+// An InstructionReceptor that stores emitted instructions.
+class InstructionStoreReceptor : public InstructionReceptor {
+ public:
+ explicit InstructionStoreReceptor(ExecutableType exe_type,
+ EncodedProgram* encoded)
+ : exe_type_(exe_type), encoded_(encoded) {
+ CHECK(encoded_);
+ }
+
+ CheckBool EmitPeRelocs() override {
+ return encoded_->AddPeMakeRelocs(exe_type_);
+ }
+ CheckBool EmitElfRelocation() override {
+ return encoded_->AddElfMakeRelocs();
+ }
+ CheckBool EmitElfARMRelocation() override {
+ return encoded_->AddElfARMMakeRelocs();
+ }
+ CheckBool EmitOrigin(RVA rva) override { return encoded_->AddOrigin(rva); }
+ CheckBool EmitSingleByte(uint8_t byte) override {
+ return encoded_->AddCopy(1, &byte);
+ }
+ CheckBool EmitMultipleBytes(const uint8_t* bytes, size_t len) override {
+ return encoded_->AddCopy(len, bytes);
+ }
+ CheckBool EmitRel32(Label* label) override {
+ return encoded_->AddRel32(label->index_);
+ }
+ CheckBool EmitRel32ARM(uint16_t op,
+ Label* label,
+ const uint8_t* arm_op,
+ uint16_t op_size) override {
+ return encoded_->AddRel32ARM(op, label->index_);
+ }
+ CheckBool EmitAbs32(Label* label) override {
+ return encoded_->AddAbs32(label->index_);
+ }
+ CheckBool EmitAbs64(Label* label) override {
+ return encoded_->AddAbs64(label->index_);
+ }
+
+ private:
+ ExecutableType exe_type_;
+ EncodedProgram* encoded_;
+
+ DISALLOW_COPY_AND_ASSIGN(InstructionStoreReceptor);
+};
+
} // namespace
////////////////////////////////////////////////////////////////////////////////
@@ -673,6 +723,13 @@
return true;
}
+CheckBool EncodedProgram::GenerateInstructions(
+ ExecutableType exe_type,
+ const InstructionGenerator& gen) {
+ InstructionStoreReceptor store_receptor(exe_type, this);
+ return gen.Run(&store_receptor);
+}
+
// RelocBlock has the layout of a block of relocations in the base relocation
// table file format.
struct RelocBlockPOD {
diff --git a/courgette/encoded_program.h b/courgette/encoded_program.h
index 1169331..39051460 100644
--- a/courgette/encoded_program.h
+++ b/courgette/encoded_program.h
@@ -14,6 +14,7 @@
#include "base/macros.h"
#include "courgette/courgette.h"
#include "courgette/image_utils.h"
+#include "courgette/instruction_utils.h"
#include "courgette/memory_allocator.h"
#include "courgette/types_elf.h"
@@ -80,6 +81,10 @@
// (5) Assembles the 'binary assembly language' into final file.
CheckBool AssembleTo(SinkStream* buffer) WARN_UNUSED_RESULT;
+ // Calls |gen| to extract all instructions, which are then encoded and stored.
+ CheckBool GenerateInstructions(ExecutableType exe_type,
+ const InstructionGenerator& gen);
+
private:
// Binary assembly language operations.
// These are part of the patch format. Reusing an existing value will
diff --git a/courgette/encoded_program_fuzz_unittest.cc b/courgette/encoded_program_fuzz_unittest.cc
index 108283516..620db81 100644
--- a/courgette/encoded_program_fuzz_unittest.cc
+++ b/courgette/encoded_program_fuzz_unittest.cc
@@ -46,11 +46,15 @@
courgette::RegionBuffer original_buffer(
courgette::Region(original_data, original_length));
- flow.ReadAssemblyProgramFromBuffer(flow.ONLY, original_buffer, false);
+ flow.ReadDisassemblerFromBuffer(flow.ONLY, original_buffer);
+ EXPECT_EQ(courgette::C_OK, flow.status());
+ EXPECT_TRUE(nullptr != flow.data(flow.ONLY)->disassembler.get());
+
+ flow.CreateAssemblyProgramFromDisassembler(flow.ONLY, false);
EXPECT_EQ(courgette::C_OK, flow.status());
EXPECT_TRUE(nullptr != flow.data(flow.ONLY)->program.get());
- flow.CreateEncodedProgramFromAssemblyProgram(flow.ONLY);
+ flow.CreateEncodedProgramFromDisassemblerAndAssemblyProgram(flow.ONLY);
EXPECT_EQ(courgette::C_OK, flow.status());
EXPECT_TRUE(nullptr != flow.data(flow.ONLY)->encoded.get());
@@ -58,6 +62,10 @@
EXPECT_EQ(courgette::C_OK, flow.status());
EXPECT_TRUE(nullptr == flow.data(flow.ONLY)->program.get());
+ flow.DestroyDisassembler(flow.ONLY);
+ EXPECT_EQ(courgette::C_OK, flow.status());
+ EXPECT_TRUE(nullptr == flow.data(flow.ONLY)->disassembler.get());
+
flow.WriteSinkStreamSetFromEncodedProgram(flow.ONLY);
EXPECT_EQ(courgette::C_OK, flow.status());
diff --git a/courgette/instruction_utils.h b/courgette/instruction_utils.h
index d7dfe97..df12aa6 100644
--- a/courgette/instruction_utils.h
+++ b/courgette/instruction_utils.h
@@ -61,6 +61,20 @@
// true on success, and false otherwise.
using InstructionGenerator = base::Callback<CheckBool(InstructionReceptor*)>;
+// A counter that increments via .push_back(), so it can be passed via template
+// to substitute std::vector<T>, to count elements instead of storing them.
+template <typename T>
+class CountingVector {
+ public:
+ CountingVector() {}
+
+ void push_back(const T& /* unused */) { ++size_; }
+ size_t size() const { return size_; }
+
+ private:
+ size_t size_ = 0;
+};
+
} // namespace courgette
#endif // COURGETTE_INSTRUCTION_UTILS_H_
diff --git a/courgette/patch_generator_x86_32.h b/courgette/patch_generator_x86_32.h
index b03d7c1..90ef5d4 100644
--- a/courgette/patch_generator_x86_32.h
+++ b/courgette/patch_generator_x86_32.h
@@ -9,11 +9,9 @@
#include "base/logging.h"
#include "base/macros.h"
-#include "courgette/assembly_program.h"
#include "courgette/courgette_flow.h"
#include "courgette/ensemble.h"
#include "courgette/patcher_x86_32.h"
-#include "courgette/program_detector.h"
namespace courgette {
@@ -51,11 +49,14 @@
return C_OK;
}
- // The format of a transformed_element is a serialized EncodedProgram. We
- // first disassemble the original old and new Elements into AssemblyPrograms.
- // Then we adjust the new AssemblyProgram to make it as much like the old one
- // as possible, before converting the AssemblyPrograms to EncodedPrograms and
- // serializing them.
+ // The format of a transformed_element is a serialized EncodedProgram. Steps:
+ // - Form Disassembler for the old and new Elements.
+ // - Extract AssemblyPrograms from old and new Disassemblers.
+ // - Adjust the new AssemblyProgram to make it as much like the old one as
+ // possible.
+ // - Serialize old and new Disassembler to EncodedProgram, using the old
+ // AssemblyProgram and the adjusted new AssemblyProgram.
+ // The steps are performed in an order to reduce peak memory.
Status Transform(SourceStreamSet* corrected_parameters,
SinkStreamSet* old_transformed_element,
SinkStreamSet* new_transformed_element) {
@@ -63,21 +64,32 @@
if (!corrected_parameters->Empty())
return C_GENERAL_ERROR;
+ // Flow graph and process sequence (DA = Disassembler, AP = AssemblyProgram,
+ // EP = EncodedProgram, Adj = Adjusted):
+ // [1 Old DA] --> [2 Old AP] [6 New AP] <-- [5 New DA]
+ // | | | | |
+ // v | | v (move) v
+ // [3 Old EP] <-----+ +->[7 Adj New AP] --> [8 New EP]
+ // (4 Write) (9 Write)
CourgetteFlow flow;
RegionBuffer old_buffer(old_element_->region());
RegionBuffer new_buffer(new_element_->region());
- flow.ReadAssemblyProgramFromBuffer(flow.OLD, old_buffer, true);
- flow.CreateEncodedProgramFromAssemblyProgram(flow.OLD);
+ flow.ReadDisassemblerFromBuffer(flow.OLD, old_buffer); // 1
+ flow.CreateAssemblyProgramFromDisassembler(flow.OLD, true); // 2
+ flow.CreateEncodedProgramFromDisassemblerAndAssemblyProgram(flow.OLD); // 3
+ flow.DestroyDisassembler(flow.OLD);
flow.WriteSinkStreamSetFromEncodedProgram(flow.OLD,
- old_transformed_element);
+ old_transformed_element); // 4
flow.DestroyEncodedProgram(flow.OLD);
- flow.ReadAssemblyProgramFromBuffer(flow.NEW, new_buffer, true);
- flow.AdjustNewAssemblyProgramToMatchOld();
+ flow.ReadDisassemblerFromBuffer(flow.NEW, new_buffer); // 5
+ flow.CreateAssemblyProgramFromDisassembler(flow.NEW, true); // 6
+ flow.AdjustNewAssemblyProgramToMatchOld(); // 7
flow.DestroyAssemblyProgram(flow.OLD);
- flow.CreateEncodedProgramFromAssemblyProgram(flow.NEW);
+ flow.CreateEncodedProgramFromDisassemblerAndAssemblyProgram(flow.NEW); // 8
flow.DestroyAssemblyProgram(flow.NEW);
+ flow.DestroyDisassembler(flow.NEW);
flow.WriteSinkStreamSetFromEncodedProgram(flow.NEW,
- new_transformed_element);
+ new_transformed_element); // 9
if (flow.failed()) {
LOG(ERROR) << flow.message() << " (" << old_element_->Name() << " => "
<< new_element_->Name() << ")";
diff --git a/courgette/patcher_x86_32.h b/courgette/patcher_x86_32.h
index fa822dc..3524437 100644
--- a/courgette/patcher_x86_32.h
+++ b/courgette/patcher_x86_32.h
@@ -7,14 +7,12 @@
#include <stdint.h>
-#include <memory>
-
+#include "base/logging.h"
#include "base/macros.h"
-#include "courgette/assembly_program.h"
#include "courgette/courgette_flow.h"
-#include "courgette/encoded_program.h"
#include "courgette/ensemble.h"
-#include "courgette/program_detector.h"
+#include "courgette/region.h"
+#include "courgette/streams.h"
namespace courgette {
@@ -49,12 +47,17 @@
Status Transform(SourceStreamSet* corrected_parameters,
SinkStreamSet* transformed_element) {
+ if (!corrected_parameters->Empty())
+ return C_GENERAL_ERROR; // Don't expect any corrected parameters.
+
CourgetteFlow flow;
RegionBuffer only_buffer(
Region(ensemble_region_.start() + base_offset_, base_length_));
- flow.ReadAssemblyProgramFromBuffer(flow.ONLY, only_buffer, false);
- flow.CreateEncodedProgramFromAssemblyProgram(flow.ONLY);
+ flow.ReadDisassemblerFromBuffer(flow.ONLY, only_buffer);
+ flow.CreateAssemblyProgramFromDisassembler(flow.ONLY, false);
+ flow.CreateEncodedProgramFromDisassemblerAndAssemblyProgram(flow.ONLY);
flow.DestroyAssemblyProgram(flow.ONLY);
+ flow.DestroyDisassembler(flow.ONLY);
flow.WriteSinkStreamSetFromEncodedProgram(flow.ONLY, transformed_element);
if (flow.failed())
LOG(ERROR) << flow.message();
diff --git a/courgette/program_detector.cc b/courgette/program_detector.cc
index cc0f9ce..12461f9 100644
--- a/courgette/program_detector.cc
+++ b/courgette/program_detector.cc
@@ -4,9 +4,6 @@
#include "courgette/program_detector.h"
-#include <utility>
-
-#include "courgette/assembly_program.h"
#include "courgette/disassembler.h"
#include "courgette/disassembler_elf_32_arm.h"
#include "courgette/disassembler_elf_32_x86.h"
@@ -15,10 +12,6 @@
namespace courgette {
-namespace {
-
-// Returns a new instance of Disassembler subclass if binary data given in
-// |buffer| and |length| matches a known binary format, otherwise null.
std::unique_ptr<Disassembler> DetectDisassembler(const uint8_t* buffer,
size_t length) {
std::unique_ptr<Disassembler> disassembler;
@@ -46,29 +39,6 @@
return nullptr;
}
-Status ParseDetectedExecutableInternal(
- const uint8_t* buffer,
- size_t length,
- bool annotate_labels,
- std::unique_ptr<AssemblyProgram>* output) {
- output->reset();
-
- std::unique_ptr<Disassembler> disassembler(
- DetectDisassembler(buffer, length));
- if (!disassembler)
- return C_INPUT_NOT_RECOGNIZED;
-
- std::unique_ptr<AssemblyProgram> program =
- disassembler->Disassemble(annotate_labels);
- if (!program.get())
- return C_DISASSEMBLY_FAILED;
-
- *output = std::move(program);
- return C_OK;
-}
-
-} // namespace
-
Status DetectExecutableType(const uint8_t* buffer,
size_t length,
ExecutableType* type,
@@ -87,17 +57,4 @@
return C_OK;
}
-Status ParseDetectedExecutable(const uint8_t* buffer,
- size_t length,
- std::unique_ptr<AssemblyProgram>* output) {
- return ParseDetectedExecutableInternal(buffer, length, false, output);
-}
-
-Status ParseDetectedExecutableWithAnnotation(
- const uint8_t* buffer,
- size_t length,
- std::unique_ptr<AssemblyProgram>* output) {
- return ParseDetectedExecutableInternal(buffer, length, true, output);
-}
-
} // namespace courgette
diff --git a/courgette/program_detector.h b/courgette/program_detector.h
index a869a3f..d44f376 100644
--- a/courgette/program_detector.h
+++ b/courgette/program_detector.h
@@ -14,7 +14,12 @@
namespace courgette {
-class AssemblyProgram;
+class Disassembler;
+
+// Returns a new instance of Disassembler inherited class if binary data given
+// in |buffer| and |length| match a known binary format, otherwise null.
+std::unique_ptr<Disassembler> DetectDisassembler(const uint8_t* buffer,
+ size_t length);
// Detects the type of an executable file, and it's length. The length may be
// slightly smaller than some executables (like ELF), but will include all bytes
@@ -29,24 +34,6 @@
ExecutableType* type,
size_t* detected_length);
-// Attempts to detect the type of executable by parsing it with the appropriate
-// tools.
-// On success:
-// Parses the executable into a new AssemblyProgram in |*output|, and returns
-// C_OK.
-// On failure:
-// Returns an error status and assigns |*output| to null.
-Status ParseDetectedExecutable(const uint8_t* buffer,
- size_t length,
- std::unique_ptr<AssemblyProgram>* output);
-
-// ParseDetectedExecutable(), with Label annotations generated and stored in
-// |output|.
-Status ParseDetectedExecutableWithAnnotation(
- const uint8_t* buffer,
- size_t length,
- std::unique_ptr<AssemblyProgram>* output);
-
} // namespace courgette
#endif // COURGETTE_PROGRAM_DETECTOR_H_
diff --git a/testing/libfuzzer/fuzzers/courgette_fuzzer.cc b/testing/libfuzzer/fuzzers/courgette_fuzzer.cc
index fffb5b7..ec61ee7 100644
--- a/testing/libfuzzer/fuzzers/courgette_fuzzer.cc
+++ b/testing/libfuzzer/fuzzers/courgette_fuzzer.cc
@@ -5,21 +5,18 @@
#include <stddef.h>
#include <stdint.h>
-#include <memory>
-
-#include "courgette/assembly_program.h"
#include "courgette/courgette.h"
-#include "courgette/encoded_program.h"
-#include "courgette/program_detector.h"
+#include "courgette/courgette_flow.h"
+#include "courgette/region.h"
// Entry point for LibFuzzer.
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
- std::unique_ptr<courgette::AssemblyProgram> prog;
- courgette::Status status =
- courgette::ParseDetectedExecutable(data, size, &prog);
- if (status != courgette::C_OK) {
- return 0;
- }
- std::unique_ptr<courgette::EncodedProgram> enc_prog(prog->Encode());
+ courgette::CourgetteFlow flow;
+ courgette::RegionBuffer buffer(courgette::Region(data, size));
+ flow.ReadDisassemblerFromBuffer(flow.ONLY, buffer);
+ flow.CreateAssemblyProgramFromDisassembler(flow.ONLY, false);
+ flow.CreateEncodedProgramFromDisassemblerAndAssemblyProgram(flow.ONLY);
+ flow.WriteSinkStreamSetFromEncodedProgram(flow.ONLY);
+ // Not bothering to check |flow.failed()|.
return 0;
}