Skip to content

Commit

Permalink
Basic pointer use heuristic (#402)
Browse files Browse the repository at this point in the history
* basic pointer fix

* add binary addrs

* spurious header

* add type names

* named types

* default for null ty

* fix typedef
  • Loading branch information
2over12 authored Dec 5, 2023
1 parent e8ca92c commit 70209a8
Show file tree
Hide file tree
Showing 14 changed files with 257 additions and 54 deletions.
17 changes: 17 additions & 0 deletions data_specifications/specification.proto
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,18 @@ message TypeHint {
Variable target_var = 2;
}

message RelativeAddress {
uint64 entry_vaddr = 1;
int64 displacement = 2;
}

message ProgramAddress {
oneof inner {
uint64 internal_address = 1;
RelativeAddress ext_address = 2;
}
}

message Function {
uint64 entry_address = 1;
uint64 entry_uid = 12;
Expand All @@ -334,11 +346,15 @@ message Function {
// to have this type after this instruction, these will be translated into
// a low lifting of that location with spec type metadata
repeated TypeHint type_hints = 11;
ProgramAddress binary_addr = 13;
}



message GlobalVariable {
TypeSpec type = 1;
uint64 address = 2;
ProgramAddress binary_address = 3;
}

message Symbol {
Expand Down Expand Up @@ -404,4 +420,5 @@ message Specification {
string image_name = 10;
uint64 image_base = 11;
repeated string required_globals = 12;
map<int64, string> type_names = 13;
}
30 changes: 19 additions & 11 deletions include/anvill/Declarations.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ struct Uid {
bool operator==(const Uid &) const = default;
};

}
} // namespace anvill

template <>
struct std::hash<anvill::Uid> {
Expand Down Expand Up @@ -88,6 +88,14 @@ struct LowLoc {
bool operator==(const LowLoc &loc) const = default;
};


struct RelAddr {
uint64_t vaddr;
std::int64_t disp;
};

using MachineAddr = std::variant<uint64_t, RelAddr>;

// A value, such as a parameter or a return value. Values are resident
// in one of two locations: either in a register, represented by a non-
// nullptr `reg` value, or in memory, at `[mem_reg + mem_offset]`.
Expand Down Expand Up @@ -140,6 +148,8 @@ struct VariableDecl {
// Address of this global variable.
std::uint64_t address{0};

MachineAddr binary_addr{};

// Declare this global variable in an LLVM module.
llvm::GlobalVariable *DeclareInModule(const std::string &name,
llvm::Module &) const;
Expand Down Expand Up @@ -427,17 +437,13 @@ struct FunctionDecl : public CallableDecl {

std::unordered_map<Uid, SpecStackOffsets> stack_offsets_at_exit;

std::unordered_map<Uid, std::vector<ParameterDecl>>
live_regs_at_entry;
std::unordered_map<Uid, std::vector<ParameterDecl>> live_regs_at_entry;

std::unordered_map<Uid, std::vector<ParameterDecl>>
live_regs_at_exit;
std::unordered_map<Uid, std::vector<ParameterDecl>> live_regs_at_exit;

std::unordered_map<Uid, std::vector<ConstantDomain>>
constant_values_at_entry;
std::unordered_map<Uid, std::vector<ConstantDomain>> constant_values_at_entry;

std::unordered_map<Uid, std::vector<ConstantDomain>>
constant_values_at_exit;
std::unordered_map<Uid, std::vector<ConstantDomain>> constant_values_at_exit;

// sorted vector of hints
std::vector<TypeHint> type_hints;
Expand All @@ -452,6 +458,9 @@ struct FunctionDecl : public CallableDecl {

std::size_t parameter_size{0};

MachineAddr binary_addr{};


std::vector<ParameterDecl> in_scope_variables;

// Declare this function in an LLVM module.
Expand All @@ -472,8 +481,7 @@ struct FunctionDecl : public CallableDecl {

SpecBlockContext GetBlockContext(Uid uid) const;

void
AddBBContexts(std::unordered_map<Uid, SpecBlockContext> &contexts) const;
void AddBBContexts(std::unordered_map<Uid, SpecBlockContext> &contexts) const;
};

// A call site decl, as represented at a "near ABI" level. This is like a
Expand Down
7 changes: 4 additions & 3 deletions include/anvill/Passes/ConvertAddressesToEntityUses.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

#include <anvill/CrossReferenceFolder.h>
#include <llvm/IR/PassManager.h>

#include <optional>
#include <vector>

Expand Down Expand Up @@ -44,29 +45,29 @@ using EntityUsages = std::vector<EntityUse>;
class ConvertAddressesToEntityUses final
: public llvm::PassInfoMixin<ConvertAddressesToEntityUses> {
private:

// Resolve addresses to entities and vice versa.
const CrossReferenceResolver &xref_resolver;

// The metadata ID to annotation recovered entities with.
const std::optional<unsigned> pc_metadata_id;

public:

// Function pass entry point
llvm::PreservedAnalyses run(llvm::Function &function,
llvm::FunctionAnalysisManager &fam);

// Returns the pass name
static llvm::StringRef name(void);

bool IsPointerLike(llvm::Use &use);

// Enumerates some of the possible entity usages that are isolated to
// specific instruction operand uses.
EntityUsages EnumeratePossibleEntityUsages(llvm::Function &function);

ConvertAddressesToEntityUses(
const CrossReferenceResolver &xref_resolver_,
std::optional<unsigned> pc_metadata_id_=std::nullopt);
std::optional<unsigned> pc_metadata_id_ = std::nullopt);
};

} // namespace anvill
10 changes: 10 additions & 0 deletions include/anvill/Providers.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#pragma once

#include <llvm/IR/DataLayout.h>
#include <llvm/IR/DerivedTypes.h>

#include <cstdint>
#include <functional>
Expand Down Expand Up @@ -71,6 +72,8 @@ class TypeProvider {

virtual const ::anvill::TypeDictionary &Dictionary(void) const = 0;

virtual std::vector<llvm::StructType *> NamedTypes(void) const = 0;

virtual ~TypeProvider() = default;
};

Expand Down Expand Up @@ -117,6 +120,9 @@ class NullTypeProvider : public BaseTypeProvider {
std::optional<VariableDecl>
TryGetVariableType(uint64_t,
llvm::Type *hinted_value_type = nullptr) const override;
std::vector<llvm::StructType *> NamedTypes(void) const override {
return {};
}
};

// Delegates to an underlying tye provider to provide the data. Derived from
Expand Down Expand Up @@ -149,6 +155,8 @@ class ProxyTypeProvider : public TypeProvider {
std::optional<uint64_t>)>
typed_reg_cb) const override;

std::vector<llvm::StructType *> NamedTypes(void) const override;

const ::anvill::TypeDictionary &Dictionary(void) const override;
};

Expand Down Expand Up @@ -199,6 +207,8 @@ class SpecificationTypeProvider : public BaseTypeProvider {
TryGetVariableType(uint64_t address,
llvm::Type *hinted_value_type = nullptr) const override;

std::vector<llvm::StructType *> NamedTypes(void) const override;

private:
SpecificationTypeProvider(void) = delete;
};
Expand Down
24 changes: 23 additions & 1 deletion include/anvill/Type.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

#pragma once

#include <llvm/IR/DerivedTypes.h>
#include <llvm/IR/Metadata.h>

#include <memory>
Expand Down Expand Up @@ -41,6 +42,10 @@ class Arch;
} // namespace remill
namespace anvill {

llvm::StructType *getOrCreateNamedStruct(llvm::LLVMContext &context,
llvm::StringRef Name);


struct TypeSpecificationError final {
enum class ErrorCode {
InvalidSpecFormat,
Expand Down Expand Up @@ -104,11 +109,21 @@ struct UnknownType {
bool operator==(const UnknownType &) const = default;
};


class TypeName {
public:
std::string name;

bool operator==(const TypeName &) const = default;

explicit TypeName(std::string name) : name(name) {}
};

using TypeSpec =
std::variant<BaseType, std::shared_ptr<PointerType>,
std::shared_ptr<VectorType>, std::shared_ptr<ArrayType>,
std::shared_ptr<StructType>, std::shared_ptr<FunctionType>,
UnknownType>;
UnknownType, TypeName>;

bool operator==(std::shared_ptr<PointerType>, std::shared_ptr<PointerType>);
bool operator==(std::shared_ptr<VectorType>, std::shared_ptr<VectorType>);
Expand Down Expand Up @@ -285,6 +300,13 @@ class TypeTranslator {


namespace std {
template <>
struct hash<anvill::TypeName> {
size_t operator()(const anvill::TypeName &unk) const {
return std::hash<std::string>()(unk.name);
}
};

template <>
struct hash<anvill::UnknownType> {
size_t operator()(const anvill::UnknownType &unk) const {
Expand Down
3 changes: 2 additions & 1 deletion lib/Declarations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -411,7 +411,8 @@ CallableDecl::DecodeFromPB(const remill::Arch *arch, const std::string &pb) {
const TypeDictionary type_dictionary(*(arch->context));
const TypeTranslator type_translator(type_dictionary, arch);
std::unordered_map<std::int64_t, TypeSpec> type_map;
ProtobufTranslator translator(type_translator, arch, type_map);
std::unordered_map<std::int64_t, std::string> type_names;
ProtobufTranslator translator(type_translator, arch, type_map, type_names);

auto default_callable_decl_res =
translator.DecodeDefaultCallableDecl(function);
Expand Down
10 changes: 10 additions & 0 deletions lib/Lifters/EntityLifter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,10 @@
#include <glog/logging.h>
#include <llvm/IR/Function.h>
#include <llvm/IR/GlobalAlias.h>
#include <llvm/IR/GlobalVariable.h>
#include <llvm/IR/LLVMContext.h>
#include <llvm/IR/Module.h>
#include <llvm/IR/TypeFinder.h>
#include <llvm/Transforms/Utils/ModuleUtils.h>
#include <remill/Arch/Arch.h>
#include <remill/BC/Util.h>
Expand All @@ -35,6 +37,14 @@ EntityLifterImpl::EntityLifterImpl(const LifterOptions &options_)
data_lifter(options) {
CHECK_EQ(options.arch->context, &(options.module->getContext()));
options.arch->PrepareModule(options.module);

// Lift named types
for (auto sty : this->type_provider->NamedTypes()) {
auto gv = new llvm::GlobalVariable(*options.module, sty, false,
llvm::GlobalValue::ExternalLinkage,
nullptr, sty->getName() + "_var_repr");
llvm::appendToUsed(*options.module, gv);
}
}

// Tells the entity lifter that `entity` is the lifted function/data at
Expand Down
18 changes: 17 additions & 1 deletion lib/Passes/ConvertAddressesToEntityUses.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@
#include <anvill/Utils.h>
#include <glog/logging.h>
#include <llvm/IR/Constant.h>
#include <llvm/IR/Constants.h>
#include <llvm/IR/Instruction.h>
#include <llvm/IR/Instructions.h>
#include <llvm/Support/Casting.h>
#include <remill/Arch/Arch.h>
#include <remill/BC/Util.h>

Expand All @@ -37,6 +41,16 @@ static llvm::MDNode *GetPCAnnotation(llvm::Module *module, uint64_t pc) {

} // namespace


bool ConvertAddressesToEntityUses::IsPointerLike(llvm::Use &use) {
if (auto cst = llvm::dyn_cast<llvm::ConstantExpr>(use.get())) {
return llvm::Instruction::IntToPtr == cst->getOpcode();
}
// TODO(Ian): Add use of type annotations here

return false;
}

llvm::PreservedAnalyses
ConvertAddressesToEntityUses::run(llvm::Function &function,
llvm::FunctionAnalysisManager &fam) {
Expand Down Expand Up @@ -150,9 +164,11 @@ EntityUsages ConvertAddressesToEntityUses::EnumeratePossibleEntityUsages(
ra.is_valid && !ra.references_return_address &&
!ra.references_stack_pointer) {


if (ra.references_entity || // Related to an existing lifted entity.
ra.references_global_value || // Related to a global var/func.
ra.references_program_counter) { // Related to `__anvill_pc`.
ra.references_program_counter ||
IsPointerLike(use)) { // Related to `__anvill_pc`.
output.emplace_back(&use, ra);
}
}
Expand Down
Loading

0 comments on commit 70209a8

Please sign in to comment.