Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Very Basic Thumb2 Support #572

Draft
wants to merge 4 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions include/remill/Arch/Name.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ enum ArchName : uint32_t {
kArchAMD64_AVX,
kArchAMD64_AVX512,

kArchThumb2LittleEndian,
kArchAArch32LittleEndian,
kArchAArch64LittleEndian,

Expand Down
18 changes: 16 additions & 2 deletions lib/Arch/AArch32/Arch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,23 @@ AArch32Arch::~AArch32Arch(void) {}

// TODO(pag): Eventually handle Thumb2 and unaligned addresses.
uint64_t AArch32Arch::MinInstructionAlign(void) const {
return 4;
switch (arch_name) {
case kArchAArch32LittleEndian: return 4;
case kArchThumb2LittleEndian: return 2;
default:
LOG(FATAL) << "Cannot get minimum instruction alignment for non-aarch32 "
"architecture " << GetArchName(arch_name);
}
}

uint64_t AArch32Arch::MinInstructionSize(void) const {
return 4;
switch (arch_name) {
case kArchAArch32LittleEndian: return 4;
case kArchThumb2LittleEndian: return 2;
default:
LOG(FATAL) << "Cannot get minimum instruction alignment for non-aarch32 "
"architecture " << GetArchName(arch_name);
}
}

// Maximum number of bytes in an instruction for this particular architecture.
Expand All @@ -77,6 +89,7 @@ llvm::Triple AArch32Arch::Triple(void) const {
auto triple = BasicTriple();
switch (arch_name) {
case kArchAArch32LittleEndian: triple.setArch(llvm::Triple::arm); break;
case kArchThumb2LittleEndian: triple.setArch(llvm::Triple::thumb); break;
default:
LOG(FATAL) << "Cannot get triple for non-aarch32 architecture "
<< GetArchName(arch_name);
Expand Down Expand Up @@ -159,6 +172,7 @@ void AArch32Arch::PopulateRegisterTable(void) const {
REG(C, sr.c, u8);
REG(Z, sr.z, u8);
REG(V, sr.v, u8);
REG(T, sr.t, u8);
}


Expand Down
3 changes: 2 additions & 1 deletion lib/Arch/AArch32/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ add_library(remill_arch_aarch32 STATIC

Arch.cpp
Decode.cpp
# Decode.h
DecodeThumb2.cpp
Decode.h
# Extract.cpp
)

Expand Down
53 changes: 25 additions & 28 deletions lib/Arch/AArch32/Decode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,13 @@
#include <optional>

#include "Arch.h"
#include "Decode.h"
#include "remill/BC/ABI.h"
#include "remill/Arch/Name.h"

namespace remill {

namespace {
namespace aarch32 {

// Integer Data Processing (three register, register shift)
union IntDataProcessingRRRR {
Expand Down Expand Up @@ -562,17 +564,8 @@ union SpecialRegsAndHints {
static_assert(sizeof(SpecialRegsAndHints) == 4, " ");

static constexpr auto kAddressSize = 32u;
static constexpr auto kPCRegNum = 15u;
static constexpr auto kLRRegNum = 14u;

static const char *const kIntRegName[] = {
"R0", "R1", "R2", "R3", "R4", "R5", "R6", "R7",
"R8", "R9", "R10", "R11", "R12", "R13", "R14", "R15"};

typedef bool(TryDecode)(Instruction &, uint32_t);
typedef std::optional<uint32_t>(InstEval)(uint32_t, uint32_t);

static void AddIntRegOp(Instruction &inst, unsigned index, unsigned size,
void AddIntRegOp(Instruction &inst, unsigned index, unsigned size,
Operand::Action action) {
Operand::Register reg;
reg.size = size;
Expand All @@ -581,7 +574,7 @@ static void AddIntRegOp(Instruction &inst, unsigned index, unsigned size,
op.action = action;
}

static void AddIntRegOp(Instruction &inst, const char *reg_name, unsigned size,
void AddIntRegOp(Instruction &inst, const char *reg_name, unsigned size,
Operand::Action action) {
Operand::Register reg;
reg.size = size;
Expand All @@ -601,8 +594,8 @@ static void AddExprOp(Instruction &inst, OperandExpression *op_expr,
op.action = action;
}

static void AddImmOp(Instruction &inst, uint64_t value, unsigned size = 32,
bool is_signed = false) {
void AddImmOp(Instruction &inst, uint64_t value, unsigned size,
bool is_signed) {
Operand::Immediate imm;
imm.val = value;
imm.is_signed = is_signed;
Expand All @@ -611,9 +604,9 @@ static void AddImmOp(Instruction &inst, uint64_t value, unsigned size = 32,
op.size = size;
}

static void AddAddrRegOp(Instruction &inst, const char *reg_name,
unsigned mem_size, Operand::Action mem_action,
unsigned disp, unsigned scale = 0) {
void AddAddrRegOp(Instruction &inst, const char *reg_name, unsigned mem_size,
Operand::Action mem_action,
unsigned disp, unsigned scale) {
Operand::Address addr;
addr.address_size = 32;
addr.base_reg.name = reg_name;
Expand Down Expand Up @@ -910,7 +903,7 @@ static void AddShiftImmCarryOperand(Instruction &inst, uint32_t reg_num,
// (shift_t, shift_n) = DecodeImmShift(type, imm5);
// (shifted, carry) = Shift_C(R[m], shift_t, shift_n, PSTATE.C);
// See an instruction in Integer Data Processing (three register, immediate shift) set for an example
static void AddShiftRegImmOperand(Instruction &inst, uint32_t reg_num,
void AddShiftRegImmOperand(Instruction &inst, uint32_t reg_num,
uint32_t shift_type, uint32_t shift_size,
bool carry_out, bool can_shift_right_by_32) {
auto is_rrx = false;
Expand Down Expand Up @@ -1193,7 +1186,7 @@ static bool EvalPCDest(Instruction &inst, const bool s, const unsigned int rd,
auto src2 = EvalOperand(inst, inst.operands[4], uses_linkreg);

AddAddrRegOp(inst, kNextPCVariableName.data(), kAddressSize,
Operand::kActionWrite, 0);
Operand::kActionWrite, 0u);

if (uses_linkreg) {

Expand Down Expand Up @@ -3556,14 +3549,14 @@ static TryDecode *TryDataProcessingAndMisc(uint32_t bits) {
// This is the top level of the instruction encoding schema for AArch32.
// Instructions are grouped into subsets based on this the top level and then
// into smaller sets.
// cond op0 op1
// cond op0 op1
// != 1111 00x Data-processing and miscellaneous instructions
// != 1111 010 Load/Store Word, Unsigned Byte (immediate, literal)
// != 1111 011 0 Load/Store Word, Unsigned Byte (register)
// != 1111 011 1 Media instructions
// 10x Branch, branch with link, and block data transfer
// 11x System register access, Advanced SIMD, floating-point, and Supervisor call
// 1111 0xx Unconditional instructions
// 10x Branch, branch with link, and block data transfer
// 11x System register access, Advanced SIMD, floating-point, and Supervisor call
// 1111 0xx Unconditional instructions
static TryDecode *TryDecodeTopLevelEncodings(uint32_t bits) {
const TopLevelEncodings enc = {bits};

Expand Down Expand Up @@ -3628,7 +3621,7 @@ static uint32_t BytesToBits(const uint8_t *bytes) {
bits = (bits << 8) | static_cast<uint32_t>(bytes[0]);
return bits;
}
} // namespace
} // namespace aarch32

// Decode an instruction
bool AArch32Arch::DecodeInstruction(uint64_t address,
Expand All @@ -3642,7 +3635,7 @@ bool AArch32Arch::DecodeInstruction(uint64_t address,
inst.has_branch_taken_delay_slot = false;
inst.has_branch_not_taken_delay_slot = false;
inst.arch_name = arch_name;
inst.sub_arch_name = arch_name; // TODO(pag): Thumb.
inst.sub_arch_name = arch_name;
inst.arch = this;
inst.category = Instruction::kCategoryInvalid;
inst.operands.clear();
Expand All @@ -3662,17 +3655,21 @@ bool AArch32Arch::DecodeInstruction(uint64_t address,
}

const auto bytes = reinterpret_cast<const uint8_t *>(inst.bytes.data());
const auto bits = BytesToBits(bytes);
const auto bits = aarch32::BytesToBits(bytes);

if (arch_name == kArchThumb2LittleEndian) {
return aarch32::DecodeThumb2Instruction(inst, bits);
}

auto decoder = TryDecodeTopLevelEncodings(bits);
auto decoder = aarch32::TryDecodeTopLevelEncodings(bits);
if (!decoder) {
LOG(ERROR) << "unhandled bits " << std::hex << bits << std::dec;
return false;
}

auto ret = decoder(inst, bits);

// LOG(ERROR) << inst.Serialize();
LOG(ERROR) << inst.Serialize();
return ret;
}

Expand Down
55 changes: 55 additions & 0 deletions lib/Arch/AArch32/Decode.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
/*
* Decode.h
*
* Created on: Feb 15, 2022
* Author: sonyaschriner
*/

#pragma once

#include <cstdint>

namespace remill {

class Instruction;

namespace aarch32 {

bool DecodeThumb2Instruction(Instruction &inst, uint32_t bits);

typedef bool(TryDecode)(Instruction &, uint32_t);
typedef bool(TryDecode16)(Instruction &, uint16_t);

static constexpr auto kPCRegNum = 15u;
static constexpr auto kLRRegNum = 14u;
static constexpr auto kSPRegNum = 13u;

static const char *const kIntRegName[] = {
"R0", "R1", "R2", "R3", "R4", "R5", "R6", "R7",
"R8", "R9", "R10", "R11", "R12", "R13", "R14", "R15"};

typedef std::optional<uint32_t>(InstEval)(uint32_t, uint32_t);

//bool DecodeCondition(Instruction &inst, uint32_t cond);

void AddIntRegOp(Instruction &inst, unsigned index, unsigned size,
Operand::Action action);

void AddIntRegOp(Instruction &inst, const char *reg_name, unsigned size,
Operand::Action action);

void AddAddrRegOp(Instruction &inst, const char *reg_name, unsigned mem_size,
Operand::Action mem_action,
unsigned disp, unsigned scale = 0);

void AddImmOp(Instruction &inst, uint64_t value, unsigned size = 32,
bool is_signed = false);

void AddShiftRegImmOperand(Instruction &inst, uint32_t reg_num,
uint32_t shift_type, uint32_t shift_size,
bool carry_out, bool can_shift_right_by_32);


}
}

Loading