lifting-bits · sschriner · Jan 4, 2022 · Jan 7, 2022 · Feb 23, 2022 · Mar 7, 2022
diff --git a/include/remill/Arch/Name.h b/include/remill/Arch/Name.h
@@ -81,6 +81,7 @@ enum ArchName : uint32_t {
   kArchAMD64_AVX,
   kArchAMD64_AVX512,
 
+  kArchThumb2LittleEndian,
   kArchAArch32LittleEndian,
   kArchAArch64LittleEndian,
 

diff --git a/lib/Arch/AArch32/Arch.cpp b/lib/Arch/AArch32/Arch.cpp
@@ -55,11 +55,23 @@ AArch32Arch::~AArch32Arch(void) {}
 
 // TODO(pag): Eventually handle Thumb2 and unaligned addresses.
 uint64_t AArch32Arch::MinInstructionAlign(void) const {
-  return 4;
+  switch (arch_name) {
+    case kArchAArch32LittleEndian: return 4;
+    case kArchThumb2LittleEndian: return 2;
+    default:
+      LOG(FATAL) << "Cannot get minimum instruction alignment for non-aarch32 "
+          "architecture " << GetArchName(arch_name);
+  }
 }
 
 uint64_t AArch32Arch::MinInstructionSize(void) const {
-  return 4;
+  switch (arch_name) {
+    case kArchAArch32LittleEndian: return 4;
+    case kArchThumb2LittleEndian: return 2;
+    default:
+      LOG(FATAL) << "Cannot get minimum instruction alignment for non-aarch32 "
+          "architecture " << GetArchName(arch_name);
+  }
 }
 
 // Maximum number of bytes in an instruction for this particular architecture.
@@ -77,6 +89,7 @@ llvm::Triple AArch32Arch::Triple(void) const {
   auto triple = BasicTriple();
   switch (arch_name) {
     case kArchAArch32LittleEndian: triple.setArch(llvm::Triple::arm); break;
+    case kArchThumb2LittleEndian: triple.setArch(llvm::Triple::thumb); break;
     default:
       LOG(FATAL) << "Cannot get triple for non-aarch32 architecture "
                  << GetArchName(arch_name);
@@ -159,6 +172,7 @@ void AArch32Arch::PopulateRegisterTable(void) const {
   REG(C, sr.c, u8);
   REG(Z, sr.z, u8);
   REG(V, sr.v, u8);
+  REG(T, sr.t, u8);
 }
 
 

diff --git a/lib/Arch/AArch32/CMakeLists.txt b/lib/Arch/AArch32/CMakeLists.txt
@@ -27,7 +27,8 @@ add_library(remill_arch_aarch32 STATIC
 
   Arch.cpp
   Decode.cpp
-# Decode.h
+  DecodeThumb2.cpp
+  Decode.h
 # Extract.cpp
 )
 

diff --git a/lib/Arch/AArch32/Decode.cpp b/lib/Arch/AArch32/Decode.cpp
@@ -19,11 +19,13 @@
 #include <optional>
 
 #include "Arch.h"
+#include "Decode.h"
 #include "remill/BC/ABI.h"
+#include "remill/Arch/Name.h"
 
 namespace remill {
 
-namespace {
+namespace aarch32 {
 
 // Integer Data Processing (three register, register shift)
 union IntDataProcessingRRRR {
@@ -562,17 +564,8 @@ union SpecialRegsAndHints {
 static_assert(sizeof(SpecialRegsAndHints) == 4, " ");
 
 static constexpr auto kAddressSize = 32u;
-static constexpr auto kPCRegNum = 15u;
-static constexpr auto kLRRegNum = 14u;
 
-static const char *const kIntRegName[] = {
-    "R0", "R1", "R2",  "R3",  "R4",  "R5",  "R6",  "R7",
-    "R8", "R9", "R10", "R11", "R12", "R13", "R14", "R15"};
-
-typedef bool(TryDecode)(Instruction &, uint32_t);
-typedef std::optional<uint32_t>(InstEval)(uint32_t, uint32_t);
-
-static void AddIntRegOp(Instruction &inst, unsigned index, unsigned size,
+void AddIntRegOp(Instruction &inst, unsigned index, unsigned size,
                         Operand::Action action) {
   Operand::Register reg;
   reg.size = size;
@@ -581,7 +574,7 @@ static void AddIntRegOp(Instruction &inst, unsigned index, unsigned size,
   op.action = action;
 }
 
-static void AddIntRegOp(Instruction &inst, const char *reg_name, unsigned size,
+void AddIntRegOp(Instruction &inst, const char *reg_name, unsigned size,
                         Operand::Action action) {
   Operand::Register reg;
   reg.size = size;
@@ -601,8 +594,8 @@ static void AddExprOp(Instruction &inst, OperandExpression *op_expr,
   op.action = action;
 }
 
-static void AddImmOp(Instruction &inst, uint64_t value, unsigned size = 32,
-                     bool is_signed = false) {
+void AddImmOp(Instruction &inst, uint64_t value, unsigned size,
+                     bool is_signed) {
   Operand::Immediate imm;
   imm.val = value;
   imm.is_signed = is_signed;
@@ -611,9 +604,9 @@ static void AddImmOp(Instruction &inst, uint64_t value, unsigned size = 32,
   op.size = size;
 }
 
-static void AddAddrRegOp(Instruction &inst, const char *reg_name,
-                         unsigned mem_size, Operand::Action mem_action,
-                         unsigned disp, unsigned scale = 0) {
+void AddAddrRegOp(Instruction &inst, const char *reg_name, unsigned mem_size,
+                         Operand::Action mem_action,
+                         unsigned disp, unsigned scale) {
   Operand::Address addr;
   addr.address_size = 32;
   addr.base_reg.name = reg_name;
@@ -910,7 +903,7 @@ static void AddShiftImmCarryOperand(Instruction &inst, uint32_t reg_num,
 // (shift_t, shift_n) = DecodeImmShift(type, imm5);
 // (shifted, carry) = Shift_C(R[m], shift_t, shift_n, PSTATE.C);
 // See an instruction in Integer Data Processing (three register, immediate shift) set for an example
-static void AddShiftRegImmOperand(Instruction &inst, uint32_t reg_num,
+void AddShiftRegImmOperand(Instruction &inst, uint32_t reg_num,
                                   uint32_t shift_type, uint32_t shift_size,
                                   bool carry_out, bool can_shift_right_by_32) {
   auto is_rrx = false;
@@ -1193,7 +1186,7 @@ static bool EvalPCDest(Instruction &inst, const bool s, const unsigned int rd,
       auto src2 = EvalOperand(inst, inst.operands[4], uses_linkreg);
 
       AddAddrRegOp(inst, kNextPCVariableName.data(), kAddressSize,
-                   Operand::kActionWrite, 0);
+                   Operand::kActionWrite, 0u);
 
       if (uses_linkreg) {
 
@@ -3556,14 +3549,14 @@ static TryDecode *TryDataProcessingAndMisc(uint32_t bits) {
 // This is the top level of the instruction encoding schema for AArch32.
 // Instructions are grouped into subsets based on this the top level and then
 // into smaller sets.
-//   cond op0 op1
+//   cond  op0 op1
 // != 1111 00x     Data-processing and miscellaneous instructions
 // != 1111 010     Load/Store Word, Unsigned Byte (immediate, literal)
 // != 1111 011 0   Load/Store Word, Unsigned Byte (register)
 // != 1111 011 1   Media instructions
-//        10x     Branch, branch with link, and block data transfer
-//        11x     System register access, Advanced SIMD, floating-point, and Supervisor call
-//   1111 0xx     Unconditional instructions
+//         10x     Branch, branch with link, and block data transfer
+//         11x     System register access, Advanced SIMD, floating-point, and Supervisor call
+//   1111  0xx     Unconditional instructions
 static TryDecode *TryDecodeTopLevelEncodings(uint32_t bits) {
   const TopLevelEncodings enc = {bits};
 
@@ -3628,7 +3621,7 @@ static uint32_t BytesToBits(const uint8_t *bytes) {
   bits = (bits << 8) | static_cast<uint32_t>(bytes[0]);
   return bits;
 }
-}  // namespace
+}  // namespace aarch32
 
 // Decode an instruction
 bool AArch32Arch::DecodeInstruction(uint64_t address,
@@ -3642,7 +3635,7 @@ bool AArch32Arch::DecodeInstruction(uint64_t address,
   inst.has_branch_taken_delay_slot = false;
   inst.has_branch_not_taken_delay_slot = false;
   inst.arch_name = arch_name;
-  inst.sub_arch_name = arch_name;  // TODO(pag): Thumb.
+  inst.sub_arch_name = arch_name;
   inst.arch = this;
   inst.category = Instruction::kCategoryInvalid;
   inst.operands.clear();
@@ -3662,17 +3655,21 @@ bool AArch32Arch::DecodeInstruction(uint64_t address,
   }
 
   const auto bytes = reinterpret_cast<const uint8_t *>(inst.bytes.data());
-  const auto bits = BytesToBits(bytes);
+  const auto bits = aarch32::BytesToBits(bytes);
+
+  if (arch_name == kArchThumb2LittleEndian) {
+    return aarch32::DecodeThumb2Instruction(inst, bits);
+  }
 
-  auto decoder = TryDecodeTopLevelEncodings(bits);
+  auto decoder = aarch32::TryDecodeTopLevelEncodings(bits);
   if (!decoder) {
     LOG(ERROR) << "unhandled bits " << std::hex << bits << std::dec;
     return false;
   }
 
   auto ret = decoder(inst, bits);
 
-  //  LOG(ERROR) << inst.Serialize();
+  LOG(ERROR) << inst.Serialize();
   return ret;
 }
 

diff --git a/lib/Arch/AArch32/Decode.h b/lib/Arch/AArch32/Decode.h
@@ -0,0 +1,55 @@
+/*
+ * Decode.h
+ *
+ *  Created on: Feb 15, 2022
+ *      Author: sonyaschriner
+ */
+
+#pragma once
+
+#include <cstdint>
+
+namespace remill {
+
+class Instruction;
+
+namespace aarch32 {
+
+bool DecodeThumb2Instruction(Instruction &inst, uint32_t bits);
+
+typedef bool(TryDecode)(Instruction &, uint32_t);
+typedef bool(TryDecode16)(Instruction &, uint16_t);
+
+static constexpr auto kPCRegNum = 15u;
+static constexpr auto kLRRegNum = 14u;
+static constexpr auto kSPRegNum = 13u;
+
+static const char *const kIntRegName[] = {
+    "R0", "R1", "R2",  "R3",  "R4",  "R5",  "R6",  "R7",
+    "R8", "R9", "R10", "R11", "R12", "R13", "R14", "R15"};
+
+typedef std::optional<uint32_t>(InstEval)(uint32_t, uint32_t);
+
+//bool DecodeCondition(Instruction &inst, uint32_t cond);
+
+void AddIntRegOp(Instruction &inst, unsigned index, unsigned size,
+                        Operand::Action action);
+
+void AddIntRegOp(Instruction &inst, const char *reg_name, unsigned size,
+                        Operand::Action action);
+
+void AddAddrRegOp(Instruction &inst, const char *reg_name, unsigned mem_size,
+                         Operand::Action mem_action,
+                         unsigned disp, unsigned scale = 0);
+
+void AddImmOp(Instruction &inst, uint64_t value, unsigned size = 32,
+                        bool is_signed = false);
+
+void AddShiftRegImmOperand(Instruction &inst, uint32_t reg_num,
+                                  uint32_t shift_type, uint32_t shift_size,
+                                  bool carry_out, bool can_shift_right_by_32);
+
+
+}
+}
+