From 07e5ccf77459e3f6da8822ccf57cd2ec37b2f841 Mon Sep 17 00:00:00 2001 From: ivg Date: Mon, 5 Oct 2020 19:13:23 -0400 Subject: [PATCH] [Draft] initial scaffolding for the AVR target Current State ------------- 1) Works with only the latest versions of LLVM (11 and above) 2) the instructions so far implemented: ADC, ADD, LDI Design Issues ------------- A unique feature of AVR is that it maps registers directly to RAM, thus we theoretically can't represent registers as variables, but instead shall model them as memory addresses[^1]. Treating this AVR feature fairly, will render quite hard to read and to analyze code. After digging through the forums, datasheets, compilers source code, and grepping avr-objdump outputs, we can presume that C compilers are not leveraging this feature and use normal reads and writes to access registers, instead of loads and stores. And the memory-mapping of registers is mostly reserved RAM-less AVR boards, where 32 register plays the role of minimal RAM. Of course, it doesn't mean that the malicious code can't exploit this. With this in mind, we still decided to model AVR as normal register-based machine but add a command-line option later, that will enable a conservative model, that lacks registers. Formally, our current lifter implementation (the one with registers) just assumes that all addresses in load and stores operation are greater than 32. [^1]: And yes, `*0 = 'B'` is a perfectly valid code on AVR that writes `0x42` into the `R0` register. --- lib/bap_avr/bap_avr_target.ml | 47 ++++++++++ lib/bap_avr/bap_avr_target.mli | 18 ++++ oasis/avr | 22 +++++ plugins/avr/.merlin | 2 + plugins/avr/avr_lifter.ml | 156 +++++++++++++++++++++++++++++++++ plugins/avr/avr_lifter.mli | 1 + plugins/avr/avr_main.ml | 53 +++++++++++ 7 files changed, 299 insertions(+) create mode 100644 lib/bap_avr/bap_avr_target.ml create mode 100644 lib/bap_avr/bap_avr_target.mli create mode 100644 oasis/avr create mode 100644 plugins/avr/.merlin create mode 100644 plugins/avr/avr_lifter.ml create mode 100644 plugins/avr/avr_lifter.mli create mode 100644 plugins/avr/avr_main.ml diff --git a/lib/bap_avr/bap_avr_target.ml b/lib/bap_avr/bap_avr_target.ml new file mode 100644 index 0000000000..4e2009654b --- /dev/null +++ b/lib/bap_avr/bap_avr_target.ml @@ -0,0 +1,47 @@ +open Core_kernel +open Bap_core_theory + +let package = "bap" + +type r16 and r8 + +type 'a bitv = 'a Theory.Bitv.t Theory.Value.sort + +let r16 : r16 bitv = Theory.Bitv.define 16 +let r8 : r8 bitv = Theory.Bitv.define 8 +let bool = Theory.Bool.t + +let reg t n = Theory.Var.define t n + +let array ?(index=string_of_int) t pref size = + List.init size ~f:(fun i -> reg t (pref ^ index i)) + +let untyped = List.map ~f:Theory.Var.forget +let (@<) xs ys = untyped xs @ untyped ys + +let gpr = array r8 "R" 32 +let sp = reg r16 "SP" +let flags = List.map ~f:(reg bool) [ + "CF"; "ZF"; "NF"; "VF"; "SF"; "HF"; "TF"; "IF" + ] + +let datas = Theory.Mem.define r16 r8 +let codes = Theory.Mem.define r16 r16 + +let data = reg datas "data" +let code = reg codes "code" + +let parent = Theory.Target.declare ~package "avr" + ~bits:8 + ~byte:8 + ~endianness:Theory.Endianness.le + + +let atmega328 = Theory.Target.declare ~package "ATmega328" + ~parent + ~data + ~code + ~vars:(gpr @< [sp] @< flags @< [data] @< [code]) + + +let llvm_avr16 = Theory.Language.declare ~package "llvm-avr16" diff --git a/lib/bap_avr/bap_avr_target.mli b/lib/bap_avr/bap_avr_target.mli new file mode 100644 index 0000000000..b255d24cb6 --- /dev/null +++ b/lib/bap_avr/bap_avr_target.mli @@ -0,0 +1,18 @@ +open Bap_core_theory + +val parent : Theory.target +val atmega328 : Theory.target +val llvm_avr16 : Theory.language + +type r16 and r8 + +type 'a bitv = 'a Theory.Bitv.t Theory.Value.sort + +val r16 : r16 bitv +val r8 : r8 bitv + +val code : (r16, r16) Theory.Mem.t Theory.var +val data : (r16, r8) Theory.Mem.t Theory.var +val gpr : r8 Theory.Bitv.t Theory.var list +val sp : r16 Theory.Bitv.t Theory.var +val flags : Theory.Bool.t Theory.var list diff --git a/oasis/avr b/oasis/avr new file mode 100644 index 0000000000..6a4542bda0 --- /dev/null +++ b/oasis/avr @@ -0,0 +1,22 @@ +Flag avr + Description: Build Avr lifter + Default: false + +Library "bap-avr" + Build$: flag(everything) || flag(avr) + XMETADescription: common definitions for Avr targets + Path: lib/bap_avr + BuildDepends: core_kernel, bap-knowledge, bap-core-theory + FindlibName: bap-avr + Modules: Bap_avr_target + +Library avr_plugin + XMETADescription: provide Avr lifter + Path: plugins/avr + Build$: flag(everything) || flag(avr) + BuildDepends: core_kernel, ppx_jane, ogre, + bap-core-theory, bap-knowledge, bap-main, + bap, bap-avr, bitvec + FindlibName: bap-plugin-avr + InternalModules: Avr_main, Avr_lifter + XMETAExtraLines: tags="avr, lifter, atmega" diff --git a/plugins/avr/.merlin b/plugins/avr/.merlin new file mode 100644 index 0000000000..ed4f494f07 --- /dev/null +++ b/plugins/avr/.merlin @@ -0,0 +1,2 @@ +B ../../lib/bap_avr +REC \ No newline at end of file diff --git a/plugins/avr/avr_lifter.ml b/plugins/avr/avr_lifter.ml new file mode 100644 index 0000000000..740baf3837 --- /dev/null +++ b/plugins/avr/avr_lifter.ml @@ -0,0 +1,156 @@ +open Core_kernel +open Bap_core_theory +open Bap.Std + +open KB.Syntax +include Bap_main.Loggers() + +module Target = Bap_avr_target +module MC = Disasm_expert.Basic + +type r1 +type 'a bitv = 'a Theory.Bitv.t Theory.Value.sort + +let r1 : r1 bitv = Theory.Bitv.define 1 + +let make_regs regs = + let regs = + List.mapi regs ~f:(fun i r -> (i,r)) |> + Map.of_alist_exn (module Int) in + Map.find_exn regs + +let gpr = make_regs Target.gpr +let regnum s = Scanf.sscanf s "R%d" ident + +let require_gpr insn pos f = + match (MC.Insn.ops insn).(pos) with + | Op.Reg r -> f (gpr (regnum (Reg.name r))) + | _ -> KB.return Insn.empty + +let require_imm insn pos f = + match (MC.Insn.ops insn).(pos) with + | Op.Imm x -> f (Option.value_exn (Imm.to_int x)) + | _ -> KB.return Insn.empty + +let hf = Theory.Var.define r1 "HF" +let cf = Theory.Var.define r1 "CF" +let nf = Theory.Var.define r1 "NF" +let vf = Theory.Var.define r1 "VF" +let sf = Theory.Var.define r1 "SF" +let zf = Theory.Var.define r1 "ZF" + +module M8 = Bitvec.M8 + +module Avr(CT : Theory.Core) = struct + open Target + let rec seq = function + | [] -> CT.perform Theory.Effect.Sort.bot + | [x] -> x + | x :: xs -> CT.seq x @@ seq xs + + let const x = CT.int r8 (M8.int x) + + let bit0 = CT.int r1 (Bitvec.M1.bool false) + let bit1 = CT.int r1 (Bitvec.M1.bool true) + let flag x = CT.ite x bit1 bit0 + + let nth x n = + CT.(extract r1 (const n) (const n) x) + + let data xs = + KB.Object.create Theory.Program.cls >>= fun lbl -> + CT.blk lbl (seq xs) (seq []) + + let (&&) = CT.logand + let (||) = CT.logor + let not = CT.not + + let bit reg n body = + nth CT.(var reg) n >>= fun expr -> + Theory.Var.scoped r1 @@ fun v -> + CT.let_ v !!expr (body (CT.var v)) + + let halfcarry ~r ~rd ~rr = + bit r 3 @@ fun r3 -> + bit rd 3 @@ fun rd3 -> + bit rr 3 @@ fun rr3 -> + rd3 && rr3 || rr3 && not r3 || not r3 && rd3 + + let fullcarry ~r ~rd ~rr = + bit r 7 @@ fun r7 -> + bit rd 7 @@ fun rd7 -> + bit rr 7 @@ fun rr7 -> + rd7 && rr7 || rr7 && not r7 || r7 && not rd7 + + let overflow ~r ~rd ~rr = + bit r 7 @@ fun r7 -> + bit rd 7 @@ fun rd7 -> + bit rr 7 @@ fun rr7 -> + rd7 && rr7 && not r7 || not rd7 && not rr7 && r7 + + + let with_result rd f = + Theory.Var.fresh (Theory.Var.sort rd) >>= fun v -> + f v >>= fun effs -> + data (effs @ CT.[set rd (var v)]) + + let (:=) = CT.set + let (+) = CT.add + + + let adc insn = + require_gpr insn 1 @@ fun rd -> + require_gpr insn 2 @@ fun rr -> + with_result rd @@ fun r -> + KB.return @@ + CT.[ + r := var rd + var rr + unsigned r8 (var cf); + hf := halfcarry ~r ~rd ~rr; + nf := nth (var r) 7; + vf := overflow ~r ~rd ~rr; + sf := var nf || var vf; + zf := flag (is_zero (var r)); + cf := fullcarry ~r ~rd ~rr; + ] + + + let add insn = + require_gpr insn 1 @@ fun rd -> + require_gpr insn 2 @@ fun rr -> + with_result rd @@ fun r -> + KB.return @@ + CT.[ + r := var rd + var rr; + hf := halfcarry ~r ~rd ~rr; + nf := nth (var r) 7; + vf := overflow ~r ~rd ~rr; + sf := var nf || var vf; + zf := flag (is_zero (var r)); + cf := fullcarry ~r ~rd ~rr; + ] + + let ldi insn = + require_gpr insn 0 @@ fun rd -> + require_imm insn 1 @@ fun k -> + data [ + rd := const k + ] +end + +let lifter label : unit Theory.eff = + KB.collect MC.Insn.slot label >>= function + | None -> KB.return Insn.empty + | Some insn -> + Theory.instance () >>= Theory.require >>= fun (module Core) -> + let module Avr = Avr(Core) in + let open Avr in + insn |> match MC.Insn.name insn with + | "ADCRdRr" -> adc + | "ADDRdRr" -> add + | "LDIRdK" -> ldi + | code -> + info "unsupported opcode: %s" code; + fun _ -> KB.return Insn.empty + +let load () = + KB.promise Theory.Semantics.slot lifter diff --git a/plugins/avr/avr_lifter.mli b/plugins/avr/avr_lifter.mli new file mode 100644 index 0000000000..a58d3a6dae --- /dev/null +++ b/plugins/avr/avr_lifter.mli @@ -0,0 +1 @@ +val load : unit -> unit diff --git a/plugins/avr/avr_main.ml b/plugins/avr/avr_main.ml new file mode 100644 index 0000000000..c6397d02ab --- /dev/null +++ b/plugins/avr/avr_main.ml @@ -0,0 +1,53 @@ +open Bap_main +open Bap.Std +open Bap_core_theory +open KB.Syntax +module CT = Theory + +include Bap_main.Loggers() + +module Target = Bap_avr_target +module Dis = Disasm_expert.Basic + +let provide_decoding () = + KB.promise CT.Label.encoding @@ fun label -> + CT.Label.target label >>| fun t -> + if CT.Target.belongs Target.parent t + then Target.llvm_avr16 + else CT.Language.unknown + +let enable_llvm () = + Dis.register Target.llvm_avr16 @@ fun _target -> + Dis.create ~backend:"llvm" "avr" + +let enable_loader () = + let request_arch doc = + let open Ogre.Syntax in + match Ogre.eval (Ogre.request Image.Scheme.arch) doc with + | Error _ -> assert false (* nothing could go wrong here! *) + | Ok arch -> arch in + KB.promise CT.Unit.target @@ fun unit -> + KB.collect Image.Spec.slot unit >>| request_arch >>| function + | Some "avr" -> Target.atmega328 + | _ -> CT.Target.unknown + + +let main _ctxt = + enable_llvm (); + enable_loader (); + provide_decoding (); + Avr_lifter.load (); + Ok () + +(* semantic tags that describe what our plugin is providing, + setting them is important not only for introspection but + for the proper function of the cache subsystem. +*) +let provides = [ + "avr"; + "lifter"; +] + +(* finally, let's register our extension and call the main function *) +let () = Bap_main.Extension.declare main + ~provides