Skip to content

Commit

Permalink
implements better support for cross memory disassembling
Browse files Browse the repository at this point in the history
TL;DR; jumps that cross segments and section boundaries are now
treated more thoroughly so that if a jump instruction leads to an
invalid execution chain in some other segment, the we will cancel both
the chain in the other segment and the chain that led to that jump in
the current segment (before it was canceled up to the boundaries of
its own segment).

Partially fixes BinaryAnalysisPlatform#1133, however since it is Thumb 2.0 binary for BAP it
is still mostly random data than something meaningful.

Problem
-------

Since 2.0 we have the incremental disassembler that supports
cross-sectional/cross-segmential jumps. As BinaryAnalysisPlatform#1133 shows sometimes they
can go wrong as they were treated specially and had some preferences
that regular intersectional jumps didn't have. One of the invariants
of our disassembler is that there is no valid chain of execution that
will hit the end of segment or data. In other words, that will force
the CPU into the invalid instruction state. We allow conservative
chains, so that the CPU can still hit an invalid instruction because
of a conditional branch (in other words, we allow conditional branches
to hit data). To preserve this invariant we maintain a tree of
disassembling tasks, so that once we hit data, we can unroll the chain
up to the root that started it (or the first conditional branch) and
cancel everything in between marking it also as data.

This invariant doesn't hold for jumps between sections as when we
see a jump instruction that goes out of the current memory region we
just assume that once we will get this other region of memory, it will
be disassembled nicely. However, later when we actually get access to
the memory region that contains the destination (our disassembler is
incremental and applied per each chunk of memory as it is discovered)
we may figure out that the chain starting from this address is
invalid and cancel this chain. However, since we no longer have access
to the disassembler state of the original memory region, we can't
cancel the chain that led to that jump in the original memory
region. Therefore later, when we build the whole program CFG we will
start that chain and eventually hit data and end up with an
exception.

Solution
--------

The solution is instead of discarding the task that breaches the
segment boundaries we will accumulate it in a debt list, and every
time we are handled with a new memory region we first try to payoff
the debts. And if the task is now in the boundaries and we can prove
that it hits data, then we cancel the whole chain that can now cross
section boundaries.

Caveats
-------

The debt is a list of task and each task references its parent tasks,
so in fact it is a tree of instructions covering the whole program. We
are storing the debt list in the disassembler state which is saved on
the hard drive and if the debt list is large (and since in binary
format we can't preserve sharing) it can be quite large to store and
to load. So far the assumption is that the debt list is either empty
or very small after the project is fully disassembled. If this
hypothesis will not turn true, we can either cancel all unpayed debt
at the end of disassembling or just ignore it and do not store on the
disk.
  • Loading branch information
ivg committed Jun 17, 2020
1 parent 0eebcc9 commit 3921672
Showing 1 changed file with 25 additions and 13 deletions.
38 changes: 25 additions & 13 deletions lib/bap_disasm/bap_disasm_driver.ml
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,12 @@ module Machine : sig
and slot = private
| Ready of task option
| Delay
[@@deriving bin_io]

type state = private {
stop : bool;
work : task list; (* work list*)
debt : task list;
curr : task;
addr : addr; (* current address *)
begs : Set.M(Addr).t; (* begins of basic blocks *)
Expand All @@ -43,6 +45,7 @@ module Machine : sig

val start :
mem ->
debt:task list ->
code:Set.M(Addr).t ->
data:Set.M(Addr).t ->
init:Set.M(Addr).t ->
Expand All @@ -67,16 +70,18 @@ end = struct
and slot =
| Ready of task option
| Delay
[@@deriving bin_io]


let init_work roots =
let init_work init roots =
Set.to_sequence ~order:`Decreasing roots |>
Seq.fold ~init:[] ~f:(fun work root ->
Seq.fold ~init ~f:(fun work root ->
Dest {dst=root; parent=None} :: work)

type state = {
stop : bool;
work : task list; (* work list*)
debt : task list;
curr : task;
addr : addr; (* current address *)
begs : Set.M(Addr).t; (* begins of basic blocks *)
Expand Down Expand Up @@ -229,20 +234,21 @@ end = struct
let s = match s.curr with
| Fall _ as task ->
cancel task s
| _ -> s in
| t -> {s with debt = t :: s.debt} in
match s.work with
| [] -> empty (step s)
| _ -> view (step s) base ~empty ~ready
| _ -> view (step s) base ~empty ~ready

let start mem ~code ~data ~init =
let start mem ~debt ~code ~data ~init =
let init = if Set.is_empty init
then Set.singleton (module Addr) (Memory.min_addr mem)
else init in
let work = init_work init in
let work = init_work debt init in
let start = Set.min_elt_exn init in
view {
work; data; usat=code;
addr = start;
debt = [];
curr = Dest {dst = start; parent = None};
stop = false;
begs = Set.empty (module Addr);
Expand Down Expand Up @@ -314,17 +320,18 @@ let classify_mem mem =
| Some true -> (code,data,Set.add root addr)
| _ -> (code,data,root))

let scan_mem arch disasm base : Machine.state KB.t =
let scan_mem arch disasm debt base : Machine.state KB.t =
classify_mem base >>= fun (code,data,init) ->
let step d s =
if Machine.is_ready s then KB.return s
else Machine.view s base ~ready:(fun s mem -> Dis.jump d mem s)
~empty:KB.return in
Machine.start base ~code ~data ~init
Machine.start base ~debt ~code ~data ~init
~ready:(fun init mem ->
Dis.run disasm mem ~stop_on:[`Valid]
~return:KB.return ~init
~stopped:(fun d s -> step d (Machine.stopped s))
~stopped:(fun d s ->
step d (Machine.stopped s))
~hit:(fun d mem insn s ->
collect_dests arch mem insn >>= fun dests ->
if Set.is_empty dests.resolved &&
Expand All @@ -333,7 +340,8 @@ let scan_mem arch disasm base : Machine.state KB.t =
else
delay arch mem insn >>= fun delay ->
step d @@ Machine.jumped s mem dests delay)
~invalid:(fun d _ s -> step d (Machine.failed s s.addr)))
~invalid:(fun d _ s ->
step d (Machine.failed s s.addr)))
~empty:KB.return

type insns = Theory.Label.t list
Expand All @@ -343,13 +351,15 @@ type state = {
jmps : dsts Addr.Map.t;
data : Addr.Set.t;
mems : mem list;
debt : Machine.task list;
} [@@deriving bin_io]

let init = {
begs = Set.empty (module Addr);
jmps = Map.empty (module Addr);
data = Set.empty (module Addr);
mems = []
mems = [];
debt = [];
}

let query_arch addr =
Expand All @@ -368,6 +378,7 @@ let commit_calls {jmps} =
Set.to_sequence dsts.resolved |>
KB.Seq.iter ~f:(fun addr ->
Theory.Label.for_addr (Word.to_bitvec addr) >>= fun dst ->
KB.provide Theory.Label.is_valid dst (Some true) >>= fun () ->
KB.provide Theory.Label.is_subroutine dst (Some true))
else KB.return ())

Expand All @@ -381,19 +392,20 @@ let scan mem s =
match Dis.create (Arch.to_string arch) with
| Error _ -> KB.return s
| Ok dis ->
scan_mem arch dis mem >>= fun {Machine.begs; jmps; data} ->
scan_mem arch dis s.debt mem >>= fun {Machine.begs; jmps; data; debt} ->
let jmps = Map.merge s.jmps jmps ~f:(fun ~key:_ -> function
| `Left dsts | `Right dsts | `Both (_,dsts) -> Some dsts) in
let begs = Set.union s.begs begs in
let data = Set.union s.data data in
let s = {begs; data; jmps; mems = mem :: s.mems} in
let s = {begs; data; jmps; mems = mem :: s.mems; debt} in
commit_calls s >>| fun () ->
s

let merge t1 t2 = {
begs = Set.union t1.begs t2.begs;
data = Set.union t1.data t2.data;
mems = List.rev_append t2.mems t1.mems;
debt = List.rev_append t2.debt t1.debt;
jmps = Map.merge t1.jmps t2.jmps ~f:(fun ~key:_ -> function
| `Left dsts | `Right dsts -> Some dsts
| `Both (d1,d2) -> Some {
Expand Down

0 comments on commit 3921672

Please sign in to comment.