Skip to content

Commit

Permalink
improves arm-family support (#1353)
Browse files Browse the repository at this point in the history
* tweaks symbolization and function start identification facilities

We remove the dependency on the Bap.Std.Image and instead use the
image specification directly. These gives us strictly more symbols,
as image imposes extra constraints, which my hide functions starts
and their names.

More information is not always better, as we now have more chances to
get the conflicting knowledge. To ensure that we're able to preserve
as much information as possible without compromising correctness we
leverage our agent-based conflict resolution system. We push all names
in which we're not completely sure into possible aliases and use a new
agent, `bap:gossiper` to propse names from that set.

To make everything work fine, we pushed down the reliability of the
objdump symbolizer (as we want bap to have the final word).

The improved symbolization facility uncovered a small bug in the way
how the x86 lock intrinsic was implemented, it was named just
`"lock"`, which obviously may conflict with a normal function with the
same name (which was uncovered by our testsuite). This commit adds
the `x86` prefix to the intrinsic, e.g., `x86:lock` as well as
properly delimits the locked code with the corresponding `x86:unlock`
intrinsic.

* adds 32-bit variants of armv8 and armv9, specifies alignments

* fixes `blx pc` semantics

It should be `call arm:unpredictable` instead of an interworking
branch (which essentially breaks the disassembler)

* assumes that all non word-aligned addresses have the T32 encoding

* fixes the test case with a non-word-aligned base
  • Loading branch information
ivg authored Oct 14, 2021
1 parent 07e4fa9 commit e91bd87
Show file tree
Hide file tree
Showing 6 changed files with 111 additions and 13 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ testsuite:
git clone https://github.com/BinaryAnalysisPlatform/bap-testsuite.git testsuite

check: testsuite
make REVISION=f8af868e4f61 -C testsuite
make REVISION=81d9159 -C testsuite

.PHONY: indent check-style status-clean

Expand Down
73 changes: 62 additions & 11 deletions lib/arm/arm_target.ml
Original file line number Diff line number Diff line change
Expand Up @@ -188,19 +188,23 @@ module Family (Order : Endianness) = struct
let order = CT.Endianness.name endianness in
name ^ "+" ^ KB.Name.unqualified order

let (<:) parent name =
let def ?code_alignment ~parent name =
if CT.Target.is_unknown parent
then CT.Target.unknown
else CT.Target.declare ~package (ordered name) ~parent
?code_alignment
~nicknames:[name]

let (<:) parent name = def ~parent name

let is_bi_endian = CT.Endianness.(equal bi) endianness

let v4 =
if is_bi_endian
then CT.Target.unknown
else CT.Target.declare ~package (ordered "armv4")
~parent
~code_alignment:32
~nicknames:["armv4"]
~bits:32
~byte:8
Expand All @@ -210,7 +214,7 @@ module Family (Order : Endianness) = struct
~vars:vars32
~regs:regs32

let v4t = v4 <: "armv4t"
let v4t = def "v4t" ~parent:v4 ~code_alignment:16
let v5 = v4 <: "armv5"
let v5t = v5 <: "armv5t"
let v5te = v5t <: "armv5te"
Expand All @@ -225,6 +229,7 @@ module Family (Order : Endianness) = struct
else CT.Target.declare ~package (ordered "armv7")
~parent
~nicknames:["armv7"]
~code_alignment:16
~bits:32
~byte:8
~endianness
Expand All @@ -247,24 +252,44 @@ module Family (Order : Endianness) = struct
~vars:vars32_fp
~regs:(regs32@vfp3regs)


let v8a =
CT.Target.declare ~package (ordered "armv8-a") ~parent:v7
~nicknames:["armv8-a"]
~nicknames:["armv8-a"; "aarch64"]
~aliasing
~code_alignment:32
~bits:64
~code:datav8
~data:datav8
~vars:varsv8
~regs:regsv8

let v8a32 =
Theory.Target.declare ~package (ordered "armv8-a+aarch32")
~nicknames:["armv8-a+aarch32"]
~parent:v7

let v8m32 =
Theory.Target.declare ~package (ordered "armv8-m+aarch32")
~nicknames:["armv8-m+aarch32"]
~parent:v7m

let v8r32 =
Theory.Target.declare ~package (ordered "armv8-r+aarch32")
~nicknames:["armv8-r+aarch32"]
~parent:v7

let v81a = v8a <: "armv8.1-a"
let v82a = v81a <: "armv8.2-a"
let v83a = v82a <: "armv8.3-a"
let v84a = v83a <: "armv8.4-a"
let v85a = v84a <: "armv8.5-a"
let v86a = v85a <: "armv8.6-a"

let v9a = v86a <: "armv9-a"

let parent = if is_bi_endian then v7 else v4

end

module LE = Family(struct let endianness = CT.Endianness.le end)
Expand Down Expand Up @@ -376,7 +401,12 @@ let arms : arms Map.M(CT.Target).t =
LE.v6t2, `armv6;
LE.v7, `armv7;
LE.v7a, `armv7;
LE.v7m, `thumbv7;
LE.v7afp, `armv7;
Bi.v7, `armv7;
Bi.v7a, `armv7;
Bi.v7m, `thumbv7;
Bi.v7afp, `armv7;
LE.v8a, `aarch64;
LE.v81a, `aarch64;
LE.v82a, `aarch64;
Expand All @@ -397,6 +427,7 @@ let arms : arms Map.M(CT.Target).t =
EB.v6t2,`armv6eb;
EB.v7, `armv7eb;
EB.v7a, `armv7eb;
EB.v7m, `thumbv7eb;
EB.v7afp, `armv7eb;
EB.v8a, `aarch64_be;
EB.v81a, `aarch64_be;
Expand Down Expand Up @@ -435,8 +466,18 @@ let register ?attrs encoding triple =
Dis.create ?attrs ~backend:"llvm" triple

let symbol_values doc =
let field = Ogre.Query.(select (from Image.Scheme.symbol_value)) in
match Ogre.eval (Ogre.collect field) doc with
let open Ogre.Let in
let open Image.Scheme in
let symbols =
let* symtab =
Ogre.(collect Query.(select (from symbol_value))) in
let+ entry = Ogre.request entry_point in
match entry with
| None -> symtab
| Some entry ->
let mask = Int64.(-1L lsl 1) in
Seq.cons Int64.(entry land mask, entry) symtab in
match Ogre.eval symbols doc with
| Ok syms -> syms
| Error err ->
failwithf "Arm_target: broken file specification: %s"
Expand All @@ -448,6 +489,7 @@ module Encodings = struct
let lsb x = Int64.(x land 1L)
let is_thumb x = Int64.equal (lsb x) 1L


let symbols_encoding spec =
symbol_values spec |>
Seq.fold ~init:empty ~f:(fun symbols (addr,value) ->
Expand Down Expand Up @@ -479,26 +521,35 @@ let has_t32 label =
Map.exists ~f:(Theory.Language.equal llvm_t32)


let is_word_aligned x = Bitvec.(M32.(int 3 land x) = zero)

let compute_encoding_from_symbol_table label =
let (>>=?) x f = x >>= function
| None -> !!Theory.Language.unknown
| Some x -> f x in
KB.collect CT.Label.unit label >>=? fun unit ->
KB.collect CT.Label.addr label >>=? fun addr ->
KB.collect Encodings.slot unit >>= fun encodings ->
KB.return @@ match Map.find encodings addr with
| Some x -> x
| None -> CT.Language.unknown
KB.collect Encodings.slot unit >>| fun encodings ->
if not (is_word_aligned addr) then llvm_t32
else match Map.find encodings addr with
| Some x -> x
| None -> CT.Language.unknown

(* here t < p means that t was introduced before p *)
let (>=) t p = CT.Target.belongs t p
let (<) t p = t >= p && not (p >= t)
let (<=) t p = t = p || t < p
let is_arm = CT.Target.belongs parent

let before_thumb2 t = t < LE.v6t2 || t < EB.v6t2
let is_64bit t = LE.v8a <= t || EB.v8a <= t || Bi.v8a <= t
let is_thumb_only t = LE.v7m <= t || EB.v7m <= t || Bi.v7m <= t

let m_profiles = [
LE.v7m; EB.v7m; Bi.v7m;
LE.v8m32; EB.v8m32; Bi.v8m32;
]
let is_thumb_only t =
List.exists m_profiles ~f:(fun p -> p <= t)


let is_big t = Theory.Target.endianness t = Theory.Endianness.eb
let is_little t = Theory.Target.endianness t = Theory.Endianness.le
Expand Down
12 changes: 12 additions & 0 deletions lib/arm/arm_target.mli
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,16 @@ module LE : sig
val v7a : Theory.Target.t
val v7afp : Theory.Target.t
val v8a : Theory.Target.t
val v8a32 : Theory.Target.t
val v8m32 : Theory.Target.t
val v8r32 : Theory.Target.t
val v81a : Theory.Target.t
val v82a : Theory.Target.t
val v83a : Theory.Target.t
val v84a : Theory.Target.t
val v85a : Theory.Target.t
val v86a : Theory.Target.t
val v9a : Theory.Target.t
end


Expand All @@ -69,12 +73,16 @@ module EB : sig
val v7a : Theory.Target.t
val v7afp : Theory.Target.t
val v8a : Theory.Target.t
val v8a32 : Theory.Target.t
val v8m32 : Theory.Target.t
val v8r32 : Theory.Target.t
val v81a : Theory.Target.t
val v82a : Theory.Target.t
val v83a : Theory.Target.t
val v84a : Theory.Target.t
val v85a : Theory.Target.t
val v86a : Theory.Target.t
val v9a : Theory.Target.t
end


Expand All @@ -90,12 +98,16 @@ module Bi : sig
val v7a : Theory.Target.t
val v7afp : Theory.Target.t
val v8a : Theory.Target.t
val v8a32 : Theory.Target.t
val v8m32 : Theory.Target.t
val v8r32 : Theory.Target.t
val v81a : Theory.Target.t
val v82a : Theory.Target.t
val v83a : Theory.Target.t
val v84a : Theory.Target.t
val v85a : Theory.Target.t
val v86a : Theory.Target.t
val v9a : Theory.Target.t
end

val llvm_a32 : Theory.language
Expand Down
2 changes: 2 additions & 0 deletions plugins/arm/arm_main.ml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ let backend =
type arms = [
| Arch.arm
| Arch.armeb
| Arch.thumb
| Arch.thumbeb
] [@@deriving enumerate]

let () = Bap_main.Extension.declare ~doc @@ fun ctxt ->
Expand Down
25 changes: 25 additions & 0 deletions plugins/arm/semantics/thumb.lisp
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
(require bits)
(require arm-bits)


;; Note: page references are from ARM DDI 0403E.b

(declare (context (target arm)))

(defpackage thumb (:use core target arm))
Expand Down Expand Up @@ -63,3 +66,25 @@
(set$ rd (* rn rm))
(set ZF (is-zero rd))
(set NF (msb rd)))


(defun t2STRDi8 (rt1 rt2 rn imm pre _)
"strd rt1, rt2, [rn, off]"
(when (condition-holds pre)
(store-word (+ rn imm) rt1)
(store-word (+ rn imm (sizeof word-width)) rt2)))

(defun t2ADDri12 (rd rn imm pre _)
"addw rd, rn, imm; A7-189, T4 "
(when (condition-holds pre)
(set$ rd (+ rn imm))))

(defun t2STRHi12 (rt rn imm pre _)
"strh.w rt, [rn, imm]; A7-442; T2"
(when (condition-holds pre)
(store-word (+ rn imm) (cast-low 16 rt))))

(defun t2B (off pre _)
"b.w imm; A7-207, T3"
(when (condition-holds pre)
(exec-addr (+ (get-program-counter) off 4))))
10 changes: 9 additions & 1 deletion plugins/thumb/thumb_main.ml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ let (>>=?) x f = x >>= function

module Thumb(CT : Theory.Core) = struct

module Thumb = Thumb_core.Make(CT)

let reg r = Theory.Var.define s32 (Reg.name r)
let imm x = Option.value_exn (Imm.to_int x)
let regs rs = List.map rs ~f:(function
Expand Down Expand Up @@ -182,6 +184,10 @@ module Thumb(CT : Theory.Core) = struct
info "unhandled bit-wise instruction: %a" pp_insn insn;
!!Insn.empty

let unpredictable =
Theory.Label.for_name "arm:unpredictable" >>= CT.goto



(* these are not entirely complete *)
let lift_branch pc opcode insn =
Expand All @@ -192,7 +198,9 @@ module Thumb(CT : Theory.Core) = struct
| `tBcc, [|Imm dst; Imm c; _|] -> bcc pc (cnd c) (imm dst)
| `tBL, [|_; _; Imm dst; _|]
| `tBLXi, [|_; _; Imm dst|] -> bli pc (imm dst)
| `tBLXr, [|_; _; Reg dst|]when is_pc (reg dst) -> blxi pc 0
| `tBLXr, [|_; _; Reg dst|]when is_pc (reg dst) ->
(* blx pc is unpredictable in all versions of ARM *)
Thumb.ctrl unpredictable
| `tBLXr, [|_; _; Reg dst|]-> blxr pc (reg dst)
| `tBX, [|Reg dst; _; _|]when is_pc (reg dst) -> bxi pc 0
| `tBX, [|Reg dst;_;_|] -> bxr (reg dst)
Expand Down

0 comments on commit e91bd87

Please sign in to comment.