From e91bd87aef1d41aca8f1398893466387d7c27f0d Mon Sep 17 00:00:00 2001 From: Ivan Gotovchits Date: Thu, 14 Oct 2021 16:48:37 -0400 Subject: [PATCH] improves arm-family support (#1353) * tweaks symbolization and function start identification facilities We remove the dependency on the Bap.Std.Image and instead use the image specification directly. These gives us strictly more symbols, as image imposes extra constraints, which my hide functions starts and their names. More information is not always better, as we now have more chances to get the conflicting knowledge. To ensure that we're able to preserve as much information as possible without compromising correctness we leverage our agent-based conflict resolution system. We push all names in which we're not completely sure into possible aliases and use a new agent, `bap:gossiper` to propse names from that set. To make everything work fine, we pushed down the reliability of the objdump symbolizer (as we want bap to have the final word). The improved symbolization facility uncovered a small bug in the way how the x86 lock intrinsic was implemented, it was named just `"lock"`, which obviously may conflict with a normal function with the same name (which was uncovered by our testsuite). This commit adds the `x86` prefix to the intrinsic, e.g., `x86:lock` as well as properly delimits the locked code with the corresponding `x86:unlock` intrinsic. * adds 32-bit variants of armv8 and armv9, specifies alignments * fixes `blx pc` semantics It should be `call arm:unpredictable` instead of an interworking branch (which essentially breaks the disassembler) * assumes that all non word-aligned addresses have the T32 encoding * fixes the test case with a non-word-aligned base --- Makefile | 2 +- lib/arm/arm_target.ml | 73 +++++++++++++++++++++++++++----- lib/arm/arm_target.mli | 12 ++++++ plugins/arm/arm_main.ml | 2 + plugins/arm/semantics/thumb.lisp | 25 +++++++++++ plugins/thumb/thumb_main.ml | 10 ++++- 6 files changed, 111 insertions(+), 13 deletions(-) diff --git a/Makefile b/Makefile index be56557b8..2f3aad254 100644 --- a/Makefile +++ b/Makefile @@ -55,7 +55,7 @@ testsuite: git clone https://github.com/BinaryAnalysisPlatform/bap-testsuite.git testsuite check: testsuite - make REVISION=f8af868e4f61 -C testsuite + make REVISION=81d9159 -C testsuite .PHONY: indent check-style status-clean diff --git a/lib/arm/arm_target.ml b/lib/arm/arm_target.ml index dcdcfcbb0..a5136794d 100644 --- a/lib/arm/arm_target.ml +++ b/lib/arm/arm_target.ml @@ -188,12 +188,15 @@ module Family (Order : Endianness) = struct let order = CT.Endianness.name endianness in name ^ "+" ^ KB.Name.unqualified order - let (<:) parent name = + let def ?code_alignment ~parent name = if CT.Target.is_unknown parent then CT.Target.unknown else CT.Target.declare ~package (ordered name) ~parent + ?code_alignment ~nicknames:[name] + let (<:) parent name = def ~parent name + let is_bi_endian = CT.Endianness.(equal bi) endianness let v4 = @@ -201,6 +204,7 @@ module Family (Order : Endianness) = struct then CT.Target.unknown else CT.Target.declare ~package (ordered "armv4") ~parent + ~code_alignment:32 ~nicknames:["armv4"] ~bits:32 ~byte:8 @@ -210,7 +214,7 @@ module Family (Order : Endianness) = struct ~vars:vars32 ~regs:regs32 - let v4t = v4 <: "armv4t" + let v4t = def "v4t" ~parent:v4 ~code_alignment:16 let v5 = v4 <: "armv5" let v5t = v5 <: "armv5t" let v5te = v5t <: "armv5te" @@ -225,6 +229,7 @@ module Family (Order : Endianness) = struct else CT.Target.declare ~package (ordered "armv7") ~parent ~nicknames:["armv7"] + ~code_alignment:16 ~bits:32 ~byte:8 ~endianness @@ -247,16 +252,33 @@ module Family (Order : Endianness) = struct ~vars:vars32_fp ~regs:(regs32@vfp3regs) + let v8a = CT.Target.declare ~package (ordered "armv8-a") ~parent:v7 - ~nicknames:["armv8-a"] + ~nicknames:["armv8-a"; "aarch64"] ~aliasing + ~code_alignment:32 ~bits:64 ~code:datav8 ~data:datav8 ~vars:varsv8 ~regs:regsv8 + let v8a32 = + Theory.Target.declare ~package (ordered "armv8-a+aarch32") + ~nicknames:["armv8-a+aarch32"] + ~parent:v7 + + let v8m32 = + Theory.Target.declare ~package (ordered "armv8-m+aarch32") + ~nicknames:["armv8-m+aarch32"] + ~parent:v7m + + let v8r32 = + Theory.Target.declare ~package (ordered "armv8-r+aarch32") + ~nicknames:["armv8-r+aarch32"] + ~parent:v7 + let v81a = v8a <: "armv8.1-a" let v82a = v81a <: "armv8.2-a" let v83a = v82a <: "armv8.3-a" @@ -264,7 +286,10 @@ module Family (Order : Endianness) = struct let v85a = v84a <: "armv8.5-a" let v86a = v85a <: "armv8.6-a" + let v9a = v86a <: "armv9-a" + let parent = if is_bi_endian then v7 else v4 + end module LE = Family(struct let endianness = CT.Endianness.le end) @@ -376,7 +401,12 @@ let arms : arms Map.M(CT.Target).t = LE.v6t2, `armv6; LE.v7, `armv7; LE.v7a, `armv7; + LE.v7m, `thumbv7; LE.v7afp, `armv7; + Bi.v7, `armv7; + Bi.v7a, `armv7; + Bi.v7m, `thumbv7; + Bi.v7afp, `armv7; LE.v8a, `aarch64; LE.v81a, `aarch64; LE.v82a, `aarch64; @@ -397,6 +427,7 @@ let arms : arms Map.M(CT.Target).t = EB.v6t2,`armv6eb; EB.v7, `armv7eb; EB.v7a, `armv7eb; + EB.v7m, `thumbv7eb; EB.v7afp, `armv7eb; EB.v8a, `aarch64_be; EB.v81a, `aarch64_be; @@ -435,8 +466,18 @@ let register ?attrs encoding triple = Dis.create ?attrs ~backend:"llvm" triple let symbol_values doc = - let field = Ogre.Query.(select (from Image.Scheme.symbol_value)) in - match Ogre.eval (Ogre.collect field) doc with + let open Ogre.Let in + let open Image.Scheme in + let symbols = + let* symtab = + Ogre.(collect Query.(select (from symbol_value))) in + let+ entry = Ogre.request entry_point in + match entry with + | None -> symtab + | Some entry -> + let mask = Int64.(-1L lsl 1) in + Seq.cons Int64.(entry land mask, entry) symtab in + match Ogre.eval symbols doc with | Ok syms -> syms | Error err -> failwithf "Arm_target: broken file specification: %s" @@ -448,6 +489,7 @@ module Encodings = struct let lsb x = Int64.(x land 1L) let is_thumb x = Int64.equal (lsb x) 1L + let symbols_encoding spec = symbol_values spec |> Seq.fold ~init:empty ~f:(fun symbols (addr,value) -> @@ -479,16 +521,19 @@ let has_t32 label = Map.exists ~f:(Theory.Language.equal llvm_t32) +let is_word_aligned x = Bitvec.(M32.(int 3 land x) = zero) + let compute_encoding_from_symbol_table label = let (>>=?) x f = x >>= function | None -> !!Theory.Language.unknown | Some x -> f x in KB.collect CT.Label.unit label >>=? fun unit -> KB.collect CT.Label.addr label >>=? fun addr -> - KB.collect Encodings.slot unit >>= fun encodings -> - KB.return @@ match Map.find encodings addr with - | Some x -> x - | None -> CT.Language.unknown + KB.collect Encodings.slot unit >>| fun encodings -> + if not (is_word_aligned addr) then llvm_t32 + else match Map.find encodings addr with + | Some x -> x + | None -> CT.Language.unknown (* here t < p means that t was introduced before p *) let (>=) t p = CT.Target.belongs t p @@ -496,9 +541,15 @@ let (<) t p = t >= p && not (p >= t) let (<=) t p = t = p || t < p let is_arm = CT.Target.belongs parent -let before_thumb2 t = t < LE.v6t2 || t < EB.v6t2 let is_64bit t = LE.v8a <= t || EB.v8a <= t || Bi.v8a <= t -let is_thumb_only t = LE.v7m <= t || EB.v7m <= t || Bi.v7m <= t + +let m_profiles = [ + LE.v7m; EB.v7m; Bi.v7m; + LE.v8m32; EB.v8m32; Bi.v8m32; +] +let is_thumb_only t = + List.exists m_profiles ~f:(fun p -> p <= t) + let is_big t = Theory.Target.endianness t = Theory.Endianness.eb let is_little t = Theory.Target.endianness t = Theory.Endianness.le diff --git a/lib/arm/arm_target.mli b/lib/arm/arm_target.mli index fb3126d88..3140b95ea 100644 --- a/lib/arm/arm_target.mli +++ b/lib/arm/arm_target.mli @@ -38,12 +38,16 @@ module LE : sig val v7a : Theory.Target.t val v7afp : Theory.Target.t val v8a : Theory.Target.t + val v8a32 : Theory.Target.t + val v8m32 : Theory.Target.t + val v8r32 : Theory.Target.t val v81a : Theory.Target.t val v82a : Theory.Target.t val v83a : Theory.Target.t val v84a : Theory.Target.t val v85a : Theory.Target.t val v86a : Theory.Target.t + val v9a : Theory.Target.t end @@ -69,12 +73,16 @@ module EB : sig val v7a : Theory.Target.t val v7afp : Theory.Target.t val v8a : Theory.Target.t + val v8a32 : Theory.Target.t + val v8m32 : Theory.Target.t + val v8r32 : Theory.Target.t val v81a : Theory.Target.t val v82a : Theory.Target.t val v83a : Theory.Target.t val v84a : Theory.Target.t val v85a : Theory.Target.t val v86a : Theory.Target.t + val v9a : Theory.Target.t end @@ -90,12 +98,16 @@ module Bi : sig val v7a : Theory.Target.t val v7afp : Theory.Target.t val v8a : Theory.Target.t + val v8a32 : Theory.Target.t + val v8m32 : Theory.Target.t + val v8r32 : Theory.Target.t val v81a : Theory.Target.t val v82a : Theory.Target.t val v83a : Theory.Target.t val v84a : Theory.Target.t val v85a : Theory.Target.t val v86a : Theory.Target.t + val v9a : Theory.Target.t end val llvm_a32 : Theory.language diff --git a/plugins/arm/arm_main.ml b/plugins/arm/arm_main.ml index 3b58078fd..8939f54b0 100644 --- a/plugins/arm/arm_main.ml +++ b/plugins/arm/arm_main.ml @@ -25,6 +25,8 @@ let backend = type arms = [ | Arch.arm | Arch.armeb + | Arch.thumb + | Arch.thumbeb ] [@@deriving enumerate] let () = Bap_main.Extension.declare ~doc @@ fun ctxt -> diff --git a/plugins/arm/semantics/thumb.lisp b/plugins/arm/semantics/thumb.lisp index c87259e43..76c847e5b 100644 --- a/plugins/arm/semantics/thumb.lisp +++ b/plugins/arm/semantics/thumb.lisp @@ -1,6 +1,9 @@ (require bits) (require arm-bits) + +;; Note: page references are from ARM DDI 0403E.b + (declare (context (target arm))) (defpackage thumb (:use core target arm)) @@ -63,3 +66,25 @@ (set$ rd (* rn rm)) (set ZF (is-zero rd)) (set NF (msb rd))) + + +(defun t2STRDi8 (rt1 rt2 rn imm pre _) + "strd rt1, rt2, [rn, off]" + (when (condition-holds pre) + (store-word (+ rn imm) rt1) + (store-word (+ rn imm (sizeof word-width)) rt2))) + +(defun t2ADDri12 (rd rn imm pre _) + "addw rd, rn, imm; A7-189, T4 " + (when (condition-holds pre) + (set$ rd (+ rn imm)))) + +(defun t2STRHi12 (rt rn imm pre _) + "strh.w rt, [rn, imm]; A7-442; T2" + (when (condition-holds pre) + (store-word (+ rn imm) (cast-low 16 rt)))) + +(defun t2B (off pre _) + "b.w imm; A7-207, T3" + (when (condition-holds pre) + (exec-addr (+ (get-program-counter) off 4)))) diff --git a/plugins/thumb/thumb_main.ml b/plugins/thumb/thumb_main.ml index e4179d66d..0f3af408a 100644 --- a/plugins/thumb/thumb_main.ml +++ b/plugins/thumb/thumb_main.ml @@ -35,6 +35,8 @@ let (>>=?) x f = x >>= function module Thumb(CT : Theory.Core) = struct + module Thumb = Thumb_core.Make(CT) + let reg r = Theory.Var.define s32 (Reg.name r) let imm x = Option.value_exn (Imm.to_int x) let regs rs = List.map rs ~f:(function @@ -182,6 +184,10 @@ module Thumb(CT : Theory.Core) = struct info "unhandled bit-wise instruction: %a" pp_insn insn; !!Insn.empty + let unpredictable = + Theory.Label.for_name "arm:unpredictable" >>= CT.goto + + (* these are not entirely complete *) let lift_branch pc opcode insn = @@ -192,7 +198,9 @@ module Thumb(CT : Theory.Core) = struct | `tBcc, [|Imm dst; Imm c; _|] -> bcc pc (cnd c) (imm dst) | `tBL, [|_; _; Imm dst; _|] | `tBLXi, [|_; _; Imm dst|] -> bli pc (imm dst) - | `tBLXr, [|_; _; Reg dst|]when is_pc (reg dst) -> blxi pc 0 + | `tBLXr, [|_; _; Reg dst|]when is_pc (reg dst) -> + (* blx pc is unpredictable in all versions of ARM *) + Thumb.ctrl unpredictable | `tBLXr, [|_; _; Reg dst|]-> blxr pc (reg dst) | `tBX, [|Reg dst; _; _|]when is_pc (reg dst) -> bxi pc 0 | `tBX, [|Reg dst;_;_|] -> bxr (reg dst)