Skip to content

Commit

Permalink
[WIP] overhauls the target/architecture abstraction (3/n)
Browse files Browse the repository at this point in the history
In this episode, we liberate `bap mc` and `bap objdump` from the bonds
of the `Arch.t` representation. We also add the systemz lifter for
demonstration purposes. Of course, the lifter is minimal and far from
being usable, but that serves well its didactic purposes.

The interface of the `bap mc` command is preserved but is extended
with a few more command-line options that provide a great deal of
flexibility. Not only it is now possible to specify the target and
encoding, but it is now possible to pass options directly to the
backend, which is useful for disassembling targets that are not yet
known to BAP. Below is an excerpt from the bap-mc man page
(see bap mc --help)

```
       SETTING ARCHITECHTURE

       The target architecture is controlled by several groups of options that
       can not be used together:

       - arch;
       - target and encoding;
       - triple, backend, cpu, bits, and order.

       The arch option provides the least control but is easiest to use. It
       relies on the dependency-injection mechanism and lets the target
       support packages (plugins that implement support for the given
       architecture) do their best to guess the target and encoding that
       matches the provided name. Use the common names for the architecture
       and it should work. You can use the bits and order options to give more
       hints to the target support packages. They default to 32 and little
       correspondingly.

       The target and encoding provides precise control over the selection of
       the target and the encoding that is used to represent machine
       instructions. The encoding field can be omitted and will be deduced
       from the target. Use  bap list targets and  bap list encodings to get
       the list of supported targets and encodings respectivly.

       Finally, the triple, backend, cpu,... group of options provides the
       full control over the disassembler backend and bypasses the
       dependency-injection mechanism to pass the specified options directly
       to the corresponding backends. This enables disassembling of targets
       and encodings that are not yet supported by BAP. The meanings of the
       options totally depend on the selected backend and they are passed as
       is to the corresponding arguments of the Disasm_expert.Basic.create
       function. The bits and order defaults to 32 and little corresondingly
       and are used to specify the number of bits in the target's addresses
       and the order of bytes in the word. This group of options is useful
       during the implementation and debugging of new targets and thus is
       reserved for experts. Note, when this group is used the semantics of
       the instructions will not be provided as it commonly requires the
       target specification.
```
  • Loading branch information
ivg committed Oct 2, 2020
1 parent aae5c10 commit 80ace08
Show file tree
Hide file tree
Showing 22 changed files with 568 additions and 42 deletions.
22 changes: 18 additions & 4 deletions lib/arm/arm_target.ml
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,7 @@ let enable_loader () =
| "arm","v86a" -> Family.v86a
| "thumb", "v4" -> Family.v4t
| "thumb", "v5" -> Family.v5t
| "thumb",_ -> Family.v5t
| "aarch64",_ -> Family.v86a
| _ -> Family.v7

Expand Down Expand Up @@ -367,11 +368,11 @@ module Encodings = struct
symbols_encoding
end

let (>>=?) x f = x >>= function
| None -> !!CT.Language.unknown
| Some x -> f x

let compute_encoding_from_symbol_table default label =
let (>>=?) x f = x >>= function
| None -> !!default
| Some x -> f x in
KB.collect CT.Label.unit label >>=? fun unit ->
KB.collect CT.Label.addr label >>=? fun addr ->
KB.collect Encodings.slot unit >>= fun encodings ->
Expand All @@ -395,10 +396,22 @@ let is_64bit t =
t >= EB.v8a ||
t >= Bi.v8a

(* we should represent it with the target options *)
let has_tsuffix =
let thumbs = Set.of_list (module Theory.Target) [
LE.v4t; LE.v5t; LE.v5te; LE.v5tej;
Bi.v4t; Bi.v5t; Bi.v5te; Bi.v5tej;
EB.v4t; EB.v5t; EB.v5te; EB.v5tej;
] in
Set.mem thumbs

let guess_encoding label target =
if is_arm target then
if before_thumb2 target
then compute_encoding_from_symbol_table llvm_a32 label
then
let default = if has_tsuffix target
then llvm_t32 else llvm_a32 in
compute_encoding_from_symbol_table default label
else KB.return @@
if is_64bit target then llvm_a64 else llvm_a32
else KB.return CT.Language.unknown
Expand All @@ -412,6 +425,7 @@ let enable_decoder () =
KB.promise CT.Label.encoding @@ fun label ->
CT.Label.target label >>= guess_encoding label


let load () =
enable_loader ();
enable_arch ();
Expand Down
6 changes: 6 additions & 0 deletions lib/bap_core_theory/bap_core_theory.mli
Original file line number Diff line number Diff line change
Expand Up @@ -1255,6 +1255,12 @@ module Theory : sig
*)
val get : ?package:string -> string -> t

(** [read ?package name] is a synonym for [get ?package name].
Introduces for the consistency with the [Enum.S] interface.
*)
val read : ?package:string -> string -> t

(** [declared ()] is the list of declared targets.
The order is unspecified, see also {!families}. The list
doesn't include the [unknown] target. *)
Expand Down
2 changes: 2 additions & 0 deletions lib/bap_core_theory/bap_core_theory_target.ml
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,8 @@ let get ?package name =
then invalid_argf "Unknown target %s" (Name.to_string name) ();
name

let read = get

let info name = match Hashtbl.find targets name with
| None -> unknown
| Some t -> t
Expand Down
1 change: 1 addition & 0 deletions lib/bap_core_theory/bap_core_theory_target.mli
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ val declare :
string -> t

val get : ?package:string -> string -> t
val read : ?package:string -> string -> t
val lookup : ?package:string -> string -> t option
val unknown : t
val is_unknown : t -> bool
Expand Down
13 changes: 13 additions & 0 deletions lib/bap_image/bap_memory.ml
Original file line number Diff line number Diff line change
Expand Up @@ -461,3 +461,16 @@ let slot = KB.Class.property ~package:"bap"
Theory.Program.cls "mem" domain
~public:true
~desc:"a memory region occupied by the program"

let () =
let open KB.Syntax in
KB.promise Theory.Label.addr @@ fun label ->
KB.collect slot label >>|? fun mem ->
Some (Addr.to_bitvec (min_addr mem))

let () =
let open KB.Rule in
declare ~package:"bap" "addr-of-mem" |>
require slot |>
provide Theory.Label.addr |>
comment "addr of the first byte"
40 changes: 40 additions & 0 deletions lib/bap_systemz/bap_systemz_target.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
open Core_kernel
open Bap_core_theory

let package = "bap"

type r64 and r32 and r16 and r8

type 'a bitv = 'a Theory.Bitv.t Theory.Value.sort

let r64 : r64 bitv = Theory.Bitv.define 64
let r32 : r32 bitv = Theory.Bitv.define 32
let r16 : r16 bitv = Theory.Bitv.define 16
let r8 : r8 bitv = Theory.Bitv.define 8
let bool = Theory.Bool.t

let reg t n = Theory.Var.define t n

let array ?(index=string_of_int) t pref size =
List.init size ~f:(fun i -> reg t (pref ^ index i))

let untyped = List.map ~f:Theory.Var.forget
let (@<) xs ys = untyped xs @ untyped ys

let mems = Theory.Mem.define r64 r8

let gpr = array r64 "R" 16
let fpr = array r64 "F" 16
let mem = reg mems "mem"

let vars = gpr @< fpr @< [mem]

let parent = Theory.Target.declare ~package "systemz"

let z9 = Theory.Target.declare ~package "systemz9" ~parent
~bits:64
~code:mem
~data:mem
~vars

let llvm_encoding = Theory.Language.declare ~package "llvm-systemz"
21 changes: 21 additions & 0 deletions lib/bap_systemz/bap_systemz_target.mli
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
open Bap_core_theory


type r64 and r32 and r16 and r8

type 'a bitv = 'a Theory.Bitv.t Theory.Value.sort

val r64 : r64 bitv
val r32 : r32 bitv
val r16 : r16 bitv
val r8 : r8 bitv

val mem : (r64, r8) Theory.Mem.t Theory.var
val gpr : r64 Theory.Bitv.t Theory.var list
val fpr : r64 Theory.Bitv.t Theory.var list

val parent : Theory.Target.t

val z9 : Theory.Target.t

val llvm_encoding : Theory.Language.t
1 change: 1 addition & 0 deletions lib/bap_types/bap_arch.ml
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ module T = struct
let persistent = KB.Persistent.of_binable (module struct
type t = arch [@@deriving bin_io]
end)

let slot = KB.Class.property ~package:"bap"
Theory.Program.cls "arch" domain
~persistent
Expand Down
4 changes: 2 additions & 2 deletions lib/bap_types/bap_ir.ml
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ module Tid = struct
then intern str
else match str.[0] with
| '%' -> parse @@ sprintf "#<%s 0x%s>"
(KB.Name.show (KB.Class.name Theory.Program.Semantics.cls))
(KB.Name.show (KB.Class.name Theory.Semantics.cls))
(String.subo ~pos:1 str)
| '@' -> intern (String.subo ~pos:1 str)
| _ -> intern str
Expand Down Expand Up @@ -1251,7 +1251,7 @@ module Term = struct
end)

let slot = Knowledge.Class.property
Theory.Program.Semantics.cls "bir" domain
Theory.Semantics.cls "bir" domain
~package ~persistent
~public:true
~desc:"BIL semantics in a graphical IR"
Expand Down
2 changes: 1 addition & 1 deletion lib/bap_types/bap_ir.mli
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ end
module Term : sig
type 'a t = 'a term

val slot : (Theory.Program.Semantics.cls, blk term list) KB.slot
val slot : (Theory.Semantics.cls, blk term list) KB.slot

val clone : 'a t -> 'a t
val same : 'a t -> 'a t -> bool
Expand Down
2 changes: 1 addition & 1 deletion lib/bap_types/bap_stmt.ml
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,6 @@ let persistent = Knowledge.Persistent.of_binable (module struct
end)

let slot = Knowledge.Class.property ~package:"bap"
~persistent Theory.Program.Semantics.cls "bil" domain
~persistent Theory.Semantics.cls "bil" domain
~public:true
~desc:"semantics of statements in BIL"
2 changes: 1 addition & 1 deletion lib/bap_types/bap_stmt.mli
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,6 @@ end
module Stmts_pp : Printable.S with type t = stmt list
module Stmts_data : Data.S with type t = stmt list

val slot : (Theory.Program.Semantics.cls, stmt list) Knowledge.slot
val slot : (Theory.Semantics.cls, stmt list) Knowledge.slot
val domain : stmt list Knowledge.domain
val persistent : stmt list Knowledge.persistent
2 changes: 1 addition & 1 deletion lib/x86_cpu/x86_target.ml
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ let i686 = Theory.Target.declare ~package "i686"
let amd64 = Theory.Target.declare ~package "amd64"
~parent:i686
~nicknames:["x64"; "x86_64"; "x86-64"; ]
~bits:32
~bits:64
~data:M64.data
~code:M64.data
~vars:M64.vars
Expand Down
3 changes: 2 additions & 1 deletion oasis/mc
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ Library mc_plugin
CompiledObject: best
Modules: Mc_main
BuildDepends: core_kernel, bap-main, bap, regular, bap-plugins,
bap-core-theory, bap-knowledge, ppx_jane, bitvec
bap-core-theory, bap-knowledge, ppx_jane, bitvec,
ogre
XMETAExtraLines: tags="command, disassemble"


Expand Down
22 changes: 22 additions & 0 deletions oasis/systemz
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
Flag systemz
Description: Build Systemz lifter
Default: false

Library "bap-systemz"
Build$: flag(everything) || flag(systemz)
XMETADescription: common definitions for Systemz targets
Path: lib/bap_systemz
BuildDepends: core_kernel, bap-knowledge, bap-core-theory
FindlibName: bap-systemz
Modules: Bap_systemz_target

Library systemz_plugin
XMETADescription: provide Systemz lifter
Path: plugins/systemz
Build$: flag(everything) || flag(systemz)
BuildDepends: core_kernel, ppx_jane, ogre,
bap-core-theory, bap-knowledge, bap-main,
bap, bap-systemz
FindlibName: bap-plugin-systemz
InternalModules: Systemz_main, Systemz_lifter
XMETAExtraLines: tags="systemz, lifter, z9"
3 changes: 2 additions & 1 deletion plugins/bil/bil_lifter.ml
Original file line number Diff line number Diff line change
Expand Up @@ -479,7 +479,8 @@ let provide_lifter ~enable_intrinsics ~with_fp () =
Knowledge.collect Memory.slot obj >>? fun mem ->
Knowledge.collect Disasm_expert.Basic.Insn.slot obj >>? fun insn ->
match lift ~enable_intrinsics arch mem insn with
| Error _ ->
| Error err ->
info "BIL: the BIL lifter failed with %a" Error.pp err;
Knowledge.return (Insn.of_basic insn)
| Ok bil ->
Bil_semantics.context >>= fun ctxt ->
Expand Down
Loading

0 comments on commit 80ace08

Please sign in to comment.