diff --git a/lib/arm/arm_target.ml b/lib/arm/arm_target.ml index 4acc020d4f..9457834ac2 100644 --- a/lib/arm/arm_target.ml +++ b/lib/arm/arm_target.ml @@ -252,6 +252,7 @@ let enable_loader () = | "arm","v86a" -> Family.v86a | "thumb", "v4" -> Family.v4t | "thumb", "v5" -> Family.v5t + | "thumb",_ -> Family.v5t | "aarch64",_ -> Family.v86a | _ -> Family.v7 @@ -367,11 +368,11 @@ module Encodings = struct symbols_encoding end -let (>>=?) x f = x >>= function - | None -> !!CT.Language.unknown - | Some x -> f x let compute_encoding_from_symbol_table default label = + let (>>=?) x f = x >>= function + | None -> !!default + | Some x -> f x in KB.collect CT.Label.unit label >>=? fun unit -> KB.collect CT.Label.addr label >>=? fun addr -> KB.collect Encodings.slot unit >>= fun encodings -> @@ -395,10 +396,22 @@ let is_64bit t = t >= EB.v8a || t >= Bi.v8a +(* we should represent it with the target options *) +let has_tsuffix = + let thumbs = Set.of_list (module Theory.Target) [ + LE.v4t; LE.v5t; LE.v5te; LE.v5tej; + Bi.v4t; Bi.v5t; Bi.v5te; Bi.v5tej; + EB.v4t; EB.v5t; EB.v5te; EB.v5tej; + ] in + Set.mem thumbs + let guess_encoding label target = if is_arm target then if before_thumb2 target - then compute_encoding_from_symbol_table llvm_a32 label + then + let default = if has_tsuffix target + then llvm_t32 else llvm_a32 in + compute_encoding_from_symbol_table default label else KB.return @@ if is_64bit target then llvm_a64 else llvm_a32 else KB.return CT.Language.unknown @@ -412,6 +425,7 @@ let enable_decoder () = KB.promise CT.Label.encoding @@ fun label -> CT.Label.target label >>= guess_encoding label + let load () = enable_loader (); enable_arch (); diff --git a/lib/bap_core_theory/bap_core_theory.mli b/lib/bap_core_theory/bap_core_theory.mli index a94afc904d..2b141f4590 100644 --- a/lib/bap_core_theory/bap_core_theory.mli +++ b/lib/bap_core_theory/bap_core_theory.mli @@ -1255,6 +1255,12 @@ module Theory : sig *) val get : ?package:string -> string -> t + (** [read ?package name] is a synonym for [get ?package name]. + + Introduces for the consistency with the [Enum.S] interface. + *) + val read : ?package:string -> string -> t + (** [declared ()] is the list of declared targets. The order is unspecified, see also {!families}. The list doesn't include the [unknown] target. *) diff --git a/lib/bap_core_theory/bap_core_theory_target.ml b/lib/bap_core_theory/bap_core_theory_target.ml index a3608d76b0..eb0c660d42 100644 --- a/lib/bap_core_theory/bap_core_theory_target.ml +++ b/lib/bap_core_theory/bap_core_theory_target.ml @@ -203,6 +203,8 @@ let get ?package name = then invalid_argf "Unknown target %s" (Name.to_string name) (); name +let read = get + let info name = match Hashtbl.find targets name with | None -> unknown | Some t -> t diff --git a/lib/bap_core_theory/bap_core_theory_target.mli b/lib/bap_core_theory/bap_core_theory_target.mli index a52dcf72f4..76752bf9dd 100644 --- a/lib/bap_core_theory/bap_core_theory_target.mli +++ b/lib/bap_core_theory/bap_core_theory_target.mli @@ -31,6 +31,7 @@ val declare : string -> t val get : ?package:string -> string -> t +val read : ?package:string -> string -> t val lookup : ?package:string -> string -> t option val unknown : t val is_unknown : t -> bool diff --git a/lib/bap_image/bap_memory.ml b/lib/bap_image/bap_memory.ml index 9a91d93241..091e205442 100644 --- a/lib/bap_image/bap_memory.ml +++ b/lib/bap_image/bap_memory.ml @@ -461,3 +461,16 @@ let slot = KB.Class.property ~package:"bap" Theory.Program.cls "mem" domain ~public:true ~desc:"a memory region occupied by the program" + +let () = + let open KB.Syntax in + KB.promise Theory.Label.addr @@ fun label -> + KB.collect slot label >>|? fun mem -> + Some (Addr.to_bitvec (min_addr mem)) + +let () = + let open KB.Rule in + declare ~package:"bap" "addr-of-mem" |> + require slot |> + provide Theory.Label.addr |> + comment "addr of the first byte" diff --git a/lib/bap_systemz/bap_systemz_target.ml b/lib/bap_systemz/bap_systemz_target.ml new file mode 100644 index 0000000000..dfa878355e --- /dev/null +++ b/lib/bap_systemz/bap_systemz_target.ml @@ -0,0 +1,40 @@ +open Core_kernel +open Bap_core_theory + +let package = "bap" + +type r64 and r32 and r16 and r8 + +type 'a bitv = 'a Theory.Bitv.t Theory.Value.sort + +let r64 : r64 bitv = Theory.Bitv.define 64 +let r32 : r32 bitv = Theory.Bitv.define 32 +let r16 : r16 bitv = Theory.Bitv.define 16 +let r8 : r8 bitv = Theory.Bitv.define 8 +let bool = Theory.Bool.t + +let reg t n = Theory.Var.define t n + +let array ?(index=string_of_int) t pref size = + List.init size ~f:(fun i -> reg t (pref ^ index i)) + +let untyped = List.map ~f:Theory.Var.forget +let (@<) xs ys = untyped xs @ untyped ys + +let mems = Theory.Mem.define r64 r8 + +let gpr = array r64 "R" 16 +let fpr = array r64 "F" 16 +let mem = reg mems "mem" + +let vars = gpr @< fpr @< [mem] + +let parent = Theory.Target.declare ~package "systemz" + +let z9 = Theory.Target.declare ~package "systemz9" ~parent + ~bits:64 + ~code:mem + ~data:mem + ~vars + +let llvm_encoding = Theory.Language.declare ~package "llvm-systemz" diff --git a/lib/bap_systemz/bap_systemz_target.mli b/lib/bap_systemz/bap_systemz_target.mli new file mode 100644 index 0000000000..882826f61e --- /dev/null +++ b/lib/bap_systemz/bap_systemz_target.mli @@ -0,0 +1,21 @@ +open Bap_core_theory + + +type r64 and r32 and r16 and r8 + +type 'a bitv = 'a Theory.Bitv.t Theory.Value.sort + +val r64 : r64 bitv +val r32 : r32 bitv +val r16 : r16 bitv +val r8 : r8 bitv + +val mem : (r64, r8) Theory.Mem.t Theory.var +val gpr : r64 Theory.Bitv.t Theory.var list +val fpr : r64 Theory.Bitv.t Theory.var list + +val parent : Theory.Target.t + +val z9 : Theory.Target.t + +val llvm_encoding : Theory.Language.t diff --git a/lib/bap_types/bap_arch.ml b/lib/bap_types/bap_arch.ml index 3ee98b8f5f..64471275bb 100644 --- a/lib/bap_types/bap_arch.ml +++ b/lib/bap_types/bap_arch.ml @@ -75,6 +75,7 @@ module T = struct let persistent = KB.Persistent.of_binable (module struct type t = arch [@@deriving bin_io] end) + let slot = KB.Class.property ~package:"bap" Theory.Program.cls "arch" domain ~persistent diff --git a/lib/bap_types/bap_ir.ml b/lib/bap_types/bap_ir.ml index d8eb1db54e..a78b78d7c2 100644 --- a/lib/bap_types/bap_ir.ml +++ b/lib/bap_types/bap_ir.ml @@ -115,7 +115,7 @@ module Tid = struct then intern str else match str.[0] with | '%' -> parse @@ sprintf "#<%s 0x%s>" - (KB.Name.show (KB.Class.name Theory.Program.Semantics.cls)) + (KB.Name.show (KB.Class.name Theory.Semantics.cls)) (String.subo ~pos:1 str) | '@' -> intern (String.subo ~pos:1 str) | _ -> intern str @@ -1251,7 +1251,7 @@ module Term = struct end) let slot = Knowledge.Class.property - Theory.Program.Semantics.cls "bir" domain + Theory.Semantics.cls "bir" domain ~package ~persistent ~public:true ~desc:"BIL semantics in a graphical IR" diff --git a/lib/bap_types/bap_ir.mli b/lib/bap_types/bap_ir.mli index 39a00967e8..a85902ca57 100644 --- a/lib/bap_types/bap_ir.mli +++ b/lib/bap_types/bap_ir.mli @@ -69,7 +69,7 @@ end module Term : sig type 'a t = 'a term - val slot : (Theory.Program.Semantics.cls, blk term list) KB.slot + val slot : (Theory.Semantics.cls, blk term list) KB.slot val clone : 'a t -> 'a t val same : 'a t -> 'a t -> bool diff --git a/lib/bap_types/bap_stmt.ml b/lib/bap_types/bap_stmt.ml index 86858ab879..45e74f6a45 100644 --- a/lib/bap_types/bap_stmt.ml +++ b/lib/bap_types/bap_stmt.ml @@ -110,6 +110,6 @@ let persistent = Knowledge.Persistent.of_binable (module struct end) let slot = Knowledge.Class.property ~package:"bap" - ~persistent Theory.Program.Semantics.cls "bil" domain + ~persistent Theory.Semantics.cls "bil" domain ~public:true ~desc:"semantics of statements in BIL" diff --git a/lib/bap_types/bap_stmt.mli b/lib/bap_types/bap_stmt.mli index d5048c4c7c..758f4d040b 100644 --- a/lib/bap_types/bap_stmt.mli +++ b/lib/bap_types/bap_stmt.mli @@ -26,6 +26,6 @@ end module Stmts_pp : Printable.S with type t = stmt list module Stmts_data : Data.S with type t = stmt list -val slot : (Theory.Program.Semantics.cls, stmt list) Knowledge.slot +val slot : (Theory.Semantics.cls, stmt list) Knowledge.slot val domain : stmt list Knowledge.domain val persistent : stmt list Knowledge.persistent diff --git a/lib/x86_cpu/x86_target.ml b/lib/x86_cpu/x86_target.ml index c9abbd94b0..0f02552c83 100644 --- a/lib/x86_cpu/x86_target.ml +++ b/lib/x86_cpu/x86_target.ml @@ -186,7 +186,7 @@ let i686 = Theory.Target.declare ~package "i686" let amd64 = Theory.Target.declare ~package "amd64" ~parent:i686 ~nicknames:["x64"; "x86_64"; "x86-64"; ] - ~bits:32 + ~bits:64 ~data:M64.data ~code:M64.data ~vars:M64.vars diff --git a/oasis/mc b/oasis/mc index 55e8bda50f..fc691ee4a9 100644 --- a/oasis/mc +++ b/oasis/mc @@ -9,7 +9,8 @@ Library mc_plugin CompiledObject: best Modules: Mc_main BuildDepends: core_kernel, bap-main, bap, regular, bap-plugins, - bap-core-theory, bap-knowledge, ppx_jane, bitvec + bap-core-theory, bap-knowledge, ppx_jane, bitvec, + ogre XMETAExtraLines: tags="command, disassemble" diff --git a/oasis/systemz b/oasis/systemz new file mode 100644 index 0000000000..22957f54ae --- /dev/null +++ b/oasis/systemz @@ -0,0 +1,22 @@ +Flag systemz + Description: Build Systemz lifter + Default: false + +Library "bap-systemz" + Build$: flag(everything) || flag(systemz) + XMETADescription: common definitions for Systemz targets + Path: lib/bap_systemz + BuildDepends: core_kernel, bap-knowledge, bap-core-theory + FindlibName: bap-systemz + Modules: Bap_systemz_target + +Library systemz_plugin + XMETADescription: provide Systemz lifter + Path: plugins/systemz + Build$: flag(everything) || flag(systemz) + BuildDepends: core_kernel, ppx_jane, ogre, + bap-core-theory, bap-knowledge, bap-main, + bap, bap-systemz + FindlibName: bap-plugin-systemz + InternalModules: Systemz_main, Systemz_lifter + XMETAExtraLines: tags="systemz, lifter, z9" diff --git a/plugins/bil/bil_lifter.ml b/plugins/bil/bil_lifter.ml index 64f85d4ad1..c3c7ff1424 100644 --- a/plugins/bil/bil_lifter.ml +++ b/plugins/bil/bil_lifter.ml @@ -479,7 +479,8 @@ let provide_lifter ~enable_intrinsics ~with_fp () = Knowledge.collect Memory.slot obj >>? fun mem -> Knowledge.collect Disasm_expert.Basic.Insn.slot obj >>? fun insn -> match lift ~enable_intrinsics arch mem insn with - | Error _ -> + | Error err -> + info "BIL: the BIL lifter failed with %a" Error.pp err; Knowledge.return (Insn.of_basic insn) | Ok bil -> Bil_semantics.context >>= fun ctxt -> diff --git a/plugins/mc/mc_main.ml b/plugins/mc/mc_main.ml index 27972f4d4c..64948dd308 100644 --- a/plugins/mc/mc_main.ml +++ b/plugins/mc/mc_main.ml @@ -18,6 +18,45 @@ The following input formats are supported: 31d248f7f3"; ``` +# SETTING ARCHITECHTURE + +The target architecture is controlled by several groups of options +that can not be used together: + +- $(b,arch); +- $(b,target) and $(b,encoding); +- $(b,triple), $(b,backend), $(b,cpu), $(b,bits), and $(b,order). + +The $(b,arch) option provides the least control but is easiest to +use. It relies on the dependency-injection mechanism and lets the +target support packages (plugins that implement support for the given +architecture) do their best to guess the target and encoding that +matches the provided name. Use the common names for the architecture +and it should work. You can use the $(b,bits) and $(b,order) options +to give more hints to the target support packages. They default to +$(b,32) and $(b,little) correspondingly. + +The $(b,target) and $(b,encoding) provides precise control over the +selection of the target and the encoding that is used to represent +machine instructions. The $(b,encoding) field can be omitted and will +be deduced from the target. Use $(b, bap list targets) and +$(b, bap list encodings) to get the list of supported targets and +encodings respectivly. + +Finally, the $(b,triple), $(b,backend), $(b,cpu),... group of options +provides the full control over the disassembler backend and bypasses +the dependency-injection mechanism to pass the specified options +directly to the corresponding backends. This enables disassembling of +targets and encodings that are not yet supported by BAP. The meanings +of the options totally depend on the selected $(b,backend) and they +are passed as is to the corresponding arguments of the +$(b,Disasm_expert.Basic.create) function. The $(b,bits) and $(b,order) +defaults to $(b,32) and $(b,little) corresondingly and are used to +specify the number of bits in the target's addresses and the order of +bytes in the word. This group of options is useful during the +implementation and debugging of new targets and thus is reserved for +experts. Note, when this group is used the semantics of the instructions +will not be provided as it commonly requires the target specification. |} let objdump_man = {| @@ -59,6 +98,14 @@ type error = | No_formats_expected of string | Disassembler_failed of Error.t | Loader_failed of Error.t + | Target_must_be_unknown + | Encoding_must_be_unknown + | Triple_must_not_be_set + | Arch_must_not_be_set + | Backend_must_not_be_set + | Cpu_must_not_be_set + | Bits_must_not_be_set + | Order_must_not_be_set type Extension.Error.t += Fail of error @@ -72,6 +119,19 @@ type output = [ | `invalid ] [@@deriving compare] +type target = + | Target of { + name : Theory.Target.t; + encoding : Theory.Language.t; + } + | Triple of { + name : string; + backend : string option; + cpu: string option; + order : endian; + bits : int; + } + let outputs = [ `insn; `bil; @@ -92,16 +152,46 @@ module Spec = struct let input = Command.arguments Type.string let file = Command.argument Type.file - let arch_type = Type.define - ~parse:(fun s -> match Arch.of_string s with - | None -> invalid_arg "unknown architecture" - | Some arch -> arch) - ~print:Arch.to_string - `x86_64 + let language = Type.define + ~parse:(Theory.Language.read ~package:"bap") + ~print:Theory.Language.to_string + Theory.Language.unknown + + let target = Type.define + ~parse:(Theory.Target.get ~package:"bap") + ~print:Theory.Target.to_string + Theory.Target.unknown + + let order = Type.enum [ + "big", BigEndian; + "little", LittleEndian; + ] + + let arch = Command.parameter Type.(some string) "arch" + ~aliases:["a"] + ~doc:"The target architecture." + + let target = Command.parameter target "target" + ~aliases:["t"] + ~doc:"The target name." + + let encoding = Command.parameter language "encoding" + ~aliases:["e"] + ~doc:"The target encoding." + + let triple = Command.parameter Type.(some string) "triple" + ~doc:"The target triple." + + let cpu = Command.parameter Type.(some string) "cpu" + ~doc:"The target CPU (used with triple)." + + let bits = Command.parameter Type.(some int) "bits" + ~doc:"The number of bits in the address \ + (used with triple or arch)" - let arch = - let doc = "Target architecture" in - Command.parameter ~doc arch_type "arch" + let order = Command.parameter Type.(some order) "order" + ~doc: "The order of bytes in the target's word \ + (used with triple or arch)." let outputs = let name_of_output = function @@ -143,7 +233,7 @@ module Spec = struct ~doc:"Stop disassembling on the first error and report it" let backend = - let doc = "Specify the disassembler backend" in + let doc = "The disassembling backend (used with triple)." in Command.parameter ~doc Type.(some string) "backend" let loader = @@ -193,9 +283,15 @@ let read_input input = | "0x" -> to_binary ~map:escape_0x input | _ -> to_binary ~map:prepend_slash_x input +let endian = function + | Triple {order} -> order + | Target {name=t} -> + if Theory.Endianness.(equal le) (Theory.Target.endianness t) + then LittleEndian + else BigEndian + let create_memory arch data base = - let endian = Arch.endian arch in - Memory.create endian base @@ + Memory.create (endian arch) base @@ Bigstring.of_string data |> function | Ok r -> Ok r | Error e -> fail (Create_mem e) @@ -208,8 +304,17 @@ let print_kinds formats insn = let new_insn arch mem insn = let open KB.Syntax in + let provide_target unit label = function + | Triple _ -> KB.return () + | Target {name=target; encoding} -> + KB.provide Theory.Unit.target unit target >>= fun () -> + if Theory.Language.is_unknown encoding + then KB.return () + else KB.provide Theory.Label.encoding label encoding in KB.Object.create Theory.Program.cls >>= fun code -> - KB.provide Arch.slot code arch >>= fun () -> + KB.Symbol.intern "unit" Theory.Unit.cls >>= fun unit -> + provide_target unit code arch >>= fun () -> + KB.provide Theory.Label.unit code (Some unit) >>= fun () -> KB.provide Memory.slot code (Some mem) >>= fun () -> KB.provide Dis.Insn.slot code (Some insn) >>| fun () -> code @@ -266,26 +371,28 @@ let print arch mem code formats = print_sema (formats `sema) insn; print_kinds (formats `kinds) code +let bits = function + | Target {name=t} -> Theory.Target.bits t + | Triple {bits} -> bits + let parse_base arch base = Result.map_error ~f:(function | Invalid_argument str -> Fail (Invalid_base str) | exn -> Fail (Invalid_base (Exn.to_string exn))) @@ Result.try_with @@ fun () -> - Word.create (Bitvec.of_string base) - (Size.in_bits (Arch.addr_size arch)) - - -let create_disassembler ?(backend="llvm") arch = - Dis.create ~backend (Arch.to_string arch) |> - Result.map_error ~f:(fun err -> Fail (Disassembler_failed err)) + Word.create (Bitvec.of_string base) (bits arch) +let create_disassembler spec = + Result.map_error ~f:(fun err -> Fail (Disassembler_failed err)) @@ + match spec with + | Target {name; encoding} -> Dis.lookup name encoding + | Triple {name; cpu; backend} -> Dis.create ?backend ?cpu name let module_of_kind = function | `insn -> "Bap.Std.Insn" | `bil -> "Bap.Std.Bil" | `bir -> "Bap.Std.Blk" - let validate_module kind formats = let name = module_of_kind kind in Data.all_writers () |> @@ -339,15 +446,144 @@ let run ?(only_one=false) ?(stop_on_error=false) dis arch mem formats = if only_one then Dis.stop state bytes else Dis.step state (bytes + Memory.length mem)) +let check_invariants xs = + List.concat_map xs ~f:(fun (pred,props) -> + if pred then props else []) |> + Result.all_unit + +let check check t error = + if not (check t) then fail error else Ok () + +let target_must_be_unknown t = + check Theory.Target.is_unknown t Target_must_be_unknown + +let encoding_must_be_unknown t = + check Theory.Language.is_unknown t Encoding_must_be_unknown + +let triple_must_not_be_set x = + check Option.is_none x Triple_must_not_be_set + +let arch_must_not_be_set x = + check Option.is_none x Arch_must_not_be_set + +let backend_must_not_be_set x = + check Option.is_none x Backend_must_not_be_set + +let cpu_must_not_be_set x = + check Option.is_none x Cpu_must_not_be_set + +let bits_must_not_be_set x = + check Option.is_none x Bits_must_not_be_set + +let order_must_not_be_set x = + check Option.is_none x Order_must_not_be_set + + + +let compute_target provide = + let extract_target = + let open KB.Syntax in + KB.Object.scoped Theory.Unit.cls @@ fun unit -> + KB.Object.scoped Theory.Program.cls @@ fun label -> + provide unit >>= fun () -> + KB.provide Theory.Label.unit label (Some unit) >>= fun () -> + Theory.Label.target label >>= fun name -> + KB.collect Theory.Label.encoding label >>| fun encoding -> + Target {name; encoding} in + let result = Toplevel.var "target-and-encoding" in + Toplevel.put result extract_target; + Toplevel.get result + +let target_of_arch arch bits order = + let bits = match bits with + | None -> 32L + | Some bits -> Int64.of_int bits in + let is_little = match order with + | Some BigEndian -> false + | _ -> true in + let make_spec = + let open Ogre.Syntax in + Ogre.sequence [ + Ogre.provide Image.Scheme.arch arch; + Ogre.provide Image.Scheme.bits bits; + Ogre.provide Image.Scheme.is_little_endian is_little; + ] in + let spec = match Ogre.exec make_spec Ogre.Doc.empty with + | Error err -> + failwithf "compute_target: failed to build a spec: %s" + (Error.to_string_hum err) () + | Ok doc -> doc in + compute_target @@ fun unit -> + KB.provide Image.Spec.slot unit spec + +let make_triple ?(bits=32) ?(order=BigEndian) ?backend ?cpu name = + Triple {name; backend; cpu; bits; order} + +let make_target target encoding = + if Theory.Language.is_unknown encoding + then compute_target @@ fun unit -> + KB.provide Theory.Unit.target unit target + else Target {name=target; encoding} + +let parse_arch + arch + target encoding + triple cpu backend + bits order = + check_invariants [ + Option.is_some arch, [ + target_must_be_unknown target; + triple_must_not_be_set triple; + ]; + not (Theory.Target.is_unknown target), [ + arch_must_not_be_set arch; + triple_must_not_be_set triple; + ]; + Option.is_some triple, [ + target_must_be_unknown target; + encoding_must_be_unknown encoding; + arch_must_not_be_set arch; + ]; + Theory.Target.is_unknown target, [ + encoding_must_be_unknown encoding; + ]; + Option.is_none triple, [ + cpu_must_not_be_set cpu; + backend_must_not_be_set backend; + ]; + Option.is_none triple && Option.is_none arch, [ + bits_must_not_be_set bits; + order_must_not_be_set order; + ] + ] >>| fun () -> match arch,triple with + | None,None -> + if Theory.Target.is_unknown target + then target_of_arch "x86-64" None None + else make_target target encoding + | Some arch,None -> target_of_arch arch bits order + | None,Some triple -> make_triple ?bits ?order ?backend ?cpu triple + | Some _, Some _ -> + failwith "parse_arch: unchecked invariant" + let () = Extension.Command.(begin declare ~doc:mc_man "mc" - Spec.(args $arch $base $backend $only_one $stop_on_error $input $outputs) - end) @@ fun arch base backend only_one stop_on_error input outputs _ctxt -> + Spec.(args + $arch + $target $encoding + $triple $cpu $backend + $bits $order + $base $only_one $stop_on_error $input $outputs) + end) @@ fun arch + target encoding + triple cpu backend + bits order + base only_one stop_on_error input outputs _ctxt -> validate_formats outputs >>= fun () -> - parse_base arch base >>= fun base -> + parse_arch arch target encoding triple cpu backend bits order >>= fun arch -> read_input input >>= fun data -> + parse_base arch base >>= fun base -> create_memory arch data base >>= fun mem -> - create_disassembler ?backend arch >>= fun dis -> + create_disassembler arch >>= fun dis -> let formats = formats outputs in run ~only_one ~stop_on_error dis arch mem formats >>= fun bytes -> Dis.close dis; @@ -358,22 +594,23 @@ let () = Extension.Command.(begin let () = Extension.Command.(begin declare ~doc:objdump_man "objdump" - Spec.(args $backend $loader $stop_on_error $file $outputs) - end) @@ fun backend loader stop_on_error input outputs _ctxt -> + Spec.(args $loader $stop_on_error $file $outputs) + end) @@ fun loader stop_on_error input outputs _ctxt -> validate_formats outputs >>= fun () -> let formats = formats outputs in match Image.create ~backend:loader input with | Error err -> Error (Fail (Loader_failed err)) | Ok (img,_warns) -> - let arch = Image.arch img in - create_disassembler ?backend arch >>= fun dis -> + let target = compute_target @@ fun unit -> + KB.provide Image.Spec.slot unit (Image.spec img) in + create_disassembler target >>= fun dis -> Image.memory img |> Memmap.to_sequence |> Seq.filter_map ~f:(fun (mem,data) -> Option.some_if (Value.is Image.code_region data) mem) |> Seq.map ~f:(fun mem -> - run ~stop_on_error dis arch mem formats >>= fun _bytes -> + run ~stop_on_error dis target mem formats >>= fun _bytes -> Ok ()) |> Seq.to_list |> Result.all_unit @@ -418,6 +655,19 @@ let string_of_failure = function sprintf "--show-%s doesn't expect any formats yet" name | Loader_failed err -> Format.asprintf "Failed to unpack the file: %a" Error.pp err + | Target_must_be_unknown + | Triple_must_not_be_set + | Arch_must_not_be_set -> + "The target, triple, and arch options could not be used together" + | Encoding_must_be_unknown -> + "The encoding option requires the target option" + | Backend_must_not_be_set -> + "The backend option requires the triple option" + | Cpu_must_not_be_set -> + "The CPU option requires the triple option" + | Bits_must_not_be_set | Order_must_not_be_set -> + "The bits and order parameters are only accepted with arch or \ + triple and are not allowed when the target is specified" let () = Extension.Error.register_printer @@ function diff --git a/plugins/systemz/.merlin b/plugins/systemz/.merlin new file mode 100644 index 0000000000..033cc1473c --- /dev/null +++ b/plugins/systemz/.merlin @@ -0,0 +1,2 @@ +B ../../lib/bap_systemz +REC diff --git a/plugins/systemz/systemz_lifter.ml b/plugins/systemz/systemz_lifter.ml new file mode 100644 index 0000000000..75472c70e2 --- /dev/null +++ b/plugins/systemz/systemz_lifter.ml @@ -0,0 +1,64 @@ +open Core_kernel +open Bap_core_theory +open Bap.Std + +open KB.Syntax +include Bap_main.Loggers() + +module Target = Bap_systemz_target +module MC = Disasm_expert.Basic + +let make_regs regs = + let regs = + List.mapi regs ~f:(fun i r -> (i,r)) |> + Map.of_alist_exn (module Int) in + Map.find_exn regs + +let gpr = make_regs Target.gpr +let regnum s = Scanf.sscanf s "R%d" ident + +let require_gpr insn pos f = + match (MC.Insn.ops insn).(pos) with + | Op.Reg r -> f (gpr (regnum (Reg.name r))) + | _ -> KB.return Insn.empty + +module Semantics(CT : Theory.Core) = struct + open Target + + let pass = Theory.Effect.empty Theory.Effect.Sort.bot + + let rec seq = function + | [] -> CT.perform Theory.Effect.Sort.bot + | [x] -> x + | x :: xs -> CT.seq x @@ seq xs + + let data xs = + KB.Object.create Theory.Program.cls >>= fun lbl -> + CT.blk lbl (seq xs) (seq []) + + let lr insn = + require_gpr insn 0 @@ fun rd -> + require_gpr insn 1 @@ fun rs -> + let x = CT.(unsigned r64 @@ low r32 (var rs)) in + let rhs = CT.(logor (var rd) x) in + data [ + CT.(set rd rhs) + ] +end + +let lifter label = + KB.collect MC.Insn.slot label >>= function + | None -> KB.return Insn.empty + | Some insn -> + Theory.instance () >>= Theory.require >>= fun (module Core) -> + let module Lifter = Semantics(Core) in + let open Lifter in + insn |> match MC.Insn.name insn with + | "LR" -> lr + | code -> + info "unsupported opcode: %s" code; + fun _ -> KB.return Insn.empty + + +let load () = + KB.promise Theory.Semantics.slot lifter diff --git a/plugins/systemz/systemz_lifter.mli b/plugins/systemz/systemz_lifter.mli new file mode 100644 index 0000000000..a58d3a6dae --- /dev/null +++ b/plugins/systemz/systemz_lifter.mli @@ -0,0 +1 @@ +val load : unit -> unit diff --git a/plugins/systemz/systemz_main.ml b/plugins/systemz/systemz_main.ml new file mode 100644 index 0000000000..8d1658ee6b --- /dev/null +++ b/plugins/systemz/systemz_main.ml @@ -0,0 +1,87 @@ +open Bap_main +open Bap.Std +open Bap_core_theory +open KB.Syntax +module CT = Theory + +include Bap_main.Loggers() + +module Target = Bap_systemz_target +module Dis = Disasm_expert.Basic + +(* to enable backward compatiblity with Arch.t + we map all systemz targets to `systemz *) +let map_arch () = + KB.promise Arch.unit_slot @@ fun unit -> + KB.collect Theory.Unit.target unit >>| fun t -> + if Theory.Target.belongs Target.parent t + then `systemz + else `unknown + +(* the same target may have different encodings + or share encodings with other targets, in addition, + the encoding may differ per each instruction and even + be context-dependent, therefore target and encodings + are separate properties of different classes. In our + case, everything is trivial. +*) +let provide_decoding () = + KB.promise CT.Label.encoding @@ fun label -> + CT.Label.target label >>| fun t -> + if CT.Target.belongs Target.parent t + then Target.llvm_encoding + else CT.Language.unknown + + +(* following the dependency injection principal, we have to provide + the disassembler instance for our encoding. + + The _target parameter may further refine the target system, which + could be reflected in the disassembler parameters. Not used in our + case.*) +let enable_llvm () = + Dis.register Target.llvm_encoding @@ fun _target -> + Dis.create ~backend:"llvm" "systemz" + +(* The file loader parses the file and provides its specification + in the OGRE format. Our task is to analyze the specification and + figure out if it is systemz (and if yes, then what version). For + starters, we just assume that the loader will say "systemz" in + the arch field. +*) +let enable_loader () = + let request_arch doc = + let open Ogre.Syntax in + match Ogre.eval (Ogre.request Image.Scheme.arch) doc with + | Error _ -> assert false (* nothing could go wrong here! *) + | Ok arch -> arch in + KB.promise CT.Unit.target @@ fun unit -> + KB.collect Image.Spec.slot unit >>| request_arch >>| function + | Some "systemz" -> Target.z9 + | _ -> CT.Target.unknown + + +(* the main function registers all our providers right now, + we may later add some command line options whose values + will be avaialble via the _ctxt parameter, which is now + unused. *) +let main _ctxt = + enable_llvm (); + enable_loader (); + provide_decoding (); + map_arch (); + Systemz_lifter.load (); + Ok () + +(* semantic tags that describe what our plugin is providing, + setting them is important not only for introspection but + for the proper function of the cache subsystem. +*) +let provides = [ + "systemz"; + "lifter"; +] + +(* finally, let's register our extension and call the main function *) +let () = Bap_main.Extension.declare main + ~provides diff --git a/plugins/systemz/systemz_main.mli b/plugins/systemz/systemz_main.mli new file mode 100644 index 0000000000..e69de29bb2