From 3fe9155dcf0a50f189b76fed934373a80206ae2b Mon Sep 17 00:00:00 2001 From: ivg Date: Fri, 27 May 2022 15:49:32 -0400 Subject: [PATCH] rewrites x86 abi using the new infrastructure Rewrites all x86 and x86_64 ABI using the new infrastructure. The ABI comprise calling conventions and name demangling schemes. We now support (at various levels of detail) 12 ABI. The most popular ABI, such as cdecl, stdcall, ms, and sysv are quite complete, with sysv lacking only the ability to pass extra wide vectors (a feature that is rarely used in libraries). The improved name demangling scheme now handles x86 and darwin files, so now we have a much better chance to be able to execute them. This commit also includes slight extensions to the bap-c library, such as a new `rebase` operator in the calling convention DSL, an ability to reverse registers when a value is passed via several registers (x86 passes values in the reversed order) and the `is_void` predicate in the `C.Type` module. --- Makefile | 2 +- lib/bap_c/bap_c_abi.ml | 31 +- lib/bap_c/bap_c_abi.mli | 25 +- lib/bap_c/bap_c_type.ml | 2 + lib/bap_c/bap_c_type.mli | 4 + lib/x86_cpu/.merlin | 2 + lib/x86_cpu/x86_target.ml | 428 ++++++++++++++++++- lib/x86_cpu/x86_target.mli | 39 +- oasis/x86 | 5 +- plugins/bil/bil_lifter.ml | 20 +- plugins/cxxfilt/cxxfilt_main.ml | 3 +- plugins/primus_lisp/site-lisp/libc-init.lisp | 3 +- plugins/relocatable/rel_symbolizer.ml | 4 - plugins/x86/.merlin | 1 + plugins/x86/x86_abi.ml | 267 ------------ plugins/x86/x86_abi.mli | 10 - plugins/x86/x86_main.ml | 26 +- 17 files changed, 526 insertions(+), 346 deletions(-) create mode 100644 lib/x86_cpu/.merlin delete mode 100644 plugins/x86/x86_abi.ml delete mode 100644 plugins/x86/x86_abi.mli diff --git a/Makefile b/Makefile index 12f342c52..d18ea3dcd 100644 --- a/Makefile +++ b/Makefile @@ -55,7 +55,7 @@ testsuite: git clone https://github.com/BinaryAnalysisPlatform/bap-testsuite.git testsuite check: testsuite - make REVISION=c40b332290bab -C testsuite + make REVISION=f0043aaa2cc -C testsuite .PHONY: indent check-style status-clean diff --git a/lib/bap_c/bap_c_abi.ml b/lib/bap_c/bap_c_abi.ml index 2ce62078d..6d15a6c35 100644 --- a/lib/bap_c/bap_c_abi.ml +++ b/lib/bap_c/bap_c_abi.ml @@ -527,14 +527,14 @@ module Arg = struct let concat = List.reduce_exn ~f:Bil.concat - let registers ?limit file t = + let registers ?(rev=false) ?limit file t = let* s = Arg.get () in let* bits = size t in let* regs_needed = registers_needed file bits in let limit = Option.value limit ~default:regs_needed in require (regs_needed <= limit) >>= fun () -> let* args = Arena.popn ~n:regs_needed s file in - push_arg t @@ concat args + push_arg t @@ concat (if rev then List.rev args else args) let align_even file = let* s = Arg.get () in @@ -556,15 +556,7 @@ module Arg = struct let reference file t = with_hidden @@ fun () -> - register file (`Pointer C.Type.Spec.{ - t; - attrs = []; - qualifier = C.Type.Qualifier.{ - const = false; - volatile = false; - restrict = false; - } - }) + register file (C.Type.pointer t) let update_stack f = let* s = Arg.get () in @@ -583,8 +575,16 @@ module Arg = struct let* bits = size t in update_stack @@ Stack.add t (data s.ruler t) bits + let hidden t = + with_hidden @@ fun () -> + memory (C.Type.pointer t) + let skip_memory bits = update_stack @@ Stack.skip bits + let rebase slots = + let* {target} = Arg.get () in + skip_memory (slots * Theory.Target.data_addr_size target) + let load t bits sp base = let mem = Var.reify (Theory.Target.data t) in let width = Theory.Target.data_addr_size t in @@ -698,12 +698,9 @@ end let define target ruler pass = let open Bap_core_theory in - let target_name = Theory.Target.name target in - let abi_name = - let abi = Theory.Target.abi target in - if Theory.Abi.(abi = unknown) - then Format.asprintf "%a-unknown" KB.Name.pp target_name - else Format.asprintf "%a" KB.Name.pp target_name in + let abi = Theory.Target.abi target in + let abi_name = Format.asprintf "%s" + (KB.Name.unqualified (Theory.Abi.name abi)) in let abi_processor = { apply_attrs = (fun _ x -> x); insert_args = fun _ attrs proto -> diff --git a/lib/bap_c/bap_c_abi.mli b/lib/bap_c/bap_c_abi.mli index ec3c571ee..ba6ec9d55 100644 --- a/lib/bap_c/bap_c_abi.mli +++ b/lib/bap_c/bap_c_abi.mli @@ -270,8 +270,13 @@ module Arg : sig Rejects the computation if [arena] doesn't have the necessary number of registers; the number of required registers is greater than [limit]; or if the size of [t] is unknown. + + If [rev] is true, then the allocated registers will be used in + the reversed order. + + @since 2.5.0 accepts the optional [rev] parameter. *) - val registers : ?limit:int -> arena -> ctype -> unit t + val registers : ?rev:bool -> ?limit:int -> arena -> ctype -> unit t (** [align_even arena] ensures that the first available register in @@ -312,6 +317,15 @@ module Arg : sig pointer role. The size of [t] is not required. *) val reference : arena -> ctype -> unit t + + (** [hidden t] passes the argument of type [t] as a pointer + to [t] via the first available stack slot. + + The computation is rejected if the target doesn't have a stack. + + @since 2.5.0 *) + val hidden : ctype -> unit t + (** [memory t] passes the argument of type [t] in the next available stack slot. @@ -333,6 +347,11 @@ module Arg : sig *) val memory : ctype -> unit t + (** [rebase off] rebases the stack position by [n] words. + + @since 2.5.0 + *) + val rebase : int -> unit t (** [split a1 a2 t] passes the lower half of the value via registers in the arena [a1] and the higher via the registers @@ -387,12 +406,12 @@ module Arg : sig (** [size t] is the size in bits of an object of type [t]. - The computation is rejected if the size is unknown. + The computation is rejected if the size is unknown, i.e., the + type is incomplete. @since 2.5.0 *) val size : ctype -> int t - (** [require cnd] rejects the computation if [cnd] doesn't hold. @since 2.5.0 *) diff --git a/lib/bap_c/bap_c_type.ml b/lib/bap_c/bap_c_type.ml index 2961d2a2e..09bec9ff0 100644 --- a/lib/bap_c/bap_c_type.ml +++ b/lib/bap_c/bap_c_type.ml @@ -174,6 +174,8 @@ let is_restrict : t -> Bool.t = function | `Pointer {qualifier={restrict}} -> restrict +let is_void = function `Void -> true | _ -> false + let qualifier ?(const=false) ?(volatile=false) restrict = Qualifier.{const; volatile; restrict} diff --git a/lib/bap_c/bap_c_type.mli b/lib/bap_c/bap_c_type.mli index ad7b6cd0d..84df9b564 100644 --- a/lib/bap_c/bap_c_type.mli +++ b/lib/bap_c/bap_c_type.mli @@ -161,6 +161,10 @@ val is_volatile : t -> Bool.t val is_restrict : t -> Bool.t +(** [is_void t] true iff [t] is [`Void] *) +val is_void : t -> Bool.t + + (** {2 Basic Types} *) (** [basic x] constructs a basic type. diff --git a/lib/x86_cpu/.merlin b/lib/x86_cpu/.merlin new file mode 100644 index 000000000..237f6b11c --- /dev/null +++ b/lib/x86_cpu/.merlin @@ -0,0 +1,2 @@ +REC +B ../bap_demangle diff --git a/lib/x86_cpu/x86_target.ml b/lib/x86_cpu/x86_target.ml index 80ecae138..ea201be6c 100644 --- a/lib/x86_cpu/x86_target.ml +++ b/lib/x86_cpu/x86_target.ml @@ -1,9 +1,12 @@ open Bap_core_theory open Core_kernel[@@warning "-D"] +open Bap_demangle.Std open Bap.Std let package = "bap" + + type r256 and r128 and r80 and r64 and r32 and r16 and r8 type 'a bitv = 'a Theory.Bitv.t Theory.Value.sort @@ -236,10 +239,12 @@ let i186 = Theory.Target.declare ~package "i186" ~parent:i86 ~nicknames:["80186"; "186"] + let i286 = Theory.Target.declare ~package "i286" ~parent:i186 ~nicknames:["80286"; "286"] + let i386 = Theory.Target.declare ~package "i386" ~parent:i286 ~nicknames:["386"; "80386"] @@ -250,6 +255,8 @@ let i386 = Theory.Target.declare ~package "i386" ~regs:M32.i386regs ~aliasing:M32.aliasing + + let i486 = Theory.Target.declare ~package "i486" ~parent:i386 ~nicknames:["486"; "80486"] @@ -282,24 +289,421 @@ let amd64 = Theory.Target.declare ~package "amd64" ~aliasing:M64.aliasing -let family = [amd64; i686; i586; i486; i386; i86] +let family = [amd64; i686; i586; i486; i386; i186; i86] + + +module Abi = struct + open Bap_c.Std + + module Arg = C.Abi.Arg + open Arg.Let + open Arg.Syntax + + module Abi = struct + let abi = Theory.Abi.declare ~package + let cdecl = abi "cdecl" + let pascal = abi "pascal" + let fortran = abi "fortran" + let fastcall = abi "fastcall" + let stdcall = abi "stdcall" + let thiscall = abi "thiscall" + let vectorcall = abi "vectorcall" + let watcomstack = abi "watcom-stack" + let watcomregs = abi "watcom-regs" + let ms = abi "ms" + let sysv = abi "sysv" + let darwin = abi "darwin" + end + + let either preds thing = List.exists preds ~f:(fun is -> is thing) + let choice options arg = + Arg.choice (List.map options ~f:(fun f -> f arg)) + + let otherwise = Fn.const true + + let is_integer = + either C.Type.[is_integer; is_pointer; is_function] + + let is_compound = + either C.Type.[is_structure; is_union] + + let is_sse : C.Type.t -> bool = function + | `Basic {t=(`float|`double)} -> true + | _ -> false + + let is_csse : C.Type.t -> bool = function + | `Basic {t=(`cfloat|`cdouble)} -> true + | _ -> false + + let is_x87 : C.Type.t -> bool = function + | `Basic {t=`long_double} -> true + | _ -> false + + + let select arg options = + List.find_map options ~f:(fun (cnd,action) -> + if cnd arg then Some (action arg) else None) |> function + | Some action -> action + | None -> Arg.reject () + + let seq xs arg = Arg.List.iter xs ~f:(fun x -> x arg) + + let skip _ = Arg.return () + + let make_return t k = match t with + | `Void -> Arg.return () + | t -> + let* size = Arg.size t in + select t (k size) + + + let arena ?low t names = Arg.Arena.of_exps @@ + List.map names ~f:(fun name -> + match Theory.Target.var t name with + | None -> failwithf "unknown register: %s" name () + | Some reg -> + let reg = Var.reify reg in + match low with + | None -> Bil.var reg + | Some bits -> Bil.(cast low bits (var reg))) + + let ia16 memory t = + let data = new C.Size.base `LP32 in + C.Abi.define t data @@ fun _ {C.Type.Proto.return=r; args} -> + let* irets = arena t ["AX"; "DX"] in + let return = match r with + | `Void -> Arg.return () + | r -> Arg.choice [ + Arg.registers irets r; + memory r; + ] in + Arg.define ~return @@ + Arg.List.iter args ~f:(fun (_,arg) -> memory arg) + + let cdecl16 = ia16 Arg.memory + + (* pascal or fortran *) + let pascal16 = ia16 Arg.push + + + let ia32 t k = + let data = new C.Size.base `ILP32 in + let is_big size _ = size > 64 in + C.Abi.define t data @@ fun _ {C.Type.Proto.return=r; args} -> + let* irets = arena t ["EAX"; "EDX"] in + let* frets = arena t ["ST0"] in + let pass = Arg.memory in + let return r = make_return r @@ fun size -> [ + C.Type.is_real, Arg.register frets; + is_big size, seq [ + Arg.reference irets; + Arg.hidden; + ]; + otherwise, Arg.registers irets; + ] in + k @@ fun ?(return=return) ?(pass=pass) () -> + Arg.define ~return:(return r) @@ Arg.sequence [ + Arg.rebase 1; + Arg.List.iter args ~f:(fun (_,arg) -> + pass arg) + ] + + (* stdcall, cdecl, watcom-stack, or ms32 *) + let cdecl t = ia32 t @@ fun accept -> accept () + + let fastcall t = ia32 t @@ fun override -> + let is_big size _ = size > 32 in + let* iregs = arena t ["ECX"; "EDX"] in + let pass arg = + let* size = Arg.size arg in + select arg [ + either [ + is_big size; + C.Type.is_floating; + ], Arg.memory; + otherwise, choice [ + Arg.register iregs; + Arg.memory; + ] + ] in + override ~pass () + + let watcomregs t = ia32 t @@ fun override -> + let* iregs = arena t ["eax"; "edx"; "ebx"; "ecx"] in + let pass arg = Arg.choice [ + Arg.register iregs arg; + Arg.memory arg; + ] in + override ~pass () + + (* aka borland register *) + let pascal t = ia32 t @@ fun override -> + let* iregs = arena t ["eax"; "edx"; "ecx"] in + let pass arg = select arg [ + either C.Type.[is_cint; is_char; is_pointer;], + choice Arg.[register iregs; memory]; + ] in + override ~pass () + + + let ms64 t = + let data = new C.Size.base `LP64 in + let is_big size _ = size > 64 in + C.Abi.define t data @@ fun _ {C.Type.Proto.return=r; args} -> + let* iregs = arena t ["rcx"; "rdx"; "r8"; "r9"] in + let* irets = arena t ["rax"; "rdx"] in + let* vregs = arena t ~low:64 @@ List.init 4 ~f:(sprintf "ymm%d") in + let* vrets = arena t ~low:128 ["ymm0"] in + let iregs = (iregs,[vregs]) and vregs = (vregs,[iregs]) + and irets = (irets,[]) and vrets = (vrets,[]) in + let use pass (arena,coarena) arg = Arg.sequence [ + pass arena arg; + Arg.List.iter coarena ~f:Arg.discard + ] in + let pass how arena = choice [ + use how arena; + Arg.memory; + ] in + let args = Arg.List.iter args ~f:(fun (_,t) -> + let* size = Arg.size t in + select t [ + is_big size, pass Arg.reference iregs; + C.Type.is_floating, pass Arg.register vregs; + otherwise, pass Arg.register iregs; + ]) in + let return = make_return r @@ fun size -> [ + is_x87, pass Arg.reference iregs; + C.Type.is_floating, pass Arg.register vrets; + is_big size, pass Arg.reference iregs; + otherwise, pass Arg.register irets; + ] in + Arg.define ~return args + + + let merge_kinds k1 k2 = match k1,k2 with + | `Nil, t | t, `Nil -> t + | `Int, _ | _, `Int -> `Int + | `Sse, `Sse -> `Sse + + let partition fields = + List.fold fields ~init:(`Nil,0,[]) + ~f:(fun (k,s,words) (k',s') -> + if s + s' <= 64 + then merge_kinds k k',s+s',words + else k',s',k :: words) |> + fun (k,_,words) -> List.rev (k::words) + + let rec classify : C.Type.t -> [`Nil | `Int | `Sse] = function + | t when is_integer t -> `Int + | t when is_sse t || is_csse t -> `Sse + | `Structure {t={fields}} + | `Union {t={fields}} -> classify_fields fields + | `Array {t={element}} -> classify element + | _ -> `Nil + and classify_fields fields = + List.fold ~init:`Nil fields ~f:(fun k (_,t) -> + merge_kinds k (classify t)) + + let sysv t = + let data = new C.Size.base `LP64 in + let is_large bits _ = bits > 128 in + C.Abi.define t data @@ fun _ {C.Type.Proto.return=r; args} -> + + let* iregs = arena t ["rdi"; "rsi"; "rdx"; "rcx"; "r8"; "r9"] in + let* vregs = arena t ~low:64 @@ List.init 8 ~f:(sprintf "ymm%d") in + let* irets = arena t ["rax"; "rdx"] in + let* vrets = arena t ~low:64 ["ymm0"; "ymm1"] in + + let union_fields size fields = + Arg.return [classify_fields fields,size] in + let rec fields {C.Type.Compound.fields=xs} = + Arg.List.fold ~init:(0,[]) xs ~f:(fun (off,acc) (_,t) -> + let alignment = data#alignment t in + let* size = Arg.size t in + let size = size + C.Size.padding alignment off in + let off = off + size in + match t with + | t when is_integer t -> + Arg.return (off, [`Int,size] :: acc) + | t when is_sse t || is_csse t -> + Arg.return (off, [`Sse,size] :: acc) + | `Structure {t} -> + let+ fields = fields t in + off, fields :: acc + | `Union {t={fields}} -> + let+ fields = union_fields size fields in + off,fields :: acc + | `Array {t={C.Type.Array.element; size=Some n}} -> + let+ size = Arg.size element in + let kind = classify element in + off,List.init n ~f:(fun _ -> kind,size) :: acc + | _ -> Arg.reject ()) >>| fun (_,acc) -> + List.rev acc |> List.concat in + + let compound_fields : C.Type.t -> _ list Arg.t = function + | `Structure {t} -> fields t >>| partition + | `Union {t={fields}} as s -> + let* size = Arg.size s in + union_fields size fields >>| partition + | _ -> Arg.return [] in + + let registers = Arg.registers ~rev:true ~limit:2 in + + let pass_compound memory iregs vregs t = + Arg.choice [ + compound_fields t >>= begin function + | [`Int] -> Arg.register iregs t + | [`Sse] -> Arg.register vregs t + | [`Int; `Int] -> registers iregs t + | [`Int; `Sse] -> Arg.split iregs vregs t + | [`Sse; `Int] -> Arg.split vregs iregs t + | [`Sse; `Sse] -> registers vregs t + | _ -> Arg.reject () + end; + memory t; + ] in + + let args = Arg.List.iter args ~f:(fun (_,arg) -> + let* bits = Arg.size arg in + select arg [ + is_large bits, Arg.memory; + is_integer, Arg.register iregs; + is_sse, Arg.register vregs; + is_csse, registers vregs; + is_compound, pass_compound Arg.memory iregs vregs + ]) in + + let return = make_return r @@ fun bits -> [ + is_large bits, Arg.reference iregs; + is_integer, Arg.register irets; + is_sse, Arg.register vrets; + is_csse, registers vrets; + is_compound, pass_compound (Arg.reference iregs) irets vrets; + ] in + Arg.define ~return args + + + let calling_conventions = [ + (* 16-bit ABI *) + [i86; i186; i286], [ + Abi.cdecl, cdecl16; + Abi.pascal, pascal16; + Abi.fortran, pascal16; + ]; + + (* 32-bit ABI *) + [i386; i486; i586; i686], [ + Abi.sysv, cdecl; + Abi.darwin, cdecl; + Abi.cdecl, cdecl; + Abi.pascal, pascal; + Abi.fastcall, fastcall; + Abi.stdcall, cdecl; + Abi.watcomstack, cdecl; + Abi.ms, cdecl; + ]; + + (* 64-bit ABI *) + [amd64], [ + Abi.ms, ms64; + Abi.sysv, sysv; + Abi.darwin, sysv; + ] + ] + + let demanglers = Demangler.[ + [amd64], Abi.darwin, strip_leading_underscore; + [i386; i486; i586; i686; amd64], Abi.ms, strip_leading_underscore; + ] + + let name_with_abi target abi = + Format.asprintf "%s-%s" + (KB.Name.unqualified (Theory.Target.name target)) + (KB.Name.unqualified (Theory.Abi.name abi)) -let enable_loader () = + let register_target parent abi install = + install @@ Theory.Target.declare ~package:"bap" + (name_with_abi parent abi) + ~parent ~abi + + + let default_calling_conventions = [ + [i86], cdecl16; + [i386; i486; i586; i686], cdecl; + [amd64], sysv; + ] + + let install_calling_conventions () = + List.iter calling_conventions ~f:(fun (targets,args) -> + List.cartesian_product targets args |> + List.iter ~f:(fun (target,(abi,install)) -> + register_target target abi install)); + List.iter default_calling_conventions ~f:(fun (targets,install) -> + List.iter targets ~f:install) + + let install_demanglers () = + List.iter demanglers ~f:(fun (targets,abi,demangler) -> + List.iter targets ~f:(fun target -> + let name = name_with_abi target abi in + let target = Theory.Target.get ~package:"bap" name in + Demanglers.install target demangler)) + + include Abi +end + +let target_with_abi base name = + Theory.Target.get ~package:"bap" @@ + if Theory.Abi.is_unknown name + then KB.Name.show (Theory.Target.name base) + else Abi.name_with_abi base name + +let enable_loader ~abi () = let open KB.Syntax in KB.Rule.(declare ~package "x86-target" |> require Image.Spec.slot |> provide Theory.Unit.target |> comment "computes target from the OGRE specification"); - let request_arch doc = - match Ogre.eval (Ogre.request Image.Scheme.arch) doc with - | Error _ -> None + + let make_target target abi' = + target_with_abi target @@ + if Theory.Abi.is_unknown abi then abi' else abi in + + let request = + let open Ogre.Syntax in + Ogre.request Image.Scheme.arch >>= fun arch -> + Ogre.request Image.Scheme.bits >>= fun bits -> + Ogre.request Image.Scheme.format >>= fun fmt -> + Ogre.return (arch,bits,fmt) in + + let is_amd64 = function + | Some ("amd64"|"x86-64"| "x86_64") -> true + | _ -> false in + + let is_x86 = function + | Some ("x86"|"i186"|"i286"|"i386"|"i486"|"i586"|"i686") -> true + | name -> is_amd64 name in + + let get_info doc = + match Ogre.eval request doc with + | Error _ -> None,None,None | Ok arch -> arch in KB.promise Theory.Unit.target @@ fun unit -> KB.collect Image.Spec.slot unit >>| - request_arch >>| function - | Some ("amd64"|"x86-64"|"x86_64") -> amd64 - | Some ("x86"|"i386"|"i486"|"i586"|"i686") -> i686 - | _ -> Theory.Target.unknown + get_info >>| fun (arch,bits,fmt) -> + + if is_x86 arch then match bits, fmt with + | Some 64L, Some "elf" -> make_target amd64 Abi.sysv + | Some 64L, Some "coff" -> make_target amd64 Abi.ms + | Some 64L, Some "macho" -> make_target amd64 Abi.darwin + | Some 32L, Some "elf" -> make_target i686 Abi.sysv + | Some 32L, Some "coff" -> make_target i686 Abi.ms + | Some 32L, Some "macho" -> make_target i686 Abi.darwin + | Some 16L, _ -> make_target i286 abi + | Some 32L, _ -> make_target i686 abi + | Some 64L, _ -> make_target amd64 abi + | _ when is_amd64 arch -> make_target amd64 abi + | _ -> make_target i686 abi + else Theory.Target.unknown let enable_arch () = let open KB.Syntax in @@ -357,7 +761,9 @@ let enable_decoder backend = else pcode else Theory.Language.unknown -let load ?(backend="llvm") () = - enable_loader (); +let load ?(abi=Theory.Abi.unknown) ?(backend="llvm") () = + Abi.install_calling_conventions (); + Abi.install_demanglers (); + enable_loader ~abi (); enable_arch (); enable_decoder backend diff --git a/lib/x86_cpu/x86_target.mli b/lib/x86_cpu/x86_target.mli index 7e8f7cc32..f66e30087 100644 --- a/lib/x86_cpu/x86_target.mli +++ b/lib/x86_cpu/x86_target.mli @@ -4,13 +4,12 @@ open Bap_core_theory (** The parent of all x86 targets. - When a new target is declared it is advised to use this target as - parent so that the newly declared target will be included into the - x86 Targets family. - The [parent] target is pure abstract and doesn't have any - propreties set. -*) + When a new target is declared it is advised to use any target from + this module as parent so that the newly declared target will be + included into the x86 Targets family. The [parent] target is pure + abstract and doesn't have any propreties set. *) val parent : Theory.Target.t + val i86 : Theory.Target.t val i186 : Theory.Target.t val i286 : Theory.Target.t @@ -20,9 +19,33 @@ val i586 : Theory.Target.t val i686 : Theory.Target.t val amd64 : Theory.Target.t + +(** The list of x86 abis. + + @since 2.5.0 *) +module Abi : sig + val cdecl : Theory.abi + val pascal : Theory.abi + val fortran : Theory.abi + val fastcall : Theory.abi + val stdcall : Theory.abi + val thiscall : Theory.abi + val vectorcall : Theory.abi + val watcomstack : Theory.abi + val watcomregs : Theory.abi + val darwin : Theory.abi + val ms : Theory.abi + val sysv : Theory.abi +end + (** [load ()] loads the knowledge base rules for the x86 targets. This includes parsing the loader output and enabling backward compatibility with the old [Arch.t] representation. -*) -val load : ?backend:string -> unit -> unit + + @param abi overrides the automatic selection of abi and use the + specified one. + + @since 2.4.0 accepts the backend parameter. + @since 2.5.0 accepts the abi parameter.*) +val load : ?abi:Theory.abi -> ?backend:string -> unit -> unit diff --git a/oasis/x86 b/oasis/x86 index 52e589132..d7dfc8c17 100644 --- a/oasis/x86 +++ b/oasis/x86 @@ -7,7 +7,7 @@ Library "bap-x86-cpu" XMETADescription: provide x86 lifter Path: lib/x86_cpu FindlibName: bap-x86-cpu - BuildDepends: bap, core_kernel, ppx_bap, + BuildDepends: bap, core_kernel, ppx_bap, bap-c, bap-demangle, bap-core-theory, ogre, bap-knowledge Modules: X86_cpu, X86_env, @@ -27,8 +27,7 @@ Library x86_plugin ppx_bap, bap-main, bap-future, bap-api, ogre, bap-primus, zarith, bap-core-theory, bap-knowledge, bitvec, str Modules: X86_backend, X86_prefix - InternalModules: X86_abi, - X86_btx, + InternalModules: X86_btx, X86_cdq, X86_cmpxchg, X86_disasm, diff --git a/plugins/bil/bil_lifter.ml b/plugins/bil/bil_lifter.ml index 7ce4c7737..8b43b2468 100644 --- a/plugins/bil/bil_lifter.ml +++ b/plugins/bil/bil_lifter.ml @@ -1,6 +1,5 @@ open Core_kernel[@@warning "-D"] open Bap.Std -open Bap_future.Std open Bap_knowledge open Bap_core_theory open Monads.Std @@ -139,9 +138,26 @@ module Relocations = struct override_external is_stub name bil | None -> bil + let fixup_agent = + KB.Agent.register ~package "bil-fixup-relocator" + + let resolve_external (k,n) = + KB.Object.scoped Theory.Program.cls @@ fun lbl -> + KB.suggest fixup_agent Theory.Label.possible_name lbl (Some n) >>= fun () -> + KB.collect Theory.Label.name lbl >>| function + | None -> (k,n) + | Some n -> (k,n) + + let resolve_externals exts = + Map.to_alist exts |> + KB.List.map ~f:resolve_external >>| + Map.of_alist_exn (module Int64) + let prepare () = KB.promise relocations_slot @@ fun unit -> - KB.collect Image.Spec.slot unit >>| of_spec + let* rels = KB.collect Image.Spec.slot unit >>| of_spec in + let+ exts = resolve_externals rels.exts in + {rels with exts} end module Brancher = struct diff --git a/plugins/cxxfilt/cxxfilt_main.ml b/plugins/cxxfilt/cxxfilt_main.ml index 74cafb42c..0df21bd51 100644 --- a/plugins/cxxfilt/cxxfilt_main.ml +++ b/plugins/cxxfilt/cxxfilt_main.ml @@ -30,5 +30,4 @@ let run name = let () = Config.when_ready @@ fun _ -> - let demangler = Demangler.create "c++filt" run in - Demanglers.register demangler + Demangler.declare ~package:"bap" "c++filt" run diff --git a/plugins/primus_lisp/site-lisp/libc-init.lisp b/plugins/primus_lisp/site-lisp/libc-init.lisp index eaed57d45..e98be87cb 100644 --- a/plugins/primus_lisp/site-lisp/libc-init.lisp +++ b/plugins/primus_lisp/site-lisp/libc-init.lisp @@ -44,7 +44,8 @@ (defun init (main argc argv auxv) "GNU libc initialization stub" (declare (external "__libc_start_main") - (context (abi "sysv"))) + (context (target "amd64") + (abi "sysv"))) (setup-thread-local-storage) (exit-with (invoke-subroutine main argc argv))) diff --git a/plugins/relocatable/rel_symbolizer.ml b/plugins/relocatable/rel_symbolizer.ml index 190509066..8f4ce3700 100644 --- a/plugins/relocatable/rel_symbolizer.ml +++ b/plugins/relocatable/rel_symbolizer.ml @@ -216,10 +216,6 @@ let resolve_stubs () = | Some (Name s) -> Some s | _ -> None) -let label_for_ref = function - | Name s -> Theory.Label.for_name s - | Addr x -> Theory.Label.for_addr x - let mark_mips_stubs_as_functions () : unit = KB.promise Theory.Label.is_subroutine @@ fun label -> let* unit = label-->?Theory.Label.unit in diff --git a/plugins/x86/.merlin b/plugins/x86/.merlin index 85b96a72f..fc0d3dccd 100644 --- a/plugins/x86/.merlin +++ b/plugins/x86/.merlin @@ -9,4 +9,5 @@ B ../../lib/bap_abi B ../../lib/bap_api B ../../lib/x86_cpu B ../../lib/bap_c +B ../../lib/bap_demangle REC \ No newline at end of file diff --git a/plugins/x86/x86_abi.ml b/plugins/x86/x86_abi.ml deleted file mode 100644 index f8a3ac208..000000000 --- a/plugins/x86/x86_abi.ml +++ /dev/null @@ -1,267 +0,0 @@ -open Core_kernel[@@warning "-D"] -open Bap.Std -open Bap_c.Std -open Bap_future.Std -include Self() - -module Stack = C.Abi.Stack - -type pos = - | Ret_0 - | Ret_1 - | Arg of int - - -module type abi = sig - val arch : Arch.x86 - val name : string - val size : C.Size.base - val arg : C.Type.t -> int -> pos -> exp - val demangle : string -> string - val autodetect : project -> bool -end - -type abi = (module abi) - -module SysV = struct - include X86_cpu.AMD64 - include Bil - let name = "sysv" - let arch = `x86_64 - let stack n = Stack.create arch n - - - let xmm r width = Bil.(cast low width (var ymms.(r))) - - let flt width = function - | Ret_0 -> xmm 0 width - | Ret_1 -> xmm 1 width - | Arg n -> if Int.(n < 8) then (xmm n width) else stack Int.(n-8) - - let int _ = function - | Ret_0 -> var rax - | Ret_1 -> var rdx - | Arg 0 -> var rdi - | Arg 1 -> var rsi - | Arg 2 -> var rdx - | Arg 3 -> var rcx - | Arg 4 -> var r.(0) - | Arg 5 -> var r.(1) - | Arg n -> stack Int.(n-6) - - let arg t width = - match t with - | `Basic {C.Type.Spec.t=(`float|`double)} -> flt width - | _ -> int 64 - - let size = object - inherit C.Size.base `LP64 - end - let demangle = Fn.id - let autodetect _ = false -end - -module CDECL = struct - include X86_cpu.IA32 - include Bil - let name = "cdecl" - let arch = `x86 - let stack n = Stack.create arch n - let arg _ _ = function - | Ret_0 -> var rax - | Ret_1 -> var rdx - | Arg n -> stack Int.(n+1) - - let size = object - inherit C.Size.base `ILP32 - end - - let demangle = Fn.id - let autodetect _ = false -end - -(* in our abstraction they are the same, as they have the same layout.*) -module STDCALL = struct - include CDECL - let name = "stdcall" -end - -let strip_leading_underscore s = - match String.chop_prefix s ~prefix:"_" with - | None -> s - | Some s -> s - -let symbols proj = - let spec = Project.specification proj in - let syms = Ogre.(collect Query.(select (from Image.Scheme.named_symbol))) in - match Ogre.eval syms spec with - | Ok syms -> Seq.map ~f:snd syms - | Error _ -> Seq.empty - -let has_symbol fn proj = - Seq.mem (symbols proj) fn ~equal:String.equal - -module MS_32 = struct - include STDCALL - let name = "ms" - let demangle = strip_leading_underscore - let autodetect = has_symbol "__GetPEImageBase" -end - -module MS_64 = struct - include SysV - let name = "ms" - let arg _ _ = function - | Ret_0 -> var rax - | Ret_1 -> var rdx - | Arg 0 -> var rcx - | Arg 1 -> var rdx - | Arg 2 -> var r.(0) - | Arg 3 -> var r.(1) - | Arg n -> stack Int.(n+1) - - let size = object - inherit C.Size.base `LLP64 as super - method! alignment = function - | `Basic {C.Type.Spec.t=#C.Type.short} -> `r32 - | t -> super#alignment t - end - let autodetect = has_symbol "_GetPEImageBase" -end - -module FASTCALL = struct - include CDECL - let name = "fastcall" - let arg w t = function - | Arg 0 -> var rcx - | Arg 1 -> var rdx - | other -> arg w t other -end - -module WATCOM_STACK = struct - include CDECL - let name = "watcom-stack" - let demangle s = match String.chop_suffix s ~suffix:"_" with - | None -> s - | Some s -> s -end - - -module WATCOM_REGS = struct - include WATCOM_STACK - let name = "watcom-regs" - let arg w t = function - | Arg 0 -> var rax - | Arg 1 -> var rdx - | Arg 2 -> var rbx - | Arg 3 -> var rcx - | Arg n -> stack Int.(n-4) - | ret -> arg w t ret -end - -exception Unsupported - -let supported_api (module Abi : abi) {C.Type.Proto.return; args} = - let word = Arch.addr_size (Abi.arch :> arch) |> Size.in_bits in - let return = match Abi.size#bits return with - | None -> None - | Some width -> match Size.of_int_opt width with - | None -> - warning "size of return object doesn't fit into word sizes"; - raise Unsupported - | Some _ -> - let data = C.Abi.data Abi.size return in - if width > word && width <= word * 2 - then Some (data, Bil.(Abi.arg return width Ret_0 ^ Abi.arg return width Ret_1)) - else if width <= word - then Some (data, Abi.arg return width Ret_0) - else - (warning "size of return object doesn't fit into double word\n"; - raise Unsupported) in - let params = List.mapi args ~f:(fun i (_,t) -> - match Abi.size#bits t with - | None -> - warning "size of %a parameter is unknown" C.Type.pp t; - raise Unsupported - | Some size -> match Size.of_int_opt size with - | Some _ when size <= word -> - C.Abi.data Abi.size t, Abi.arg t size (Arg i) - | _ -> - warning "argument %d doesn't fit into word" i; - raise Unsupported) in - C.Abi.{return; params; hidden=[]} - -let supported () : (module abi) list = [ - (module SysV); - (module CDECL); - (module STDCALL); - (module MS_32); - (module MS_64); - (module FASTCALL); - (module WATCOM_STACK); - (module WATCOM_REGS) -] - -let is_named_after abi name = - let module Abi = (val abi : abi) in - String.equal Abi.name name || - String.equal (Abi.name ^ "_abi") name - -let name (module Abi : abi) = Abi.name -let arch (module Abi : abi) = Abi.arch -let find name = supported () |> List.find ~f:(fun abi -> is_named_after abi name) - -let auto proj = supported () |> List.find ~f:(fun (module Abi : abi) -> - Abi.autodetect proj) - -let api abi proto = - try Some (supported_api abi proto) with Unsupported -> - warning "skipped function due to unsupported abi"; - None - - -let default_abi arch : (module abi) = match arch with - | `x86 -> (module CDECL) - | `x86_64 -> (module SysV) - - -let dispatch default sub attrs proto = - let abi = supported () |> List.find ~f:(fun abi -> - List.exists attrs ~f:(fun {C.Type.Attr.name} -> - is_named_after abi name)) |> function - | None -> default - | Some abi -> abi in - info "applying %s to %s" (name abi) (Sub.name sub); - api abi proto - - -let demangle demangle prog = - Term.map sub_t prog ~f:(fun sub -> - let name = demangle (Sub.name sub) in - Sub.with_name sub name) - -let setup ?(abi=fun _ -> None) () = - let main proj = match Project.arch proj with - | #Arch.x86 as arch -> - let abi = match abi arch with - | Some abi -> abi - | None -> match auto proj with - | Some abi -> - info "autodetected ABI"; - abi - | None -> - info "can't detect ABI, falling back to default"; - default_abi arch in - let module Abi = (val abi) in - info "using %s ABI" Abi.name; - let abi = C.Abi.{ - insert_args = dispatch abi; - apply_attrs = fun _ -> Fn.id - } in - C.Abi.register Abi.name abi; - let api = C.Abi.create_api_processor Abi.size abi in - Bap_api.process api; - let proj = Project.map_program proj ~f:(demangle Abi.demangle) in - Project.set proj Bap_abi.name Abi.name - | _ -> proj in - Bap_abi.register_pass main diff --git a/plugins/x86/x86_abi.mli b/plugins/x86/x86_abi.mli deleted file mode 100644 index da8adaf2b..000000000 --- a/plugins/x86/x86_abi.mli +++ /dev/null @@ -1,10 +0,0 @@ -open Bap.Std - -type abi - -val name : abi -> string -val arch : abi -> Arch.x86 -val supported : unit -> abi list - -(** registers x86 ABIs *) -val setup : ?abi:(Arch.x86 -> abi option) -> unit -> unit diff --git a/plugins/x86/x86_main.ml b/plugins/x86/x86_main.ml index cd1cc62e3..48244b4eb 100644 --- a/plugins/x86/x86_main.ml +++ b/plugins/x86/x86_main.ml @@ -1,4 +1,5 @@ open Core_kernel[@@warning "-D"] +open Bap_core_theory open Poly open Bap.Std open X86_targets @@ -6,18 +7,16 @@ include Self() type kind = Legacy | Modern | Merge [@@deriving equal] -let main backend kind x32 x64 = - X86_target.load ?backend (); +let main backend kind abi = + let abi = Option.map ~f:(Theory.Abi.read ~package:"bap") abi in + X86_target.load ?abi ?backend (); let kind = if backend = Some "ghidra" then Modern else kind in let ia32, amd64 = match kind with | Legacy -> (module IA32L : Target), (module AMD64L : Target) | Modern -> (module IA32 : Target), (module AMD64 : Target) | Merge -> (module IA32M : Target), (module AMD64M : Target) in register_target `x86 ia32; - register_target `x86_64 amd64; - X86_abi.setup ~abi:(function - | `x86 -> x32 - | `x86_64 -> x64) () + register_target `x86_64 amd64 let () = let () = Config.manpage [ @@ -39,16 +38,9 @@ let () = `S "SEE ALSO"; `P "$(b,bap-x86-cpu)(3), $(b,bap-plugin-abi)(1), $(b,bap-plugin-arm)(1)" ] in - let abi arch name = - let abis = X86_abi.supported () |> List.filter_map ~f:(fun abi -> - if Arch.equal (X86_abi.arch abi :> arch) arch - then Some (X86_abi.name abi, abi) - else None) in - let doc = sprintf "Use specified $(docv) as a default one. The $(docv) - must be %s" @@ Config.doc_enum abis in - Config.(param (some (enum abis)) name ~doc) in - let x32 = abi `x86 "abi" in - let x64 = abi `x86_64 "64-abi" in + let abi = + let doc = "Override the ABI detection and use the selected ABI." in + Config.(param (some string) "abi" ~synonyms:["64-abi"] ~doc) in let fp_lifter = Config.flag "with-floating-points" ~doc:"DEPRECATED" in let legacy_lifter = Config.flag "with-legacy-floating-points" @@ -77,7 +69,7 @@ let () = let open Bap_core_theory in let open Bap_primus.Std in - main !!backend !!kind !!x32 !!x64; + main !!backend !!kind !!abi; if !!fp_lifter || !!legacy_lifter then begin let open Bap_core_theory in let open Bap_primus.Std in