Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

adds C data type layout #1524

Merged
merged 1 commit into from
Jul 5, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
107 changes: 88 additions & 19 deletions lib/bap_c/bap_c_abi.ml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ open Monads.Std
include Self()

module Attrs = Bap_c_term_attributes

module Data = Bap_c_data
type ctype = t

let is_const p = p.Spec.qualifier.Qualifier.const
Expand Down Expand Up @@ -40,7 +40,7 @@ type error = [
] [@@deriving sexp_of]

let sexp_of_exp exp = Sexp.Atom (Exp.to_string exp)
type param = Bap_c_data.t * exp [@@deriving sexp]
type param = Data.t * exp [@@deriving sexp]

type args = {
return : param option;
Expand All @@ -58,33 +58,96 @@ exception Failed of error [@@deriving sexp_of]
let fail x = raise (Failed x)

let data (size : #Bap_c_size.base) (t : Bap_c_type.t) =
let open Bap_c_data in
let open Data in
let sizeof t = match size#bits t with
| None -> Size.in_bits size#pointer
| Some s -> s in
let padding pad : Data.t =
match Size.of_int_opt pad with
| Some pad -> Imm (pad,Set [])
| None ->
let data : Data.t = Imm (`r8,Set []) in
Seq (List.init (pad/8) ~f:(Fn.const data)) in
let rec data = function
| `Void -> Seq []
| `Basic {Spec.t} -> Imm (size#basic t, Top)
| `Pointer {Spec.t} -> Ptr (data t)
| `Array {Spec.t={Array.element=t; size=None}} -> Ptr (data t)
| `Array {Spec.t={Array.element=t; size=Some n}} ->
let et = data t in
Ptr (Seq (List.init n ~f:(fun _ -> et)))
Ptr (Seq (List.init n ~f:(Fn.const (data t))))
| `Structure {Spec.t={Compound.fields=fs}} ->
let _,ss =
List.fold fs ~init:(0,[]) ~f:(fun (off,seq) (_,t) ->
let off' = match size#bits t with
| None -> off + Size.in_bits size#pointer (* or assert false *)
| Some sz -> off + sz in
match size#padding t off with
| None -> off', data t :: seq
| Some pad -> off, data t :: Imm (pad,Set []) :: seq) in
List.fold fs ~init:(0,0,[]) ~f:(fun (off,total,seq) (_,t) ->
let fsize = sizeof t in
let pad = Bap_c_size.padding (size#alignment t) off in
off + fsize + pad, total + fsize + pad, match pad with
| 0 -> data t :: seq
| _ -> data t :: padding pad :: seq) |> fun (_,total,ss) ->
let fullsize = sizeof t in
let pad = max 0 (fullsize - total) in
let ss = if pad = 0 then ss else padding (fullsize-total) :: ss in
Seq (List.rev ss)
| `Union {Spec.t=_} ->
let sz = match size#bits t with
| None -> Size.in_bits size#pointer
| Some sz -> sz in
Seq (List.init (sz/8) ~f:(fun _ -> Imm (`r8,Set [])))
| `Union _ ->
let sz = sizeof t in
Seq (List.init (sz/8) ~f:(fun _ -> Imm (`r8,Top)))
| `Function _ -> Ptr (Imm ((size#pointer :> size),Top)) in
data t

let layout (size : #Bap_c_size.base) (t : Bap_c_type.t) =
let open Data in
let sizeof t = match size#bits t with
| None -> Size.in_bits size#pointer
| Some s -> s in
let imm size obj : Data.layout = {layout=Imm(size,obj)}
and ptr {layout=data} : Data.layout = {layout=Ptr data}
and seq layouts : Data.layout = {
layout = Seq (List.map layouts ~f:(fun {layout} -> layout))
} in
let padding pad : Data.layout = imm pad Undef in
let rec layout t : Data.layout = match t with
| `Void -> imm 8 Undef
| `Basic {Spec.t} -> imm (Size.in_bits (size#basic t)) (Basic t)
| `Pointer {Spec.t} -> ptr (layout t)
| `Array {Spec.t={Array.element=t; size=None}} -> ptr (layout t)
| `Array {Spec.t={Array.element=t; size=Some n}} ->
ptr (seq (List.init n ~f:(Fn.const (layout t))))
| `Structure {Spec.t={Compound.fields=fs}} ->
List.fold fs ~init:(0,0,[]) ~f:(fun (off,total,seq) (name,t) ->
let fsize = sizeof t in
let pad = Bap_c_size.padding (size#alignment t) off in
off + fsize + pad, total + fsize + pad,
imm fsize (Field (name,layout t)) ::
match pad with
| 0 -> seq
| _ -> padding pad :: seq) |> fun (_,total,ss) ->
let fullsize = sizeof t in
let pad = max 0 (fullsize - total) in
let ss = if pad = 0 then ss else padding (fullsize-total) :: ss in
seq (List.rev ss)
| `Union {Spec.t={Compound.fields=fs}} ->
let total = sizeof t in
let variants = List.map fs ~f:(fun (name,t) ->
let fsize = sizeof t in
let pad = max 0 (total - fsize) in
let field = imm fsize @@ Field (name, layout t) in
match pad with
| 0 -> field
| _ -> seq [field; padding pad]) in
imm total (Union variants)
| `Function _ -> ptr (imm (Size.in_bits (size#pointer)) Undef) in
layout t

let rec size_of_data size : Data.t -> int = function
| Imm (size,_) -> Size.in_bits size
| Seq xs -> List.sum (module Int) ~f:(size_of_data size) xs
| Ptr _ -> Size.in_bits (size#pointer)

let rec size_of_layout size : Data.layout -> int =
fun {layout} -> size_of_datum size layout
and size_of_datum size : _ Data.datum -> int = function
| Imm (size,_) -> size
| Seq xs -> List.sum (module Int) ~f:(size_of_datum size) xs
| Ptr _ -> Size.in_bits (size#pointer)

let array_to_pointer (t : ctype) : ctype =
match t with
| `Array ({t={element}} as s) -> `Pointer {s with t = element}
Expand All @@ -109,12 +172,18 @@ let create_arg size i intent name t (data,exp) sub =
let ltyp = match size#bits t with
| None -> Type.imm (Size.in_bits size#pointer)
| Some m -> Type.imm m in
let layout = match data with
| Data.Ptr _ ->
if Bap_c_type.is_pointer t then layout size t
else layout size (Bap_c_type.pointer t)
| _ -> layout size t in
let rtyp = Type.infer_exn exp in
let name = if String.is_empty name then sprintf "arg%d" (i+1) else name in
let var = Var.create (Sub.name sub ^ "_" ^ name) ltyp in
let arg = Arg.create ~intent var @@ coerce ltyp rtyp exp in
let arg = Term.set_attr arg Attrs.data data in
let arg = Term.set_attr arg Attrs.t t in
let arg = Term.set_attr arg Attrs.layout layout in
arg

let registry = Hashtbl.create (module String)
Expand Down Expand Up @@ -242,7 +311,7 @@ module Arg = struct
module C = struct
module Size = Bap_c_size
module Type = Bap_c_type
module Data = Bap_c_data
module Data = Data
end

module Stack : sig
Expand Down
12 changes: 11 additions & 1 deletion lib/bap_c/bap_c_abi.mli
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,19 @@ val create_api_processor : #Bap_c_size.base -> t -> Bap_api.t

(** [data size t] creates an abstraction of data that is represented
by type [t]. The [size] parameter defines a data model, e.g.,
sizes of primitive types, padding and alignment restrictions, etc.*)
sizes of primitive types, padding and alignment restrictions, etc.

The abstraction includes inner and trailing paddings, when
necessary. *)
val data : #Bap_c_size.base -> Bap_c_type.t -> Bap_c_data.t


(** [layout size t] computes the c data type layout.

@since 2.5.0 *)
val layout : #Bap_c_size.base -> Bap_c_type.t -> Bap_c_data.layout


(** [arg_intent t] infers argument intention based on its C type. If
an argument is passed by value, i.e., it is a c basic type, then
it is an input argument. If an argument is a reference, but not a
Expand Down
89 changes: 48 additions & 41 deletions lib/bap_c/bap_c_data.ml
Original file line number Diff line number Diff line change
@@ -1,66 +1,73 @@
(** C Data model.

This module defines abstractions for C values.

A value is backed by a datum - a sequence of bits that represents
the value. This module also defines models for integer
representation.
*)
open Core_kernel[@@warning "-D"]
open Bap.Std
open Format

(** models for 32 bit systems *)
type model32 = [
| `LP32
| `ILP32
]

(** models for 64 bit systems *)
type model64 = [
| `ILP64
| `LLP64
| `LP64
]


(** The following table summarize all models of integer
representation.

{v
LP32 ILP32 ILP64 LLP64 LP64
char 8 8 8 8 8
short 16 16 16 16 16
int 16 32 64 32 32
long 32 32 64 32 64
addr 32 32 64 64 64
v}
*)
type model = [model32 | model64]

(** Abstract value lattice. The lattice is complete, and
[Set []] is the supremum, i.e., the bot.*)
type value =
| Top
(** any possible value *)
| Set of word list
(** one of the specified *)
[@@deriving bin_io, compare, sexp]

type 'd obj =
| Basic of Bap_c_type.basic
| Field of (string * 'd)
| Undef
| Union of 'd list
[@@deriving bin_io, compare, sexp]

(** abstraction of a С datum.
type ('d,'s) datum =
| Imm of 's * 'd
| Seq of ('d,'s) datum list
| Ptr of ('d,'s) datum
[@@deriving bin_io, compare, sexp]

The datum is a sequence of bits that represenst a particular C
value. We abstract datum as either an immediate value of the given
size and value lattice, or a sequence of data, or a pointer to a
datum.*)
type t =
| Imm of Size.t * value
(** [Imm (size,value)] *)
| Seq of t list
(** [Seq (t1,..,tN)] *)
| Ptr of t
(** [Ptr (type,size)] *)
type layout = {layout : (layout obj,int) datum}
[@@deriving bin_io, compare, sexp]

type t = (value,Size.t) datum
[@@deriving bin_io, compare, sexp]

let pp_value ppf = function
| Top -> fprintf ppf "Top"
| Set xs -> fprintf ppf "%a" (Seq.pp Word.pp) (Seq.of_list xs)
let rec pp ppf = function
| Imm (sz,v) -> fprintf ppf "%a:%a" pp_value v Size.pp sz
| Seq ts -> fprintf ppf "%a" (Seq.pp pp) (Seq.of_list ts)
| Ptr t -> fprintf ppf "%a ptr" pp t


(** *)
let rec pp_layout ppf : layout -> unit = fun {layout=datum} ->
pp_datum ppf datum
and pp_datum ppf : (layout obj, int) datum -> unit = function
| Imm (sz,v) ->
fprintf ppf "@[<2>[%a : %d]@]" pp_obj v sz
| Seq objs ->
fprintf ppf "@[@[<hv2>{@ ";
pp_print_list ~pp_sep:(fun ppf () ->
fprintf ppf ",@ ")
pp_datum ppf objs;
fprintf ppf "@]@;}@]"
| Ptr t ->
fprintf ppf "*%a" pp_datum t
and pp_obj ppf : layout obj -> unit = function
| Basic t -> Bap_c_type.(pp ppf (basic t))
| Field (name,layout) ->
fprintf ppf "@[<2><%s : %a>@]" name pp_layout layout
| Undef ->
fprintf ppf "<undef>"
| Union xs ->
fprintf ppf "@[<hv>";
pp_print_list ~pp_sep:(fun ppf () -> fprintf ppf "@;| ")
pp_layout ppf xs;
fprintf ppf "@]"
92 changes: 92 additions & 0 deletions lib/bap_c/bap_c_data.mli
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
(** C Data model.

This module defines abstractions for C values.

A value is backed by a datum - a sequence of bits that represents
the value. This module also defines models for integer
representation.
*)
open Core_kernel[@@warning "-D"]
open Bap.Std

(** models for 32 bit systems *)
type model32 = [
| `LP32
| `ILP32
]

(** models for 64 bit systems *)
type model64 = [
| `ILP64
| `LLP64
| `LP64
]


(** The following table summarize all models of integer
representation.

{v
LP32 ILP32 ILP64 LLP64 LP64
char 8 8 8 8 8
short 16 16 16 16 16
int 16 32 64 32 32
long 32 32 64 32 64
addr 32 32 64 64 64
v}
*)
type model = [model32 | model64]

(** A value lattice.*)
type value =
| Top (** any possible value *)
| Set of word list (** one of the specified, [Set []] is bot *)
[@@deriving bin_io, compare, sexp]


(** A C Object representation.

The type is parameterized with the object layout representation to
enable the recursive definition of the generalized layout type.

@since 2.5.0 *)
type 'd obj =
| Basic of Bap_c_type.basic (** A value of a basic type *)
| Field of (string * 'd) (** A struct or union field *)
| Undef (** Undefined data (padding or code) *)
| Union of 'd list (** Union of values *)
[@@deriving bin_io, compare, sexp]

(** abstraction of a С datum.

The datum is a sequence of bits that represents a C object. We
abstract datum as either an immediate value of the given size,
or a sequence of data, or a pointer to a datum.

@since 2.5.0
*)
type ('d,'s) datum =
| Imm of 's * 'd (** [Imm (size, value)] *)
| Seq of ('d,'s) datum list (** [Seq [t1; ... ;tN]] *)
| Ptr of ('d,'s) datum (** [Ptr datum] *)
[@@deriving bin_io, compare, sexp]


(** Describes C object's layout. *)
type layout = {layout : (layout obj,int) datum}
[@@deriving bin_io, compare, sexp]


(** The datum that uses value lattice for object representation. *)
type t = (value,Size.t) datum
[@@deriving bin_io, compare, sexp]


(** [pp ppf datum] prints the datum in a human-readable form.
@since 2.5.0 *)
val pp : Format.formatter -> t -> unit


(** [pp_layout ppf layout] outputs layout in a human-readable form.
@since 2.5.0 *)
val pp_layout : Format.formatter -> layout -> unit
Loading