Skip to content

Commit

Permalink
adds c layouts (#1524)
Browse files Browse the repository at this point in the history
also, fixes c.data computation to include the trailing padding.
  • Loading branch information
ivg authored Jul 5, 2022
1 parent 4d019ce commit 5a71fb1
Show file tree
Hide file tree
Showing 6 changed files with 256 additions and 71 deletions.
107 changes: 88 additions & 19 deletions lib/bap_c/bap_c_abi.ml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ open Monads.Std
include Self()

module Attrs = Bap_c_term_attributes

module Data = Bap_c_data
type ctype = t

let is_const p = p.Spec.qualifier.Qualifier.const
Expand Down Expand Up @@ -40,7 +40,7 @@ type error = [
] [@@deriving sexp_of]

let sexp_of_exp exp = Sexp.Atom (Exp.to_string exp)
type param = Bap_c_data.t * exp [@@deriving sexp]
type param = Data.t * exp [@@deriving sexp]

type args = {
return : param option;
Expand All @@ -58,33 +58,96 @@ exception Failed of error [@@deriving sexp_of]
let fail x = raise (Failed x)

let data (size : #Bap_c_size.base) (t : Bap_c_type.t) =
let open Bap_c_data in
let open Data in
let sizeof t = match size#bits t with
| None -> Size.in_bits size#pointer
| Some s -> s in
let padding pad : Data.t =
match Size.of_int_opt pad with
| Some pad -> Imm (pad,Set [])
| None ->
let data : Data.t = Imm (`r8,Set []) in
Seq (List.init (pad/8) ~f:(Fn.const data)) in
let rec data = function
| `Void -> Seq []
| `Basic {Spec.t} -> Imm (size#basic t, Top)
| `Pointer {Spec.t} -> Ptr (data t)
| `Array {Spec.t={Array.element=t; size=None}} -> Ptr (data t)
| `Array {Spec.t={Array.element=t; size=Some n}} ->
let et = data t in
Ptr (Seq (List.init n ~f:(fun _ -> et)))
Ptr (Seq (List.init n ~f:(Fn.const (data t))))
| `Structure {Spec.t={Compound.fields=fs}} ->
let _,ss =
List.fold fs ~init:(0,[]) ~f:(fun (off,seq) (_,t) ->
let off' = match size#bits t with
| None -> off + Size.in_bits size#pointer (* or assert false *)
| Some sz -> off + sz in
match size#padding t off with
| None -> off', data t :: seq
| Some pad -> off, data t :: Imm (pad,Set []) :: seq) in
List.fold fs ~init:(0,0,[]) ~f:(fun (off,total,seq) (_,t) ->
let fsize = sizeof t in
let pad = Bap_c_size.padding (size#alignment t) off in
off + fsize + pad, total + fsize + pad, match pad with
| 0 -> data t :: seq
| _ -> data t :: padding pad :: seq) |> fun (_,total,ss) ->
let fullsize = sizeof t in
let pad = max 0 (fullsize - total) in
let ss = if pad = 0 then ss else padding (fullsize-total) :: ss in
Seq (List.rev ss)
| `Union {Spec.t=_} ->
let sz = match size#bits t with
| None -> Size.in_bits size#pointer
| Some sz -> sz in
Seq (List.init (sz/8) ~f:(fun _ -> Imm (`r8,Set [])))
| `Union _ ->
let sz = sizeof t in
Seq (List.init (sz/8) ~f:(fun _ -> Imm (`r8,Top)))
| `Function _ -> Ptr (Imm ((size#pointer :> size),Top)) in
data t

let layout (size : #Bap_c_size.base) (t : Bap_c_type.t) =
let open Data in
let sizeof t = match size#bits t with
| None -> Size.in_bits size#pointer
| Some s -> s in
let imm size obj : Data.layout = {layout=Imm(size,obj)}
and ptr {layout=data} : Data.layout = {layout=Ptr data}
and seq layouts : Data.layout = {
layout = Seq (List.map layouts ~f:(fun {layout} -> layout))
} in
let padding pad : Data.layout = imm pad Undef in
let rec layout t : Data.layout = match t with
| `Void -> imm 8 Undef
| `Basic {Spec.t} -> imm (Size.in_bits (size#basic t)) (Basic t)
| `Pointer {Spec.t} -> ptr (layout t)
| `Array {Spec.t={Array.element=t; size=None}} -> ptr (layout t)
| `Array {Spec.t={Array.element=t; size=Some n}} ->
ptr (seq (List.init n ~f:(Fn.const (layout t))))
| `Structure {Spec.t={Compound.fields=fs}} ->
List.fold fs ~init:(0,0,[]) ~f:(fun (off,total,seq) (name,t) ->
let fsize = sizeof t in
let pad = Bap_c_size.padding (size#alignment t) off in
off + fsize + pad, total + fsize + pad,
imm fsize (Field (name,layout t)) ::
match pad with
| 0 -> seq
| _ -> padding pad :: seq) |> fun (_,total,ss) ->
let fullsize = sizeof t in
let pad = max 0 (fullsize - total) in
let ss = if pad = 0 then ss else padding (fullsize-total) :: ss in
seq (List.rev ss)
| `Union {Spec.t={Compound.fields=fs}} ->
let total = sizeof t in
let variants = List.map fs ~f:(fun (name,t) ->
let fsize = sizeof t in
let pad = max 0 (total - fsize) in
let field = imm fsize @@ Field (name, layout t) in
match pad with
| 0 -> field
| _ -> seq [field; padding pad]) in
imm total (Union variants)
| `Function _ -> ptr (imm (Size.in_bits (size#pointer)) Undef) in
layout t

let rec size_of_data size : Data.t -> int = function
| Imm (size,_) -> Size.in_bits size
| Seq xs -> List.sum (module Int) ~f:(size_of_data size) xs
| Ptr _ -> Size.in_bits (size#pointer)

let rec size_of_layout size : Data.layout -> int =
fun {layout} -> size_of_datum size layout
and size_of_datum size : _ Data.datum -> int = function
| Imm (size,_) -> size
| Seq xs -> List.sum (module Int) ~f:(size_of_datum size) xs
| Ptr _ -> Size.in_bits (size#pointer)

let array_to_pointer (t : ctype) : ctype =
match t with
| `Array ({t={element}} as s) -> `Pointer {s with t = element}
Expand All @@ -109,12 +172,18 @@ let create_arg size i intent name t (data,exp) sub =
let ltyp = match size#bits t with
| None -> Type.imm (Size.in_bits size#pointer)
| Some m -> Type.imm m in
let layout = match data with
| Data.Ptr _ ->
if Bap_c_type.is_pointer t then layout size t
else layout size (Bap_c_type.pointer t)
| _ -> layout size t in
let rtyp = Type.infer_exn exp in
let name = if String.is_empty name then sprintf "arg%d" (i+1) else name in
let var = Var.create (Sub.name sub ^ "_" ^ name) ltyp in
let arg = Arg.create ~intent var @@ coerce ltyp rtyp exp in
let arg = Term.set_attr arg Attrs.data data in
let arg = Term.set_attr arg Attrs.t t in
let arg = Term.set_attr arg Attrs.layout layout in
arg

let registry = Hashtbl.create (module String)
Expand Down Expand Up @@ -242,7 +311,7 @@ module Arg = struct
module C = struct
module Size = Bap_c_size
module Type = Bap_c_type
module Data = Bap_c_data
module Data = Data
end

module Stack : sig
Expand Down
12 changes: 11 additions & 1 deletion lib/bap_c/bap_c_abi.mli
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,19 @@ val create_api_processor : #Bap_c_size.base -> t -> Bap_api.t

(** [data size t] creates an abstraction of data that is represented
by type [t]. The [size] parameter defines a data model, e.g.,
sizes of primitive types, padding and alignment restrictions, etc.*)
sizes of primitive types, padding and alignment restrictions, etc.
The abstraction includes inner and trailing paddings, when
necessary. *)
val data : #Bap_c_size.base -> Bap_c_type.t -> Bap_c_data.t


(** [layout size t] computes the c data type layout.
@since 2.5.0 *)
val layout : #Bap_c_size.base -> Bap_c_type.t -> Bap_c_data.layout


(** [arg_intent t] infers argument intention based on its C type. If
an argument is passed by value, i.e., it is a c basic type, then
it is an input argument. If an argument is a reference, but not a
Expand Down
89 changes: 48 additions & 41 deletions lib/bap_c/bap_c_data.ml
Original file line number Diff line number Diff line change
@@ -1,66 +1,73 @@
(** C Data model.
This module defines abstractions for C values.
A value is backed by a datum - a sequence of bits that represents
the value. This module also defines models for integer
representation.
*)
open Core_kernel[@@warning "-D"]
open Bap.Std
open Format

(** models for 32 bit systems *)
type model32 = [
| `LP32
| `ILP32
]

(** models for 64 bit systems *)
type model64 = [
| `ILP64
| `LLP64
| `LP64
]


(** The following table summarize all models of integer
representation.
{v
LP32 ILP32 ILP64 LLP64 LP64
char 8 8 8 8 8
short 16 16 16 16 16
int 16 32 64 32 32
long 32 32 64 32 64
addr 32 32 64 64 64
v}
*)
type model = [model32 | model64]

(** Abstract value lattice. The lattice is complete, and
[Set []] is the supremum, i.e., the bot.*)
type value =
| Top
(** any possible value *)
| Set of word list
(** one of the specified *)
[@@deriving bin_io, compare, sexp]

type 'd obj =
| Basic of Bap_c_type.basic
| Field of (string * 'd)
| Undef
| Union of 'd list
[@@deriving bin_io, compare, sexp]

(** abstraction of a С datum.
type ('d,'s) datum =
| Imm of 's * 'd
| Seq of ('d,'s) datum list
| Ptr of ('d,'s) datum
[@@deriving bin_io, compare, sexp]

The datum is a sequence of bits that represenst a particular C
value. We abstract datum as either an immediate value of the given
size and value lattice, or a sequence of data, or a pointer to a
datum.*)
type t =
| Imm of Size.t * value
(** [Imm (size,value)] *)
| Seq of t list
(** [Seq (t1,..,tN)] *)
| Ptr of t
(** [Ptr (type,size)] *)
type layout = {layout : (layout obj,int) datum}
[@@deriving bin_io, compare, sexp]

type t = (value,Size.t) datum
[@@deriving bin_io, compare, sexp]

let pp_value ppf = function
| Top -> fprintf ppf "Top"
| Set xs -> fprintf ppf "%a" (Seq.pp Word.pp) (Seq.of_list xs)
let rec pp ppf = function
| Imm (sz,v) -> fprintf ppf "%a:%a" pp_value v Size.pp sz
| Seq ts -> fprintf ppf "%a" (Seq.pp pp) (Seq.of_list ts)
| Ptr t -> fprintf ppf "%a ptr" pp t


(** *)
let rec pp_layout ppf : layout -> unit = fun {layout=datum} ->
pp_datum ppf datum
and pp_datum ppf : (layout obj, int) datum -> unit = function
| Imm (sz,v) ->
fprintf ppf "@[<2>[%a : %d]@]" pp_obj v sz
| Seq objs ->
fprintf ppf "@[@[<hv2>{@ ";
pp_print_list ~pp_sep:(fun ppf () ->
fprintf ppf ",@ ")
pp_datum ppf objs;
fprintf ppf "@]@;}@]"
| Ptr t ->
fprintf ppf "*%a" pp_datum t
and pp_obj ppf : layout obj -> unit = function
| Basic t -> Bap_c_type.(pp ppf (basic t))
| Field (name,layout) ->
fprintf ppf "@[<2><%s : %a>@]" name pp_layout layout
| Undef ->
fprintf ppf "<undef>"
| Union xs ->
fprintf ppf "@[<hv>";
pp_print_list ~pp_sep:(fun ppf () -> fprintf ppf "@;| ")
pp_layout ppf xs;
fprintf ppf "@]"
92 changes: 92 additions & 0 deletions lib/bap_c/bap_c_data.mli
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
(** C Data model.
This module defines abstractions for C values.
A value is backed by a datum - a sequence of bits that represents
the value. This module also defines models for integer
representation.
*)
open Core_kernel[@@warning "-D"]
open Bap.Std

(** models for 32 bit systems *)
type model32 = [
| `LP32
| `ILP32
]

(** models for 64 bit systems *)
type model64 = [
| `ILP64
| `LLP64
| `LP64
]


(** The following table summarize all models of integer
representation.
{v
LP32 ILP32 ILP64 LLP64 LP64
char 8 8 8 8 8
short 16 16 16 16 16
int 16 32 64 32 32
long 32 32 64 32 64
addr 32 32 64 64 64
v}
*)
type model = [model32 | model64]

(** A value lattice.*)
type value =
| Top (** any possible value *)
| Set of word list (** one of the specified, [Set []] is bot *)
[@@deriving bin_io, compare, sexp]


(** A C Object representation.
The type is parameterized with the object layout representation to
enable the recursive definition of the generalized layout type.
@since 2.5.0 *)
type 'd obj =
| Basic of Bap_c_type.basic (** A value of a basic type *)
| Field of (string * 'd) (** A struct or union field *)
| Undef (** Undefined data (padding or code) *)
| Union of 'd list (** Union of values *)
[@@deriving bin_io, compare, sexp]

(** abstraction of a С datum.
The datum is a sequence of bits that represents a C object. We
abstract datum as either an immediate value of the given size,
or a sequence of data, or a pointer to a datum.
@since 2.5.0
*)
type ('d,'s) datum =
| Imm of 's * 'd (** [Imm (size, value)] *)
| Seq of ('d,'s) datum list (** [Seq [t1; ... ;tN]] *)
| Ptr of ('d,'s) datum (** [Ptr datum] *)
[@@deriving bin_io, compare, sexp]


(** Describes C object's layout. *)
type layout = {layout : (layout obj,int) datum}
[@@deriving bin_io, compare, sexp]


(** The datum that uses value lattice for object representation. *)
type t = (value,Size.t) datum
[@@deriving bin_io, compare, sexp]


(** [pp ppf datum] prints the datum in a human-readable form.
@since 2.5.0 *)
val pp : Format.formatter -> t -> unit


(** [pp_layout ppf layout] outputs layout in a human-readable form.
@since 2.5.0 *)
val pp_layout : Format.formatter -> layout -> unit
Loading

0 comments on commit 5a71fb1

Please sign in to comment.