makes all C data type sizes a multitude of their alignment (#1501)

ivg · web-flow · commit fa01322a2689 · 2022-06-02T14:11:40.000-04:00
Though it is abi-specific in general, apparently most if not all ABI
require the structure size to be a multitude of its alignment, so I
decided to make it the default.

In addition, adds comprehensive documentation to each field.

Also publishes `padding` and `next_multitude_of` functions to leverage
custom alignment/padding implementations.
diff --git a/lib/bap_c/bap_c_size.ml b/lib/bap_c/bap_c_size.ml
@@ -8,6 +8,14 @@ type 'a unqualified = (no_qualifier, 'a) spec
 
 type bits = Int.t
 
+let next_multitude_of ~n x = (x + (n-1)) land (lnot (n-1))
+
+
+let padding alignment offset =
+  let align = Size.in_bits alignment in
+  (align - offset mod align) mod align
+
+
 
 class base (m : model) = object(self)
   method integer (t : integer) : size =
@@ -56,12 +64,9 @@ class base (m : model) = object(self)
     | `Pointer _ -> (self#pointer :> size)
 
   method padding t offset : size option =
-    let align = Size.in_bits (self#alignment t) in
-    match (align - offset mod align) mod align with
-    | 0 -> None
-    | n -> match Size.of_int n with
-      | Error _ -> None
-      | Ok s -> Some s
+    match Size.of_int @@ padding (self#alignment t) offset with
+    | Error _ -> None
+    | Ok s -> Some s
 
   method alignment (t : Bap_c_type.t) : size =
     let byte = `r8 in
@@ -75,14 +80,17 @@ class base (m : model) = object(self)
     | `Function _ -> (self#pointer :> size)
     | #scalar as t -> self#scalar t
 
-
-  method bits : t -> Int.t option = fun t -> match t with
-    | `Void -> None
-    | #scalar as t -> Some (Size.in_bits (self#scalar t))
-    | `Function _ -> None
-    | `Union s -> self#union s
-    | `Array s -> self#array s
-    | `Structure s -> self#structure s
+  method bits : t -> Int.t option = fun t ->
+    let size = match t with
+      | `Void -> None
+      | #scalar as t -> Some (Size.in_bits (self#scalar t))
+      | `Function _ -> None
+      | `Union s -> self#union s
+      | `Array s -> self#array s
+      | `Structure s -> self#structure s in
+    Option.map size ~f:(fun size ->
+        let alignment = self#alignment t in
+        next_multitude_of ~n:(Size.in_bits alignment) size)
 
   method array : _  -> Int.t option =
     fun {Spec.t={Array.element=t; size}} -> match size with
@@ -101,12 +109,10 @@ class base (m : model) = object(self)
 
   method structure : compound unqualified -> Int.t option =
     fun {Spec.t={Compound.fields}} ->
-    let padding t offset =
-      let align = Size.in_bits (self#alignment t) in
-      (align - offset mod align) mod align in
     List.fold fields ~init:(Some 0) ~f:(fun sz (_,field) -> match sz with
         | None -> None
         | Some sz -> match self#bits field with
           | None -> None
-          | Some sz' -> Some (sz + sz' + padding field sz))
+          | Some sz' ->
+            Some (sz + sz' + padding (self#alignment field) sz))
 end
diff --git a/lib/bap_c/bap_c_size.mli b/lib/bap_c/bap_c_size.mli
@@ -7,60 +7,133 @@ open Bap_c_type
 
 type bits = Int.t
 
-(** Base class for computing size of C data types.
+
+(** [next_multitude_of ~n x] returns [y >= x] so that [y]
+    is a multitude of [n], i.e., [y = n * k].
+
+    @since 2.5.0 *)
+val next_multitude_of : n:int -> int -> int
+
+
+(** [padding alignment offset] computes the required padding at
+    [offset] to ensure the [alignment].
+
+    @since 2.5.0 *)
+val padding : Size.t -> int -> int
+
+(** The base class for computing sizes and aligments of C data types.
+
     The algorithm is implemented as a class to allow
     a particular implementation to fine tune the calculation.
-    We need here an open recursion, since type is inherently
-    recursive.
 
-    The entry method is the [bits] method.
+    The [model] argument defines the default sizes for integral data
+    types. If no suitable model is available for your architecture
+    then use the closest model and override the specific methods to
+    fine-tune the data model of your target.
+
+    The entry methods are [bits] and [aligment].
+
+    {3 Example}
+
+    For example, let's compute the size of the
+
+    {v
+      struct foo {
+         char v1;
+         int  v2;
+         char v3;
+      };
+    v}
+
+    Using the LP64 data model, in which integers are 32 bit long and
+    char is 8 bit. The size of the structure is 12 bytes, due to
+    the 3 bytes of padding before [v2] and six bytes of trailing
+    padding.
+
+    {[
+      # let size = new C.Size.base `LP64;;
+      # size#bits C.Type.(structure "foo" [
+          "v1", basic `char;
+          "v2", basic `uint;
+          "v3", basic `char
+        ]);;
+      - : C.Size.bits option = Some 96
+    ]}
 *)
 class base :  model -> object
-    (** returns a size of the data type representation if type
-        definition is complete. Otherwise [None] is returned.
-        The size is computed with respect to padding and alignment
-        restructions.
+
+
+    (** returns a size of the data type representation in bits.
+
+        For incomplete types returns [None]. The size is always a
+        multitude of the data type alignment and includes the
+        paddings necessary for preserving the alignment restrictions.
+
+        @since 2.5.0 the size is a multitude of the alignment.
     *)
     method bits : t -> bits option
 
-    (** [alignment t] calculates an alignment restriction for data
-        type [t]. The default alignment rules are the following:
-        - if type is scalar then the alignment is [sizeof(t)];
-        - if type is [elt\[\]] then the alignment is [sizeof(elt)];
-        - if type is structure or union, the the alignment of is
-          the maximum alignment of a field;
-        - if type is function, then alignment is equal to sizeof
-          pointer
-        - if type is void then alignment is 8 bits.*)
+    (** [alignment t] the alignment of data type [t].
+
+        The alignment of
+        - void or an incomplete type is 8;
+        - a scalar is [sizeof(t)];
+        - an array is the alignment its element;
+        - a function pointer is [sizeof] the pointer;
+        - a structure or a union is the largest of the element's alignments.
+
+    *)
     method alignment : t -> size
 
+    (** DEPRECATED. Use the [padding] function if you need to compute
+        padding.  *)
+    method padding : t -> bits -> size option
+    [@@deprecated "since [2021-05] this method is ignored"]
     (* this method was deprecated as
        1) it has an incorrect type (padding can have any number of bits)
        2) padding is fully defined by the alignemnt and there is no
           need to parameterize it. *)
-    method padding : t -> bits -> size option
-    [@@deprecated "since [2021-05] this method is ignored"]
 
 
-    (** [array spec] if array [spec] is complete, then returns a
-        product of the bitwidth of array size and array's element
-        type, otherwise returns [None] *)
+    (** [array spec] if array [spec] is complete, i.e., the number of
+        elements is known, then returns a product of the
+        array size and the array's element type in bits,
+        otherwise returns [None]
+    *)
     method array : (cvr qualifier, array) spec -> bits option
 
-    (** if spec is complete then returns a size of the biggest
-        element, including padding *)
+    (** if spec is complete then returns a size in bits of the biggest
+        element, including the padding between fields, but excludeing
+        the trailing padding. *)
     method union : (no_qualifier, compound) spec -> bits option
 
     (**  if spec is complete then returns a total size of the
-         structure, including padding. *)
+         structure, including the padding between fields, but excluding
+         the trailing padding. *)
     method structure : (no_qualifier, compound) spec -> bits option
 
+
+    (** the size of intergral types.  *)
     method integer : integer -> size
+
+    (** the size of a pointer.  *)
     method pointer : addr_size
+
+    (** the size of the enumeration.  *)
     method enum : (string * int64 option) list -> size
+
+    (** the size of a real floating-point data type.  *)
     method real : real -> [`r32 | `r64 | `r128]
+
+    (** the size of a complex floating-point data type.  *)
     method complex : complex -> size
+
+    (** the size of a floating-point data type.  *)
     method floating : floating -> size
+
+    (** the size of a basic data type.  *)
     method basic : basic -> size
+
+    (** the size of a scalar data type.  *)
     method scalar : scalar -> size
   end