diff --git a/src/repr/binary_codec.ml b/src/repr/binary_codec.ml index f64cb690..6fd09248 100644 --- a/src/repr/binary_codec.ml +++ b/src/repr/binary_codec.ml @@ -1,5 +1,8 @@ include Binary_codec_intf +include Binary_codec_intf.Types +open Type_core open Staging +module Sizer = Size.Sizer let unsafe_add_bytes b k = k (Bytes.unsafe_to_string b) let str = Bytes.unsafe_of_string @@ -15,11 +18,13 @@ let charstring_of_code : int -> string = module Unit = struct let encode () _k = () let decode _ ofs = (ofs, ()) [@@inline always] + let sizer = Sizer.static 0 end module Char = struct let encode c k = k (charstring_of_code (Char.code c)) let decode buf ofs = (ofs + 1, buf.[ofs]) [@@inline always] + let sizer = Sizer.static 1 end module Bool = struct @@ -28,6 +33,8 @@ module Bool = struct let decode buf ofs = let ofs, c = Char.decode buf ofs in match c with '\000' -> (ofs, false) | _ -> (ofs, true) + + let sizer = Sizer.static 1 end module Int8 = struct @@ -46,6 +53,7 @@ module Int16 = struct unsafe_add_bytes b let decode buf ofs = (ofs + 2, Bytes.get_uint16_be (str buf) ofs) + let sizer = Sizer.static 2 end module Int32 = struct @@ -55,6 +63,7 @@ module Int32 = struct unsafe_add_bytes b let decode buf ofs = (ofs + 4, Bytes.get_int32_be (str buf) ofs) + let sizer = Sizer.static 4 end module Int64 = struct @@ -64,6 +73,7 @@ module Int64 = struct unsafe_add_bytes b let decode buf ofs = (ofs + 8, Bytes.get_int64_be (str buf) ofs) + let sizer = Sizer.static 8 end module Float = struct @@ -72,6 +82,9 @@ module Float = struct let decode buf ofs = let ofs, f = Int64.decode buf ofs in (ofs, Stdlib.Int64.float_of_bits f) + + (* NOTE: we consider 'double' here *) + let sizer = Sizer.static 8 end module Int = struct @@ -92,32 +105,55 @@ module Int = struct if i >= 0 && i < 128 then (ofs, n) else aux buf n (p + 7) ofs in aux buf 0 0 ofs + + let sizer = + let of_value = + let rec aux len n = + if n >= 0 && n < 128 then len else aux (len + 1) (n lsr 7) + in + fun n -> aux 1 n + in + let of_encoding buf (Size.Offset off) = + Size.Offset (fst (decode buf off)) + in + Sizer.dynamic ~of_value ~of_encoding end module Len = struct - let encode n i = - match n with - | `Int -> Int.encode i - | `Int8 -> Int8.encode i - | `Int16 -> Int16.encode i - | `Int32 -> Int32.encode (Stdlib.Int32.of_int i) - | `Int64 -> Int64.encode (Stdlib.Int64.of_int i) - | `Fixed _ -> Unit.encode () - | `Unboxed -> Unit.encode () - - let decode n buf ofs = - match n with - | `Int -> Int.decode buf ofs - | `Int8 -> Int8.decode buf ofs - | `Int16 -> Int16.decode buf ofs - | `Int32 -> - let ofs, i = Int32.decode buf ofs in - (ofs, Stdlib.Int32.to_int i) - | `Int64 -> - let ofs, i = Int64.decode buf ofs in - (ofs, Stdlib.Int64.to_int i) - | `Fixed n -> (ofs, n) - | `Unboxed -> (ofs, String.length buf - ofs) + let encode n = + stage (fun i -> + match n with + | `Int -> Int.encode i + | `Int8 -> Int8.encode i + | `Int16 -> Int16.encode i + | `Int32 -> Int32.encode (Stdlib.Int32.of_int i) + | `Int64 -> Int64.encode (Stdlib.Int64.of_int i) + | `Fixed _ -> Unit.encode () + | `Unboxed -> Unit.encode ()) + + let decode n = + stage (fun buf ofs -> + match n with + | `Int -> Int.decode buf ofs + | `Int8 -> Int8.decode buf ofs + | `Int16 -> Int16.decode buf ofs + | `Int32 -> + let ofs, i = Int32.decode buf ofs in + (ofs, Stdlib.Int32.to_int i) + | `Int64 -> + let ofs, i = Int64.decode buf ofs in + (ofs, Stdlib.Int64.to_int i) + | `Fixed n -> (ofs, n) + | `Unboxed -> (ofs, String.length buf - ofs)) + + let sizer = function + | `Int -> Int.sizer + | `Int8 -> Sizer.static 1 + | `Int16 -> Sizer.static 2 + | `Int32 -> Sizer.static 4 + | `Int64 -> Sizer.static 8 + | `Fixed _ -> Sizer.static 0 + | `Unboxed -> Sizer.static 0 end (* Helper functions generalising over [string] / [bytes]. *) @@ -144,9 +180,36 @@ module Mono_container = struct (* fixed-size strings are never boxed *) stage @@ fun buf ofs -> sub n buf ofs | n -> + let decode_len = unstage (Len.decode n) in stage @@ fun buf ofs -> - let ofs, len = Len.decode n buf ofs in + let ofs, len = decode_len buf ofs in sub len buf ofs + + let sizer_unboxed ~length = function + | `Fixed n -> Sizer.static n (* fixed-size containers are never boxed *) + | _ -> { of_value = Dynamic length; of_encoding = Unknown } + (* NOTE: this is the one case where we can't recover the length of an + encoding from its initial offset, given a structurally-defined codec. *) + + let sizer ~length header_typ = + let size_of_header = (Len.sizer header_typ).of_value in + match (size_of_header, header_typ) with + | Static n, `Fixed str_len -> Sizer.static (n + str_len) + | _, _ -> ( + let decode_len = unstage (Len.decode header_typ) in + let of_encoding buf (Size.Offset off) = + let off, size = decode_len buf off in + assert (size >= 0); + Size.Offset (off + size) + in + match size_of_header with + | Unknown -> assert false + | Static n -> + Sizer.dynamic ~of_encoding ~of_value:(fun s -> n + length s) + | Dynamic f -> + Sizer.dynamic ~of_encoding ~of_value:(fun s -> + let s_len = length s in + f s_len + s_len)) end module String_unboxed = struct @@ -154,6 +217,8 @@ module String_unboxed = struct let decode _ = Mono_container.decode_unboxed (fun x -> x) Bytes.unsafe_to_string + + let sizer n = Mono_container.sizer_unboxed ~length:String.length n end module Bytes_unboxed = struct @@ -162,26 +227,32 @@ module Bytes_unboxed = struct let decode _ = Mono_container.decode_unboxed Bytes.unsafe_of_string (fun x -> x) + + let sizer n = Mono_container.sizer_unboxed ~length:Bytes.length n end module String = struct let encode len = + let encode_len = unstage (Len.encode len) in stage (fun s k -> let i = String.length s in - Len.encode len i k; + encode_len i k; k s) let decode len = Mono_container.decode (fun x -> x) Bytes.unsafe_to_string len + let sizer n = Mono_container.sizer ~length:String.length n end module Bytes = struct let encode len = + let encode_len = unstage (Len.encode len) in stage (fun s k -> let i = Bytes.length s in - Len.encode len i k; + encode_len i k; unsafe_add_bytes s k) let decode len = Mono_container.decode Bytes.unsafe_of_string (fun x -> x) len + let sizer len = Mono_container.sizer ~length:Bytes.length len end module Option = struct @@ -199,6 +270,112 @@ module Option = struct | _ -> let ofs, x = decode_elt buf ofs in (ofs, Some x) + + let sizer : type a. a Sizer.t -> a option Sizer.t = + fun elt -> + let header_size = 1 in + match elt with + | { of_value = Static 0; _ } -> + Sizer.static header_size (* Either '\000' or '\255' *) + | { of_value = Static n; _ } -> + (* Must add [n] in the [Some _] case, otherwise just header size. *) + let of_value = function + | None -> header_size + | Some _ -> header_size + n + in + let of_encoding buf (Size.Offset off) = + match Stdlib.String.get buf off with + | '\000' -> Size.Offset (off + 1) + | _ -> Size.Offset (1 + n) + in + Sizer.dynamic ~of_value ~of_encoding + | elt -> + (* Must dynamically compute element size in the [Some _] case. *) + let open Size.Syntax in + let of_value = + let+ elt_encode = elt.of_value in + function None -> header_size | Some x -> header_size + elt_encode x + in + let of_encoding = + let+ elt_decode = elt.of_encoding in + fun buf (Size.Offset off) -> + match Stdlib.String.get buf off with + | '\000' -> Size.Offset (off + 1) + | _ -> elt_decode buf (Size.Offset (off + 1)) + in + { of_value; of_encoding } +end + +(* Helper functions generalising over [list] / [array]. *) +module Poly_container = struct + let sizer : + type a at. + length:(at -> int) -> + fold_left:(f:(int -> a -> int) -> init:int -> at -> int) -> + len -> + a sizer -> + at sizer = + fun ~length ~fold_left header_typ elt_size -> + let header_size = (Len.sizer header_typ).of_value in + match (header_typ, header_size, elt_size) with + | _, Size.Unknown, _ -> assert false + | `Fixed length, Static header_size, { of_value = Static elt_size; _ } -> + (* We don't serialise headers for fixed-size containers *) + assert (header_size = 0); + Sizer.static (length * elt_size) + | _, _, { of_value = Static elt_size; _ } -> + let of_value = + (* Number of elements not fixed, so we must check it dynamically *) + match header_size with + | Unknown -> assert false + | Static header_size -> + fun l -> + let nb_elements = length l in + header_size + (elt_size * nb_elements) + | Dynamic header_size -> + fun l -> + let nb_elements = length l in + header_size nb_elements + (elt_size * nb_elements) + in + let of_encoding = + (* Read the header to recover the length. Don't need to look at + elements as they have static size. *) + let decode_len = unstage (Len.decode header_typ) in + fun buf (Size.Offset off) -> + let off, elements = decode_len buf off in + Size.Offset (off + (elt_size * elements)) + in + Sizer.dynamic ~of_value ~of_encoding + | _ -> + let open Size.Syntax in + let of_value = + (* Must traverse the container _and_ compute element sizes + individually *) + let+ elt_size = elt_size.of_value in + match header_size with + | Unknown -> assert false + | Static header_size -> + fun l -> + fold_left l ~init:header_size ~f:(fun acc x -> acc + elt_size x) + | Dynamic header_size -> + fun l -> + let len = length l in + let header_size = header_size len in + fold_left l ~init:header_size ~f:(fun acc x -> acc + elt_size x) + in + let of_encoding = + let+ elt_decode = elt_size.of_encoding in + let rec decode_elements buf off todo = + match todo with + | 0 -> off + | n -> decode_elements buf (elt_decode buf off) (n - 1) + in + let decode_len = unstage (Len.decode header_typ) in + fun buf (Size.Offset off) -> + let off, elements = decode_len buf off in + decode_elements buf (Size.Offset off) elements + in + { of_value; of_encoding } end module List = struct @@ -210,8 +387,9 @@ module List = struct (encode_elements [@tailcall]) encode_elt k xs in fun len encode_elt -> + let encode_len = unstage (Len.encode len) in stage (fun x k -> - Len.encode len (List.length x) k; + encode_len (List.length x) k; encode_elements encode_elt k x) let decode = @@ -222,9 +400,14 @@ module List = struct decode_elements decode_elt (x :: acc) buf off (n - 1) in fun len decode_elt -> + let decode_len = unstage (Len.decode len) in stage (fun buf ofs -> - let ofs, len = Len.decode len buf ofs in + let ofs, len = decode_len buf ofs in decode_elements decode_elt [] buf ofs len) + + let sizer len elt = + Poly_container.sizer ~length:List.length ~fold_left:ListLabels.fold_left len + elt end module Array = struct @@ -235,8 +418,9 @@ module Array = struct done in fun n l -> + let encode_len = unstage (Len.encode n) in stage (fun x k -> - Len.encode n (Array.length x) k; + encode_len (Array.length x) k; encode_elements l k x) let decode len decode_elt = @@ -244,6 +428,10 @@ module Array = struct stage (fun buf off -> let ofs, l = list_decode buf off in (ofs, Array.of_list l)) + + let sizer len elt = + Poly_container.sizer ~length:Array.length ~fold_left:ArrayLabels.fold_left + len elt end module Pair = struct @@ -255,6 +443,8 @@ module Pair = struct let off, a = a buf off in let off, b = b buf off in (off, (a, b)) + + let sizer a b = Sizer.(using fst a <+> using snd b) end module Triple = struct @@ -268,4 +458,10 @@ module Triple = struct let off, b = b buf off in let off, c = c buf off in (off, (a, b, c)) + + let sizer a b c = + Sizer.( + using (fun (x, _, _) -> x) a + <+> using (fun (_, x, _) -> x) b + <+> using (fun (_, _, x) -> x) c) end diff --git a/src/repr/binary_codec_intf.ml b/src/repr/binary_codec_intf.ml index fc1de134..9b41c8d4 100644 --- a/src/repr/binary_codec_intf.ml +++ b/src/repr/binary_codec_intf.ml @@ -1,14 +1,20 @@ open Type_core open Staging -type 'a encoder = 'a -> (string -> unit) -> unit -type 'a decoder = string -> int -> int * 'a +module Types = struct + type 'a encoder = 'a -> (string -> unit) -> unit + type 'a decoder = string -> int -> int * 'a + type 'a sizer = 'a Size.Sizer.t +end + +open Types module type S = sig type t val encode : t encoder val decode : t decoder + val sizer : t sizer end module type S_with_length = sig @@ -16,6 +22,7 @@ module type S_with_length = sig val encode : len -> t encoder staged val decode : len -> t decoder staged + val sizer : len -> t sizer end module type S1 = sig @@ -23,6 +30,7 @@ module type S1 = sig val encode : 'a encoder -> 'a t encoder val decode : 'a decoder -> 'a t decoder + val sizer : 'a sizer -> 'a t sizer end module type S1_with_length = sig @@ -30,6 +38,7 @@ module type S1_with_length = sig val encode : len -> 'a encoder -> 'a t encoder staged val decode : len -> 'a decoder -> 'a t decoder staged + val sizer : len -> 'a sizer -> 'a t sizer end module type S2 = sig @@ -37,6 +46,7 @@ module type S2 = sig val encode : 'a encoder -> 'b encoder -> ('a, 'b) t encoder val decode : 'a decoder -> 'b decoder -> ('a, 'b) t decoder + val sizer : 'a sizer -> 'b sizer -> ('a, 'b) t sizer end module type S3 = sig @@ -44,9 +54,12 @@ module type S3 = sig val encode : 'a encoder -> 'b encoder -> 'c encoder -> ('a, 'b, 'c) t encoder val decode : 'a decoder -> 'b decoder -> 'c decoder -> ('a, 'b, 'c) t decoder + val sizer : 'a sizer -> 'b sizer -> 'c sizer -> ('a, 'b, 'c) t sizer end module type Intf = sig + include module type of Types + module type S = S module type S1 = S1 module type S2 = S2 diff --git a/src/repr/size.ml b/src/repr/size.ml new file mode 100644 index 00000000..f840370a --- /dev/null +++ b/src/repr/size.ml @@ -0,0 +1,79 @@ +type 'a t = Static of int | Dynamic of 'a | Unknown +type 'a size = 'a t + +let map : type a b. (a -> b) -> a t -> b t = + fun f -> function + | Unknown -> Unknown + | Static n -> Static n + | Dynamic a -> Dynamic (f a) + +module Syntax = struct + let ( let+ ) x f = map f x +end + +(** A type wrapper for positional offsets into buffers (as opposed to e.g. + lengths of values in those buffers). *) +type offset = Offset of int [@@unboxed] + +module Offset = struct + type t = offset + + let ( +> ) : t -> int -> t = fun (Offset n) m -> Offset (n + m) + let ( <+ ) : int -> t -> t = fun n (Offset m) -> Offset (n + m) +end + +module Sizer = struct + type 'a t = { + of_value : ('a -> int) size; + of_encoding : (string -> Offset.t -> Offset.t) size; + } + (** An ['a t] is a value that represents the size information known about a + particular codec for type ['a]. + + - [of_value]: given a value to encode, return the size of its encoding. + + - [of_encoding]: given a buffer [buf] and an offset [off], return the + _offset_ immediately _after_ the encoding starting at [buf.\[off\]] + NOTE: not the length of the encoding itself, to enable chains of such + sizers to call each other in tail-position. + + Invariant: [∀ n. (of_value = Static n) ⟺ (of_encoding = Static n)]. *) + + let ( <+> ) : type a. a t -> a t -> a t = + let add_of_value (a : _ size) (b : _ size) : _ size = + match (a, b) with + | Unknown, _ | _, Unknown -> Unknown + | Static a, Static b -> Static (a + b) + | Static 0, other | other, Static 0 -> other + | Static n, Dynamic f | Dynamic f, Static n -> Dynamic (fun a -> n + f a) + | Dynamic f, Dynamic g -> Dynamic (fun a -> f a + g a) + in + let add_of_encoding (a : _ size) (b : _ size) : _ size = + match (a, b) with + | Unknown, _ | _, Unknown -> Unknown + | Static a, Static b -> Static (a + b) + | Static 0, other | other, Static 0 -> other + | Dynamic f, Dynamic g -> Dynamic (fun buf off -> g buf (f buf off)) + (* NOTE: in these cases we could be slightly more efficient by storing a + vector of sizing functions inside [Dynamic], which would allow constant + folding for static segments of dynamically-sized types. *) + | Static n, Dynamic f -> Dynamic (fun buf off -> f buf Offset.(off +> n)) + | Dynamic f, Static n -> Dynamic (fun buf off -> Offset.(f buf off +> n)) + in + fun a b -> + { + of_value = add_of_value a.of_value b.of_value; + of_encoding = add_of_encoding a.of_encoding b.of_encoding; + } + + let static n = { of_value = Static n; of_encoding = Static n } + + let dynamic ~of_value ~of_encoding = + { of_value = Dynamic of_value; of_encoding = Dynamic of_encoding } + + let using f t = + let of_value = map (fun size_of x -> size_of (f x)) t.of_value in + { t with of_value } + + let unknown = { of_value = Unknown; of_encoding = Unknown } +end diff --git a/src/repr/type.ml b/src/repr/type.ml index a4685fcd..0957ca98 100644 --- a/src/repr/type.ml +++ b/src/repr/type.ml @@ -21,6 +21,7 @@ open Utils let pre_hash t = let rec aux : type a. a t -> a encode_bin = function + | Attributes { attr_type; _ } -> aux attr_type | Self s -> aux s.self_fix | Map m -> let dst = unstage (aux m.x) in @@ -307,7 +308,7 @@ let partially_abstract ~pp ~of_string ~json ~bin ~unboxed_bin ~equal ~compare let encode_bin, decode_bin, size_of = fold_impl bin ~undefined:(fun () -> - (undefined' "encode_bin", undefined' "decode_bin", undefined' "size_of")) + (undefined' "encode_bin", undefined' "decode_bin", unimplemented_size_of)) ~structural:(fun () -> (Type_binary.encode_bin t, Type_binary.decode_bin t, Type_size.t t)) in @@ -316,7 +317,7 @@ let partially_abstract ~pp ~of_string ~json ~bin ~unboxed_bin ~equal ~compare ~undefined:(fun () -> ( undefined' "Unboxed.encode_bin", undefined' "Unboxed.decode_bin", - undefined' "Unboxed.size_of" )) + unimplemented_size_of )) ~structural:(fun () -> ( Type_binary.Unboxed.encode_bin t, Type_binary.Unboxed.decode_bin t, @@ -421,7 +422,56 @@ let encode_bin, decode_bin, to_bin_string, of_bin_string = Type_binary.(encode_bin, decode_bin, to_bin_string, of_bin_string) let random, random_state = Type_random.(of_global, of_state) -let size_of = Type_size.t + +let size_of t = + match (Type_size.t t).of_value with + | Size.Static n -> stage (fun _ -> Some n) + | Size.Dynamic f -> stage (fun x -> Some (f x)) + | Size.Unknown -> stage (fun _ -> None) + +module Size = struct + type 'a t = 'a Size.t = Static of int | Dynamic of 'a | Unknown + + (* The [Size] module defines _scanning_ length decoders that return + [initial_offset + length] rather than just [length]. These functions + convert these to decoders that return the [length] directly. *) + + let to_scanning : type a. (a -> int -> int) -> a -> Size.offset -> Size.offset + = + fun len_f buf (Size.Offset off) -> Size.Offset (off + len_f buf off) + + let of_scanning : type a. (a -> Size.offset -> Size.offset) -> a -> int -> int + = + fun scan_f buf off -> + let (Size.Offset off') = scan_f buf (Size.Offset off) in + off' - off + + let of_value t = (Type_size.t t).of_value + let of_encoding t = Size.map of_scanning (Type_size.unboxed t).of_encoding + let t t = Type_size.t t + + type 'a sizer = 'a size_of + + let using f t = + let of_value = + Size.map (fun sizer x -> sizer (f x)) t.Size.Sizer.of_value + in + { t with of_value } + + let custom_static n = + Size.Sizer.{ of_value = Static n; of_encoding = Static n } + + let custom_dynamic ?of_value ?of_encoding () = + let of_value = + match of_value with Some f -> Dynamic f | None -> Unknown + in + let of_encoding = + match of_encoding with + | Some f -> Dynamic (to_scanning f) + | None -> Unknown + in + Size.Sizer.{ of_value; of_encoding } +end module Unboxed = struct include Type_binary.Unboxed diff --git a/src/repr/type_binary.ml b/src/repr/type_binary.ml index 0ea3af17..2e3845d2 100644 --- a/src/repr/type_binary.ml +++ b/src/repr/type_binary.ml @@ -255,12 +255,16 @@ module Unboxed = struct let decode_bin = Decode.unboxed end -let to_bin size_of encode_bin = - let size_of = unstage size_of in +let to_bin (size_of : _ Size.Sizer.t) encode_bin = let encode_bin = unstage encode_bin in stage (fun x -> let seq = encode_bin x in - let len = match size_of x with None -> 1024 | Some n -> n in + let len = + match size_of.of_value with + | Static n -> n + | Dynamic f -> f x + | Unknown -> 1024 + in let buf = Buffer.create len in seq (Buffer.add_string buf); Buffer.contents buf) diff --git a/src/repr/type_core.ml b/src/repr/type_core.ml index 1081df07..cbfb51bb 100644 --- a/src/repr/type_core.ml +++ b/src/repr/type_core.ml @@ -50,6 +50,10 @@ let annotate t ~add ~data = | Attributes t -> Attributes { t with attrs = add data t.attrs } | t -> Attributes { attrs = add data Attribute.Map.empty; attr_type = t } +let unimplemented_size_of = + let f _ = failwith "`size_of` not implemented" in + Size.Sizer.{ of_value = Dynamic f; of_encoding = Dynamic f } + let partial ?(pp = fun _ -> failwith "`pp` not implemented") ?(of_string = fun _ -> failwith "`of_string` not implemented") ?(encode_json = fun _ -> failwith "`encode_json` not implemented") @@ -61,13 +65,12 @@ let partial ?(pp = fun _ -> failwith "`pp` not implemented") ?(equal = stage (fun _ -> failwith "`equal` not implemented")) ?(encode_bin = stage (fun _ -> failwith "`encode_bin` not implemented")) ?(decode_bin = stage (fun _ -> failwith "`decode_bin` not implemented")) - ?(size_of = stage (fun _ -> failwith "`size_of` not implemented")) + ?(size_of = unimplemented_size_of) ?(unboxed_encode_bin = stage (fun _ -> failwith "`unboxed_encode_bin` not implemented")) ?(unboxed_decode_bin = stage (fun _ -> failwith "`unboxed_decode_bin` not implemented")) - ?(unboxed_size_of = - stage (fun _ -> failwith "`unboxed_size_of` not implemented")) () = + ?(unboxed_size_of = unimplemented_size_of) () = Custom { cwit = `Witness (Witness.make ()); diff --git a/src/repr/type_core_intf.ml b/src/repr/type_core_intf.ml index 2718d9f0..45f21e02 100644 --- a/src/repr/type_core_intf.ml +++ b/src/repr/type_core_intf.ml @@ -12,7 +12,7 @@ module Types = struct type 'a pre_hash = 'a bin_seq staged type 'a encode_bin = 'a bin_seq staged type 'a decode_bin = (string -> int -> int * 'a) staged - type 'a size_of = ('a -> int option) staged + type 'a size_of = 'a Size.Sizer.t type 'a compare = ('a -> 'a -> int) staged type 'a equal = ('a -> 'a -> bool) staged type 'a short_hash = (?seed:int -> 'a -> int) staged @@ -136,6 +136,7 @@ module type Type_core = sig include module type of Types (** @inline *) + val unimplemented_size_of : 'a size_of val fields : 'a record -> 'a a_field list module Fields_folder (Acc : sig diff --git a/src/repr/type_intf.ml b/src/repr/type_intf.ml index 091e3a13..d1daacd8 100644 --- a/src/repr/type_intf.ml +++ b/src/repr/type_intf.ml @@ -528,7 +528,7 @@ module type DSL = sig type 'a decode_bin = (string -> int -> int * 'a) staged (** The type for binary decoders. *) - type 'a size_of = ('a -> int option) staged + type -'a size_of (** The type for size function related to binary encoder/decoders. *) type 'a short_hash := (?seed:int -> 'a -> int) staged @@ -561,11 +561,44 @@ module type DSL = sig {b NOTE:} When [t] is {!Type.string}, the result is [s] (without copy). *) - val size_of : 'a t -> 'a size_of + val size_of : 'a t -> ('a -> int option) staged (** [size_of t x] is either the size of [encode_bin t x] or the binary encoding of [x], if the backend is not able to pre-compute serialisation lengths. *) + module Size : sig + (** A value representing information known about the length in bytes of + encodings produced by a particular binary codec: + + - [Static n]: all encodings produced by this codec have length [n]; + + - [Dynamic f]: the length of binary encodings is dependent on the + specific value, but may be efficiently computed at run-time via the + function [f]. + + - [Unknown]: this codec may produce encodings that cannot be efficiently + pre-computed.*) + type 'a t = 'a Size.t = private Static of int | Dynamic of 'a | Unknown + + val of_value : 'a ty -> ('a -> int) t + val of_encoding : 'a ty -> (string -> int -> int) t + + (** Constructors for custom value sizers, for use with binary codecs that + are not structurally-defined. *) + + type -'a sizer = 'a size_of + + val t : 'a ty -> 'a sizer + val using : ('b -> 'a) -> 'a sizer -> 'b sizer + val custom_static : int -> _ sizer + + val custom_dynamic : + ?of_value:('a -> int) -> + ?of_encoding:(string -> int -> int) -> + unit -> + 'a sizer + end + module Unboxed : sig (** Unboxed operations assumes that value being serialized is fully filling the underlying buffer. When that's the case, it is not necessary to diff --git a/src/repr/type_size.ml b/src/repr/type_size.ml index e6cf2347..e8bc9425 100644 --- a/src/repr/type_size.ml +++ b/src/repr/type_size.ml @@ -15,171 +15,164 @@ *) open Type_core -open Staging -open Utils +module Sizer = Size.Sizer +module Bin = Binary_codec -let ( >>= ) x f = match x with Some x -> f x | None -> None -let ( >|= ) x f = match x with Some x -> Some (f x) | None -> None - -let int n = - let rec aux len n = - if n >= 0 && n < 128 then len else aux (len + 1) (n lsr 7) - in - aux 1 n - -let len n = function - | `Int -> int n - | `Int8 -> 1 - | `Int16 -> 2 - | `Int32 -> 4 - | `Int64 -> 8 - | `Fixed _ -> 0 - -let unit () = 0 -let char (_ : char) = 1 -let int32 (_ : int32) = 4 -let int64 (_ : int64) = 8 -let bool (_ : bool) = 1 -let float (_ : float) = 8 (* NOTE: we consider 'double' here *) - -let boxed_string n s = - let s = String.length s in - len s n + s - -let unboxed_string = function - | `Fixed len -> fun _ -> len (* fixed-size strings are never boxed *) - | _ -> String.length - -let string ~boxed = if boxed then boxed_string else unboxed_string - -let boxed_bytes n s = - let s = Bytes.length s in - len s n + s - -let unboxed_bytes = function - | `Fixed len -> fun _ -> len (* fixed-size bytes are never boxed *) - | _ -> Bytes.length - -let bytes ~boxed = if boxed then boxed_bytes else unboxed_bytes - -let list l n = - let l = unstage l in - stage (fun x -> - let init = len (List.length x) n in - List.fold_left - (fun acc x -> - acc >>= fun acc -> - l x >|= fun l -> acc + l) - (Some init) x) - -let array l n = - let l = unstage l in - stage (fun x -> - let init = len (Array.length x) n in - Array.fold_left - (fun acc x -> - acc >>= fun acc -> - l x >|= fun l -> acc + l) - (Some init) x) - -let pair a b = - let a = unstage a and b = unstage b in - stage (fun (x, y) -> - a x >>= fun a -> - b y >|= fun b -> a + b) - -let triple a b c = - let a = unstage a and b = unstage b and c = unstage c in - stage (fun (x, y, z) -> - a x >>= fun a -> - b y >>= fun b -> - c z >|= fun c -> a + b + c) - -let option o = - let o = unstage o in - stage (function - | None -> Some (char '\000') - | Some x -> o x >|= fun o -> char '\000' + o) - -let rec t : type a. a t -> a size_of = function +let rec t : type a. a t -> a Sizer.t = function | Self s -> fst (self s) | Custom c -> c.size_of | Map b -> map ~boxed:true b | Prim t -> prim ~boxed:true t | Attributes { attr_type; _ } -> t attr_type | Boxed b -> t b - | List l -> list (t l.v) l.len - | Array a -> array (t a.v) a.len + | List l -> Bin.List.sizer l.len (t l.v) + | Array a -> Bin.Array.sizer a.len (t a.v) | Tuple t -> tuple t - | Option x -> option (t x) + | Option x -> Bin.Option.sizer (t x) | Record r -> record r | Variant v -> variant v | Var v -> raise (Unbound_type_variable v) -and unboxed : type a. a t -> a size_of = function +and unboxed : type a. a t -> a Sizer.t = function | Self s -> snd (self s) | Custom c -> c.unboxed_size_of | Map b -> map ~boxed:false b | Prim t -> prim ~boxed:false t - | Attributes { attr_type; _ } -> t attr_type + | Attributes { attr_type = t; _ } -> unboxed t | Boxed b -> t b - | List l -> list (t l.v) l.len - | Array a -> array (t a.v) a.len + | List l -> Bin.List.sizer l.len (t l.v) + | Array a -> Bin.Array.sizer a.len (t a.v) | Tuple t -> tuple t - | Option x -> option (t x) + | Option x -> Bin.Option.sizer (t x) | Record r -> record r | Variant v -> variant v | Var v -> raise (Unbound_type_variable v) -and self : type a. a self -> a size_of * a size_of = +and self : type a. a self -> a Sizer.t * a Sizer.t = fun { self_unroll; _ } -> - fix_staged2 (fun size_of unboxed_size_of -> - let cyclic = self_unroll (partial ~size_of ~unboxed_size_of ()) in - (t cyclic, unboxed cyclic)) + let open Staging in + let of_value, of_encoding, unboxed_of_value, unboxed_of_encoding = + Utils.fix_staged4 + (fun of_value of_encoding unboxed_of_value unboxed_of_encoding -> + let size_of = + let of_value = Size.Dynamic (unstage of_value) + and of_encoding = Size.Dynamic (unstage of_encoding) in + Sizer.{ of_value; of_encoding } + and unboxed_size_of = + let of_value = Size.Dynamic (unstage unboxed_of_value) + and of_encoding = Size.Dynamic (unstage unboxed_of_encoding) in + Sizer.{ of_value; of_encoding } + in + let cyclic = self_unroll (partial ~size_of ~unboxed_size_of ()) in + let t = t cyclic and unboxed = unboxed cyclic in + match + (t.of_value, t.of_encoding, unboxed.of_value, unboxed.of_encoding) + with + | Size.Dynamic a, Size.Dynamic b, Size.Dynamic c, Size.Dynamic d -> + (stage a, stage b, stage c, stage d) + | _ -> assert false) + in + ( Sizer.dynamic ~of_value:(unstage of_value) ~of_encoding:(unstage of_encoding), + Sizer.dynamic ~of_value:(unstage unboxed_of_value) + ~of_encoding:(unstage unboxed_of_encoding) ) -and tuple : type a. a tuple -> a size_of = function - | Pair (x, y) -> pair (t x) (t y) - | Triple (x, y, z) -> triple (t x) (t y) (t z) +and tuple : type a. a tuple -> a Sizer.t = function + | Pair (x, y) -> Bin.Pair.sizer (t x) (t y) + | Triple (x, y, z) -> Bin.Triple.sizer (t x) (t y) (t z) -and map : type a b. boxed:bool -> (a, b) map -> b size_of = - fun ~boxed { x; g; _ } -> - let size_of = unstage (if boxed then t x else unboxed x) in - stage (fun u -> size_of (g u)) +and map : type a b. boxed:bool -> (a, b) map -> b Sizer.t = + fun ~boxed { x; g; _ } -> Sizer.using g (if boxed then t x else unboxed x) -and prim : type a. boxed:bool -> a prim -> a size_of = +and prim : type a. boxed:bool -> a prim -> a Sizer.t = fun ~boxed -> function - | Unit -> stage (fun x -> Some (unit x)) - | Bool -> stage (fun x -> Some (bool x)) - | Char -> stage (fun x -> Some (char x)) - | Int -> stage (fun x -> Some (int x)) - | Int32 -> stage (fun x -> Some (int32 x)) - | Int64 -> stage (fun x -> Some (int64 x)) - | Float -> stage (fun x -> Some (float x)) - | String n -> - let size_of = string ~boxed n in - stage (fun x -> Some (size_of x)) - | Bytes n -> - let size_of = bytes ~boxed n in - stage (fun x -> Some (size_of x)) - -and record : type a. a record -> a size_of = + | Unit -> Bin.Unit.sizer + | Bool -> Bin.Bool.sizer + | Char -> Bin.Char.sizer + | Int -> Bin.Int.sizer + | Int32 -> Bin.Int32.sizer + | Int64 -> Bin.Int64.sizer + | Float -> Bin.Float.sizer + | String n -> (if boxed then Bin.String.sizer else Bin.String_unboxed.sizer) n + | Bytes n -> (if boxed then Bin.Bytes.sizer else Bin.Bytes_unboxed.sizer) n + +and record : type a. a record -> a Sizer.t = fun r -> - let field_sizers : (a -> int option) list = - fields r - |> List.map @@ fun (Field f) -> - let field_size = unstage (t f.ftype) in - fun x -> field_size (f.fget x) + fields r + |> List.map (fun (Field f) -> Sizer.using f.fget (t f.ftype)) + |> ListLabels.fold_left ~init:(Sizer.static 0) ~f:Sizer.( <+> ) + +and variant : type a. a variant -> a Sizer.t = + fun v -> + let static_varint_size n = + match Bin.Int.sizer.of_value with + | Unknown | Static _ -> assert false + | Dynamic f -> f n in - stage (fun x -> - List.fold_left - (fun acc fsize -> acc >>= fun acc -> fsize x >|= ( + ) acc) - (Some 0) field_sizers) - -and variant : type a. a variant -> a size_of = - let c0 { ctag0; _ } = stage (Some (int ctag0)) in - let c1 { ctag1; ctype1; _ } = - let size_tag = int ctag1 in - let size_arg = unstage (t ctype1) in - stage (fun v -> size_arg v >|= ( + ) size_tag) + let case_lengths : (int * a Sizer.t) array = + ArrayLabels.map v.vcases ~f:(function + | C0 { ctag0; _ } -> (static_varint_size ctag0, Sizer.static 0) + | C1 { ctag1; ctype1; cwit1 = expected; _ } -> + let tag_length = static_varint_size ctag1 in + let arg_length = + match t ctype1 with + | ({ of_value = Static _; _ } | { of_value = Unknown; _ }) as t -> t + | { of_value = Dynamic of_value; of_encoding } -> + let of_value a = + match v.vget a with + | CV0 _ -> assert false + | CV1 ({ cwit1 = received; _ }, args) -> ( + match Witness.cast received expected args with + | Some v -> of_value v + | None -> assert false) + in + { of_value = Dynamic of_value; of_encoding } + in + (tag_length, arg_length)) + in + (* If all cases have [size = Static n], then so does the variant. + If any case has [size = Unknown], then so does the variant. *) + let non_dynamic_length = + let rec aux static_so_far = function + | -1 -> Option.map (fun n -> Sizer.static n) static_so_far + | i -> ( + match case_lengths.(i) with + | _, { of_value = Unknown; _ } -> Some Sizer.unknown + | _, { of_value = Dynamic _; _ } -> None + | tag_len, { of_value = Static arg_len; _ } -> ( + let len = tag_len + arg_len in + match static_so_far with + | None -> aux (Some len) (i - 1) + | Some len' when len = len' -> aux static_so_far (i - 1) + | Some _ -> None)) + in + aux None (Array.length case_lengths - 1) in - fun v -> fold_variant { c0; c1 } v + match non_dynamic_length with + | Some x -> x + | None -> + (* Otherwise, the variant size is [Dynamic] over the tag *) + let of_value a = + let tag = + match v.vget a with + | CV0 { ctag0; _ } -> ctag0 + | CV1 ({ ctag1; _ }, _) -> ctag1 + in + let tag_length, arg_length = case_lengths.(tag) in + let arg_length = + match arg_length.of_value with + | Dynamic f -> f a + | Static n -> n + | Unknown -> + (* [Unknown] arg lengths discounted above *) + assert false + in + tag_length + arg_length + in + let of_encoding buf (Size.Offset off) = + let off, tag = Bin.Int.decode buf off in + match case_lengths.(tag) with + | _, { of_encoding = Static n; _ } -> Size.Offset (off + n) + | _, { of_encoding = Dynamic f; _ } -> f buf (Size.Offset off) + | _, { of_encoding = _; _ } -> assert false + in + Sizer.dynamic ~of_value ~of_encoding diff --git a/src/repr/type_size.mli b/src/repr/type_size.mli index 36bf984c..98e1a0e4 100644 --- a/src/repr/type_size.mli +++ b/src/repr/type_size.mli @@ -16,5 +16,5 @@ open Type_core -val t : 'a t -> 'a size_of -val unboxed : 'a t -> 'a size_of +val t : 'a t -> 'a Size.Sizer.t +val unboxed : 'a t -> 'a Size.Sizer.t diff --git a/src/repr/utils.ml b/src/repr/utils.ml index a3694388..410684f7 100644 --- a/src/repr/utils.ml +++ b/src/repr/utils.ml @@ -29,3 +29,27 @@ let fix_staged2 : and backptr1 e = unstage (Lazy.force here |> fst) e and backptr2 e = unstage (Lazy.force here |> snd) e in Lazy.force here + +let tup4_1 (x, _, _, _) = x +let tup4_2 (_, x, _, _) = x +let tup4_3 (_, _, x, _) = x +let tup4_4 (_, _, _, x) = x + +let fix_staged4 : + type a b c d e f g h. + (((a -> b) staged as 'f1) -> + ((c -> d) staged as 'f2) -> + ((e -> f) staged as 'f3) -> + ((g -> h) staged as 'f4) -> + 'f1 * 'f2 * 'f3 * 'f4) -> + 'f1 * 'f2 * 'f3 * 'f4 = + fun unroll -> + let rec here = + lazy + (unroll (stage backptr1) (stage backptr2) (stage backptr3) + (stage backptr4)) + and backptr1 e = unstage (Lazy.force here |> tup4_1) e + and backptr2 e = unstage (Lazy.force here |> tup4_2) e + and backptr3 e = unstage (Lazy.force here |> tup4_3) e + and backptr4 e = unstage (Lazy.force here |> tup4_4) e in + Lazy.force here diff --git a/src/repr/utils.mli b/src/repr/utils.mli index 6d04df16..37bbe51b 100644 --- a/src/repr/utils.mli +++ b/src/repr/utils.mli @@ -12,3 +12,10 @@ val fix_staged2 : ('f1 -> 'f2 -> 'f1 * 'f2) -> ((_ -> _) staged as 'f1) * ((_ -> _) staged as 'f2) (** Generalises {!fix_staged} to handle mutually recursive definitions. *) + +val fix_staged4 : + ('f1 -> 'f2 -> 'f3 -> 'f4 -> 'f1 * 'f2 * 'f3 * 'f4) -> + ((_ -> _) staged as 'f1) + * ((_ -> _) staged as 'f2) + * ((_ -> _) staged as 'f3) + * ((_ -> _) staged as 'f4) diff --git a/test/repr/import.ml b/test/repr/import.ml new file mode 100644 index 00000000..323461ea --- /dev/null +++ b/test/repr/import.ml @@ -0,0 +1,10 @@ +module Alcotest = struct + include Alcotest + + let gcheck ?pos typ msg a b = + let equal = Repr.(unstage (equal typ)) in + let pp = Repr.pp_dump typ in + check ?pos (testable pp equal) msg a b + + let string = testable Fmt.Dump.string String.equal +end diff --git a/test/repr/main.ml b/test/repr/main.ml index fdf07214..ab2f35ca 100644 --- a/test/repr/main.ml +++ b/test/repr/main.ml @@ -1,16 +1,6 @@ +open! Import module T = Repr -module Alcotest = struct - include Alcotest - - let gcheck ?pos typ msg a b = - let equal = T.(unstage (equal typ)) in - let pp = T.pp_dump typ in - check ?pos (testable pp equal) msg a b - - let string = testable Fmt.Dump.string String.equal -end - let id x = x type foo = { a : int; b : int } @@ -21,7 +11,17 @@ let to_bin_string t = T.unstage (T.to_bin_string t) let of_bin_string t = T.unstage (T.of_bin_string t) let encode_bin t = T.unstage (T.encode_bin t) let decode_bin t = T.unstage (T.decode_bin t) -let size_of t = T.unstage (T.size_of t) + +let size_of t v = + match T.Size.of_value t with + | Unknown -> assert false + | Dynamic f -> f v + | Static n -> n + +let static_size_of t = + match T.Size.of_value t with + | Static n -> n + | Dynamic _ | Unknown -> Alcotest.fail "Expected Static" let with_buf f = let buf = Buffer.create 10 in @@ -31,7 +31,6 @@ let with_buf f = module Unboxed = struct let decode_bin t = T.unstage (T.Unboxed.decode_bin t) let encode_bin t = T.unstage (T.Unboxed.encode_bin t) - let size_of t = T.unstage (T.Unboxed.size_of t) end let test_base () = @@ -39,10 +38,7 @@ let test_base () = Alcotest.(check string) "JSON string" "\"foo\"" s; let s = to_bin_string T.string "foo" in Alcotest.(check string) "binary string" "foo" s; - Alcotest.(check (option int)) - "binary size" - (Some (String.length "foo")) - (size_of T.(string_of (`Fixed 3)) "foo"); + Alcotest.(check int) "binary size" 3 (static_size_of T.(string_of (`Fixed 3))); let s = T.to_string T.string "foo" in Alcotest.(check string) "CLI string" "foo" s; let s = T.to_json_string T.int 42 in @@ -247,9 +243,7 @@ let test_bin () = Alcotest.(check string) "hex list" "[\"666f6f\",\"666f6f\"]" s; let s = to_bin_string l [ "foo"; "bar" ] in Alcotest.(check string) "encode list" "foobar" s; - Alcotest.(check (option int)) - "size of list" (Some 6) - (size_of l [ "foo"; "bar" ]); + Alcotest.(check int) "size of list" 6 (static_size_of l); let s = of_bin_string l "foobar" in Alcotest.(check (ok tl)) "decode list" (Ok [ "foo"; "bar" ]) s; let buf = Buffer.create 10 in @@ -563,7 +557,7 @@ let test_pp_ty () = let compare = T.stage @@ fun _ _ -> assert false in let hdr f = T.stage f in T.abstract ~pp:a2 ~of_string:a1 ~json:(a2, a1) - ~bin:(hdr a2, hdr a2, hdr a1) + ~bin:(hdr a2, hdr a2, T.Size.custom_dynamic ()) ~equal ~compare ~short_hash:(T.stage (fun ?seed:_ -> a1)) ~pre_hash () @@ -624,8 +618,8 @@ let test_int () = | Ok y -> Alcotest.(check tt) "eq" x y in let size x s = - match size_of T.int x with - | Some ss -> Alcotest.(check int) (Fmt.strf "size:%d" x) s ss + match T.(unstage (size_of int)) x with + | Some n -> Alcotest.(check int) (Fmt.strf "size:%d" x) s n | None -> Alcotest.fail "size" in let p7 = 128 in @@ -675,26 +669,6 @@ let test_decode () = decode ~off:2 "xx\002aa" (Ok "aa"); decode ~off:2 "xx\000aaaaa" (Ok "") -let test_size () = - let check t v n = - match size_of t v with - | Some s -> - let name = Fmt.strf "size: %a" (T.pp t) v in - Alcotest.(check int) name n s - | None -> Alcotest.fail "size expected" - in - check T.int 0 1; - check T.int 128 2; - check T.int 16384 3; - check T.string "foo" (1 + 3); - check T.string (String.make 128 'x') (2 + 128); - check T.bytes (Bytes.of_string "foo") 4; - check T.(list string) [] 1; - let s = Unboxed.size_of T.string "foo" in - Alcotest.(check (option int)) "foo 1" (Some 3) s; - let s = size_of T.string "foo" in - Alcotest.(check (option int)) "foo 1" (Some 4) s - type v = [ `X000 | `X001 | `X002 | `X003 of int | `X004 of int | `X005 of int | `X006 of int | `X007 of int | `X008 of int | `X009 of int | `X010 of int @@ -760,7 +734,7 @@ let test_variants () = in let n = size_of v i in let s = to_bin_string v i in - Alcotest.(check (option int)) ("sizes " ^ s) (Some (String.length x)) n; + Alcotest.(check int) ("sizes " ^ s) (String.length x) n; Alcotest.(check v_t) ("bij " ^ s) i y in test `X000; @@ -929,7 +903,7 @@ let test_stdlib_containers () = let () = Alcotest.run "repr" [ - ( "type", + ( "main", [ ("base", `Quick, test_base); ("boxing", `Quick, test_boxing); @@ -946,10 +920,10 @@ let () = ("random", `Quick, test_random); ("ints", `Quick, test_int); ("decode", `Quick, test_decode); - ("size_of", `Quick, test_size); ("test_variants", `Quick, test_variants); ("test_duplicate_names", `Quick, test_duplicate_names); ("test_malformed_utf8", `Quick, test_malformed_utf8); ("test_stdlib_containers", `Quick, test_stdlib_containers); ] ); + ("size_of", Test_size_of.tests); ] diff --git a/test/repr/test_size_of.ml b/test/repr/test_size_of.ml new file mode 100644 index 00000000..ccf110bc --- /dev/null +++ b/test/repr/test_size_of.ml @@ -0,0 +1,165 @@ +module T = Repr + +let encode_bin t = T.(unstage (encode_bin t)) + +let encode_bin typ v = + let buffer = Buffer.create 0 in + encode_bin typ v (Buffer.add_string buffer); + Buffer.contents buffer + +let random_string len = String.init len (fun _ -> char_of_int (Random.int 256)) + +let check_static_size ~__POS__:pos typ expected v = + match T.Size.of_value typ with + | Unknown | Dynamic _ -> + Alcotest.failf ~pos + "Expected type to have static size %d, but (Unknown | Dynamic _) was \ + received." + expected + | Static n -> + Alcotest.(check ~pos int) "Expected static size" expected n; + + (* Check that the encoding actually occupies [n] bytes *) + let actual_size = String.length (encode_bin typ v) in + Alcotest.(check ~pos int) + "Actual size must match static spec" expected actual_size + +let check_dynamic_size ~__POS__:pos typ expected v = + Fmt.pr "Testing type: %a@." T.pp_ty typ; + let unexpected fmt = + Alcotest.failf ~pos + ("Expected type to have dynamic size, but " ^^ fmt ^^ " was received.") + in + match T.Size.(of_value typ, of_encoding typ) with + | Unknown, _ | _, Unknown -> unexpected "Unknown" + | Static n, _ | _, Static n -> unexpected "Static %d" n + | Dynamic encode, Dynamic decode -> + Alcotest.(check ~pos int) "Expected dynamic size" expected (encode v); + + (* Check that the encoding actually occupies [n] bytes *) + let encoding = encode_bin typ v in + let actual_size = String.length encoding in + Alcotest.(check ~pos int) + "Actual size must match dynamic spec" expected actual_size; + + (* Check that the size is correctly recovered from the encoding, even after + adding some random surrounding context. *) + let left_pad = 1 in + let right_pad = 0 in + let wrapped_encoding = + random_string left_pad ^ encoding ^ random_string right_pad + in + let recovered_length = decode wrapped_encoding left_pad in + Fmt.epr "wrapped_encoding (left %d, right %d): %a@." left_pad right_pad + Fmt.(Dump.list (fun ppf x -> Fmt.pf ppf "%d" x)) + (String.to_seq wrapped_encoding |> List.of_seq |> List.map Char.code); + Alcotest.(check ~pos int) + "Recovered length must match dynamic spec" expected recovered_length + +let test_primitive () = + let sta = check_static_size in + sta ~__POS__ T.unit 0 (); + sta ~__POS__ T.bool 1 true; + sta ~__POS__ T.char 1 ' '; + sta ~__POS__ T.int32 4 1l; + sta ~__POS__ T.int64 8 (-1L); + sta ~__POS__ T.float 8 Float.nan + +let test_int () = + let dyn = check_dynamic_size in + let test_cases = + (* Test a range of integers that fit correctly on this platform *) + [ + (__POS__, 7); + (__POS__, 14); + (__POS__, 21); + (__POS__, 28); + (__POS__, 35); + (__POS__, 42); + (__POS__, 49); + (__POS__, 56); + ] + |> List.filter (fun (_, i) -> i < Sys.int_size) + |> List.map (fun (p, i) -> (p, 1 lsl i)) + in + ListLabels.iteri test_cases ~f:(fun i (pos, p) -> + dyn ~__POS__:pos T.int (i + 1) (p - 1); + dyn ~__POS__:pos T.int (i + 2) p) + +let test_container () = + let module X = struct + type two = bool * bool [@@deriving repr] + type three = bool * bool * bool [@@deriving repr] + + let thirty_t = T.(list ~len:(`Fixed 10) three_t) + let two = (true, true) + let three = (true, true, true) + let thirty = List.init 10 (fun _ -> three) + end in + let open X in + let sta = check_static_size in + sta ~__POS__ two_t 2 two; + sta ~__POS__ three_t 3 three; + sta ~__POS__ thirty_t (3 * 10) thirty; + sta ~__POS__ T.(triple char int32 int64) (1 + 4 + 8) ('1', 4l, 8L) + +let test_variant () = + let module X = struct + type enum = A | B | C [@@deriving repr] + type enum' = A | B of unit [@@deriving repr] + type equal_size = A of bool | B of char [@@deriving repr] + + type mixed = Argless | Unit of unit | Char of char | Int of int + [@@deriving repr] + end in + let open X in + let sta = check_static_size in + sta ~__POS__ enum_t 1 A; + sta ~__POS__ enum'_t 1 (B ()); + sta ~__POS__ equal_size_t 2 (A true); + sta ~__POS__ [%typ: unit option] 1 None; + + let dyn = check_dynamic_size in + dyn ~__POS__ mixed_t 1 Argless + +let test_recursive () = + let module X = struct + type int_list = [] | ( :: ) of int * int_list [@@deriving repr] + + type int_tree = Leaf of int | Branch of int_tree * int_tree + [@@deriving repr] + + type odd = S of even + + and even = Z | S' of odd [@@deriving repr] + end in + let open X in + let dyn = check_dynamic_size in + dyn ~__POS__ int_list_t 1 []; + dyn ~__POS__ int_list_t 7 [ 1; 2; 3 ]; + + let leaf_size = 2 (* tag + short int *) in + let branch_size = 1 (* tag, excluding subterms *) in + dyn ~__POS__ int_tree_t leaf_size (Leaf 0); + dyn ~__POS__ int_tree_t + (branch_size + (2 * leaf_size)) + (Branch (Leaf 1, Leaf 2)); + dyn ~__POS__ int_tree_t + ((3 * branch_size) + (4 * leaf_size)) + (Branch (Branch (Leaf 1, Leaf 2), Branch (Leaf 3, Leaf 4))); + + dyn ~__POS__ even_t 1 Z; + dyn ~__POS__ odd_t 2 (S Z); + dyn ~__POS__ even_t 3 (S' (S Z)); + dyn ~__POS__ odd_t 4 (S (S' (S Z))); + + () + +let tests = + [ + ("primitive", `Quick, test_primitive); + ("int", `Quick, test_int); + ("container", `Quick, test_container); + ("variant", `Quick, test_variant); + ("recursive", `Quick, test_recursive); + ] diff --git a/test/repr/test_size_of.mli b/test/repr/test_size_of.mli new file mode 100644 index 00000000..d38ba9a9 --- /dev/null +++ b/test/repr/test_size_of.mli @@ -0,0 +1 @@ +val tests : unit Alcotest.test_case list