Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Extend the implementation of size precomputation for binary codecs #69

Merged
merged 5 commits into from
Jun 14, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
254 changes: 225 additions & 29 deletions src/repr/binary_codec.ml

Large diffs are not rendered by default.

17 changes: 15 additions & 2 deletions src/repr/binary_codec_intf.ml
Original file line number Diff line number Diff line change
@@ -1,52 +1,65 @@
open Type_core
open Staging

type 'a encoder = 'a -> (string -> unit) -> unit
type 'a decoder = string -> int -> int * 'a
module Types = struct
type 'a encoder = 'a -> (string -> unit) -> unit
type 'a decoder = string -> int -> int * 'a
type 'a sizer = 'a Size.Sizer.t
end

open Types

module type S = sig
type t

val encode : t encoder
val decode : t decoder
val sizer : t sizer
end

module type S_with_length = sig
type t

val encode : len -> t encoder staged
val decode : len -> t decoder staged
val sizer : len -> t sizer
end

module type S1 = sig
type 'a t

val encode : 'a encoder -> 'a t encoder
val decode : 'a decoder -> 'a t decoder
val sizer : 'a sizer -> 'a t sizer
end

module type S1_with_length = sig
type 'a t

val encode : len -> 'a encoder -> 'a t encoder staged
val decode : len -> 'a decoder -> 'a t decoder staged
val sizer : len -> 'a sizer -> 'a t sizer
end

module type S2 = sig
type ('a, 'b) t

val encode : 'a encoder -> 'b encoder -> ('a, 'b) t encoder
val decode : 'a decoder -> 'b decoder -> ('a, 'b) t decoder
val sizer : 'a sizer -> 'b sizer -> ('a, 'b) t sizer
end

module type S3 = sig
type ('a, 'b, 'c) t

val encode : 'a encoder -> 'b encoder -> 'c encoder -> ('a, 'b, 'c) t encoder
val decode : 'a decoder -> 'b decoder -> 'c decoder -> ('a, 'b, 'c) t decoder
val sizer : 'a sizer -> 'b sizer -> 'c sizer -> ('a, 'b, 'c) t sizer
end

module type Intf = sig
include module type of Types

module type S = S
module type S1 = S1
module type S2 = S2
Expand Down
79 changes: 79 additions & 0 deletions src/repr/size.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
type 'a t = Static of int | Dynamic of 'a | Unknown
type 'a size = 'a t

let map : type a b. (a -> b) -> a t -> b t =
fun f -> function
| Unknown -> Unknown
| Static n -> Static n
| Dynamic a -> Dynamic (f a)

module Syntax = struct
let ( let+ ) x f = map f x
end

(** A type wrapper for positional offsets into buffers (as opposed to e.g.
lengths of values in those buffers). *)
type offset = Offset of int [@@unboxed]

module Offset = struct
type t = offset

let ( +> ) : t -> int -> t = fun (Offset n) m -> Offset (n + m)
let ( <+ ) : int -> t -> t = fun n (Offset m) -> Offset (n + m)
end

module Sizer = struct
type 'a t = {
of_value : ('a -> int) size;
of_encoding : (string -> Offset.t -> Offset.t) size;
}
(** An ['a t] is a value that represents the size information known about a
particular codec for type ['a].

- [of_value]: given a value to encode, return the size of its encoding.

- [of_encoding]: given a buffer [buf] and an offset [off], return the
craigfe marked this conversation as resolved.
Show resolved Hide resolved
_offset_ immediately _after_ the encoding starting at [buf.\[off\]]
NOTE: not the length of the encoding itself, to enable chains of such
sizers to call each other in tail-position.

Invariant: [∀ n. (of_value = Static n) ⟺ (of_encoding = Static n)]. *)

let ( <+> ) : type a. a t -> a t -> a t =
let add_of_value (a : _ size) (b : _ size) : _ size =
match (a, b) with
| Unknown, _ | _, Unknown -> Unknown
| Static a, Static b -> Static (a + b)
| Static 0, other | other, Static 0 -> other
| Static n, Dynamic f | Dynamic f, Static n -> Dynamic (fun a -> n + f a)
| Dynamic f, Dynamic g -> Dynamic (fun a -> f a + g a)
in
let add_of_encoding (a : _ size) (b : _ size) : _ size =
match (a, b) with
| Unknown, _ | _, Unknown -> Unknown
| Static a, Static b -> Static (a + b)
| Static 0, other | other, Static 0 -> other
| Dynamic f, Dynamic g -> Dynamic (fun buf off -> g buf (f buf off))
(* NOTE: in these cases we could be slightly more efficient by storing a
vector of sizing functions inside [Dynamic], which would allow constant
folding for static segments of dynamically-sized types. *)
| Static n, Dynamic f -> Dynamic (fun buf off -> f buf Offset.(off +> n))
| Dynamic f, Static n -> Dynamic (fun buf off -> Offset.(f buf off +> n))
in
fun a b ->
{
of_value = add_of_value a.of_value b.of_value;
of_encoding = add_of_encoding a.of_encoding b.of_encoding;
}

let static n = { of_value = Static n; of_encoding = Static n }

let dynamic ~of_value ~of_encoding =
{ of_value = Dynamic of_value; of_encoding = Dynamic of_encoding }

let using f t =
let of_value = map (fun size_of x -> size_of (f x)) t.of_value in
{ t with of_value }

let unknown = { of_value = Unknown; of_encoding = Unknown }
end
58 changes: 55 additions & 3 deletions src/repr/type.ml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ open Utils

let pre_hash t =
let rec aux : type a. a t -> a encode_bin = function
| Attributes { attr_type; _ } -> aux attr_type
| Self s -> aux s.self_fix
| Map m ->
let dst = unstage (aux m.x) in
Expand Down Expand Up @@ -307,7 +308,7 @@ let partially_abstract ~pp ~of_string ~json ~bin ~unboxed_bin ~equal ~compare
let encode_bin, decode_bin, size_of =
fold_impl bin
~undefined:(fun () ->
(undefined' "encode_bin", undefined' "decode_bin", undefined' "size_of"))
(undefined' "encode_bin", undefined' "decode_bin", unimplemented_size_of))
~structural:(fun () ->
(Type_binary.encode_bin t, Type_binary.decode_bin t, Type_size.t t))
in
Expand All @@ -316,7 +317,7 @@ let partially_abstract ~pp ~of_string ~json ~bin ~unboxed_bin ~equal ~compare
~undefined:(fun () ->
( undefined' "Unboxed.encode_bin",
undefined' "Unboxed.decode_bin",
undefined' "Unboxed.size_of" ))
unimplemented_size_of ))
~structural:(fun () ->
( Type_binary.Unboxed.encode_bin t,
Type_binary.Unboxed.decode_bin t,
Expand Down Expand Up @@ -421,7 +422,58 @@ let encode_bin, decode_bin, to_bin_string, of_bin_string =
Type_binary.(encode_bin, decode_bin, to_bin_string, of_bin_string)

let random, random_state = Type_random.(of_global, of_state)
let size_of = Type_size.t

let size_of t =
match (Type_size.t t).of_value with
| Size.Static n ->
let n = Some n in
stage (fun _ -> n)
| Size.Dynamic f -> stage (fun x -> Some (f x))
| Size.Unknown -> stage (fun _ -> None)

module Size = struct
type 'a t = 'a Size.t = Static of int | Dynamic of 'a | Unknown

(* The [Size] module defines _scanning_ length decoders that return
[initial_offset + length] rather than just [length]. These functions
convert these to decoders that return the [length] directly. *)

let to_scanning : type a. (a -> int -> int) -> a -> Size.offset -> Size.offset
=
fun len_f buf (Size.Offset off) -> Size.Offset (off + len_f buf off)

let of_scanning : type a. (a -> Size.offset -> Size.offset) -> a -> int -> int
=
fun scan_f buf off ->
let (Size.Offset off') = scan_f buf (Size.Offset off) in
off' - off

let of_value t = (Type_size.t t).of_value
let of_encoding t = Size.map of_scanning (Type_size.unboxed t).of_encoding
let t t = Type_size.t t

type 'a sizer = 'a size_of

let using f t =
let of_value =
Size.map (fun sizer x -> sizer (f x)) t.Size.Sizer.of_value
in
{ t with of_value }

let custom_static n =
Size.Sizer.{ of_value = Static n; of_encoding = Static n }

let custom_dynamic ?of_value ?of_encoding () =
let of_value =
match of_value with Some f -> Dynamic f | None -> Unknown
in
let of_encoding =
match of_encoding with
| Some f -> Dynamic (to_scanning f)
| None -> Unknown
in
Size.Sizer.{ of_value; of_encoding }
end

module Unboxed = struct
include Type_binary.Unboxed
Expand Down
10 changes: 7 additions & 3 deletions src/repr/type_binary.ml
Original file line number Diff line number Diff line change
Expand Up @@ -255,12 +255,16 @@ module Unboxed = struct
let decode_bin = Decode.unboxed
end

let to_bin size_of encode_bin =
let size_of = unstage size_of in
let to_bin (size_of : _ Size.Sizer.t) encode_bin =
let encode_bin = unstage encode_bin in
stage (fun x ->
let seq = encode_bin x in
let len = match size_of x with None -> 1024 | Some n -> n in
let len =
match size_of.of_value with
| Static n -> n
| Dynamic f -> f x
| Unknown -> 1024
in
let buf = Buffer.create len in
seq (Buffer.add_string buf);
Buffer.contents buf)
Expand Down
9 changes: 6 additions & 3 deletions src/repr/type_core.ml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,10 @@ let annotate t ~add ~data =
| Attributes t -> Attributes { t with attrs = add data t.attrs }
| t -> Attributes { attrs = add data Attribute.Map.empty; attr_type = t }

let unimplemented_size_of =
let f _ = failwith "`size_of` not implemented" in
Size.Sizer.{ of_value = Dynamic f; of_encoding = Dynamic f }

let partial ?(pp = fun _ -> failwith "`pp` not implemented")
?(of_string = fun _ -> failwith "`of_string` not implemented")
?(encode_json = fun _ -> failwith "`encode_json` not implemented")
Expand All @@ -61,13 +65,12 @@ let partial ?(pp = fun _ -> failwith "`pp` not implemented")
?(equal = stage (fun _ -> failwith "`equal` not implemented"))
?(encode_bin = stage (fun _ -> failwith "`encode_bin` not implemented"))
?(decode_bin = stage (fun _ -> failwith "`decode_bin` not implemented"))
?(size_of = stage (fun _ -> failwith "`size_of` not implemented"))
?(size_of = unimplemented_size_of)
?(unboxed_encode_bin =
stage (fun _ -> failwith "`unboxed_encode_bin` not implemented"))
?(unboxed_decode_bin =
stage (fun _ -> failwith "`unboxed_decode_bin` not implemented"))
?(unboxed_size_of =
stage (fun _ -> failwith "`unboxed_size_of` not implemented")) () =
?(unboxed_size_of = unimplemented_size_of) () =
Custom
{
cwit = `Witness (Witness.make ());
Expand Down
3 changes: 2 additions & 1 deletion src/repr/type_core_intf.ml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ module Types = struct
type 'a pre_hash = 'a bin_seq staged
type 'a encode_bin = 'a bin_seq staged
type 'a decode_bin = (string -> int -> int * 'a) staged
type 'a size_of = ('a -> int option) staged
type 'a size_of = 'a Size.Sizer.t
type 'a compare = ('a -> 'a -> int) staged
type 'a equal = ('a -> 'a -> bool) staged
type 'a short_hash = (?seed:int -> 'a -> int) staged
Expand Down Expand Up @@ -136,6 +136,7 @@ module type Type_core = sig
include module type of Types
(** @inline *)

val unimplemented_size_of : 'a size_of
val fields : 'a record -> 'a a_field list

module Fields_folder (Acc : sig
Expand Down
37 changes: 35 additions & 2 deletions src/repr/type_intf.ml
Original file line number Diff line number Diff line change
Expand Up @@ -528,7 +528,7 @@ module type DSL = sig
type 'a decode_bin = (string -> int -> int * 'a) staged
(** The type for binary decoders. *)

type 'a size_of = ('a -> int option) staged
type -'a size_of
(** The type for size function related to binary encoder/decoders. *)

type 'a short_hash := (?seed:int -> 'a -> int) staged
Expand Down Expand Up @@ -561,11 +561,44 @@ module type DSL = sig

{b NOTE:} When [t] is {!Type.string}, the result is [s] (without copy). *)

val size_of : 'a t -> 'a size_of
val size_of : 'a t -> ('a -> int option) staged
(** [size_of t x] is either the size of [encode_bin t x] or the binary
encoding of [x], if the backend is not able to pre-compute serialisation
lengths. *)

module Size : sig
(** A value representing information known about the length in bytes of
encodings produced by a particular binary codec:

- [Static n]: all encodings produced by this codec have length [n];

- [Dynamic f]: the length of binary encodings is dependent on the
specific value, but may be efficiently computed at run-time via the
function [f].

- [Unknown]: this codec may produce encodings that cannot be efficiently
pre-computed.*)
type 'a t = 'a Size.t = private Static of int | Dynamic of 'a | Unknown

val of_value : 'a ty -> ('a -> int) t
val of_encoding : 'a ty -> (string -> int -> int) t

(** Constructors for custom value sizers, for use with binary codecs that
are not structurally-defined. *)

type -'a sizer = 'a size_of

val t : 'a ty -> 'a sizer
val using : ('b -> 'a) -> 'a sizer -> 'b sizer
val custom_static : int -> _ sizer

val custom_dynamic :
?of_value:('a -> int) ->
?of_encoding:(string -> int -> int) ->
unit ->
'a sizer
end

module Unboxed : sig
(** Unboxed operations assumes that value being serialized is fully filling
the underlying buffer. When that's the case, it is not necessary to
Expand Down
Loading