Skip to content

Commit

Permalink
Extend the implementation of size precomputation for binary codecs
Browse files Browse the repository at this point in the history
The implementation now makes a distinction between three states of
knowledge about the size of a codec:

- `Static n`: the encoding always occupies exactly `n` bytes;
- `Dynamic f`: the encoding has variable size, pre-computed by `f`;
- `Unknown`: the encoding size cannot be efficiently pre-computed.

This allows some of the work in determining the size of a codec to be
computed at specialisation time, cutting down significantly on the
allocation cost.

Additionally, the implementation is now also able to efficiently
_recover_ the size of an encoding given a pointer to the start of that
encoding (i.e. without actually decoding the value). In this case,
`Dynamic f` is a function that decodes the size of an encoding, and
`Unknown` signifies that this cannot be done (e.g. for unboxed values).

Fix #65.
  • Loading branch information
craigfe committed Jun 12, 2021
1 parent 31734fd commit c39fb13
Show file tree
Hide file tree
Showing 16 changed files with 782 additions and 229 deletions.
254 changes: 225 additions & 29 deletions src/repr/binary_codec.ml

Large diffs are not rendered by default.

17 changes: 15 additions & 2 deletions src/repr/binary_codec_intf.ml
Original file line number Diff line number Diff line change
@@ -1,52 +1,65 @@
open Type_core
open Staging

type 'a encoder = 'a -> (string -> unit) -> unit
type 'a decoder = string -> int -> int * 'a
module Types = struct
type 'a encoder = 'a -> (string -> unit) -> unit
type 'a decoder = string -> int -> int * 'a
type 'a sizer = 'a Size.Sizer.t
end

open Types

module type S = sig
type t

val encode : t encoder
val decode : t decoder
val sizer : t sizer
end

module type S_with_length = sig
type t

val encode : len -> t encoder staged
val decode : len -> t decoder staged
val sizer : len -> t sizer
end

module type S1 = sig
type 'a t

val encode : 'a encoder -> 'a t encoder
val decode : 'a decoder -> 'a t decoder
val sizer : 'a sizer -> 'a t sizer
end

module type S1_with_length = sig
type 'a t

val encode : len -> 'a encoder -> 'a t encoder staged
val decode : len -> 'a decoder -> 'a t decoder staged
val sizer : len -> 'a sizer -> 'a t sizer
end

module type S2 = sig
type ('a, 'b) t

val encode : 'a encoder -> 'b encoder -> ('a, 'b) t encoder
val decode : 'a decoder -> 'b decoder -> ('a, 'b) t decoder
val sizer : 'a sizer -> 'b sizer -> ('a, 'b) t sizer
end

module type S3 = sig
type ('a, 'b, 'c) t

val encode : 'a encoder -> 'b encoder -> 'c encoder -> ('a, 'b, 'c) t encoder
val decode : 'a decoder -> 'b decoder -> 'c decoder -> ('a, 'b, 'c) t decoder
val sizer : 'a sizer -> 'b sizer -> 'c sizer -> ('a, 'b, 'c) t sizer
end

module type Intf = sig
include module type of Types

module type S = S
module type S1 = S1
module type S2 = S2
Expand Down
79 changes: 79 additions & 0 deletions src/repr/size.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
type 'a t = Static of int | Dynamic of 'a | Unknown
type 'a size = 'a t

let map : type a b. (a -> b) -> a t -> b t =
fun f -> function
| Unknown -> Unknown
| Static n -> Static n
| Dynamic a -> Dynamic (f a)

module Syntax = struct
let ( let+ ) x f = map f x
end

(** A type wrapper for positional offsets into buffers (as opposed to e.g.
lengths of values in those buffers). *)
type offset = Offset of int [@@unboxed]

module Offset = struct
type t = offset

let ( +> ) : t -> int -> t = fun (Offset n) m -> Offset (n + m)
let ( <+ ) : int -> t -> t = fun n (Offset m) -> Offset (n + m)
end

module Sizer = struct
type 'a t = {
of_value : ('a -> int) size;
of_encoding : (string -> Offset.t -> Offset.t) size;
}
(** An ['a t] is a value that represents the size information known about a
particular codec for type ['a].
- [of_value]: given a value to encode, return the size of its encoding.
- [of_encoding]: given a buffer [buf] and an offset [off], return the
_offset_ immediately _after_ the encoding starting at [buf.\[off\]]
NOTE: not the length of the encoding itself, to enable chains of such
sizers to call each other in tail-position.
Invariant: [∀ n. (of_value = Static n) ⟺ (of_encoding = Static n)]. *)

let ( <+> ) : type a. a t -> a t -> a t =
let add_of_value (a : _ size) (b : _ size) : _ size =
match (a, b) with
| Unknown, _ | _, Unknown -> Unknown
| Static a, Static b -> Static (a + b)
| Static 0, other | other, Static 0 -> other
| Static n, Dynamic f | Dynamic f, Static n -> Dynamic (fun a -> n + f a)
| Dynamic f, Dynamic g -> Dynamic (fun a -> f a + g a)
in
let add_of_encoding (a : _ size) (b : _ size) : _ size =
match (a, b) with
| Unknown, _ | _, Unknown -> Unknown
| Static a, Static b -> Static (a + b)
| Static 0, other | other, Static 0 -> other
| Dynamic f, Dynamic g -> Dynamic (fun buf off -> g buf (f buf off))
(* NOTE: in these cases we could be slightly more efficient by storing a
vector of sizing functions inside [Dynamic], which would allow constant
folding for static segments of dynamically-sized types. *)
| Static n, Dynamic f -> Dynamic (fun buf off -> f buf Offset.(off +> n))
| Dynamic f, Static n -> Dynamic (fun buf off -> Offset.(f buf off +> n))
in
fun a b ->
{
of_value = add_of_value a.of_value b.of_value;
of_encoding = add_of_encoding a.of_encoding b.of_encoding;
}

let static n = { of_value = Static n; of_encoding = Static n }

let dynamic ~of_value ~of_encoding =
{ of_value = Dynamic of_value; of_encoding = Dynamic of_encoding }

let using f t =
let of_value = map (fun size_of x -> size_of (f x)) t.of_value in
{ t with of_value }

let unknown = { of_value = Unknown; of_encoding = Unknown }
end
56 changes: 53 additions & 3 deletions src/repr/type.ml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ open Utils

let pre_hash t =
let rec aux : type a. a t -> a encode_bin = function
| Attributes { attr_type; _ } -> aux attr_type
| Self s -> aux s.self_fix
| Map m ->
let dst = unstage (aux m.x) in
Expand Down Expand Up @@ -307,7 +308,7 @@ let partially_abstract ~pp ~of_string ~json ~bin ~unboxed_bin ~equal ~compare
let encode_bin, decode_bin, size_of =
fold_impl bin
~undefined:(fun () ->
(undefined' "encode_bin", undefined' "decode_bin", undefined' "size_of"))
(undefined' "encode_bin", undefined' "decode_bin", unimplemented_size_of))
~structural:(fun () ->
(Type_binary.encode_bin t, Type_binary.decode_bin t, Type_size.t t))
in
Expand All @@ -316,7 +317,7 @@ let partially_abstract ~pp ~of_string ~json ~bin ~unboxed_bin ~equal ~compare
~undefined:(fun () ->
( undefined' "Unboxed.encode_bin",
undefined' "Unboxed.decode_bin",
undefined' "Unboxed.size_of" ))
unimplemented_size_of ))
~structural:(fun () ->
( Type_binary.Unboxed.encode_bin t,
Type_binary.Unboxed.decode_bin t,
Expand Down Expand Up @@ -421,7 +422,56 @@ let encode_bin, decode_bin, to_bin_string, of_bin_string =
Type_binary.(encode_bin, decode_bin, to_bin_string, of_bin_string)

let random, random_state = Type_random.(of_global, of_state)
let size_of = Type_size.t

let size_of t =
match (Type_size.t t).of_value with
| Size.Static n -> stage (fun _ -> Some n)
| Size.Dynamic f -> stage (fun x -> Some (f x))
| Size.Unknown -> stage (fun _ -> None)

module Size = struct
type 'a t = 'a Size.t = Static of int | Dynamic of 'a | Unknown

(* The [Size] module defines _scanning_ length decoders that return
[initial_offset + length] rather than just [length]. These functions
convert these to decoders that return the [length] directly. *)

let to_scanning : type a. (a -> int -> int) -> a -> Size.offset -> Size.offset
=
fun len_f buf (Size.Offset off) -> Size.Offset (off + len_f buf off)

let of_scanning : type a. (a -> Size.offset -> Size.offset) -> a -> int -> int
=
fun scan_f buf off ->
let (Size.Offset off') = scan_f buf (Size.Offset off) in
off' - off

let of_value t = (Type_size.t t).of_value
let of_encoding t = Size.map of_scanning (Type_size.unboxed t).of_encoding
let t t = Type_size.t t

type 'a sizer = 'a size_of

let using f t =
let of_value =
Size.map (fun sizer x -> sizer (f x)) t.Size.Sizer.of_value
in
{ t with of_value }

let custom_static n =
Size.Sizer.{ of_value = Static n; of_encoding = Static n }

let custom_dynamic ?of_value ?of_encoding () =
let of_value =
match of_value with Some f -> Dynamic f | None -> Unknown
in
let of_encoding =
match of_encoding with
| Some f -> Dynamic (to_scanning f)
| None -> Unknown
in
Size.Sizer.{ of_value; of_encoding }
end

module Unboxed = struct
include Type_binary.Unboxed
Expand Down
10 changes: 7 additions & 3 deletions src/repr/type_binary.ml
Original file line number Diff line number Diff line change
Expand Up @@ -255,12 +255,16 @@ module Unboxed = struct
let decode_bin = Decode.unboxed
end

let to_bin size_of encode_bin =
let size_of = unstage size_of in
let to_bin (size_of : _ Size.Sizer.t) encode_bin =
let encode_bin = unstage encode_bin in
stage (fun x ->
let seq = encode_bin x in
let len = match size_of x with None -> 1024 | Some n -> n in
let len =
match size_of.of_value with
| Static n -> n
| Dynamic f -> f x
| Unknown -> 1024
in
let buf = Buffer.create len in
seq (Buffer.add_string buf);
Buffer.contents buf)
Expand Down
9 changes: 6 additions & 3 deletions src/repr/type_core.ml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,10 @@ let annotate t ~add ~data =
| Attributes t -> Attributes { t with attrs = add data t.attrs }
| t -> Attributes { attrs = add data Attribute.Map.empty; attr_type = t }

let unimplemented_size_of =
let f _ = failwith "`size_of` not implemented" in
Size.Sizer.{ of_value = Dynamic f; of_encoding = Dynamic f }

let partial ?(pp = fun _ -> failwith "`pp` not implemented")
?(of_string = fun _ -> failwith "`of_string` not implemented")
?(encode_json = fun _ -> failwith "`encode_json` not implemented")
Expand All @@ -61,13 +65,12 @@ let partial ?(pp = fun _ -> failwith "`pp` not implemented")
?(equal = stage (fun _ -> failwith "`equal` not implemented"))
?(encode_bin = stage (fun _ -> failwith "`encode_bin` not implemented"))
?(decode_bin = stage (fun _ -> failwith "`decode_bin` not implemented"))
?(size_of = stage (fun _ -> failwith "`size_of` not implemented"))
?(size_of = unimplemented_size_of)
?(unboxed_encode_bin =
stage (fun _ -> failwith "`unboxed_encode_bin` not implemented"))
?(unboxed_decode_bin =
stage (fun _ -> failwith "`unboxed_decode_bin` not implemented"))
?(unboxed_size_of =
stage (fun _ -> failwith "`unboxed_size_of` not implemented")) () =
?(unboxed_size_of = unimplemented_size_of) () =
Custom
{
cwit = `Witness (Witness.make ());
Expand Down
3 changes: 2 additions & 1 deletion src/repr/type_core_intf.ml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ module Types = struct
type 'a pre_hash = 'a bin_seq staged
type 'a encode_bin = 'a bin_seq staged
type 'a decode_bin = (string -> int -> int * 'a) staged
type 'a size_of = ('a -> int option) staged
type 'a size_of = 'a Size.Sizer.t
type 'a compare = ('a -> 'a -> int) staged
type 'a equal = ('a -> 'a -> bool) staged
type 'a short_hash = (?seed:int -> 'a -> int) staged
Expand Down Expand Up @@ -136,6 +136,7 @@ module type Type_core = sig
include module type of Types
(** @inline *)

val unimplemented_size_of : 'a size_of
val fields : 'a record -> 'a a_field list

module Fields_folder (Acc : sig
Expand Down
37 changes: 35 additions & 2 deletions src/repr/type_intf.ml
Original file line number Diff line number Diff line change
Expand Up @@ -528,7 +528,7 @@ module type DSL = sig
type 'a decode_bin = (string -> int -> int * 'a) staged
(** The type for binary decoders. *)

type 'a size_of = ('a -> int option) staged
type -'a size_of
(** The type for size function related to binary encoder/decoders. *)

type 'a short_hash := (?seed:int -> 'a -> int) staged
Expand Down Expand Up @@ -561,11 +561,44 @@ module type DSL = sig
{b NOTE:} When [t] is {!Type.string}, the result is [s] (without copy). *)

val size_of : 'a t -> 'a size_of
val size_of : 'a t -> ('a -> int option) staged
(** [size_of t x] is either the size of [encode_bin t x] or the binary
encoding of [x], if the backend is not able to pre-compute serialisation
lengths. *)

module Size : sig
(** A value representing information known about the length in bytes of
encodings produced by a particular binary codec:
- [Static n]: all encodings produced by this codec have length [n];
- [Dynamic f]: the length of binary encodings is dependent on the
specific value, but may be efficiently computed at run-time via the
function [f].
- [Unknown]: this codec may produce encodings that cannot be efficiently
pre-computed.*)
type 'a t = 'a Size.t = private Static of int | Dynamic of 'a | Unknown

val of_value : 'a ty -> ('a -> int) t
val of_encoding : 'a ty -> (string -> int -> int) t

(** Constructors for custom value sizers, for use with binary codecs that
are not structurally-defined. *)

type -'a sizer = 'a size_of

val t : 'a ty -> 'a sizer
val using : ('b -> 'a) -> 'a sizer -> 'b sizer
val custom_static : int -> _ sizer

val custom_dynamic :
?of_value:('a -> int) ->
?of_encoding:(string -> int -> int) ->
unit ->
'a sizer
end

module Unboxed : sig
(** Unboxed operations assumes that value being serialized is fully filling
the underlying buffer. When that's the case, it is not necessary to
Expand Down
Loading

0 comments on commit c39fb13

Please sign in to comment.