Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

irmin-pack: unify LRUs and add max memory config #2254

Merged
merged 6 commits into from
Jul 4, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@
default. Add `allow_duplicate` optional argument to override. (@metanivek,
#2252)

- **irmin-pack**
- Add maximum memory as an alternative configuration option, `lru_max_memory`,
for setting LRU capacity. (@metanivek, #2254)

### Changed

- **irmin**
Expand All @@ -20,7 +24,6 @@

## 3.7.1 (2023-05-24)


### Fixed

- **irmin-pack**
Expand Down
14 changes: 12 additions & 2 deletions src/irmin-pack/conf.ml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ end
module Default = struct
let fresh = false
let lru_size = 100_000
let lru_max_memory = None
let index_log_size = 2_500_000
let readonly = false
let merge_throttle = `Block_writes
Expand All @@ -53,9 +54,15 @@ module Key = struct
Default.fresh

let lru_size =
key ~spec ~doc:"Size of the LRU cache for pack entries." "lru-size"
key ~spec ~doc:"Maximum size of the LRU cache for pack entries." "lru-size"
Irmin.Type.int Default.lru_size

let lru_max_memory =
key ~spec ~doc:"Maximum memory in bytes of the LRU cache for pack entries."
"lru-max-memory"
Irmin.Type.(option int)
Default.lru_max_memory

let index_log_size =
key ~spec ~doc:"Size of index logs." "index-log-size" Irmin.Type.int
Default.index_log_size
Expand Down Expand Up @@ -109,6 +116,7 @@ end

let fresh config = get config Key.fresh
let lru_size config = get config Key.lru_size
let lru_max_memory config = get config Key.lru_max_memory
let readonly config = get config Key.readonly
let index_log_size config = get config Key.index_log_size
let merge_throttle config = get config Key.merge_throttle
Expand All @@ -132,7 +140,8 @@ let suffix_auto_flush_threshold config =
let no_migrate config = get config Key.no_migrate

let init ?(fresh = Default.fresh) ?(readonly = Default.readonly)
?(lru_size = Default.lru_size) ?(index_log_size = Default.index_log_size)
?(lru_size = Default.lru_size) ?(lru_max_memory = Default.lru_max_memory)
?(index_log_size = Default.index_log_size)
?(merge_throttle = Default.merge_throttle)
?(indexing_strategy = Default.indexing_strategy)
?(use_fsync = Default.use_fsync)
Expand All @@ -144,6 +153,7 @@ let init ?(fresh = Default.fresh) ?(readonly = Default.readonly)
let config = add config Key.lower_root lower_root in
let config = add config Key.fresh fresh in
let config = add config Key.lru_size lru_size in
let config = add config Key.lru_max_memory lru_max_memory in
let config = add config Key.index_log_size index_log_size in
let config = add config Key.readonly readonly in
let config = add config Key.merge_throttle merge_throttle in
Expand Down
9 changes: 8 additions & 1 deletion src/irmin-pack/conf.mli
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ type merge_throttle = [ `Block_writes | `Overcommit_memory ] [@@deriving irmin]
module Key : sig
val fresh : bool Irmin.Backend.Conf.key
val lru_size : int Irmin.Backend.Conf.key
val lru_max_memory : int option Irmin.Backend.Conf.key
val index_log_size : int Irmin.Backend.Conf.key
val readonly : bool Irmin.Backend.Conf.key
val root : string Irmin.Backend.Conf.key
Expand All @@ -88,7 +89,12 @@ val fresh : Irmin.Backend.Conf.t -> bool
setting this to [true] will delete existing data. Default is [false]. *)

val lru_size : Irmin.Backend.Conf.t -> int
(** Size, in number of entries, of LRU cache. Default [100_000]. *)
(** Maximum size, in number of entries, of LRU cache. Default [100_000]. Unused
if {!lru_max_memory} is set. *)

val lru_max_memory : Irmin.Backend.Conf.t -> int option
(** Maximum memory, in bytes, for the LRU cache to use. Default [None], which
falls back to {!lru_size} for LRU limit. *)

val index_log_size : Irmin.Backend.Conf.t -> int
(** Size, in number of entries, of index log. Default [2_500_000]. *)
Expand Down Expand Up @@ -136,6 +142,7 @@ val init :
?fresh:bool ->
?readonly:bool ->
?lru_size:int ->
?lru_max_memory:int option ->
?index_log_size:int ->
?merge_throttle:merge_throttle ->
?indexing_strategy:Indexing_strategy.t ->
Expand Down
14 changes: 14 additions & 0 deletions src/irmin-pack/import.ml
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,17 @@ module Int63 = struct
end

type int63 = Int63.t [@@deriving irmin]

module Mem = struct
let bytes_per_word = Sys.word_size / 8

let reachable_bytes o =
Obj.repr o |> Obj.reachable_words |> Int.mul bytes_per_word

let repr_size : type a. a Repr.t -> a -> int =
fun ty ->
match Irmin.Type.Size.of_value ty with
| Unknown -> Fun.const max_int
| Dynamic f -> fun v -> f v
| Static n -> Fun.const n
end
18 changes: 17 additions & 1 deletion src/irmin-pack/inode.ml
Original file line number Diff line number Diff line change
Expand Up @@ -1776,7 +1776,10 @@ struct
type key = Key.t
type t = T.key Bin.t [@@deriving irmin]
type metadata = T.metadata [@@deriving irmin]
type Pack_value.kinded += Node of t

let to_kinded t = Node t
let of_kinded = function Node n -> n | _ -> assert false
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I would prefer if this extensible type was hidden in the LRU implementation.. I think it should be possible to create behind the scene the fresh += Node new type constructor by keeping the LRU a functor to instantiate for each type of value that we want to store in it.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Interesting idea! I'll see what I can do. I do agree it would be nicer to hide this bookkeeping.

let depth = Bin.depth

exception Invalid_depth of { expected : int; got : int; v : t }
Expand All @@ -1786,7 +1789,20 @@ struct
if t.root then Pack_value.Kind.Inode_v2_root
else Pack_value.Kind.Inode_v2_nonroot

let weight _ = 1
let repr_size = Mem.repr_size t

(** [repr_size] undercounts the size of an inode by around this factor.

A value of 4.5 was empirically observed by averaging the ratio between
[Mem.reachable_bytes] and [repr_size] during a few runs of a trace
replay. This value is rounded to 5 to prevent float-int conversion
during weight calculation, at the expense of letting fewer objects into
the LRU. *)
let repr_size_adjustment = 5

let weight t =
Pack_value.Deferred (fun () -> repr_size_adjustment * repr_size t)

let hash t = Bin.hash t
let step_to_bin = T.step_to_bin_string
let step_of_bin = T.step_of_bin_string
Expand Down
1 change: 1 addition & 0 deletions src/irmin-pack/irmin_pack_intf.ml
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ module type Sigs = sig
?fresh:bool ->
?readonly:bool ->
?lru_size:int ->
?lru_max_memory:int option ->
?index_log_size:int ->
?merge_throttle:Conf.merge_throttle ->
?indexing_strategy:Indexing_strategy.t ->
Expand Down
23 changes: 12 additions & 11 deletions src/irmin-pack/pack_value.ml
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,10 @@ struct
type t = Data.t [@@deriving irmin ~size_of]
type key = Key.t
type hash = Hash.t
type kinded += Contents of t

let to_kinded t = Contents t
let of_kinded = function Contents c -> c | _ -> assert false
let hash = Hash.hash
let kind = Kind.Contents
let length_header = Fun.const Conf.contents_length_header
Expand All @@ -138,16 +141,8 @@ struct
let kind _ = kind

let weight =
(* Normalise the weight of the blob, assuming that the 'average'
size for a blob is 1k bytes. *)
let normalise n = max 1 (n / 1_000) in
match Irmin.Type.Size.of_value t with
| Unknown ->
(* this should not happen unless the user has specified a very
weird content type. *)
Fun.const max_int
| Dynamic f -> fun v -> normalise (f v)
| Static n -> Fun.const (normalise n)
let size = Mem.repr_size t in
fun v -> Immediate (size v)
end

module Of_commit
Expand All @@ -162,10 +157,16 @@ struct
type t = Commit.t [@@deriving irmin]
type key = Key.t
type hash = Hash.t [@@deriving irmin ~encode_bin ~decode_bin]
type kinded += Commit of t

let to_kinded t = Commit t
let of_kinded = function Commit c -> c | _ -> assert false
let hash = Hash.hash
let kind _ = Kind.Commit_v2
let weight _ = 1

let weight =
let size = Mem.repr_size t in
fun v -> Deferred (fun () -> size v)

(* A commit implementation that uses integer offsets for addresses where possible. *)
module Commit_direct = struct
Expand Down
22 changes: 21 additions & 1 deletion src/irmin-pack/pack_value_intf.ml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
open! Import

type length_header = [ `Varint ] option
type weight = Immediate of int | Deferred of (unit -> int)
type kinded = ..

module type S = sig
include Irmin.Type.S
Expand All @@ -32,7 +34,7 @@ module type S = sig
(** Describes the length header formats for the {i data} sections of pack
entries. *)

val weight : t -> int
val weight : t -> weight
(** [weight t] is the [t]'s LRU weight. *)

val encode_bin :
Expand All @@ -48,6 +50,15 @@ module type S = sig
t Irmin.Type.decode_bin

val decode_bin_length : string -> int -> int

val to_kinded : t -> kinded
(** [to_kinded t] returns a {!kinded} version of [t]. *)

val of_kinded : kinded -> t
(** [of_kinded k] is the inverse of [to_kinded t].

It is expected that an implementation only works for [k] that is returned
from [to_kinded t] and will raise an exception otherwise. *)
end

module type T = sig
Expand Down Expand Up @@ -84,6 +95,15 @@ module type Sigs = sig
header is user defined. *)
end

type nonrec weight = weight = Immediate of int | Deferred of (unit -> int)

type nonrec kinded = kinded = ..
(** [kinded] is an extenisble variant that each {!S} extends so that it can
define {!S.to_kinded} and {!S.of_kinded}. Its purpose is to allow
containers, such as {!Irmin_pack_unix.Lru}, to store and return all types
of {!S} and thus be usable by modules defined over {!S}, such as
{!Irmin_pack_unix.Pack_store}. *)

module type S = S with type kind := Kind.t
module type Config = Config

Expand Down
1 change: 1 addition & 0 deletions src/irmin-pack/unix/gc_args.ml
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ module type S = sig
fm:Fm.t ->
dict:Dict.t ->
dispatcher:Dispatcher.t ->
lru:Lru.t ->
read t

val unsafe_find :
Expand Down
5 changes: 3 additions & 2 deletions src/irmin-pack/unix/gc_worker.ml
Original file line number Diff line number Diff line change
Expand Up @@ -230,8 +230,9 @@ module Make (Args : Gc_args.S) = struct
@@ fun () ->
let dict = Dict.v fm |> Errs.raise_if_error in
let dispatcher = Dispatcher.v fm |> Errs.raise_if_error in
let node_store = Node_store.v ~config ~fm ~dict ~dispatcher in
let commit_store = Commit_store.v ~config ~fm ~dict ~dispatcher in
let lru = Lru.create config in
let node_store = Node_store.v ~config ~fm ~dict ~dispatcher ~lru in
let commit_store = Commit_store.v ~config ~fm ~dict ~dispatcher ~lru in

(* Step 2. Load commit which will make [commit_key] [Direct] if it's not
already the case. *)
Expand Down
1 change: 1 addition & 0 deletions src/irmin-pack/unix/inode_intf.ml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ module type Persistent = sig
fm:file_manager ->
dict:dict ->
dispatcher:dispatcher ->
lru:Lru.t ->
read t

include Irmin_pack.Checkable with type 'a t := 'a t and type hash := hash
Expand Down
1 change: 1 addition & 0 deletions src/irmin-pack/unix/irmin_pack_unix.ml
Original file line number Diff line number Diff line change
Expand Up @@ -60,3 +60,4 @@ module Sparse_file = Sparse_file
module File_manager = File_manager
module Lower = Lower
module Utils = Utils
module Lru = Lru
1 change: 1 addition & 0 deletions src/irmin-pack/unix/irmin_pack_unix.mli
Original file line number Diff line number Diff line change
Expand Up @@ -65,3 +65,4 @@ module Sparse_file = Sparse_file
module File_manager = File_manager
module Lower = Lower
module Utils = Utils
module Lru = Lru
91 changes: 91 additions & 0 deletions src/irmin-pack/unix/lru.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
(*
* Copyright (c) 2023 Tarides <contact@tarides.com>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*)

open Import

module Internal = Irmin.Backend.Lru.Make (struct
include Int63

let hash = Hashtbl.hash
end)

type key = int63
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think it's a problem atm, but just to be sure: this works because we only use the LRU once per kind of objects, and the keys of the different kind of objects are naturally distinct since they are file offsets?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, this is a good point to bring up!

A problem could arise if a particular offset is re-used for a different object type than what was originally cached, but that can't happen since it is correlated to the ever-growing file offsets of the pack file (as you say). The unified LRU, as it currently is written, is definitely tied to these implementation details. I was trying to avoid a functorized LRU for simplicity, but (thinking out loud) if we move to that, perhaps I can encode the type in the key as well.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah hm, but having an extensible key type that support custom hashing/equality is a whole new world of pain... which is how I ended up thinking about "not sharing the hashtables" to sidestep the issue :P

type value = Irmin_pack.Pack_value.kinded
type weighted_value = { v : value; weight : int }

type t = {
lru : weighted_value Internal.t;
weight_limit : int option;
mutable total_weight : int;
}

let create config =
let lru_max_memory = Irmin_pack.Conf.lru_max_memory config in
let lru_size, weight_limit =
match lru_max_memory with
| None -> (Irmin_pack.Conf.lru_size config, None)
| Some b -> (-42, Some b)
in
let lru = Internal.create lru_size in
{ lru; weight_limit; total_weight = 0 }

let lru_enabled t = match t.weight_limit with None -> true | Some x -> x > 0

(** [exceeds_entry_weight_limit] attempts to filter out entries that are "too
large".

Since we do not necessarily want to incur a cost for calculating the weight
for every entry when [lru_max_memory] is not configured, the control for
this is in the caller of [add]. Only [Irmin_pack.Pack_value.Immediate]
weight's are checked.

The current entry weight limit is hard-coded to 20kB. *)
let exceeds_entry_weight_limit = function
| Irmin_pack.Pack_value.Immediate w -> w > 20_000
| Deferred _ -> false

let resolve_weight = function
| Irmin_pack.Pack_value.Immediate w -> w
| Deferred w -> w ()

let add t k w v =
if lru_enabled t = false then ()
else if exceeds_entry_weight_limit w then ()
else
let add t k v w =
let n = { v; weight = w } in
t.total_weight <- t.total_weight + w;
Internal.add t.lru k n
in
match t.weight_limit with
| None -> add t k v 0
| Some limit ->
add t k v (resolve_weight w);
while t.total_weight > limit do
match Internal.drop t.lru with
| None -> t.total_weight <- 0
| Some n -> t.total_weight <- t.total_weight - n.weight
done

let v v = v.v
let find { lru; _ } k = Internal.find lru k |> v
let mem { lru; _ } k = Internal.mem lru k

let clear t =
Internal.clear t.lru;
t.total_weight <- 0

let iter { lru; _ } f = Internal.iter lru (fun k wv -> f k (v wv))
Loading