diff --git a/bench/bench.ml b/bench/bench.ml index 4c8e31ab..39273a97 100644 --- a/bench/bench.ml +++ b/bench/bench.ml @@ -214,7 +214,7 @@ module Index = struct let run ~with_metrics ~nb_entries ~log_size ~root ~name ~fresh ~readonly b = let index = - Index.v ~cache:(Index.empty_cache ()) ~fresh ~readonly ~log_size + Index.v ~io:() ~cache:(Index.empty_cache ()) ~fresh ~readonly ~log_size (root // name) in let result = Benchmark.run ~nb_entries (b ~with_metrics index) in diff --git a/bench/replay.ml b/bench/replay.ml index 86feb426..2363178d 100644 --- a/bench/replay.ml +++ b/bench/replay.ml @@ -164,7 +164,10 @@ module Index = struct include Index let cache = Index.empty_cache () - let v root = Index.v ~cache ~readonly:false ~fresh:true ~log_size:500_000 root + + let v root = + Index.v ~io:() ~cache ~readonly:false ~fresh:true ~log_size:500_000 root + let close t = Index.close t end diff --git a/src/checks.ml b/src/checks.ml index 3d447025..2fe64660 100644 --- a/src/checks.ml +++ b/src/checks.ml @@ -42,6 +42,8 @@ module Make (K : Data.Key) (V : Data.Value) (Platform : Platform_args) = struct @@ pos 0 (some string) None @@ info ~doc:"Path to the Index store on disk" ~docv:"PATH" [] + type io = Platform.IO.io + module Stat = struct type io = { size : size; @@ -62,16 +64,17 @@ module Make (K : Data.Key) (V : Data.Value) (Platform : Platform_args) = struct type t = { entry_size : size; files : files } [@@deriving repr] - let with_io : type a. string -> (IO.t -> a) -> a option = - fun path f -> - match IO.v path with + let with_io : type a. io:Platform.IO.io -> string -> (IO.t -> a) -> a option + = + fun ~io path f -> + match IO.v ~io path with | Error `No_file_on_disk -> None | Ok io -> let a = f io in IO.close io; Some a - let io path = + let run_io path = with_io path @@ fun io -> let IO.Header.{ offset; generation } = IO.Header.get io in let fanout_size = Bytes (IO.get_fanout_size io) in @@ -80,12 +83,12 @@ module Make (K : Data.Key) (V : Data.Value) (Platform : Platform_args) = struct let generation = Int63.to_int64 generation in { size; offset; generation; fanout_size } - let run ~root = + let run ~io ~root = Logs.app (fun f -> f "Getting statistics for store: `%s'@," root); - let data = io (Layout.data ~root) in - let log = io (Layout.log ~root) in - let log_async = io (Layout.log_async ~root) in - let merge = io (Layout.merge ~root) in + let data = run_io ~io (Layout.data ~root) in + let log = run_io ~io (Layout.log ~root) in + let log_async = run_io ~io (Layout.log_async ~root) in + let merge = run_io ~io (Layout.merge ~root) in let lock = IO.Lock.pp_dump (Layout.lock ~root) |> Option.map (fun f -> @@ -99,7 +102,7 @@ module Make (K : Data.Key) (V : Data.Value) (Platform : Platform_args) = struct } |> Repr.pp_json ~minify:false t Fmt.stdout - let term = Cmdliner.Term.(const (fun root () -> run ~root) $ path) + let term ~io = Cmdliner.Term.(const (fun root () -> run ~io ~root) $ path) end module Integrity_check = struct @@ -120,9 +123,9 @@ module Make (K : Data.Key) (V : Data.Value) (Platform : Platform_args) = struct highlight (fun ppf () -> (Repr.pp Entry.t) ppf entry)) |> Fmt.(concat ~sep:cut) - let run ~root = + let run ~io ~root = let context = 2 in - match IO.v (Layout.data ~root) with + match IO.v ~io (Layout.data ~root) with | Error `No_file_on_disk -> Fmt.failwith "No data file in %s" root | Ok io -> let io_offset = IO.offset io in @@ -151,7 +154,7 @@ module Make (K : Data.Key) (V : Data.Value) (Platform : Platform_args) = struct ()); previous := e) - let term = Cmdliner.Term.(const (fun root () -> run ~root) $ path) + let term ~io = Cmdliner.Term.(const (fun root () -> run ~io ~root) $ path) end module Cli = struct @@ -166,7 +169,7 @@ module Make (K : Data.Key) (V : Data.Value) (Platform : Platform_args) = struct in Logs_fmt.reporter ~pp_header () - let main () : empty = + let main ~io () : empty = let default = Term.(ret (const (`Help (`Auto, None)))) in let info = let doc = "Check and repair Index data-stores." in @@ -175,12 +178,12 @@ module Make (K : Data.Key) (V : Data.Value) (Platform : Platform_args) = struct let commands = [ ( Term.( - Stat.term + Stat.term ~io $ Log.setup_term ~reporter (module Clock) (module Fmt_tty)), Cmd.info ~doc:"Print high-level statistics about the store." "stat" ); ( Term.( - Integrity_check.term + Integrity_check.term ~io $ Log.setup_term ~reporter (module Clock) (module Fmt_tty)), Cmd.info ~doc:"Search the store for integrity faults and corruption." diff --git a/src/checks_intf.ml b/src/checks_intf.ml index f90d836a..820cc5c5 100644 --- a/src/checks_intf.ml +++ b/src/checks_intf.ml @@ -3,24 +3,26 @@ open! Import type empty = | module type S = sig + type io + module Stat : sig - val run : root:string -> unit + val run : io:io -> root:string -> unit (** Read basic metrics from an existing store. *) - val term : (unit -> unit) Cmdliner.Term.t + val term : io:io -> (unit -> unit) Cmdliner.Term.t (** A pre-packaged [Cmdliner] term for executing {!run}. *) end module Integrity_check : sig - val run : root:string -> unit + val run : io:io -> root:string -> unit (** Check that the integrity invariants of a store are preserved, and display any broken invariants. *) - val term : (unit -> unit) Cmdliner.Term.t + val term : io:io -> (unit -> unit) Cmdliner.Term.t (** A pre-packaged [Cmdliner] term for executing {!run}. *) end - val cli : unit -> empty + val cli : io:io -> unit -> empty (** Run a [Cmdliner] binary containing tools for running offline integrity checks. *) end @@ -38,5 +40,6 @@ module type Checks = sig module type S = S module type Platform_args = Platform_args - module Make (K : Data.Key) (V : Data.Value) (_ : Platform_args) : S + module Make (K : Data.Key) (V : Data.Value) (P : Platform_args) : + S with type io = P.IO.io end diff --git a/src/eio/buffer.ml b/src/eio/buffer.ml new file mode 100644 index 00000000..f6e45df0 --- /dev/null +++ b/src/eio/buffer.ml @@ -0,0 +1,54 @@ +(* The MIT License + + Copyright (c) 2021 Clément Pascutto + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. *) + +open! Import + +type t = { mutable buffer : bytes; mutable position : int } + +external unsafe_blit_string : string -> int -> bytes -> int -> int -> unit + = "caml_blit_string" +[@@noalloc] +(** Bytes.unsafe_blit_string not available in OCaml 4.08. *) + +let create n = { buffer = Bytes.create n; position = 0 } + +let write_with (write : string -> int -> int -> unit) b = + write (Bytes.unsafe_to_string b.buffer) 0 b.position + +let length b = b.position +let is_empty b = b.position = 0 +let clear b = b.position <- 0 + +let resize b more = + let old_pos = b.position in + let old_len = Bytes.length b.buffer in + let new_len = ref old_len in + while old_pos + more > !new_len do + new_len := 2 * !new_len + done; + let new_buffer = Bytes.create !new_len in + Bytes.blit b.buffer 0 new_buffer 0 b.position; + b.buffer <- new_buffer + +let add_substring b s ~off ~len = + let new_position = b.position + len in + if new_position > Bytes.length b.buffer then resize b len; + unsafe_blit_string s off b.buffer b.position len; + b.position <- new_position + +let blit ~src ~src_off ~dst ~dst_off ~len = + assert (src_off + len <= src.position); + Bytes.blit src.buffer src_off dst dst_off len + +let add_string b s = add_substring b s ~off:0 ~len:(String.length s) diff --git a/src/eio/buffer.mli b/src/eio/buffer.mli new file mode 100644 index 00000000..6275a380 --- /dev/null +++ b/src/eio/buffer.mli @@ -0,0 +1,46 @@ +(* The MIT License + + Copyright (c) 2021 Clément Pascutto + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. *) + +(** Extensible buffers with non-allocating access to the buffer's contents. *) + +type t +(** The type of buffers. *) + +val create : int -> t +(** [create n] is a fresh buffer with initial size [n]. *) + +val length : t -> int +(** [length b] is the number of bytes contained in the buffer. *) + +val is_empty : t -> bool +(** [is_empty t] iff [t] contains 0 characters. *) + +val clear : t -> unit +(** [clear t] clears the data contained in [t]. It does not reset the buffer to + its initial size. *) + +val add_substring : t -> string -> off:int -> len:int -> unit +(** [add_substring t s ~off ~len] appends the substring + [s.(off) .. s.(off + len - 1)] at the end of [t], resizing [t] if necessary. *) + +val add_string : t -> string -> unit +(** [add_string t s] appends [s] at the end of [t], resizing [t] if necessary. *) + +val write_with : (string -> int -> int -> unit) -> t -> unit +(** [write_with writer t] uses [writer] to write the contents of [t]. [writer] + takes a string to write, an offset and a length. *) + +val blit : src:t -> src_off:int -> dst:bytes -> dst_off:int -> len:int -> unit +(** [blit] copies [len] bytes from the buffer [src], starting at offset + [src_off], into the supplied bytes [dst], starting at offset [dst_off]. *) diff --git a/src/eio/dune b/src/eio/dune new file mode 100644 index 00000000..768d3e6b --- /dev/null +++ b/src/eio/dune @@ -0,0 +1,6 @@ +(library + (public_name index.eio) + (name index_eio) + (optional) + (libraries fmt fmt.tty index logs logs.threaded threads.posix unix eio + eio.core cstruct mtime mtime.clock.os optint progress)) diff --git a/src/eio/import.ml b/src/eio/import.ml new file mode 100644 index 00000000..bbdb78fa --- /dev/null +++ b/src/eio/import.ml @@ -0,0 +1,10 @@ +module Int63 = Optint.Int63 + +type int63 = Int63.t + +module Mtime = struct + include Mtime + + let span_to_s span = Mtime.Span.to_float_ns span *. 1e-9 + let span_to_us span = Mtime.Span.to_float_ns span *. 1e-3 +end diff --git a/src/eio/index_eio.ml b/src/eio/index_eio.ml new file mode 100644 index 00000000..5bb8b584 --- /dev/null +++ b/src/eio/index_eio.ml @@ -0,0 +1,429 @@ +(* The MIT License + + Copyright (c) 2019 Craig Ferguson + Thomas Gazagnaire + Ioana Cristescu + Clément Pascutto + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. *) + +open! Import + +let src = Logs.Src.create "index_eio" ~doc:"Index_eio" + +module Log = (val Logs.src_log src : Logs.LOG) + +exception RO_not_allowed + +let current_version = "00000001" + +module Stats = Index.Stats + +type io = { switch : Eio.Switch.t; root : Eio.Fs.dir_ty Eio.Path.t } + +module IO : Index.Platform.IO with type io = io = struct + type nonrec io = io + + let ( ++ ) = Int63.add + let ( -- ) = Int63.sub + + type t = { + mutable file : Eio.Fs.dir_ty Eio.Path.t; + mutable header : int63; + mutable raw : Raw.t; + mutable offset : int63; + mutable flushed : int63; + mutable fan_size : int63; + readonly : bool; + buf : Buffer.t; + flush_callback : unit -> unit; + sw : Eio.Switch.t; + } + + let flush ?no_callback ?(with_fsync = false) t = + if t.readonly then raise RO_not_allowed; + if not (Buffer.is_empty t.buf) then ( + let buf_len = Buffer.length t.buf in + let offset = t.offset in + (match no_callback with Some () -> () | None -> t.flush_callback ()); + Log.debug (fun l -> l "[%a] flushing %d bytes" Eio.Path.pp t.file buf_len); + Buffer.write_with (Raw.unsafe_write t.raw ~off:t.flushed) t.buf; + Buffer.clear t.buf; + Raw.Offset.set t.raw offset; + assert (t.flushed ++ Int63.of_int buf_len = t.header ++ offset); + t.flushed <- offset ++ t.header); + if with_fsync then Raw.fsync t.raw + + let rename ~src ~dst = + flush ~with_fsync:true src; + Raw.close dst.raw; + Eio.Path.rename src.file dst.file; + Buffer.clear dst.buf; + src.file <- dst.file; + dst.header <- src.header; + dst.fan_size <- src.fan_size; + dst.offset <- src.offset; + dst.flushed <- src.flushed; + dst.raw <- src.raw + + let close t = + if not t.readonly then Buffer.clear t.buf; + Raw.close t.raw + + let auto_flush_limit = Int63.of_int 1_000_000 + + let append_substring t buf ~off ~len = + if t.readonly then raise RO_not_allowed; + Buffer.add_substring t.buf buf ~off ~len; + let len = Int63.of_int len in + t.offset <- t.offset ++ len; + if t.offset -- t.flushed > auto_flush_limit then flush t + + let append t buf = append_substring t buf ~off:0 ~len:(String.length buf) + + let read t ~off ~len buf = + let off = t.header ++ off in + let end_of_value = off ++ Int63.of_int len in + if not t.readonly then + assert ( + let total_length = t.flushed ++ Int63.of_int (Buffer.length t.buf) in + (* NOTE: we don't require that [end_of_value <= total_length] in order + to support short reads on read-write handles (see comment about this + case below). *) + off <= total_length); + + if t.readonly || end_of_value <= t.flushed then + (* Value is entirely on disk *) + Raw.unsafe_read t.raw ~off ~len buf + else + (* Must read some data not yet flushed to disk *) + let requested_from_disk = max 0 (Int63.to_int (t.flushed -- off)) in + let requested_from_buffer = len - requested_from_disk in + let read_from_disk = + if requested_from_disk > 0 then ( + let read = Raw.unsafe_read t.raw ~off ~len:requested_from_disk buf in + assert (read = requested_from_disk); + read) + else 0 + in + let read_from_buffer = + let src_off = max 0 (Int63.to_int (off -- t.flushed)) in + let len = + (* The user may request more bytes than actually exist, in which case + we read to the end of the write buffer and return a size less than + [len]. *) + let available_length = Buffer.length t.buf - src_off in + min available_length requested_from_buffer + in + Buffer.blit ~src:t.buf ~src_off ~dst:buf ~dst_off:requested_from_disk + ~len; + len + in + read_from_disk + read_from_buffer + + let offset t = t.offset + + let get_generation t = + let i = Raw.Generation.get t.raw in + Log.debug (fun m -> m "get_generation: %a" Int63.pp i); + i + + let get_fanout t = Raw.Fan.get t.raw + let get_fanout_size t = Raw.Fan.get_size t.raw + + let set_fanout t buf = + assert (Int63.(equal (of_int (String.length buf)) t.fan_size)); + Raw.Fan.set t.raw buf + + module Header = struct + type header = { offset : int63; generation : int63 } + + let pp ppf { offset; generation } = + Format.fprintf ppf "{ offset = %a; generation = %a }" Int63.pp offset + Int63.pp generation + + let get t = + let Raw.Header.{ offset; generation; _ } = Raw.Header.get t.raw in + t.offset <- offset; + let headers = { offset; generation } in + Log.debug (fun m -> + m "[%a] get_headers: %a" Eio.Path.pp t.file pp headers); + headers + + let set t { offset; generation } = + let version = current_version in + Log.debug (fun m -> + m "[%a] set_header %a" Eio.Path.pp t.file pp { offset; generation }); + Raw.Header.(set t.raw { offset; version; generation }) + end + + let mkdir dirname = Eio.Path.mkdirs ~exists_ok:true ~perm:0o755 dirname + + let mkdir_of file = + match Eio.Path.split file with + | None -> () + | Some (dirname, _) -> mkdir dirname + + let raw_file ~sw ~version ~offset ~generation file = + Eio.Switch.run @@ fun _sw -> + let x = Eio.Path.open_out ~sw ~create:(`If_missing 0o644) file in + let raw = Raw.v x in + let header = { Raw.Header.offset; version; generation } in + Log.debug (fun m -> + m "[%a] raw set_header %a" Eio.Path.pp file Header.pp + { offset; generation }); + Raw.Header.set raw header; + Raw.Fan.set raw ""; + Raw.fsync raw; + raw + + let clear ~generation ?(hook = fun () -> ()) ~reopen t = + t.offset <- Int63.zero; + t.flushed <- t.header; + Buffer.clear t.buf; + let old = t.raw in + + if reopen then ( + (* Open a fresh file and rename it to ensure atomicity: + concurrent readers should never see the file disapearing. *) + let tmp_file = + let dir, path = t.file in + (dir, path ^ "_tmp") + in + t.raw <- + raw_file ~sw:t.sw ~version:current_version ~generation + ~offset:Int63.zero tmp_file; + Eio.Path.rename tmp_file t.file) + else + (* Remove the file current file. This allows a fresh file to be + created, before writing the new generation in the old file. *) + Eio.Path.unlink t.file; + + hook (); + + (* Set new generation in the old file. *) + Raw.Header.set old + { Raw.Header.offset = Int63.zero; generation; version = current_version }; + Raw.close old + + let () = assert (String.length current_version = 8) + + let v_instance ~sw ?(flush_callback = fun () -> ()) ~readonly ~fan_size + ~offset file raw = + let eight = Int63.of_int 8 in + let header = eight ++ eight ++ eight ++ eight ++ fan_size in + { + header; + file; + offset; + raw; + readonly; + fan_size; + buf = Buffer.create (if readonly then 0 else 4 * 1024); + flushed = header ++ offset; + flush_callback; + sw; + } + + let v ~io ?flush_callback ~fresh ~generation ~fan_size filename = + let file = Eio.Path.(io.root / filename) in + let v = v_instance ~sw:io.switch ?flush_callback ~readonly:false file in + mkdir_of file; + let header = + { Raw.Header.offset = Int63.zero; version = current_version; generation } + in + match Eio.Path.is_file file with + | false -> + let x = + Eio.Path.open_out ~sw:io.switch file ~create:(`Exclusive 0o644) + in + let raw = Raw.v x in + Raw.Header.set raw header; + Raw.Fan.set_size raw fan_size; + v ~fan_size ~offset:Int63.zero raw + | true -> + let x = Eio.Path.open_out ~sw:io.switch file ~create:`Never in + let raw = Raw.v x in + if fresh then ( + Raw.Header.set raw header; + Raw.Fan.set_size raw fan_size; + Raw.fsync raw; + v ~fan_size ~offset:Int63.zero raw) + else + let version = Raw.Version.get raw in + if version <> current_version then + Fmt.failwith "Io.v: unsupported version %s (current version is %s)" + version current_version; + + let offset = Raw.Offset.get raw in + let fan_size = Raw.Fan.get_size raw in + v ~fan_size ~offset raw + + let v_readonly ~io filename = + let file = Eio.Path.(io.root / filename) in + let v = v_instance ~sw:io.switch ~readonly:true file in + try + let x = Eio.Path.open_out ~sw:io.switch ~create:`Never file in + let raw = Raw.v x in + try + let version = Raw.Version.get raw in + if version <> current_version then + Fmt.failwith "Io.v: unsupported version %s (current version is %s)" + version current_version; + let offset = Raw.Offset.get raw in + let fan_size = Raw.Fan.get_size raw in + Ok (v ~fan_size ~offset raw) + with Raw.Not_written -> + (* The readonly instance cannot read a file that does not have a + header.*) + Raw.close raw; + Error `No_file_on_disk + with + | Eio.Io (Eio.Fs.E (Not_found _), _) -> Error `No_file_on_disk + | e -> raise e + + let exists = Sys.file_exists + let size { raw; _ } = (Raw.fstat raw).Eio.File.Stat.size |> Int63.to_int + let size_header t = t.header |> Int63.to_int + + module Lock = struct + type t = { path : string; fd : Eio.File.rw_ty Eio.Resource.t } + + exception Locked of string + + let single_write fd str = + let cstruct = Cstruct.string str in + Eio.File.pwrite_single fd ~file_offset:Int63.zero [ cstruct ] + + let unsafe_lock ~io _op filename = + let f = Eio.Path.(io.root / filename) in + mkdir_of f; + let fd = Eio.Path.open_out ~sw:io.switch ~create:(`If_missing 0o600) f in + let pid = string_of_int (Unix.getpid ()) in + let pid_len = String.length pid in + try + (* TODO: Unix.lockf fd op 0; *) + if single_write fd pid <> pid_len then ( + Eio.Resource.close fd; + failwith "Unable to write PID to lock file") + else Some fd + with + (* TODO: + | Unix.Unix_error (Unix.EAGAIN, _, _) -> + Eio.Resource.close fd; + None *) + | e -> + Eio.Resource.close fd; + raise e + + let with_ic path f = + let ic = open_in path in + let a = f ic in + close_in ic; + a + + let err_rw_lock path = + let line = with_ic path input_line in + let pid = int_of_string line in + Log.err (fun l -> + l + "Cannot lock %s: index is already opened in write mode by PID %d. \ + Current PID is %d." + path pid (Unix.getpid ())); + raise (Locked path) + + let lock ~io path = + Log.debug (fun l -> l "Locking %s" path); + match unsafe_lock ~io Unix.F_TLOCK path with + | Some fd -> { path; fd } + | None -> err_rw_lock path + + let unlock { path; fd } = + Log.debug (fun l -> l "Unlocking %s" path); + Eio.Resource.close fd + + let pp_dump path = + match Sys.file_exists path with + | false -> None + | true -> + let contents = + with_ic path (fun ic -> + really_input_string ic (in_channel_length ic)) + in + Some (fun ppf -> Fmt.string ppf contents) + end +end + +module Semaphore = struct + module S = Eio.Mutex + + type t = S.t + + let is_held t = + let acquired = S.try_lock t in + if acquired then S.unlock t; + not acquired + + let make b = + let t = S.create () in + if not b then S.lock t; + t + + let release t = S.unlock t + + let acquire n t = + let x = Mtime_clock.counter () in + S.lock t; + let y = Mtime_clock.count x in + if Mtime.span_to_s y > 1. then + Log.warn (fun l -> l "Semaphore %s was blocked for %a" n Mtime.Span.pp y) + + let with_acquire n t f = + acquire n t; + Fun.protect ~finally:(fun () -> S.unlock t) f +end + +module Thread = struct + type io = IO.io + type 'a t = 'a Eio.Promise.or_exn + + let async ~io f = Eio.Fiber.fork_promise ~sw:io.switch f + let yield = Eio.Fiber.yield + let return a = Eio.Promise.create_resolved (Ok a) + + let await t = + match Eio.Promise.await t with + | Ok v -> Ok v + | Error exn -> Error (`Async_exn exn) +end + +module Platform = struct + type nonrec io = io + + module IO = IO + module Semaphore = Semaphore + module Thread = Thread + module Clock = Mtime_clock + module Progress = Progress + module Fmt_tty = Fmt_tty +end + +module Make (K : Index.Key.S) (V : Index.Value.S) (C : Index.Cache.S) = + Index.Make (K) (V) (Platform) (C) + +module Private = struct + module Platform = Platform + module IO = IO + module Raw = Raw + + module Make (K : Index.Key.S) (V : Index.Value.S) = + Index.Private.Make (K) (V) (Platform) +end diff --git a/src/eio/index_eio.mli b/src/eio/index_eio.mli new file mode 100644 index 00000000..79c25160 --- /dev/null +++ b/src/eio/index_eio.mli @@ -0,0 +1,34 @@ +(* The MIT License + + Copyright (c) 2019 Craig Ferguson + Thomas Gazagnaire + Ioana Cristescu + Clément Pascutto + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. *) + +open! Import + +type io = { switch : Eio.Switch.t; root : Eio.Fs.dir_ty Eio.Path.t } + +module Make (K : Index.Key.S) (V : Index.Value.S) (C : Index.Cache.S) : + Index.S with type key = K.t and type value = V.t and type io = io + +(** These modules should not be used. They are exposed purely for testing + purposes. *) +module Private : sig + module Platform : Index.Platform.S with type io = io + module IO : Index.Platform.IO with type io = io + module Raw = Raw + + module Make (K : Index.Key.S) (V : Index.Value.S) (C : Index.Cache.S) : + Index.Private.S with type key = K.t and type value = V.t and type io = io +end diff --git a/src/eio/raw.ml b/src/eio/raw.ml new file mode 100644 index 00000000..a0d6b7dc --- /dev/null +++ b/src/eio/raw.ml @@ -0,0 +1,203 @@ +open! Import +module Stats = Index.Stats + +let ( ++ ) = Int63.add + +type t = Rw : _ Eio.File.rw -> t [@@unboxed] + +let v fd = Rw fd + +let really_write fd fd_offset buffer buffer_offset length = + let cstruct = Cstruct.string ~off:buffer_offset ~len:length buffer in + Eio.File.pwrite_all fd ~file_offset:fd_offset [ cstruct ] +(* + let rec aux fd_offset buffer_offset length = + let w = Syscalls.pwrite ~fd ~fd_offset ~buffer ~buffer_offset ~length in + if w = 0 || w = length then () + else + (aux [@tailcall]) + (fd_offset ++ Int63.of_int w) + (buffer_offset + w) (length - w) + in + aux fd_offset buffer_offset length + *) + +let really_read fd fd_offset length buffer = + let cstruct = Cstruct.create length in + Eio.File.pread_exact fd ~file_offset:fd_offset [ cstruct ]; + Cstruct.blit_to_bytes cstruct 0 buffer 0 length; + length +(* + let rec aux fd_offset buffer_offset length = + let r = Syscalls.pread ~fd ~fd_offset ~buffer ~buffer_offset ~length in + if r = 0 then buffer_offset (* end of file *) + else if r = length then buffer_offset + r + else + (aux [@tailcall]) + (fd_offset ++ Int63.of_int r) + (buffer_offset + r) (length - r) + in + aux fd_offset 0 length + *) + +let fsync (Rw t) = Eio.File.sync t +let close (Rw t) = Eio.Resource.close t +let fstat (Rw t) = Eio.File.stat t + +let unsafe_write (Rw t) ~off buffer buffer_offset length = + really_write t off buffer buffer_offset length; + Stats.add_write length + +let unsafe_read (Rw t) ~off ~len buf = + match really_read t off len buf with + | n -> + Stats.add_read n; + n + | exception End_of_file -> 0 + +let encode_int63 n = + let buf = Bytes.create Int63.encoded_size in + Int63.encode buf ~off:0 n; + Bytes.unsafe_to_string buf + +let decode_int63 buf = Int63.decode ~off:0 buf + +exception Not_written + +let assert_read ~len n = + if n = 0 && n <> len then raise Not_written; + assert ( + if Int.equal n len then true + else ( + Printf.eprintf "Attempted to read %d bytes, but got %d bytes instead!\n%!" + len n; + false)) +[@@inline always] + +module Offset = struct + let off = Int63.zero + let set t n = unsafe_write t ~off (encode_int63 n) 0 8 + + let get t = + let len = 8 in + let buf = Bytes.create len in + let n = unsafe_read t ~off ~len buf in + assert_read ~len n; + decode_int63 (Bytes.unsafe_to_string buf) +end + +module Version = struct + let off = Int63.of_int 8 + + let get t = + let len = 8 in + let buf = Bytes.create len in + let n = unsafe_read t ~off ~len buf in + assert_read ~len n; + Bytes.unsafe_to_string buf + + let set t v = unsafe_write t ~off v 0 8 +end + +module Generation = struct + let off = Int63.of_int 16 + + let get t = + let len = 8 in + let buf = Bytes.create len in + let n = unsafe_read t ~off ~len buf in + assert_read ~len n; + decode_int63 (Bytes.unsafe_to_string buf) + + let set t gen = unsafe_write t ~off (encode_int63 gen) 0 8 +end + +module Fan = struct + let off = Int63.of_int 24 + + let set t buf = + let buf_len = String.length buf in + let size = encode_int63 (Int63.of_int buf_len) in + unsafe_write t ~off size 0 8; + if buf <> "" then unsafe_write t ~off:(off ++ Int63.of_int 8) buf 0 buf_len + + let get_size t = + let len = 8 in + let size_buf = Bytes.create len in + let n = unsafe_read t ~off ~len size_buf in + assert_read ~len n; + decode_int63 (Bytes.unsafe_to_string size_buf) + + let set_size t size = + let buf = encode_int63 size in + unsafe_write t ~off buf 0 8 + + let get t = + let size = Int63.to_int (get_size t) in + let buf = Bytes.create size in + let n = unsafe_read t ~off:(off ++ Int63.of_int 8) ~len:size buf in + assert_read ~len:size n; + Bytes.unsafe_to_string buf +end + +module Header = struct + type t = { offset : int63; version : string; generation : int63 } + + (** NOTE: These functions must be equivalent to calling the above [set] / + [get] functions individually. *) + + let total_header_length = 8 + 8 + 8 + + let read_word buf off = + let result = Bytes.create 8 in + Bytes.blit buf off result 0 8; + Bytes.unsafe_to_string result + + let get t = + let header = Bytes.create total_header_length in + let n = unsafe_read t ~off:Int63.zero ~len:total_header_length header in + assert_read ~len:total_header_length n; + let offset = read_word header 0 |> decode_int63 in + let version = read_word header 8 in + let generation = read_word header 16 |> decode_int63 in + { offset; version; generation } + + let set t { offset; version; generation } = + assert (String.length version = 8); + let b = Bytes.create total_header_length in + Bytes.blit_string (encode_int63 offset) 0 b 0 8; + Bytes.blit_string version 0 b 8 8; + Bytes.blit_string (encode_int63 generation) 0 b 16 8; + unsafe_write t ~off:Int63.zero (Bytes.unsafe_to_string b) 0 + total_header_length +end + +module Header_prefix = struct + type t = { offset : int63; version : string } + + (** NOTE: These functions must be equivalent to calling the above [set] / + [get] functions individually. *) + + let total_header_length = 8 + 8 + + let read_word buf off = + let result = Bytes.create 8 in + Bytes.blit buf off result 0 8; + Bytes.unsafe_to_string result + + let get t = + let header = Bytes.create total_header_length in + let n = unsafe_read t ~off:Int63.zero ~len:total_header_length header in + assert_read ~len:total_header_length n; + let offset = read_word header 0 |> decode_int63 in + let version = read_word header 8 in + { offset; version } + + let set t { offset; version } = + assert (String.length version = 8); + let b = Bytes.create total_header_length in + Bytes.blit_string (encode_int63 offset) 0 b 0 8; + Bytes.blit_string version 0 b 8 8; + unsafe_write t ~off:Int63.zero (Bytes.unsafe_to_string b) 0 + total_header_length +end diff --git a/src/eio/raw.mli b/src/eio/raw.mli new file mode 100644 index 00000000..6d6d3691 --- /dev/null +++ b/src/eio/raw.mli @@ -0,0 +1,76 @@ +(** [Raw] wraps a file-descriptor with an file-format used internally by Index. + The format contains the following header fields: + + - {b offset}: a 64-bit integer, denoting the length of the file containing + valid data; + - {b version}: an 8-byte version string; + - {b generation}: a 64-bit integer denoting the generation number; + - {b fan}: a 64-bit length field, followed by a string containing that many + bytes. *) + +open! Import + +type t +(** The type of [raw] file handles. *) + +val v : _ Eio.File.rw -> t +(** Construct a [raw] value from a file descriptor. *) + +val unsafe_write : t -> off:int63 -> string -> int -> int -> unit +val unsafe_read : t -> off:int63 -> len:int -> bytes -> int +val fsync : t -> unit +val close : t -> unit +val fstat : t -> Eio.File.Stat.t + +exception Not_written + +module Version : sig + val get : t -> string + val set : t -> string -> unit +end + +module Offset : sig + val get : t -> int63 + val set : t -> int63 -> unit +end + +module Generation : sig + val get : t -> int63 + val set : t -> int63 -> unit +end + +module Fan : sig + val get : t -> string + val set : t -> string -> unit + val get_size : t -> int63 + val set_size : t -> int63 -> unit +end + +module Header : sig + type raw + + type t = { + offset : int63; (** The length of the file containing valid data *) + version : string; (** Format version *) + generation : int63; (** Generation number *) + } + + val get : raw -> t + val set : raw -> t -> unit +end +with type raw := t + +(** Functions for interacting with the header format {i without} the generation + number, provided for use in [irmin-pack]. *) +module Header_prefix : sig + type raw + + type t = { + offset : int63; (** The length of the file containing valid data *) + version : string; (** Format version *) + } + + val get : raw -> t + val set : raw -> t -> unit +end +with type raw := t diff --git a/src/index.ml b/src/index.ml index 6dde17a4..b01e91a6 100644 --- a/src/index.ml +++ b/src/index.ml @@ -149,8 +149,11 @@ struct header of [io]. *) } + type io = Platform.IO.io + type instance = { config : config; + io : io; root : string; (** The root location of the index *) mutable generation : int63; (** The generation is a counter of rewriting operations (e.g. [clear] @@ -281,7 +284,7 @@ struct the temporary [log_async], or to fill the [log] field when they have been created before their RW counterpart. *) let try_load_log t path = - match IO.v_readonly path with + match IO.v_readonly ~io:t.io path with | Error `No_file_on_disk -> None | Ok io -> let log = Log_file.create io in @@ -335,7 +338,7 @@ struct changed after a merge. *) Option.iter (fun (i : index) -> IO.close i.io) t.index; let index_path = Layout.data ~root:t.root in - match IO.v_readonly index_path with + match IO.v_readonly ~io:t.io index_path with | Error `No_file_on_disk -> t.index <- None | Ok io -> let fan_out = Fan.import ~hash_size:K.hash_size (IO.get_fanout io) in @@ -517,13 +520,13 @@ struct Log_file.flush ~with_fsync:true log; IO.clear ~generation ~reopen:false log_async - let v_no_cache ?(flush_callback = fun () -> ()) ~throttle ~fresh ~readonly + let v_no_cache ~io ?(flush_callback = fun () -> ()) ~throttle ~fresh ~readonly ~lru_size ~log_size root = Log.debug (fun l -> l "[%s] not found in cache, creating a new instance" (Filename.basename root)); let writer_lock = - if not readonly then Some (IO.Lock.lock (Layout.lock ~root)) else None + if not readonly then Some (IO.Lock.lock ~io (Layout.lock ~root)) else None in let config = { @@ -541,7 +544,7 @@ struct if readonly then if fresh then raise RO_not_allowed else None else let io = - IO.v ~flush_callback ~fresh ~generation:Int63.zero + IO.v ~io ~flush_callback ~fresh ~generation:Int63.zero ~fan_size:Int63.zero log_path in let entries = Int63.(to_int_exn (IO.offset io / Entry.encoded_sizeL)) in @@ -562,7 +565,7 @@ struct during sync_log so there is no need to do it here. *) if (not readonly) && IO.exists log_async_path then let io = - IO.v ~flush_callback ~fresh ~generation ~fan_size:Int63.zero + IO.v ~io ~flush_callback ~fresh ~generation ~fan_size:Int63.zero log_async_path in (* in fresh mode, we need to wipe the existing [log_async] file. *) @@ -582,8 +585,8 @@ struct (* NOTE: No [flush_callback] on the Index IO as we maintain the invariant that any bindings it contains were previously persisted in either [log] or [log_async]. *) - IO.v ?flush_callback:None ~fresh ~generation ~fan_size:Int63.zero - index_path + IO.v ~io ?flush_callback:None ~fresh ~generation + ~fan_size:Int63.zero index_path in let entries = Int63.div (IO.offset io) Entry.encoded_sizeL in if entries = Int63.zero then None @@ -602,6 +605,7 @@ struct in { config; + io; generation; log; log_async = None; @@ -620,12 +624,12 @@ struct let empty_cache = Cache.create - let v ?(flush_callback = fun () -> ()) ?(cache = empty_cache ()) + let v ~io ?(flush_callback = fun () -> ()) ?(cache = empty_cache ()) ?(fresh = false) ?(readonly = false) ?(throttle = `Block_writes) ?(lru_size = 30_000) ~log_size root = let new_instance () = let instance = - v_no_cache ~flush_callback ~fresh ~readonly ~log_size ~lru_size + v_no_cache ~io ~flush_callback ~fresh ~readonly ~log_size ~lru_size ~throttle root in if readonly then sync_instance instance; @@ -802,7 +806,7 @@ struct in let merge = let merge_path = Layout.merge ~root:t.root in - IO.v ~fresh:true ~generation + IO.v ~io:t.io ~fresh:true ~generation ~fan_size:(Int63.of_int (Fan.exported_size fan_out)) merge_path in @@ -813,7 +817,7 @@ struct | `Abort -> `Aborted | `Continue -> let io = - IO.v ~fresh:true ~generation ~fan_size:Int63.zero + IO.v ~io:t.io ~fresh:true ~generation ~fan_size:Int63.zero (Layout.data ~root:t.root) in append_remaining_log fan_out sorted_log_bindings merge; @@ -872,7 +876,7 @@ struct let reset_log_async t = let io = let log_async_path = Layout.log_async ~root:t.root in - IO.v ~flush_callback:t.config.flush_callback ~fresh:true + IO.v ~io:t.io ~flush_callback:t.config.flush_callback ~fresh:true ~generation:(Int63.succ t.generation) ~fan_size:Int63.zero log_async_path in @@ -928,7 +932,7 @@ struct merge_lock_wait Mtime.Span.pp rename_lock_wait); merge_result in - if blocking then go () |> Thread.return else Thread.async go + if blocking then go () |> Thread.return else Thread.async ~io:t.io go let is_empty t = (* A read-only instance may have not yet loaded the [log], if no explicit diff --git a/src/index_intf.ml b/src/index_intf.ml index 0ed8d11e..951fcea4 100644 --- a/src/index_intf.ml +++ b/src/index_intf.ml @@ -43,7 +43,11 @@ module type S = sig val empty_cache : unit -> cache (** Construct a new empty cache of index instances. *) + type io + (** The type of IO context required by the backend. *) + val v : + io:io -> ?flush_callback:(unit -> unit) -> ?cache:cache -> ?fresh:bool -> @@ -147,7 +151,7 @@ module type S = sig (** Offline [fsck]-like utility for checking the integrity of Index stores built using this module. *) module Checks : sig - include Checks.S + include Checks.S with type io = io (** @inline *) end end @@ -290,8 +294,8 @@ module type Index = sig (** The exception raised when any operation is attempted on a closed index, except for [close], which is idempotent. *) - module Make (K : Key.S) (V : Value.S) (_ : Platform.S) (C : Cache.S) : - S with type key = K.t and type value = V.t + module Make (K : Key.S) (V : Value.S) (P : Platform.S) (C : Cache.S) : + S with type key = K.t and type value = V.t and type io = P.io (** Run-time metric tracking for index instances. *) module Stats : sig @@ -335,7 +339,7 @@ module type Index = sig module type S = Private with type 'a hook := 'a Hook.t - module Make (K : Key) (V : Value) (_ : Platform.S) (C : Cache.S) : - S with type key = K.t and type value = V.t + module Make (K : Key) (V : Value) (P : Platform.S) (C : Cache.S) : + S with type key = K.t and type value = V.t and type io = P.io end end diff --git a/src/io_intf.ml b/src/io_intf.ml index 2dce8ded..ee6df5e0 100644 --- a/src/io_intf.ml +++ b/src/io_intf.ml @@ -1,9 +1,11 @@ open! Import module type S = sig + type io type t val v : + io:io -> ?flush_callback:(unit -> unit) -> fresh:bool -> generation:int63 -> @@ -11,7 +13,7 @@ module type S = sig string -> t - val v_readonly : string -> (t, [ `No_file_on_disk ]) result + val v_readonly : io:io -> string -> (t, [ `No_file_on_disk ]) result val offset : t -> int63 val read : t -> off:int63 -> len:int -> bytes -> int @@ -34,7 +36,7 @@ module type S = sig module Lock : sig type t - val lock : string -> t + val lock : io:io -> string -> t val unlock : t -> unit val pp_dump : string -> (Format.formatter -> unit) option @@ -64,7 +66,7 @@ module type Io = sig module type S = S module Extend (S : S) : sig - include S with type t = S.t + include S with type t = S.t and type io = S.io val iter : page_size:int63 -> diff --git a/src/platform.ml b/src/platform.ml index 99a49395..3437d05f 100644 --- a/src/platform.ml +++ b/src/platform.ml @@ -40,10 +40,12 @@ end module type THREAD = sig (** Cooperative threads. *) + type io + type 'a t (** The type of thread handles. *) - val async : (unit -> 'a) -> 'a t + val async : io:io -> (unit -> 'a) -> 'a t (** [async f] creates a new thread of control which executes [f ()] and returns the corresponding thread handle. The thread terminates whenever [f ()] returns a value or raises an exception. *) @@ -64,9 +66,11 @@ module type FMT_TTY = sig end module type S = sig - module IO : IO + type io + + module IO : IO with type io = io module Semaphore : SEMAPHORE - module Thread : THREAD + module Thread : THREAD with type io = io module Clock : CLOCK module Progress : Progress_engine.S module Fmt_tty : FMT_TTY diff --git a/src/unix/index_unix.ml b/src/unix/index_unix.ml index 37953031..777ebecc 100644 --- a/src/unix/index_unix.ml +++ b/src/unix/index_unix.ml @@ -27,7 +27,9 @@ let current_version = "00000001" module Stats = Index.Stats -module IO : Index.Platform.IO = struct +module IO : Index.Platform.IO with type io = unit = struct + type io = unit + let ( ++ ) = Int63.add let ( -- ) = Int63.sub @@ -237,7 +239,7 @@ module IO : Index.Platform.IO = struct flush_callback; } - let v ?flush_callback ~fresh ~generation ~fan_size file = + let v ~io:() ?flush_callback ~fresh ~generation ~fan_size file = let v = v_instance ?flush_callback ~readonly:false file in mkdir (Filename.dirname file); let header = @@ -268,7 +270,7 @@ module IO : Index.Platform.IO = struct let fan_size = Raw.Fan.get_size raw in v ~fan_size ~offset raw - let v_readonly file = + let v_readonly ~io:() file = let v = v_instance ~readonly:true file in mkdir (Filename.dirname file); try @@ -338,7 +340,7 @@ module IO : Index.Platform.IO = struct path pid (Unix.getpid ())); raise (Locked path) - let lock path = + let lock ~io:() path = Log.debug (fun l -> l "Locking %s" path); match unsafe_lock Unix.F_TLOCK path with | Some fd -> { path; fd } @@ -383,11 +385,13 @@ module Semaphore = struct end module Thread = struct + type nonrec io = IO.io + type 'a t = | Async of { thread : Thread.t; result : ('a, exn) result option ref } | Value of 'a - let async f = + let async ~io:() f = let result = ref None in let protected_f x = try result := Some (Ok (f x)) @@ -413,6 +417,8 @@ module Thread = struct end module Platform = struct + type io = unit + module IO = IO module Semaphore = Semaphore module Thread = Thread diff --git a/src/unix/index_unix.mli b/src/unix/index_unix.mli index a6d8ffec..0326cb4b 100644 --- a/src/unix/index_unix.mli +++ b/src/unix/index_unix.mli @@ -18,7 +18,7 @@ open! Import module Make (K : Index.Key.S) (V : Index.Value.S) (C : Index.Cache.S) : - Index.S with type key = K.t and type value = V.t + Index.S with type key = K.t and type value = V.t and type io = unit module Syscalls = Syscalls (** Bindings to Unix system calls. *) @@ -26,10 +26,10 @@ module Syscalls = Syscalls (** These modules should not be used. They are exposed purely for testing purposes. *) module Private : sig - module Platform : Index.Platform.S - module IO : Index.Platform.IO + module Platform : Index.Platform.S with type io = unit + module IO : Index.Platform.IO with type io = unit module Raw = Raw module Make (K : Index.Key.S) (V : Index.Value.S) (C : Index.Cache.S) : - Index.Private.S with type key = K.t and type value = V.t + Index.Private.S with type key = K.t and type value = V.t and type io = unit end diff --git a/test/cli/generate.ml b/test/cli/generate.ml index 84e937a4..f8d5e9f6 100644 --- a/test/cli/generate.ml +++ b/test/cli/generate.ml @@ -12,7 +12,7 @@ module Index = (Index.Cache.Noop) let random () = - let index = Index.v ~fresh:true ~log_size:100 "data/random" in + let index = Index.v ~io:() ~fresh:true ~log_size:100 "data/random" in for _ = 1 to 1001 do Index.replace index (random ()) (random ()) done; diff --git a/test/cli/index_fsck.ml b/test/cli/index_fsck.ml index aca35eb8..07972cfa 100644 --- a/test/cli/index_fsck.ml +++ b/test/cli/index_fsck.ml @@ -7,4 +7,4 @@ module Index = (Index.Key.String_fixed (Size)) (Index.Value.String_fixed (Size)) (Index.Cache.Noop) -let () = match Index.Checks.cli () with _ -> . +let () = match Index.Checks.cli ~io:() () with _ -> . diff --git a/test/dune b/test/dune index 9ff58460..050e6d85 100644 --- a/test/dune +++ b/test/dune @@ -1,4 +1,4 @@ -(test - (name main) +(tests + (names main test_unix test_eio) (package index) - (libraries index alcotest optint)) + (libraries index alcotest optint test_io index.unix index.eio eio eio.core eio.unix eio_main)) diff --git a/test/unix/common.ml b/test/io/common.ml similarity index 76% rename from test/unix/common.ml rename to test/io/common.ml index f4fc3f7a..3b5f75d0 100644 --- a/test/unix/common.ml +++ b/test/io/common.ml @@ -64,8 +64,14 @@ module Tbl = struct let check_binding tbl = check_entry (Hashtbl.find tbl) "table" end -module Index = struct - include Index_unix.Private.Make (Key) (Value) (Index.Cache.Unbounded) +module type Platform = sig + include Index.Platform.S + + val name : string +end + +module Index (Platform : Platform) = struct + include Index.Private.Make (Key) (Value) (Platform) (Index.Cache.Unbounded) let replace_random ?hook t = let ((key, value) as binding) = (Key.v (), Value.v ()) in @@ -88,10 +94,14 @@ let check_completed = function Alcotest.failf "Unexpected asynchronous exception: %s" (Printexc.to_string exn) -module Make_context (Config : sig - val root : string -end) = +module Make_context + (Platform : Platform) + (Config : sig + val root : string + end) = struct + module Index = Index (Platform) + let fresh_name = let c = ref 0 in fun object_type -> @@ -110,19 +120,19 @@ struct let ignore (_ : t) = () - let empty_index ?(log_size = Default.log_size) ?(lru_size = Default.lru_size) - ?flush_callback ?throttle () = + let empty_index ~io ?(log_size = Default.log_size) + ?(lru_size = Default.lru_size) ?flush_callback ?throttle () = let name = fresh_name "empty_index" in let cache = Index.empty_cache () in let rw = - Index.v ?flush_callback ?throttle ~cache ~fresh:true ~log_size ~lru_size - name + Index.v ~io ?flush_callback ?throttle ~cache ~fresh:true ~log_size + ~lru_size name in let close_all = ref (fun () -> Index.close rw) in let tbl = Hashtbl.create 0 in let clone ?(fresh = false) ~readonly () = let t = - Index.v ?flush_callback ?throttle ~cache ~fresh ~log_size ~lru_size + Index.v ~io ?flush_callback ?throttle ~cache ~fresh ~log_size ~lru_size ~readonly name in (close_all := !close_all >> fun () -> Index.close t); @@ -130,7 +140,7 @@ struct in { rw; tbl; clone; close_all = (fun () -> !close_all ()) } - let full_index ?(size = Default.size) ?(log_size = Default.log_size) + let full_index ~io ?(size = Default.size) ?(log_size = Default.log_size) ?(lru_size = Default.lru_size) ?(flush_callback = fun () -> ()) ?throttle () = let f = @@ -140,7 +150,7 @@ struct let name = fresh_name "full_index" in let cache = Index.empty_cache () in let rw = - Index.v + Index.v ~io ~flush_callback:(fun () -> !f ()) ?throttle ~cache ~fresh:true ~log_size ~lru_size name in @@ -157,7 +167,7 @@ struct f := flush_callback (* Enable [flush_callback] *); let clone ?(fresh = false) ~readonly () = let t = - Index.v ~flush_callback ?throttle ~cache ~fresh ~log_size ~lru_size + Index.v ~io ~flush_callback ?throttle ~cache ~fresh ~log_size ~lru_size ~readonly name in (close_all := !close_all >> fun () -> Index.close t); @@ -170,38 +180,38 @@ struct t.close_all (); a - let with_empty_index ?log_size ?lru_size ?flush_callback ?throttle () f = + let with_empty_index ~io ?log_size ?lru_size ?flush_callback ?throttle () f = call_then_close - (empty_index ?log_size ?lru_size ?flush_callback ?throttle ()) + (empty_index ~io ?log_size ?lru_size ?flush_callback ?throttle ()) f - let with_full_index ?log_size ?lru_size ?flush_callback ?throttle ?size () f = + let with_full_index ~io ?log_size ?lru_size ?flush_callback ?throttle ?size () + f = call_then_close - (full_index ?log_size ?lru_size ?flush_callback ?throttle ?size ()) + (full_index ~io ?log_size ?lru_size ?flush_callback ?throttle ?size ()) f + + let check_equivalence index htbl = + Hashtbl.iter (Index.check_binding index) htbl; + Index.iter (Tbl.check_binding htbl) index + + let check_disjoint index htbl = + Hashtbl.iter + (fun k v -> + match Index.find index k with + | exception Not_found -> () + | v' when Value.equal v v' -> + Alcotest.failf "Binding %a should not be present" pp_binding (k, v) + | v' -> + Alcotest.failf "Found value %a when checking for the absence of %a" + (Repr.pp Value.t) v' pp_binding (k, v)) + htbl end let ( let* ) f k = f k let uncurry f (x, y) = f x y let ignore_value (_ : Value.t) = () let ignore_bool (_ : bool) = () -let ignore_index (_ : Index.t) = () - -let check_equivalence index htbl = - Hashtbl.iter (Index.check_binding index) htbl; - Index.iter (Tbl.check_binding htbl) index - -let check_disjoint index htbl = - Hashtbl.iter - (fun k v -> - match Index.find index k with - | exception Not_found -> () - | v' when Value.equal v v' -> - Alcotest.failf "Binding %a should not be present" pp_binding (k, v) - | v' -> - Alcotest.failf "Found value %a when checking for the absence of %a" - (Repr.pp Value.t) v' pp_binding (k, v)) - htbl let get_open_fd root = let ( >>? ) x f = match x with `Ok x -> f x | `Skip err -> `Skip err in diff --git a/test/unix/common.mli b/test/io/common.mli similarity index 83% rename from test/unix/common.mli rename to test/io/common.mli index a45c0264..35b2d0b1 100644 --- a/test/unix/common.mli +++ b/test/io/common.mli @@ -33,9 +33,17 @@ module Tbl : sig (** Check that a binding exists in the table. *) end -module Index : sig +module type Platform = sig + include Index.Platform.S + + val name : string +end + +module Index (Platform : Platform) : sig open Index.Private - include S with type key = Key.t and type value = Value.t + + include + S with type key = Key.t and type value = Value.t and type io = Platform.io val replace_random : ?hook:[ `Merge of merge_stages ] Hook.t -> @@ -51,9 +59,13 @@ module Index : sig end (** Helper constructors for fresh pre-initialised indices *) -module Make_context (Config : sig - val root : string -end) : sig +module Make_context + (Platform : Platform) + (Config : sig + val root : string + end) : sig + module Index : module type of Index (Platform) + type t = private { rw : Index.t; tbl : (string, string) Hashtbl.t; @@ -67,6 +79,7 @@ end) : sig (** [fresh_name typ] is a clean directory for a resource of type [typ]. *) val with_empty_index : + io:Platform.io -> ?log_size:int -> ?lru_size:int -> ?flush_callback:(unit -> unit) -> @@ -78,6 +91,7 @@ end) : sig index and any clones are closed. *) val with_full_index : + io:Platform.io -> ?log_size:int -> ?lru_size:int -> ?flush_callback:(unit -> unit) -> @@ -90,6 +104,9 @@ end) : sig key/value pairs. [f] also gets a constructor for opening clones of the index at the same location. Afterwards, the index and any clones are closed. *) + + val check_equivalence : Index.t -> (Key.t, Value.t) Hashtbl.t -> unit + val check_disjoint : Index.t -> (Key.t, Value.t) Hashtbl.t -> unit end val ( let* ) : ('a -> 'b) -> 'a -> 'b @@ -99,7 +116,6 @@ val ( >> ) : ('a -> 'b) -> ('b -> 'c) -> 'a -> 'c val uncurry : ('a -> 'b -> 'c) -> 'a * 'b -> 'c val ignore_value : Value.t -> unit val ignore_bool : bool -> unit -val ignore_index : Index.t -> unit type binding = Key.t * Value.t @@ -108,7 +124,5 @@ val pp_binding : binding Fmt.t val check_completed : ([ `Aborted | `Completed ], [ `Async_exn of exn ]) result -> unit -val check_equivalence : Index.t -> (Key.t, Value.t) Hashtbl.t -> unit -val check_disjoint : Index.t -> (Key.t, Value.t) Hashtbl.t -> unit val get_open_fd : string -> [> `Ok of string list | `Skip of string ] val partition : string -> string list -> string list * string list diff --git a/test/io/dune b/test/io/dune new file mode 100644 index 00000000..028cf460 --- /dev/null +++ b/test/io/dune @@ -0,0 +1,4 @@ +(library + (name test_io) + (libraries index index.unix unix alcotest fmt logs logs.fmt re stdlib-shims + threads.posix repr semaphore-compat optint mtime.clock.os)) diff --git a/test/io/flush_callback.ml b/test/io/flush_callback.ml new file mode 100644 index 00000000..314d0ad8 --- /dev/null +++ b/test/io/flush_callback.ml @@ -0,0 +1,207 @@ +module I = Index +open Common + +module Make (Platform : Common.Platform) = struct + module Context = + Common.Make_context + (Platform) + (struct + let root = Filename.concat "_tests" (Platform.name ^ ".flush_callback") + end) + + module Index = Context.Index + module Semaphore = Platform.Semaphore + + module Mutable_callback = struct + type t = { + flush_callback : unit -> unit; + require_callback : + 'a. + ?at_least_once:unit -> ?callback:(unit -> unit) -> (unit -> 'a) -> 'a; + (** Locally override the definition of [flush_callback] inside a + continuation. The default [callback] is the identity function. + + - The continuation must trigger the callback exactly once (unless + [~at_least_once:()] is passed). + - Any callbacks not scoped inside [require_callback] result in + failure. *) + } + + let v () : t = + let unexpected () = Alcotest.fail "Callback call not expected" in + let top = ref unexpected in + let require_callback ?at_least_once ?(callback = fun () -> ()) (type a) + (f : unit -> a) : a = + let called = ref false in + let prev_top = !top in + (top := + fun () -> + match (at_least_once, !called) with + | None, true -> Alcotest.fail "flush_callback already triggered" + | _, _ -> + called := true; + (* Ensure the callback does not recursively invoke an auto-flush. *) + let saved_top = !top in + top := unexpected; + callback (); + top := saved_top); + let a = f () in + if not !called then Alcotest.fail "flush_callback was not called"; + top := prev_top; + a + in + let flush_callback () = !top () in + { require_callback; flush_callback } + end + + let check_no_merge binding = function + | None -> binding + | Some _merge_promise -> + Alcotest.failf "New binding %a triggered an unexpected merge operation" + pp_binding binding + + (** Tests that [close] does not trigger the [flush_callback] *) + let test_close ~io () = + let fail typ () = + Alcotest.failf "Closing <%s> should not trigger the flush_callback" typ + in + Context.with_empty_index ~io ~flush_callback:(fail "empty index") () + Context.ignore; + + Context.with_full_index ~io ~flush_callback:(fail "fresh index") () + Context.ignore; + + let calls = ref 0 in + Context.with_empty_index ~io + ~flush_callback:(fun () -> incr calls) + () + (fun Context.{ rw; _ } -> + Index.replace_random rw + |> uncurry check_no_merge + |> (ignore : binding -> unit)); + Alcotest.(check int) + "Closing a dirty index should trigger the flush_callback once" 1 !calls + + (** Test that [flush] triggers the [flush_callback] when necessary. *) + let test_flush ~io () = + let Mutable_callback.{ require_callback; flush_callback } = + Mutable_callback.v () + in + let* Context.{ rw; clone; _ } = + Context.with_empty_index ~io ~flush_callback () + in + let ro = clone ~readonly:true () in + + Index.flush rw (* No callback, since there are no bindings to persist *); + let binding = Index.replace_random rw |> uncurry check_no_merge in + require_callback + ~callback:(fun () -> + Log.app (fun m -> + m "Checking that newly-added binding %a is not yet visible" + pp_binding binding); + Index.sync ro; + Index.check_not_found ro (fst binding)) + (fun () -> Index.flush rw); + + Log.app (fun m -> + m "After the flush, binding %a should be visible" pp_binding binding); + Index.sync ro; + uncurry (Index.check_binding ro) binding; + + let _ = Index.replace_random rw |> uncurry check_no_merge in + Index.flush ~no_callback:() rw (* No callback, by user request *); + + () + + (** Test that flushes due to [replace] operations trigger the + [flush_callback]: + + - 1. Initial flush of [log] before an automatic merge. + - 2. Flushing of [log_async] while a merge is ongoing. *) + let test_replace ~io () = + let log_size = 8 in + let bindings = Tbl.v ~size:log_size in + let binding_list = bindings |> Hashtbl.to_seq |> List.of_seq in + let Mutable_callback.{ require_callback; flush_callback } = + Mutable_callback.v () + in + let* Context.{ rw; clone; _ } = + Context.with_empty_index ~io ~log_size ~flush_callback () + in + let ro = clone ~readonly:true () in + + (* The first [log_size]-many replaces don't trigger the callback. (Provided + the [auto_flush_limit] is not reached, which it is not.) *) + let replace_no_merge binding = + Index.replace' rw (fst binding) (snd binding) + |> check_no_merge binding + |> (ignore : Key.t * Value.t -> unit) + in + binding_list |> List.iter replace_no_merge; + + (* The next replace overflows the log, causing the bindings to be persisted *) + let do_merge = Semaphore.make false in + let overflow_binding, merge_promise = + require_callback + ~callback:(fun () -> + Log.app (fun m -> + m + "Checking newly-added bindings are not visible from a synced \ + RO instance until [flush_callback] is called"); + Index.sync ro; + Context.check_disjoint ro bindings) + (fun () -> + Index.replace_random + ~hook: + (I.Private.Hook.v (function + | `Merge `Before -> Semaphore.acquire "do_merge" do_merge + | _ -> ())) + rw) + in + + Log.app (fun m -> m "Checking merged bindings are now visible"); + Hashtbl.add bindings (fst overflow_binding) (snd overflow_binding); + Index.sync ro; + Context.check_equivalence ro bindings; + + (* New values added during the merge go into [log_async] *) + let async_binding = Index.replace_random rw |> uncurry check_no_merge in + Log.app (fun m -> + m "Added new binding %a while merge is ongoing" pp_binding async_binding); + + (* We could implicitly cause an automatic flush of [log_async], but it's + simpler to just explicitly force one. *) + Index.sync ro; + require_callback ~at_least_once:() + ~callback:(fun () -> + Log.app (fun m -> + m + "Checking async_binding %a is not yet visible from a synced RO \ + instance" + pp_binding async_binding); + Context.check_equivalence ro bindings) + (fun () -> Index.flush rw); + + (* The merge triggers the callback when flushing [log_async] entries into + [log]. (Not necessary here, since [log_async] values were already flushed.) *) + require_callback (fun () -> + Semaphore.release do_merge; + merge_promise |> Option.get |> Index.await |> check_completed); + + Log.app (fun m -> + m + "Checking that all added bindings are now visible from a synced RO \ + instance"); + Hashtbl.add bindings (fst async_binding) (snd async_binding); + Index.sync ro; + Context.check_equivalence ro bindings + + let tests ~io = + List.map + (fun (name, mode, test) -> (name, mode, test ~io)) + [ + ("close", `Quick, test_close); + ("flush", `Quick, test_flush); + ("replace", `Quick, test_replace); + ] +end diff --git a/test/io/flush_callback.mli b/test/io/flush_callback.mli new file mode 100644 index 00000000..f114e0b7 --- /dev/null +++ b/test/io/flush_callback.mli @@ -0,0 +1,3 @@ +module Make (Platform : Common.Platform) : sig + val tests : io:Platform.io -> unit Alcotest.test_case list +end diff --git a/test/io/force_merge.ml b/test/io/force_merge.ml new file mode 100644 index 00000000..192adba8 --- /dev/null +++ b/test/io/force_merge.ml @@ -0,0 +1,448 @@ +module Hook = Index.Private.Hook +open Common + +module Make (Platform : Common.Platform) = struct + let root = Filename.concat "_tests" (Platform.name ^ ".force_merge") + + module Context = + Common.Make_context + (Platform) + (struct + let root = root + end) + + module Index = Context.Index + module Semaphore = Platform.Semaphore + + let after f = Hook.v (function `After -> f () | _ -> ()) + let after_clear f = Hook.v (function `After_clear -> f () | _ -> ()) + let before f = Hook.v (function `Before -> f () | _ -> ()) + + let before_offset_read f = + Hook.v (function `Before_offset_read -> f () | _ -> ()) + + let test_find_present t tbl = + Hashtbl.iter + (fun k v -> + match Index.find t k with + | res -> + if not (res = v) then + Alcotest.fail "Replacing existing value failed." + | exception Not_found -> + Alcotest.failf "Inserted value is not present anymore: %s." k) + tbl + + let test_one_entry r k v = + match Index.find r k with + | res -> + if not (res = v) then Alcotest.fail "Replacing existing value failed." + | exception Not_found -> + Alcotest.failf "Inserted value is not present anymore: %s." k + + let test_fd () = + match Common.get_open_fd root with + | `Ok lines -> ( + let contains sub s = + try + ignore (Re.Str.search_forward (Re.Str.regexp sub) s 0); + true + with Not_found -> false + in + let result = + let data, rs = List.partition (contains "data") lines in + if List.length data > 2 then + Alcotest.fail "Too many file descriptors opened for data files"; + let log, rs = List.partition (contains "log") rs in + if List.length log > 2 then + Alcotest.fail "Too many file descriptors opened for log files"; + let lock, rs = List.partition (contains "lock") rs in + if List.length lock > 2 then + Alcotest.fail "Too many file descriptors opened for lock files"; + if List.length rs > 0 then + Alcotest.fail "Unknown file descriptors opened"; + `Ok () + in + match result with + | `Ok () -> () + | `Skip err -> Log.warn (fun m -> m "`test_fd` was skipped: %s" err)) + | `Skip err -> Log.warn (fun m -> m "`test_fd` was skipped: %s" err) + + let readonly_s ~io () = + let* { Context.tbl; clone; _ } = Context.with_full_index ~io () in + let r1 = clone ~readonly:true () in + let r2 = clone ~readonly:true () in + let r3 = clone ~readonly:true () in + test_find_present r1 tbl; + test_find_present r2 tbl; + test_find_present r3 tbl; + test_fd () + + let readonly ~io () = + let* { Context.tbl; clone; _ } = Context.with_full_index ~io () in + let r1 = clone ~readonly:true () in + let r2 = clone ~readonly:true () in + let r3 = clone ~readonly:true () in + Hashtbl.iter + (fun k v -> + test_one_entry r1 k v; + test_one_entry r2 k v; + test_one_entry r3 k v) + tbl; + test_fd () + + let readonly_and_merge ~io () = + let* { Context.rw; clone; _ } = Context.with_full_index ~io () in + let w = rw in + let r1 = clone ~readonly:true () in + let r2 = clone ~readonly:true () in + let r3 = clone ~readonly:true () in + let interleave () = + let k1 = Key.v () in + let v1 = Value.v () in + Index.replace w k1 v1; + Index.flush w; + let t1 = Index.try_merge_aux ~force:true w in + Index.sync r1; + Index.sync r2; + Index.sync r3; + test_one_entry r1 k1 v1; + test_one_entry r2 k1 v1; + test_one_entry r3 k1 v1; + + let k2 = Key.v () in + let v2 = Value.v () in + Index.replace w k2 v2; + Index.flush w; + Index.sync r1; + Index.sync r2; + Index.sync r3; + test_one_entry r1 k1 v1; + let t2 = Index.try_merge_aux ~force:true w in + test_one_entry r2 k2 v2; + test_one_entry r3 k1 v1; + + let k2 = Key.v () in + let v2 = Value.v () in + let k3 = Key.v () in + let v3 = Value.v () in + test_one_entry r1 k1 v1; + Index.replace w k2 v2; + Index.flush w; + Index.sync r1; + let t3 = Index.try_merge_aux ~force:true w in + test_one_entry r1 k1 v1; + Index.replace w k3 v3; + Index.flush w; + Index.sync r3; + let t4 = Index.try_merge_aux ~force:true w in + test_one_entry r3 k3 v3; + + let k2 = Key.v () in + let v2 = Value.v () in + Index.replace w k2 v2; + Index.flush w; + Index.sync r2; + Index.sync r3; + test_one_entry w k2 v2; + let t5 = Index.try_merge_aux ~force:true w in + test_one_entry w k2 v2; + test_one_entry r2 k2 v2; + test_one_entry r3 k1 v1; + + let k2 = Key.v () in + let v2 = Value.v () in + Index.replace w k2 v2; + Index.flush w; + Index.sync r2; + Index.sync r3; + test_one_entry r2 k1 v1; + let t6 = Index.try_merge_aux ~force:true w in + test_one_entry w k2 v2; + test_one_entry r2 k2 v2; + test_one_entry r3 k2 v2; + Index.await t1 |> check_completed; + Index.await t2 |> check_completed; + Index.await t3 |> check_completed; + Index.await t4 |> check_completed; + Index.await t5 |> check_completed; + Index.await t6 |> check_completed + in + + for _ = 1 to 10 do + interleave () + done; + test_fd () + + (* A force merge has an implicit flush, however, if the replace occurs at the end of the merge, the value is not flushed *) + let write_after_merge ~io () = + let* { Context.rw; clone; _ } = Context.with_full_index ~io () in + let w = rw in + let r1 = clone ~readonly:true () in + let k1 = Key.v () in + let v1 = Value.v () in + let k2 = Key.v () in + let v2 = Value.v () in + Index.replace w k1 v1; + let hook = after (fun () -> Index.replace w k2 v2) in + let t = Index.try_merge_aux ~force:true ~hook w in + Index.await t |> check_completed; + Index.sync r1; + test_one_entry r1 k1 v1; + Alcotest.check_raises (Printf.sprintf "Absent value was found: %s." k2) + Not_found (fun () -> ignore_value (Index.find r1 k2)) + + let replace_while_merge ~io () = + let* { Context.rw; clone; _ } = Context.with_full_index ~io () in + let w = rw in + let r1 = clone ~readonly:true () in + let k1 = Key.v () in + let v1 = Value.v () in + let k2 = Key.v () in + let v2 = Value.v () in + Index.replace w k1 v1; + let hook = + before (fun () -> + Index.replace w k2 v2; + test_one_entry w k2 v2) + in + let t = Index.try_merge_aux ~force:true ~hook w in + Index.sync r1; + test_one_entry r1 k1 v1; + Index.await t |> check_completed + + (* note that here we cannot do + `test_one_entry r1 k2 v2` + as there is no way to guarantee that the latests value + added by a RW instance is found by a RO instance + *) + + let find_while_merge ~io () = + let* { Context.rw; clone; _ } = Context.with_full_index ~io () in + let w = rw in + let k1 = Key.v () in + let v1 = Value.v () in + Index.replace w k1 v1; + let f () = test_one_entry w k1 v1 in + let t1 = Index.try_merge_aux ~force:true ~hook:(after f) w in + let t2 = Index.try_merge_aux ~force:true ~hook:(after f) w in + let r1 = clone ~readonly:true () in + let f () = test_one_entry r1 k1 v1 in + let t3 = Index.try_merge_aux ~force:true ~hook:(before f) w in + let t4 = Index.try_merge_aux ~force:true ~hook:(before f) w in + Index.await t1 |> check_completed; + Index.await t2 |> check_completed; + Index.await t3 |> check_completed; + Index.await t4 |> check_completed + + let find_in_async_generation_change ~io () = + let* { Context.rw; clone; _ } = Context.with_full_index ~io () in + let w = rw in + let r1 = clone ~readonly:true () in + let k1 = Key.v () in + let v1 = Value.v () in + let f () = + Index.replace w k1 v1; + Index.flush w; + Index.sync r1; + test_one_entry r1 k1 v1 + in + let t1 = Index.try_merge_aux ~force:true ~hook:(before f) w in + Index.await t1 |> check_completed + + let find_in_async_same_generation ~io () = + let* { Context.rw; clone; _ } = Context.with_full_index ~io () in + let w = rw in + let r1 = clone ~readonly:true () in + let k1 = Key.v () in + let v1 = Value.v () in + let k2 = Key.v () in + let v2 = Value.v () in + let f () = + Index.replace w k1 v1; + Index.flush w; + Index.sync r1; + test_one_entry r1 k1 v1; + Index.replace w k2 v2; + Index.flush w; + Index.sync r1; + test_one_entry r1 k2 v2 + in + let t1 = Index.try_merge_aux ~force:true ~hook:(before f) w in + Index.await t1 |> check_completed + + let sync_before_and_after_clearing_async ~io () = + let* { Context.rw; clone; _ } = Context.with_full_index ~io () in + let w = rw in + let ro = clone ~readonly:true () in + let k1 = Key.v () in + let v1 = Value.v () in + let k2 = Key.v () in + let v2 = Value.v () in + let add_in_async () = + Index.replace w k1 v1; + Index.replace w k2 v2; + Index.flush w; + Log.debug (fun l -> l "RO updates async's offset"); + Index.sync ro + in + let sync_before_clear_async () = + Log.debug (fun l -> l "RO updates instance's generation"); + Index.sync ro + in + let hook = + Hook.v (function + | `Before -> add_in_async () + | `After_clear -> sync_before_clear_async () + | _ -> ()) + in + let t1 = Index.try_merge_aux ~force:true ~hook w in + Index.await t1 |> check_completed; + Index.sync ro; + test_one_entry ro k1 v1; + test_one_entry ro k2 v2 + + (** RW adds a value in log and flushes it, so every subsequent RO sync should + find that value. But if the RO sync occurs during a merge, after a clear + but before a generation change, then the value is missed. Also test ro + find at this point. *) + let sync_after_clear_log ~io () = + let* Context.{ rw; clone; _ } = Context.with_empty_index ~io () in + let ro = clone ~readonly:true () in + let k1, v1 = (Key.v (), Value.v ()) in + Index.replace rw k1 v1; + Index.flush rw; + let hook = after_clear (fun () -> Index.sync ro) in + let t = Index.try_merge_aux ~force:true ~hook rw in + Index.await t |> check_completed; + test_one_entry ro k1 v1; + let k2, v2 = (Key.v (), Value.v ()) in + Index.replace rw k2 v2; + Index.flush rw; + Index.sync ro; + let hook = after_clear (fun () -> test_one_entry ro k1 v1) in + let t = Index.try_merge_aux ~force:true ~hook rw in + Index.await t |> check_completed + + (** during a merge RO sync can miss a value if it reads the generation before + the generation is updated. *) + let merge_during_sync ~io () = + let* Context.{ rw; clone; _ } = Context.with_empty_index ~io () in + let ro = clone ~readonly:true () in + let k1, v1 = (Key.v (), Value.v ()) in + Index.replace rw k1 v1; + Index.flush rw; + let hook = + before_offset_read (fun () -> + let t = Index.try_merge_aux ~force:true rw in + Index.await t |> check_completed) + in + Index.sync' ~hook ro; + test_one_entry ro k1 v1 + + let test_is_merging ~io () = + let* Context.{ rw; _ } = Context.with_empty_index ~io () in + let add_binding_and_merge ~hook = + let k1, v1 = (Key.v (), Value.v ()) in + Index.replace rw k1 v1; + let t = Index.try_merge_aux ~force:true ~hook rw in + Index.await t |> check_completed + in + let f msg b () = Alcotest.(check bool) msg (Index.is_merging rw) b in + f "before merge" false (); + add_binding_and_merge ~hook:(before (f "before" true)); + f "between merge" false (); + add_binding_and_merge ~hook:(after (f "after" true)); + add_binding_and_merge ~hook:(after_clear (f "after clear" true)) + + let add_bindings index = + let k1, v1 = (Key.v (), Value.v ()) in + Index.replace index k1 v1 + + (** Test that a clear aborts the merge. *) + let test_non_blocking_clear ~io () = + let* Context.{ rw; _ } = Context.with_empty_index ~io () in + let merge_started = Semaphore.make false and merge = Semaphore.make false in + let merge_hook = + Hook.v @@ function + | `Before -> + Semaphore.release merge_started; + Semaphore.acquire "merge" merge + | `After -> Alcotest.fail "Merge should have been aborted by clear" + | _ -> () + in + let clear_hook = + Hook.v @@ function + | `Abort_signalled -> Semaphore.release merge + | `IO_clear -> () + in + add_bindings rw; + let thread = Index.try_merge_aux ~force:true ~hook:merge_hook rw in + Semaphore.acquire "merge_started" merge_started; + add_bindings rw; + Index.clear' ~hook:clear_hook rw; + match Index.await thread with + | Ok `Aborted -> () + | _ -> Alcotest.fail "merge should have aborted" + + (** The test consists of aborting a first merge after one entry is added in + the ./merge file and checking that a second merge succeeds. Regression + test for PR 211 in which the second merge was triggering an assert + failure. *) + let test_abort_merge ~io ~abort_merge () = + let* { Context.rw; clone; _ } = Context.with_full_index ~io () in + let merge_started = Semaphore.make false and merge = Semaphore.make false in + let merge_hook = + Hook.v @@ function + | `After_first_entry -> + Semaphore.release merge_started; + Semaphore.acquire "merge" merge + | `After | `After_clear -> + Alcotest.fail "Merge should have been aborted by clear" + | `Before -> () + in + let abort_hook = + Hook.v @@ function + | `Abort_signalled -> Semaphore.release merge + | `IO_clear -> () + in + let t = Index.try_merge_aux ~force:true ~hook:merge_hook rw in + Semaphore.acquire "merge_started" merge_started; + abort_merge ~hook:abort_hook rw; + (match Index.await t with + | Ok `Aborted -> () + | _ -> Alcotest.fail "Merge should have aborted"); + let rw = clone ~readonly:false ~fresh:false () in + add_bindings rw; + let t = Index.try_merge_aux ~force:true rw in + Index.await t |> check_completed + + let test_clear_aborts_merge = test_abort_merge ~abort_merge:Index.clear' + + let test_close_immediately_aborts_merge = + test_abort_merge ~abort_merge:(Index.close' ~immediately:()) + + let tests ~io = + List.map + (fun (name, mode, test) -> (name, mode, test ~io)) + [ + ("readonly in sequence", `Quick, readonly_s); + ("readonly interleaved", `Quick, readonly); + ("interleaved merge", `Quick, readonly_and_merge); + ("write at the end of merge", `Quick, write_after_merge); + ("write in log_async", `Quick, replace_while_merge); + ("find while merging", `Quick, find_while_merge); + ("find in async without log", `Quick, find_in_async_generation_change); + ("find in async with log", `Quick, find_in_async_same_generation); + ( "sync before and after clearing the async", + `Quick, + sync_before_and_after_clearing_async ); + ("sync and find after log cleared", `Quick, sync_after_clear_log); + ("merge during ro sync", `Quick, merge_during_sync); + ("is_merging", `Quick, test_is_merging); + ("clear is not blocking", `Quick, test_non_blocking_clear); + ("`clear` aborts merge", `Quick, test_clear_aborts_merge); + ( "`close ~immediately` aborts merge", + `Quick, + test_close_immediately_aborts_merge ); + ] +end diff --git a/test/io/force_merge.mli b/test/io/force_merge.mli new file mode 100644 index 00000000..f114e0b7 --- /dev/null +++ b/test/io/force_merge.mli @@ -0,0 +1,3 @@ +module Make (Platform : Common.Platform) : sig + val tests : io:Platform.io -> unit Alcotest.test_case list +end diff --git a/test/io/io_array.ml b/test/io/io_array.ml new file mode 100644 index 00000000..acb5b828 --- /dev/null +++ b/test/io/io_array.ml @@ -0,0 +1,87 @@ +module Int63 = Optint.Int63 + +module Make (Platform : Common.Platform) = struct + module IO = Platform.IO + + let ( // ) = Filename.concat + let root = "_tests" // (Platform.name ^ ".io_array") + + module Entry = struct + module Key = Common.Key + module Value = Common.Value + + type t = Key.t * Value.t + + let encoded_size = Key.encoded_size + Value.encoded_size + + let decode string off = + let key = Key.decode string off in + let value = Value.decode string (off + Key.encoded_size) in + (key, value) + + let append_io io (key, value) = + let encoded_key = Key.encode key in + let encoded_value = Value.encode value in + IO.append io encoded_key; + IO.append io encoded_value + end + + module IOArray = Index.Private.Io_array.Make (IO) (Entry) + + let entry = Alcotest.(pair string string) + + let fresh_io ~io name = + IO.v ~io ~fresh:true ~generation:Int63.zero ~fan_size:Int63.zero + (root // name) + + (* Append a random sequence of [size] keys to an IO instance and return + a pair of an IOArray and an equivalent in-memory array. *) + let populate_random ~size io = + let rec loop acc = function + | 0 -> acc + | n -> + let e = (Common.Key.v (), Common.Value.v ()) in + Entry.append_io io e; + loop (e :: acc) (n - 1) + in + let mem_arr = Array.of_list (List.rev (loop [] size)) in + let io_arr = IOArray.v io in + IO.flush io; + (mem_arr, io_arr) + + (* Tests *) + let read_sequential ~io () = + let size = 1000 in + let fio = fresh_io ~io "read_sequential" in + let mem_arr, io_arr = populate_random ~size fio in + for i = 0 to size - 1 do + let expected = mem_arr.(i) in + let actual = IOArray.get io_arr (Int63.of_int i) in + Alcotest.(check entry) + (Fmt.str "Inserted key at index %i is accessible" i) + expected actual + done + + let read_sequential_prefetch ~io () = + let size = 1000 in + let io = fresh_io ~io "read_sequential_prefetch" in + let mem_arr, io_arr = populate_random ~size io in + IOArray.pre_fetch io_arr ~low:Int63.zero ~high:(Int63.of_int 999); + + (* Read the arrays backwards *) + for i = size - 1 to 0 do + let expected = mem_arr.(i) in + let actual = IOArray.get io_arr (Int63.of_int i) in + Alcotest.(check entry) + (Fmt.str "Inserted key at index %i is accessible" i) + expected actual + done + + let tests ~io = + List.map + (fun (name, mode, test) -> (name, mode, test ~io)) + [ + ("fresh", `Quick, read_sequential); + ("prefetch", `Quick, read_sequential_prefetch); + ] +end diff --git a/test/io/io_array.mli b/test/io/io_array.mli new file mode 100644 index 00000000..f114e0b7 --- /dev/null +++ b/test/io/io_array.mli @@ -0,0 +1,3 @@ +module Make (Platform : Common.Platform) : sig + val tests : io:Platform.io -> unit Alcotest.test_case list +end diff --git a/test/unix/log.ml b/test/io/log.ml similarity index 100% rename from test/unix/log.ml rename to test/io/log.ml diff --git a/test/unix/log.mli b/test/io/log.mli similarity index 100% rename from test/unix/log.mli rename to test/io/log.mli diff --git a/test/io/test_io.ml b/test/io/test_io.ml new file mode 100644 index 00000000..93f10688 --- /dev/null +++ b/test/io/test_io.ml @@ -0,0 +1,1115 @@ +module Hook = Index.Private.Hook +module Layout = Index.Private.Layout +module I = Index +open Common + +module Make (Platform : Common.Platform) = struct + let ( // ) = Filename.concat + let root = "_tests" // (Platform.name ^ ".main") + + module Context = + Common.Make_context + (Platform) + (struct + let root = root + end) + + module Index = Context.Index + module Semaphore = Platform.Semaphore + + type index = Index.t + + (* Helper functions *) + + (** [random_new_key tbl] returns a random key which is not in [tbl]. *) + let rec random_new_key tbl = + let r = Key.v () in + if Hashtbl.mem tbl r then random_new_key tbl else r + + exception Found of string + + (** [random_existing_key tbl] returns a random key from [tbl]. *) + let random_existing_key tbl = + try + Hashtbl.iter (fun k _ -> raise (Found k)) tbl; + Alcotest.fail "Provided table contains no keys." + with Found k -> k + + let test_replace t = + let k = Key.v () in + let v = Value.v () in + let v' = Value.v () in + Index.replace t k v; + Index.replace t k v'; + Index.check_binding t k v' + + let test_find_absent t tbl = + let rec loop i = + if i = 0 then () + else + let k = random_new_key tbl in + Alcotest.check_raises (Printf.sprintf "Absent value was found: %s." k) + Not_found (fun () -> ignore_value (Index.find t k)); + loop (i - 1) + in + loop 100 + + let mem_entry f k _ = + if not (f k) then Alcotest.failf "Wrong insertion: %s key is missing." k + + let mem_index_entry index = mem_entry (Index.mem index) + let mem_tbl_entry tbl = mem_entry (Hashtbl.mem tbl) + + let check_equivalence_mem index tbl = + Hashtbl.iter (mem_index_entry index) tbl; + Index.iter (mem_tbl_entry tbl) index + + (* Basic tests of find/replace on a live index *) + module Live = struct + let find_present_live ~io () = + let* Context.{ rw; tbl; _ } = Context.with_full_index ~io () in + Context.check_equivalence rw tbl + + let find_absent_live ~io () = + let* Context.{ rw; tbl; _ } = Context.with_full_index ~io () in + test_find_absent rw tbl + + let replace_live ~io () = + let* Context.{ rw; _ } = Context.with_full_index ~io () in + test_replace rw + + let different_size_for_key ~io () = + let* Context.{ rw; _ } = Context.with_empty_index ~io () in + let k = String.init 2 (fun _i -> random_char ()) in + let v = Value.v () in + let exn = I.Private.Data.Invalid_size k in + Alcotest.check_raises + "Cannot add a key of a different size than string_size." exn (fun () -> + Index.replace rw k v) + + let different_size_for_value ~io () = + let* Context.{ rw; _ } = Context.with_empty_index ~io () in + let k = Key.v () in + let v = String.init 200 (fun _i -> random_char ()) in + let exn = I.Private.Data.Invalid_size v in + Alcotest.check_raises + "Cannot add a value of a different size than string_size." exn + (fun () -> Index.replace rw k v) + + let membership ~io () = + let* Context.{ rw; tbl; _ } = Context.with_full_index ~io () in + check_equivalence_mem rw tbl + + let iter_after_clear ~io () = + let* Context.{ rw; _ } = Context.with_full_index ~io () in + let () = Index.clear rw in + Index.iter (fun _ _ -> Alcotest.fail "Indexed not cleared.") rw + + let find_after_clear ~io () = + let* Context.{ rw; tbl; _ } = Context.with_full_index ~io () in + let () = Index.clear rw in + Hashtbl.fold + (fun k _ () -> + match Index.find rw k with + | exception Not_found -> () + | _ -> Alcotest.fail "Indexed not cleared.") + tbl () + + let open_after_clear ~io () = + let* Context.{ clone; rw; _ } = Context.with_full_index ~io () in + Index.clear rw; + let rw2 = clone ~fresh:false ~readonly:false () in + Alcotest.check_raises "Finding absent should raise Not_found" Not_found + (fun () -> Key.v () |> Index.find rw2 |> ignore_value) + + let files_on_disk_after_clear ~io () = + let root = Context.fresh_name "full_index" in + let rw = Index.v ~io ~fresh:true ~log_size:Default.log_size root in + for _ = 1 to Default.size do + let k = Key.v () in + let v = Value.v () in + Index.replace rw k v + done; + Index.flush rw; + Index.clear rw; + Index.close rw; + let module I = Platform.IO in + let test_there path = + match I.v_readonly ~io path with + | Error `No_file_on_disk -> Alcotest.failf "expected file: %s" path + | Ok data -> + Alcotest.(check int) path (I.size data) (I.size_header data); + I.close data + in + let test_not_there path = + match I.v_readonly ~io path with + | Error `No_file_on_disk -> () + | Ok _ -> Alcotest.failf "do not expect file: %s" path + in + test_there (Layout.log ~root); + test_not_there (Layout.log_async ~root); + test_not_there (Layout.data ~root) + + let duplicate_entries ~io () = + let* Context.{ rw; _ } = Context.with_empty_index ~io () in + let k1, v1, v2, v3 = (Key.v (), Value.v (), Value.v (), Value.v ()) in + Index.replace rw k1 v1; + Index.replace rw k1 v2; + Index.replace rw k1 v3; + let thread = Index.try_merge_aux ~force:true rw in + Index.await thread |> check_completed; + let once = ref true in + Index.iter + (fun k v -> + if !once && k = k1 && v = v3 then once := false + else Alcotest.fail "Index should contain a single entry.") + rw + + let tests ~io = + List.map + (fun (name, mode, test) -> (name, mode, test ~io)) + [ + ("find (present)", `Quick, find_present_live); + ("find (absent)", `Quick, find_absent_live); + ("replace", `Quick, replace_live); + ("fail add (key)", `Quick, different_size_for_key); + ("fail add (value)", `Quick, different_size_for_value); + ("membership", `Quick, membership); + ("clear and iter", `Quick, iter_after_clear); + ("clear and find", `Quick, find_after_clear); + ("open after clear", `Quick, open_after_clear); + ("files on disk after clear", `Quick, files_on_disk_after_clear); + ("duplicate entries", `Quick, duplicate_entries); + ] + end + + (* Tests of behaviour after restarting the index *) + module DuplicateInstance = struct + let find_present ~io () = + let* Context.{ rw; tbl; clone; _ } = Context.with_full_index ~io () in + let (_ : index) = clone ~readonly:false () in + Context.check_equivalence rw tbl + + let find_absent ~io () = + let* Context.{ rw; tbl; clone; _ } = Context.with_full_index ~io () in + let (_ : index) = clone ~readonly:false () in + test_find_absent rw tbl + + let replace ~io () = + let* Context.{ rw; clone; _ } = Context.with_full_index ~io ~size:5 () in + let (_ : index) = clone ~readonly:false () in + test_replace rw + + let membership ~io () = + let* Context.{ tbl; clone; _ } = Context.with_full_index ~io () in + let rw' = clone ~readonly:false () in + check_equivalence_mem rw' tbl + + let fail_restart_fresh ~io () = + let reuse_name = Context.fresh_name "empty_index" in + let cache = Index.empty_cache () in + let rw = + Index.v ~io ~cache ~fresh:true ~readonly:false ~log_size:4 reuse_name + in + let exn = I.RO_not_allowed in + Alcotest.check_raises "Index readonly cannot be fresh." exn (fun () -> + let (_ : index) = + Index.v ~io ~cache ~fresh:true ~readonly:true ~log_size:4 reuse_name + in + ()); + Index.close rw + + let sync ~io () = + let* Context.{ rw; clone; _ } = Context.with_full_index ~io () in + let k1, v1 = (Key.v (), Value.v ()) in + Index.replace rw k1 v1; + let rw2 = clone ~readonly:false () in + let k2, v2 = (Key.v (), Value.v ()) in + Index.replace rw2 k2 v2; + Index.check_binding rw k2 v2; + Index.check_binding rw2 k1 v1 + + let duplicate_entries ~io () = + let* Context.{ rw; clone; _ } = Context.with_empty_index ~io () in + let k1, v1, v2 = (Key.v (), Value.v (), Value.v ()) in + Index.replace rw k1 v1; + Index.replace rw k1 v2; + Index.close rw; + let rw2 = clone ~readonly:false () in + let once = ref true in + Index.iter + (fun k v -> + if !once && k = k1 && v = v2 then once := false + else Alcotest.fail "Index should contain a single entry.") + rw2 + + let tests ~io = + List.map + (fun (name, mode, test) -> (name, mode, test ~io)) + [ + ("find (present)", `Quick, find_present); + ("find (absent)", `Quick, find_absent); + ("replace", `Quick, replace); + ("membership", `Quick, membership); + ("fail restart readonly fresh", `Quick, fail_restart_fresh); + ("in sync", `Quick, sync); + ("duplicate entries in log", `Quick, duplicate_entries); + ] + end + + (* Tests of read-only indices *) + module Readonly = struct + let readonly ~io () = + let* Context.{ rw; clone; tbl; _ } = Context.with_empty_index ~io () in + let ro = clone ~readonly:true () in + let tbl2 = + let h = Hashtbl.create 0 in + Hashtbl.iter (fun k _ -> Hashtbl.add h k (Value.v ())) tbl; + h + in + Hashtbl.iter (fun k v -> Index.replace rw k v) tbl2; + Index.flush rw; + Index.sync ro; + Context.check_equivalence ro tbl2 + + let readonly_v_after_replace ~io () = + let* Context.{ rw; clone; _ } = Context.with_full_index ~io () in + let k = Key.v () in + let v = Value.v () in + Index.replace rw k v; + let ro = clone ~readonly:true () in + Index.close rw; + Index.close ro; + let rw = clone ~readonly:false () in + Index.check_binding rw k v + + let readonly_clear ~io () = + let check_no_index_entry index k = + Alcotest.check_raises (Fmt.str "Find %s key after clearing." k) + Not_found (fun () -> ignore_value (Index.find index k)) + in + let* Context.{ rw; tbl; clone; _ } = + (* Ensure that the clear also wipes the LRU *) + let lru_size = 10 in + Context.with_full_index ~io ~lru_size () + in + let ro = clone ~readonly:true () in + Index.clear rw; + Index.sync ro; + Log.info (fun m -> m "Checking that RO observes the empty index"); + Hashtbl.iter (fun k _ -> check_no_index_entry ro k) tbl; + Index.close rw; + Index.close ro; + let rw = clone ~readonly:false () in + let ro = clone ~readonly:true () in + let k, v = (Key.v (), Value.v ()) in + Index.replace rw k v; + Index.check_binding rw k v; + check_no_index_entry ro k; + Index.flush rw; + Index.sync ro; + Index.check_binding rw k v; + Index.check_binding ro k v; + Index.clear rw; + check_no_index_entry rw k; + Index.check_binding ro k v; + Index.sync ro; + check_no_index_entry rw k; + check_no_index_entry ro k + + (* If sync is called right after the generation is set, and before the old + file is removed, the readonly instance reopens the old file. It does not + try to reopen the file until the next generation change occurs. *) + let readonly_io_clear ~io () = + let* Context.{ rw; clone; _ } = Context.with_full_index ~io () in + let ro = clone ~readonly:true () in + let hook = + Hook.v @@ function `IO_clear -> Index.sync ro | `Abort_signalled -> () + in + Index.clear' ~hook rw; + let k, v = (Key.v (), Value.v ()) in + Index.replace rw k v; + Index.flush rw; + Index.sync ro; + Index.check_binding rw k v; + Index.check_binding ro k v + + let hashtbl_pick tbl = + match Hashtbl.fold (fun k v acc -> (k, v) :: acc) tbl [] with + | h :: _ -> h + | _ -> assert false + + let before l m = + I.Private.Hook.v @@ function + | `Before -> Semaphore.acquire "before" l + | `After -> Semaphore.release m + | _ -> () + + let after l m = + I.Private.Hook.v @@ function + | `After_offset_read -> + Semaphore.release l; + Semaphore.acquire "after_offset_read" m + | _ -> () + + (* check that the ro instance is "snapshot isolated", e.g. it can read old + values, even when rw overwrites them. *) + let readonly_snapshot ~io () = + let* Context.{ rw; clone; tbl; _ } = + Context.with_full_index ~io ~throttle:`Overcommit_memory () + in + let ro = clone ~readonly:true () in + let tbl2 = + let h = Hashtbl.create 0 in + Hashtbl.iter (fun k _ -> Hashtbl.add h k (Value.v ())) tbl; + h + in + let merge = Semaphore.make false and sync = Semaphore.make false in + let k, v = hashtbl_pick tbl2 in + + Index.clear rw; + Index.replace rw k v; + let thread = + Index.try_merge_aux ~force:true ~hook:(before merge sync) rw + in + Hashtbl.iter (Index.replace rw) tbl2; + Index.flush rw; + Context.check_equivalence rw tbl2; + Context.check_equivalence ro tbl; + Index.sync' ~hook:(after merge sync) ro; + Context.check_equivalence ro tbl2; + Semaphore.release merge; + Index.await thread |> check_completed + + let fail_readonly_add ~io () = + let* Context.{ clone; _ } = Context.with_empty_index ~io () in + let ro = clone ~readonly:true () in + let exn = I.RO_not_allowed in + Alcotest.check_raises "Index readonly cannot write." exn (fun () -> + Index.replace ro (Key.v ()) (Value.v ())) + + (** Tests that the entries that are not flushed cannot be read by a readonly + index. The test relies on the fact that, for log_size > 0, adding one + entry into an empty index does not lead to flush/merge. *) + let fail_readonly_read ~io () = + let* Context.{ rw; clone; _ } = Context.with_empty_index ~io () in + let ro = clone ~readonly:true () in + let k1, v1 = (Key.v (), Value.v ()) in + Index.replace rw k1 v1; + Index.sync ro; + Alcotest.check_raises "Index readonly cannot read if data is not flushed." + Not_found (fun () -> ignore_value (Index.find ro k1)) + + let readonly_v_in_sync ~io () = + let* Context.{ rw; clone; _ } = Context.with_full_index ~io () in + let k, v = (Key.v (), Value.v ()) in + Index.replace rw k v; + Index.flush rw; + let ro = clone ~readonly:true () in + Log.info (fun m -> + m "Checking that RO observes the flushed binding %a" pp_binding (k, v)); + Index.check_binding ro k v + + (** Readonly finds value in log before and after clear. Before sync the + deleted value is still found. *) + let readonly_add_log_before_clear ~io () = + let* Context.{ rw; clone; _ } = Context.with_empty_index ~io () in + let ro = clone ~readonly:true () in + let k1, v1 = (Key.v (), Value.v ()) in + Index.replace rw k1 v1; + Index.flush rw; + Index.sync ro; + Index.check_binding ro k1 v1; + Index.clear rw; + Index.check_binding ro k1 v1; + Index.sync ro; + Alcotest.check_raises (Printf.sprintf "Found %s key after clearing." k1) + Not_found (fun () -> ignore_value (Index.find ro k1)) + + (** Readonly finds value in index before and after clear. Before sync the + deleted value is still found. *) + let readonly_add_index_before_clear ~io () = + let* Context.{ rw; clone; _ } = Context.with_full_index ~io () in + let ro = clone ~readonly:true () in + Index.clear rw; + let k1, v1 = (Key.v (), Value.v ()) in + Index.replace rw k1 v1; + let thread = Index.try_merge_aux ~force:true rw in + Index.await thread |> check_completed; + Index.sync ro; + Index.check_binding ro k1 v1; + Index.clear rw; + Index.check_binding ro k1 v1; + Index.sync ro; + Alcotest.check_raises (Printf.sprintf "Found %s key after clearing." k1) + Not_found (fun () -> ignore_value (Index.find ro k1)) + + (** Readonly finds old value in log after clear and after new values are + added, before a sync. *) + let readonly_add_after_clear ~io () = + let* Context.{ rw; clone; _ } = Context.with_empty_index ~io () in + let ro = clone ~readonly:true () in + let k1, v1 = (Key.v (), Value.v ()) in + Index.replace rw k1 v1; + Index.flush rw; + Index.sync ro; + Index.check_binding ro k1 v1; + Index.clear rw; + let k2, v2 = (Key.v (), Value.v ()) in + Index.replace rw k2 v2; + Index.flush rw; + Index.check_binding ro k1 v1; + Index.sync ro; + Index.check_binding ro k2 v2; + Alcotest.check_raises (Printf.sprintf "Found %s key after clearing." k1) + Not_found (fun () -> ignore_value (Index.find rw k1)); + Alcotest.check_raises (Printf.sprintf "Found %s key after clearing." k1) + Not_found (fun () -> ignore_value (Index.find ro k1)) + + (** Readonly finds old value in index after clear and after new values are + added, before a sync. This is because the readonly instance still uses + the old index file, before being replaced by the merge. *) + let readonly_add_index_after_clear ~io () = + let* Context.{ rw; clone; _ } = Context.with_empty_index ~io () in + let ro = clone ~readonly:true () in + Index.clear rw; + let k1, v1 = (Key.v (), Value.v ()) in + Index.replace rw k1 v1; + let t = Index.try_merge_aux ~force:true rw in + Index.await t |> check_completed; + Index.sync ro; + Index.clear rw; + let k2, v2 = (Key.v (), Value.v ()) in + Index.replace rw k2 v2; + let t = Index.try_merge_aux ~force:true rw in + Index.await t |> check_completed; + Index.check_binding ro k1 v1; + Alcotest.check_raises (Printf.sprintf "Found %s key after clearing." k1) + Not_found (fun () -> ignore_value (Index.find rw k1)); + Index.sync ro; + Alcotest.check_raises (Printf.sprintf "Found %s key after clearing." k1) + Not_found (fun () -> ignore_value (Index.find ro k1)); + Index.check_binding ro k2 v2 + + let readonly_open_after_clear ~io () = + let* Context.{ clone; rw; _ } = Context.with_full_index ~io () in + Index.clear rw; + let ro = clone ~fresh:false ~readonly:true () in + Alcotest.check_raises "Finding absent should raise Not_found" Not_found + (fun () -> Key.v () |> Index.find ro |> ignore_value) + + let readonly_sync_and_merge ~io () = + let* Context.{ clone; rw; _ } = Context.with_empty_index ~io () in + let ro = clone ~readonly:true () in + let replace = Semaphore.make false + and merge = Semaphore.make false + and sync = Semaphore.make false in + let merge_hook = + I.Private.Hook.v @@ function + | `Before -> + Semaphore.release replace; + Semaphore.acquire "merge" merge + | `After -> Semaphore.release sync + | _ -> () + in + let sync_hook = + I.Private.Hook.v @@ function + | `After_offset_read -> + Semaphore.release merge; + Semaphore.acquire "sync" sync + | _ -> () + in + let gen i = (String.make Key.encoded_size i, Value.v ()) in + let k1, v1 = gen '1' in + let k2, v2 = gen '2' in + let k3, v3 = gen '3' in + + Index.replace rw k1 v1; + let thread = Index.try_merge_aux ~force:true ~hook:merge_hook rw in + Semaphore.acquire "replace" replace; + Index.replace rw k2 v2; + Index.replace rw k3 v3; + Semaphore.release replace; + Index.flush rw; + Index.sync' ~hook:sync_hook ro; + Index.await thread |> check_completed; + Semaphore.release sync; + Index.check_binding ro k2 v2; + Index.check_binding ro k3 v3 + + let readonly_sync_and_merge_clear ~io () = + let* Context.{ clone; rw; _ } = Context.with_empty_index ~io () in + let ro = clone ~readonly:true () in + let merge = Semaphore.make false and sync = Semaphore.make false in + let merge_hook = + I.Private.Hook.v @@ function + | `Before -> + Semaphore.release sync; + Semaphore.acquire "merge" merge + | `After_clear -> Semaphore.release sync + | _ -> () + in + let sync_hook = + I.Private.Hook.v @@ function + | `After_offset_read -> + Semaphore.release merge; + Semaphore.acquire "sync" sync + | _ -> () + in + let gen i = (String.make Key.encoded_size i, Value.v ()) in + let k1, v1 = gen '1' in + let k2, v2 = gen '2' in + + Index.replace rw k1 v1; + Index.flush rw; + let thread = Index.try_merge_aux ~force:true ~hook:merge_hook rw in + Index.replace rw k2 v2; + Semaphore.acquire "sync" sync; + Index.sync' ~hook:sync_hook ro; + Index.await thread |> check_completed; + Semaphore.release sync; + Index.check_binding ro k1 v1 + + let reload_log_async ~io () = + let* Context.{ rw; clone; _ } = Context.with_empty_index ~io () in + let ro = clone ~readonly:true () in + let reload_log = ref 0 in + let reload_log_async = ref 0 in + let merge = Semaphore.make false in + let sync = Semaphore.make false in + let merge_hook = + I.Private.Hook.v @@ function + | `Before -> + Semaphore.release sync; + Semaphore.acquire "merge" merge + | `After_clear -> Semaphore.release sync + | _ -> () + in + let sync_hook = + I.Private.Hook.v (function + | `Reload_log -> reload_log := succ !reload_log + | `Reload_log_async -> reload_log_async := succ !reload_log_async + | _ -> ()) + in + let k1, v1 = (Key.v (), Value.v ()) in + let k2, v2 = (Key.v (), Value.v ()) in + Index.replace rw k1 v1; + Index.flush rw; + let t = Index.try_merge_aux ~force:true ~hook:merge_hook rw in + Index.replace rw k2 v2; + Index.flush rw; + Semaphore.acquire "sync" sync; + Index.sync' ~hook:sync_hook ro; + Index.sync' ~hook:sync_hook ro; + Index.sync' ~hook:sync_hook ro; + Index.sync' ~hook:sync_hook ro; + Semaphore.release merge; + Index.check_binding ro k1 v1; + Index.check_binding ro k2 v2; + Alcotest.(check int) "reloadings of log per merge" 0 !reload_log; + Alcotest.(check int) + "reloadings of log async per merge" 1 !reload_log_async; + Index.await t |> check_completed + + let tests ~io = + List.map + (fun (name, mode, test) -> (name, mode, test ~io)) + [ + ("add", `Quick, readonly); + ("read after clear", `Quick, readonly_clear); + ("snapshot isolation", `Quick, readonly_snapshot); + ("Readonly v after replace", `Quick, readonly_v_after_replace); + ("add not allowed", `Quick, fail_readonly_add); + ("fail read if no flush", `Quick, fail_readonly_read); + ("readonly v is in sync", `Quick, readonly_v_in_sync); + ( "read values added in log before clear", + `Quick, + readonly_add_log_before_clear ); + ( "read values added in index before clear", + `Quick, + readonly_add_index_before_clear ); + ( "read old values in log after clear", + `Quick, + readonly_add_after_clear ); + ( "read old values in index after clear", + `Quick, + readonly_add_index_after_clear ); + ("readonly open after clear", `Quick, readonly_open_after_clear); + ("race between sync and merge", `Quick, readonly_sync_and_merge); + ("race between sync and clear", `Quick, readonly_io_clear); + ( "race between sync and end of merge", + `Quick, + readonly_sync_and_merge_clear ); + ("reload log and log async", `Quick, reload_log_async); + ] + end + + (* Tests of {Index.close} *) + module Close = struct + exception Stop + + let check_logs msg rw ~log_size ~log_async_size = + let log = Index.log rw in + let log_async = Index.log_async rw in + let len = Option.map List.length in + Alcotest.(check (option int)) (msg ^ ": log entries") log_size (len log); + Alcotest.(check (option int)) + (msg ^ ": log_async entries") + log_async_size (len log_async) + + let force_merge rw = + let thread = Index.try_merge_aux ~force:true rw in + Index.await thread |> check_completed; + check_logs "force_merge" rw ~log_size:(Some 0) ~log_async_size:None + + let close_reopen_rw ~io () = + let* Context.{ rw; tbl; clone; _ } = Context.with_full_index ~io () in + Index.close rw; + let w = clone ~readonly:false () in + Context.check_equivalence w tbl + + let close_reopen_rw_more ~io () = + let merge = Semaphore.make false in + let merge_hook = + I.Private.Hook.v @@ function + | `After_clear -> + Semaphore.release merge; + raise Stop + | _ -> () + in + let* Context.{ rw; clone; tbl; _ } = + Context.with_full_index ~io ~log_size:2 () + in + let k1, v1 = (Key.v (), Value.v ()) in + let k2, v2 = (Key.v (), Value.v ()) in + let k3, v3 = (Key.v (), Value.v ()) in + + (* await a cancelled merge and close *) + let close rw t ~log_size ~log_async_size = + Semaphore.acquire "merge" merge; + check_logs "close" rw ~log_size ~log_async_size; + Index.close ~immediately:() rw; + match Index.await t with + | Error (`Async_exn Stop) -> () + | _ -> Alcotest.fail "the merge thread should have been killed" + in + + (* Add k1, start a merge and crash just after the index is + renamed *) + Index.replace rw k1 v1; + check_logs __LOC__ rw ~log_size:(Some 1) ~log_async_size:None; + let t = Index.try_merge_aux ~force:true ~hook:merge_hook rw in + (* Check that the log entries have been merged with index *) + close rw t ~log_size:(Some 0) ~log_async_size:(Some 0); + + (* k1 should be there (thx to the log file) *) + let rw = clone ~readonly:false ~fresh:false () in + Hashtbl.add tbl k1 v1; + Context.check_equivalence rw tbl; + + (* Add k2 in log and k3 in log_async and crash just after the + index is renamed. Log is merged but log_async should still be + present. *) + Index.replace rw k2 v2; + check_logs __LOC__ rw ~log_size:(Some 1) ~log_async_size:None; + let t = Index.try_merge_aux ~force:true ~hook:merge_hook rw in + Index.replace rw k3 v3; + close rw t ~log_size:(Some 0) ~log_async_size:(Some 1); + + (* Reopen, k2 and k3 should be there. *) + let rw = clone ~readonly:false ~fresh:false () in + Hashtbl.add tbl k2 v2; + Hashtbl.add tbl k3 v3; + Context.check_equivalence rw tbl + + let crash_and_continue ~io () = + let merge = Semaphore.make false in + let merge_hook = + Log.debug (fun m -> m "crash_and_continue: merge_hook"); + I.Private.Hook.v @@ function + | `After_clear -> + Semaphore.release merge; + raise Stop + | _ -> () + in + let* Context.{ rw; tbl; _ } = + Context.with_full_index ~io ~log_size:2 () + in + let k1, v1 = (Key.v (), Value.v ()) in + let k2, v2 = (Key.v (), Value.v ()) in + let k3, v3 = (Key.v (), Value.v ()) in + + let wait t = + Semaphore.acquire "merge" merge; + match Index.await t with + | Error (`Async_exn Stop) -> () + | _ -> Alcotest.fail "the merge thread should have been killed" + in + + (* empty log and log_async *) + force_merge rw; + + (* Add k1, start a merge and crash just after the index is + renamed *) + Index.replace rw k1 v1; + check_logs __LOC__ rw ~log_size:(Some 1) ~log_async_size:None; + Index.try_merge_aux ~force:true ~hook:merge_hook rw |> wait; + (* Check that the log entries have been merged with index *) + check_logs __LOC__ rw ~log_size:(Some 0) ~log_async_size:(Some 0); + + (* k1 should be in data *) + Hashtbl.add tbl k1 v1; + Context.check_equivalence rw tbl; + + (* Add k2 in log_async, as the file is present from the previous + crash. *) + Index.replace rw k2 v2; + check_logs __LOC__ rw ~log_size:(Some 0) ~log_async_size:(Some 1); + + (* k2 should be there. *) + Hashtbl.add tbl k2 v2; + Context.check_equivalence rw tbl; + + (* merge should merge pre-existing log_async entries *) + Index.try_merge_aux ~force:true ~hook:merge_hook rw |> wait; + check_logs __LOC__ rw ~log_size:(Some 0) ~log_async_size:(Some 0); + Context.check_equivalence rw tbl; + + (* Add k3 in log_async as it already exists *) + Index.replace rw k3 v3; + check_logs __LOC__ rw ~log_size:(Some 0) ~log_async_size:(Some 1); + + (* k3 should be there. *) + Hashtbl.add tbl k3 v3; + Context.check_equivalence rw tbl; + + (* full merge and check *) + force_merge rw; + Context.check_equivalence rw tbl; + Index.close rw + + let find_absent ~io () = + let* Context.{ rw; tbl; clone; _ } = Context.with_full_index ~io () in + Index.close rw; + let rw = clone ~readonly:false () in + test_find_absent rw tbl + + let replace ~io () = + let* Context.{ rw; clone; _ } = Context.with_full_index ~io ~size:5 () in + Index.close rw; + let rw = clone ~readonly:false () in + test_replace rw + + let open_readonly_close_rw ~io () = + let* Context.{ rw; tbl; clone; _ } = Context.with_full_index ~io () in + let ro = clone ~readonly:true () in + Index.close rw; + Context.check_equivalence ro tbl + + let close_reopen_readonly ~io () = + let* Context.{ rw; tbl; clone; _ } = Context.with_full_index ~io () in + Index.close rw; + let ro = clone ~readonly:true () in + Context.check_equivalence ro tbl + + let fail_api_after_close ~io () = + let k = Key.v () in + let v = Value.v () in + let calls t = + [ + ("clear", fun () -> Index.clear t); + ("find", fun () -> ignore_value (Index.find t k : string)); + ("mem", fun () -> ignore_bool (Index.mem t k : bool)); + ("replace", fun () -> Index.replace t k v); + ("iter", fun () -> Index.iter (fun _ _ -> ()) t); + ( "try_merge ~force:true", + fun () -> + let thread = Index.try_merge_aux ~force:true t in + Index.await thread |> function + | Ok `Completed -> () + | Ok `Aborted | Error _ -> + Alcotest.fail + "Unexpected return status from [try_merge ~force:true] \ + after close" ); + ("flush", fun () -> Index.flush t); + ] + in + let check_calls ~readonly instance = + Index.close instance; + List.iter + (fun (name, call) -> + Alcotest.check_raises + (Printf.sprintf "%s after close with readonly=%b raises Closed" + name readonly) + I.Closed call) + (calls instance) + in + let* Context.{ rw; _ } = Context.with_full_index ~io () in + check_calls ~readonly:true rw; + check_calls ~readonly:false rw + + let check_double_close ~io () = + let* Context.{ rw; _ } = Context.with_full_index ~io () in + Index.close rw; + Index.close rw; + Alcotest.check_raises "flush after double close with raises Closed" + I.Closed (fun () -> Index.flush rw) + + let restart_twice ~io () = + let* Context.{ rw; clone; _ } = Context.with_empty_index ~io () in + let k1, v1 = (Key.v (), Value.v ()) in + Index.replace rw k1 v1; + Index.close rw; + let rw = clone ~fresh:true ~readonly:false () in + Alcotest.check_raises "Index restarts fresh cannot read data." Not_found + (fun () -> ignore_value (Index.find rw k1)); + Index.close rw; + let rw = clone ~fresh:false ~readonly:false () in + Alcotest.check_raises "Index restarted fresh once cannot read data." + Not_found (fun () -> ignore_value (Index.find rw k1)) + + (** [close] terminates an ongoing merge operation *) + let aborted_merge ~io () = + let* Context.{ rw; _ } = + Context.with_full_index ~io ~throttle:`Block_writes ~size:100 () + in + let close_request, abort_signalled = + (* Both semaphores are initially held. + - [close_request] is dropped by the merge thread in the [`Before] hook + as a signal to the parent thread to issue the [close] operation. + + - [abort_signalled] is dropped by the parent thread to signal to the + child to continue the merge operation (which must then abort prematurely). + *) + (Semaphore.make false, Semaphore.make false) + in + let hook = function + | `Before -> + Log.app (fun f -> + f "Child (pid = %d): issuing request to close the index" + Thread.(id (self ()))); + Semaphore.release close_request + | `After_first_entry -> + Semaphore.acquire "abort_signalled" abort_signalled + | `After_clear | `After -> + Alcotest.failf + "Child (pid = %d): merge completed despite concurrent close" + Thread.(id (self ())) + in + let merge_promise : _ Index.async = + Index.try_merge_aux ~force:true ~hook:(I.Private.Hook.v hook) rw + in + Log.app (fun f -> f "Parent: waiting for request to close the index"); + Semaphore.acquire "close_request" close_request; + Log.app (fun f -> f "Parent: closing the index"); + Index.close' + ~hook: + (I.Private.Hook.v (fun `Abort_signalled -> + Semaphore.release abort_signalled)) + ~immediately:() rw; + Log.app (fun f -> f "Parent: awaiting merge result"); + Index.await merge_promise |> function + | Ok `Completed -> + Alcotest.fail + "try_merge ~force:true returned `Completed despite concurrent close" + | Error (`Async_exn exn) -> + Alcotest.failf + "Asynchronous exception occurred during try_merge ~force:true: %s" + (Printexc.to_string exn) + | Ok `Aborted -> ( + match Common.get_open_fd root with + | `Ok ofd -> + let merge, _ = Common.partition "merge" ofd in + if List.length merge > 0 then + Alcotest.fail "Too many file descriptors opened for merge files" + | `Skip err -> + Log.warn (fun m -> m "`aborted_fd` was skipped: %s" err)) + + let tests ~io = + List.map + (fun (name, mode, test) -> (name, mode, test ~io)) + [ + ("close and reopen", `Quick, close_reopen_rw); + ("close and reopen more", `Quick, close_reopen_rw_more); + ("crash and continue", `Quick, crash_and_continue); + ("find (absent)", `Quick, find_absent); + ("replace", `Quick, replace); + ("open two instances, close one", `Quick, open_readonly_close_rw); + ("close and reopen on readonly", `Quick, close_reopen_readonly); + ("non-close operations fail after close", `Quick, fail_api_after_close); + ("double close", `Quick, check_double_close); + ("double restart", `Quick, restart_twice); + ("aborted merge", `Quick, aborted_merge); + ] + end + + (* Tests of {Index.filter} *) + module Filter = struct + (* Filtering should also affect the in-memory LRU. *) + let lru_size = 10 + + (** Test that all bindings are kept when using [filter] with a true + predicate. *) + let filter_none ~io () = + let* Context.{ rw; tbl; _ } = Context.with_full_index ~io ~lru_size () in + Index.filter rw (fun _ -> true); + Context.check_equivalence rw tbl + + (** Test that all bindings are removed when using [filter] with a false + predicate. *) + let filter_all ~io () = + let* Context.{ rw; _ } = Context.with_full_index ~io ~lru_size () in + Index.filter rw (fun _ -> false); + Context.check_equivalence rw (Hashtbl.create 0) + + (** Test that [filter] can be used to remove exactly a single binding. *) + let filter_one ~io () = + let* Context.{ rw; tbl; _ } = Context.with_full_index ~io ~lru_size () in + let k = random_existing_key tbl in + (* Ensure the key is cached in the LRU: [filter] must remove it from there too. *) + let (_ : Value.t) = Index.find rw k in + Hashtbl.remove tbl k; + Index.filter rw (fun (k', _) -> not (String.equal k k')); + Index.check_not_found rw k; + Context.check_equivalence rw tbl + + (** Test that the results of [filter] are propagated to a clone which was + created before. *) + let clone_then_filter ~io () = + let* Context.{ rw; tbl; clone; _ } = + Context.with_full_index ~io ~lru_size () + in + let k = random_existing_key tbl in + Hashtbl.remove tbl k; + let rw2 = clone ~readonly:false () in + Index.filter rw (fun (k', _) -> not (String.equal k k')); + Context.check_equivalence rw tbl; + Context.check_equivalence rw2 tbl + + (** Test that the results of [filter] are propagated to a clone which was + created after. *) + let filter_then_clone ~io () = + let* Context.{ rw; tbl; clone; _ } = + Context.with_full_index ~io ~lru_size () + in + let k = random_existing_key tbl in + Hashtbl.remove tbl k; + Index.filter rw (fun (k', _) -> not (String.equal k k')); + let rw2 = clone ~readonly:false () in + Context.check_equivalence rw tbl; + Context.check_equivalence rw2 tbl + + (** Test that using [filter] doesn't affect fresh clones created later at + the same path. *) + let empty_after_filter_and_fresh ~io () = + let* Context.{ rw; tbl; clone; _ } = + Context.with_full_index ~io ~lru_size () + in + let k = random_existing_key tbl in + Hashtbl.remove tbl k; + Index.filter rw (fun (k', _) -> not (String.equal k k')); + let rw2 = clone ~fresh:true ~readonly:false () in + (* rw2 should be empty since it is fresh. *) + Context.check_equivalence rw2 (Hashtbl.create 0) + + let tests ~io = + List.map + (fun (name, mode, test) -> (name, mode, test ~io)) + [ + ("filter none", `Quick, filter_none); + ("filter all", `Quick, filter_all); + ("filter one", `Quick, filter_one); + ("clone then filter", `Quick, clone_then_filter); + ("filter then clone", `Quick, filter_then_clone); + ("empty after filter+fresh", `Quick, empty_after_filter_and_fresh); + ] + end + + (** Tests of [Index.v ~throttle]*) + module Throttle = struct + let add_binding ?hook t = + match Index.replace_random ?hook t with + | binding, None -> binding + | binding, Some _ -> + Alcotest.failf + "New binding %a triggered an unexpected merge operation" pp_binding + binding + + let add_overflow_binding ?hook t = + match Index.replace_random ?hook t with + | binding, Some merge_result -> (binding, merge_result) + | binding, None -> + Alcotest.failf "Expected new binding %a to trigger a merge operation" + pp_binding binding + + let merge ~io () = + let* Context.{ rw; tbl; _ } = + Context.with_full_index ~io ~throttle:`Overcommit_memory () + in + let m = Semaphore.make false in + let hook = + Hook.v @@ function `Before -> Semaphore.acquire "m" m | _ -> () + in + let (_ : binding) = add_binding rw in + let merge_result = Index.try_merge_aux ~force:true ~hook rw in + Hashtbl.iter (fun k v -> Index.replace rw k v) tbl; + Semaphore.release m; + Index.await merge_result |> check_completed + + let implicit_merge ~io () = + let log_size = 4 in + let* Context.{ rw; _ } = + Context.with_empty_index ~io ~log_size ~throttle:`Overcommit_memory () + in + let m = Semaphore.make false in + let hook = + Hook.v @@ function `Merge `Before -> Semaphore.acquire "m" m | _ -> () + in + for _ = 1 to log_size do + ignore (add_binding rw : binding) + done; + Log.app (fun m -> m "Triggering an implicit merge"); + let _, merge_result = add_overflow_binding ~hook rw in + Log.app (fun m -> m "Overcommitting to the log while a merge is ongoing"); + for _ = 1 to log_size + 1 do + ignore (add_binding rw : binding) + done; + Semaphore.release m; + Index.await merge_result |> check_completed; + Log.app (fun m -> + m "Triggering a second implicit merge (with an overcommitted log)"); + let _, merge_result = add_overflow_binding rw in + Index.await merge_result |> check_completed; + () + + let tests ~io = + List.map + (fun (name, mode, test) -> (name, mode, test ~io)) + [ + ("force merge", `Quick, merge); + ("implicit merge", `Quick, implicit_merge); + ] + end + + module Io_array = Io_array.Make (Platform) + module Force_merge = Force_merge.Make (Platform) + module Test_lru = Test_lru.Make (Platform) + module Flush_callback = Flush_callback.Make (Platform) + + let tests ~io = + Common.report (); + List.map + (fun (name, test) -> (name, test ~io)) + [ + ("io_array", Io_array.tests); + ("merge", Force_merge.tests); + ("live", Live.tests); + ("lru", Test_lru.tests); + ("on restart", DuplicateInstance.tests); + ("readonly", Readonly.tests); + ("close", Close.tests); + ("filter", Filter.tests); + ("flush_callback", Flush_callback.tests); + ("throttle", Throttle.tests); + ] +end diff --git a/test/io/test_lru.ml b/test/io/test_lru.ml new file mode 100644 index 00000000..0629c364 --- /dev/null +++ b/test/io/test_lru.ml @@ -0,0 +1,135 @@ +(** Tests of the in-memory LRU used by the Index implementation. + + NOTE: most other tests in the suite set an LRU size of 0 for simplicity. *) + +module Stats = Index.Stats +open Common + +module Make (Platform : Common.Platform) = struct + module Context = + Common.Make_context + (Platform) + (struct + let root = + Filename.concat "_tests" (Platform.name ^ ".test_log_with_lru") + end) + + module Index = Context.Index + + let check_bool pos ~expected act = Alcotest.(check ~pos bool) "" expected act + let check_int pos ~expected act = Alcotest.(check ~pos int) "" expected act + + let check_value pos ~expected act = + let key = Alcotest.testable Key.pp Key.equal in + Alcotest.(check ~pos key) "" expected act + + let check_lru_stats pos ~hits ~misses = + Alcotest.(check ~pos int) "LRU hits" hits Stats.((get ()).lru_hits); + Alcotest.(check ~pos int) "LRU misses" misses Stats.((get ()).lru_misses) + + let get_new_cached_binding idx = + let k, v = (Key.v (), Value.v ()) in + Index.replace idx k v; + check_value __POS__ ~expected:v (Index.find idx k); + (k, v) + + let test_replace_and_find ~io () = + let lru_size = 1 in + let* { rw = idx; _ } = Context.with_empty_index ~io ~lru_size () in + + (* Check that [replace] populates the LRU: *) + let k1, v1 = (Key.v (), Value.v ()) in + let () = + Stats.reset_stats (); + Index.replace idx k1 v1; + (* [k1] is now in the LRU: *) + check_value __POS__ ~expected:v1 (Index.find idx k1); + check_lru_stats __POS__ ~hits:1 ~misses:0 + in + + (* Check that a second [replace] updates the LRU contents: *) + let k2, v2 = (Key.v (), Value.v ()) in + let () = + assert (k1 <> k2); + Stats.reset_stats (); + Index.replace idx k2 v2; + (* [k2] has replaced [k1] in the LRU, so we miss on [find k1]: *) + check_value __POS__ ~expected:v1 (Index.find idx k1); + check_lru_stats __POS__ ~hits:0 ~misses:1; + (* [k1] is now in the LRU: *) + check_value __POS__ ~expected:v1 (Index.find idx k1); + check_lru_stats __POS__ ~hits:1 ~misses:1 + in + () + + let test_mem ~io () = + let lru_size = 1 in + let* { rw = idx; _ } = Context.with_empty_index ~io ~lru_size () in + + (* Initially, [k2] is in the LRU and [k1] is not: *) + let k1, k2, v1, v2 = (Key.v (), Key.v (), Value.v (), Value.v ()) in + Index.replace idx k1 v1; + Index.replace idx k2 v2; + + (* [mem k2] hits in the LRU: *) + let () = + Stats.reset_stats (); + check_bool __POS__ ~expected:true (Index.mem idx k2); + check_lru_stats __POS__ ~hits:1 ~misses:0 + in + + (* [mem k1] initially misses in the LRU, but subsequent calls hit in the LRU + (because the [k2] binding is replaced by [k1] on the miss). *) + let () = + Stats.reset_stats (); + check_bool __POS__ ~expected:true (Index.mem idx k1); + check_lru_stats __POS__ ~hits:0 ~misses:1; + check_bool __POS__ ~expected:true (Index.mem idx k1); + check_lru_stats __POS__ ~hits:1 ~misses:1 + in + () + + (* Check that the LRU is cleared on [clear]. *) + let test_clear ~io () = + let lru_size = 1 in + let* { rw = idx; _ } = Context.with_full_index ~io ~lru_size () in + + (* Add a binding and ensure that it's in the LRU: *) + let k, v = (Key.v (), Value.v ()) in + Index.replace idx k v; + check_value __POS__ ~expected:v (Index.find idx k); + + (* We should miss in the LRU when attempting to find [k] after [clear]: *) + Index.clear idx; + Stats.reset_stats (); + Alcotest.check_raises "find after clear" Not_found (fun () -> + ignore (Index.find idx k)); + check_lru_stats __POS__ ~hits:0 ~misses:1 + + (* Check that bindings in the LRU are properly removed by [filter]: *) + let test_filter ~io () = + let lru_size = 1 in + let* { rw = idx; _ } = Context.with_full_index ~io ~lru_size () in + + (* Add a binding and ensure that it's in the LRU: *) + let k, v = (Key.v (), Value.v ()) in + Index.replace idx k v; + check_value __POS__ ~expected:v (Index.find idx k); + + (* Remove [k] from the index via [filter], then try to [find] it: *) + Index.filter idx (fun (k', _) -> not (Key.equal k k')); + Stats.reset_stats (); + Alcotest.check_raises ~pos:__POS__ "find after filter-false" Not_found + (fun () -> ignore (Index.find idx k)); + check_lru_stats __POS__ ~hits:0 ~misses:1 + + let tests ~io = + List.map + (fun (name, mode, test) -> (name, mode, test ~io)) + [ + ("replace_and_find", `Quick, test_replace_and_find); + ("mem", `Quick, test_mem); + ("clear", `Quick, test_clear); + ("filter", `Quick, test_filter); + ] +end diff --git a/test/test_eio.ml b/test/test_eio.ml new file mode 100644 index 00000000..992ad12d --- /dev/null +++ b/test/test_eio.ml @@ -0,0 +1,11 @@ +module Test_eio = Test_io.Make (struct + include Index_eio.Private.Platform + + let name = "eio" +end) + +let () = + Eio_main.run @@ fun env -> + Eio.Switch.run @@ fun switch -> + let io = { Index_eio.switch; root = Eio.Stdenv.cwd env } in + Alcotest.run "index.eio" (Test_eio.tests ~io) diff --git a/test/test_unix.ml b/test/test_unix.ml new file mode 100644 index 00000000..315c8c90 --- /dev/null +++ b/test/test_unix.ml @@ -0,0 +1,7 @@ +module Test_unix = Test_io.Make (struct + include Index_unix.Private.Platform + + let name = "unix" +end) + +let () = Alcotest.run "index.unix" (Test_unix.tests ~io:()) diff --git a/test/unix/dune b/test/unix/dune deleted file mode 100644 index 65de8482..00000000 --- a/test/unix/dune +++ /dev/null @@ -1,5 +0,0 @@ -(tests - (names main force_merge io_array) - (package index) - (libraries index index.unix alcotest fmt logs logs.fmt re stdlib-shims - threads.posix repr semaphore-compat optint mtime.clock.os)) diff --git a/test/unix/flush_callback.ml b/test/unix/flush_callback.ml deleted file mode 100644 index b347a2be..00000000 --- a/test/unix/flush_callback.ml +++ /dev/null @@ -1,193 +0,0 @@ -module I = Index -open Common -module Semaphore = Semaphore_compat.Semaphore.Binary - -module Context = Common.Make_context (struct - let root = Filename.concat "_tests" "unix.flush_callback" -end) - -module Mutable_callback = struct - type t = { - flush_callback : unit -> unit; - require_callback : - 'a. ?at_least_once:unit -> ?callback:(unit -> unit) -> (unit -> 'a) -> 'a; - (** Locally override the definition of [flush_callback] inside a - continuation. The default [callback] is the identity function. - - - The continuation must trigger the callback exactly once (unless - [~at_least_once:()] is passed). - - Any callbacks not scoped inside [require_callback] result in - failure. *) - } - - let v () : t = - let unexpected () = Alcotest.fail "Callback call not expected" in - let top = ref unexpected in - let require_callback ?at_least_once ?(callback = fun () -> ()) (type a) - (f : unit -> a) : a = - let called = ref false in - let prev_top = !top in - (top := - fun () -> - match (at_least_once, !called) with - | None, true -> Alcotest.fail "flush_callback already triggered" - | _, _ -> - called := true; - (* Ensure the callback does not recursively invoke an auto-flush. *) - let saved_top = !top in - top := unexpected; - callback (); - top := saved_top); - let a = f () in - if not !called then Alcotest.fail "flush_callback was not called"; - top := prev_top; - a - in - let flush_callback () = !top () in - { require_callback; flush_callback } -end - -let check_no_merge binding = function - | None -> binding - | Some _merge_promise -> - Alcotest.failf "New binding %a triggered an unexpected merge operation" - pp_binding binding - -(** Tests that [close] does not trigger the [flush_callback] *) -let test_close () = - let fail typ () = - Alcotest.failf "Closing <%s> should not trigger the flush_callback" typ - in - Context.with_empty_index ~flush_callback:(fail "empty index") () - Context.ignore; - - Context.with_full_index ~flush_callback:(fail "fresh index") () Context.ignore; - - let calls = ref 0 in - Context.with_empty_index - ~flush_callback:(fun () -> incr calls) - () - (fun Context.{ rw; _ } -> - Index.replace_random rw - |> uncurry check_no_merge - |> (ignore : binding -> unit)); - Alcotest.(check int) - "Closing a dirty index should trigger the flush_callback once" 1 !calls - -(** Test that [flush] triggers the [flush_callback] when necessary. *) -let test_flush () = - let Mutable_callback.{ require_callback; flush_callback } = - Mutable_callback.v () - in - let* Context.{ rw; clone; _ } = Context.with_empty_index ~flush_callback () in - let ro = clone ~readonly:true () in - - Index.flush rw (* No callback, since there are no bindings to persist *); - let binding = Index.replace_random rw |> uncurry check_no_merge in - require_callback - ~callback:(fun () -> - Log.app (fun m -> - m "Checking that newly-added binding %a is not yet visible" pp_binding - binding); - Index.sync ro; - Index.check_not_found ro (fst binding)) - (fun () -> Index.flush rw); - - Log.app (fun m -> - m "After the flush, binding %a should be visible" pp_binding binding); - Index.sync ro; - uncurry (Index.check_binding ro) binding; - - let _ = Index.replace_random rw |> uncurry check_no_merge in - Index.flush ~no_callback:() rw (* No callback, by user request *); - - () - -(** Test that flushes due to [replace] operations trigger the [flush_callback]: - - - 1. Initial flush of [log] before an automatic merge. - - 2. Flushing of [log_async] while a merge is ongoing. *) -let test_replace () = - let log_size = 8 in - let bindings = Tbl.v ~size:log_size in - let binding_list = bindings |> Hashtbl.to_seq |> List.of_seq in - let Mutable_callback.{ require_callback; flush_callback } = - Mutable_callback.v () - in - let* Context.{ rw; clone; _ } = - Context.with_empty_index ~log_size ~flush_callback () - in - let ro = clone ~readonly:true () in - - (* The first [log_size]-many replaces don't trigger the callback. (Provided - the [auto_flush_limit] is not reached, which it is not.) *) - let replace_no_merge binding = - Index.replace' rw (fst binding) (snd binding) - |> check_no_merge binding - |> (ignore : Key.t * Value.t -> unit) - in - binding_list |> List.iter replace_no_merge; - - (* The next replace overflows the log, causing the bindings to be persisted *) - let do_merge = Semaphore.make false in - let overflow_binding, merge_promise = - require_callback - ~callback:(fun () -> - Log.app (fun m -> - m - "Checking newly-added bindings are not visible from a synced RO \ - instance until [flush_callback] is called"); - Index.sync ro; - check_disjoint ro bindings) - (fun () -> - Index.replace_random - ~hook: - (I.Private.Hook.v (function - | `Merge `Before -> Semaphore.acquire do_merge - | _ -> ())) - rw) - in - - Log.app (fun m -> m "Checking merged bindings are now visible"); - Hashtbl.add bindings (fst overflow_binding) (snd overflow_binding); - Index.sync ro; - check_equivalence ro bindings; - - (* New values added during the merge go into [log_async] *) - let async_binding = Index.replace_random rw |> uncurry check_no_merge in - Log.app (fun m -> - m "Added new binding %a while merge is ongoing" pp_binding async_binding); - - (* We could implicitly cause an automatic flush of [log_async], but it's - simpler to just explicitly force one. *) - Index.sync ro; - require_callback ~at_least_once:() - ~callback:(fun () -> - Log.app (fun m -> - m - "Checking async_binding %a is not yet visible from a synced RO \ - instance" - pp_binding async_binding); - check_equivalence ro bindings) - (fun () -> Index.flush rw); - - (* The merge triggers the callback when flushing [log_async] entries into - [log]. (Not necessary here, since [log_async] values were already flushed.) *) - require_callback (fun () -> - Semaphore.release do_merge; - merge_promise |> Option.get |> Index.await |> check_completed); - - Log.app (fun m -> - m - "Checking that all added bindings are now visible from a synced RO \ - instance"); - Hashtbl.add bindings (fst async_binding) (snd async_binding); - Index.sync ro; - check_equivalence ro bindings - -let tests = - [ - ("close", `Quick, test_close); - ("flush", `Quick, test_flush); - ("replace", `Quick, test_replace); - ] diff --git a/test/unix/flush_callback.mli b/test/unix/flush_callback.mli deleted file mode 100644 index d38ba9a9..00000000 --- a/test/unix/flush_callback.mli +++ /dev/null @@ -1 +0,0 @@ -val tests : unit Alcotest.test_case list diff --git a/test/unix/force_merge.ml b/test/unix/force_merge.ml deleted file mode 100644 index 6498b35f..00000000 --- a/test/unix/force_merge.ml +++ /dev/null @@ -1,437 +0,0 @@ -module Hook = Index.Private.Hook -module Semaphore = Semaphore_compat.Semaphore.Binary -open Common - -let root = Filename.concat "_tests" "unix.force_merge" - -module Context = Common.Make_context (struct - let root = root -end) - -let after f = Hook.v (function `After -> f () | _ -> ()) -let after_clear f = Hook.v (function `After_clear -> f () | _ -> ()) -let before f = Hook.v (function `Before -> f () | _ -> ()) - -let before_offset_read f = - Hook.v (function `Before_offset_read -> f () | _ -> ()) - -let test_find_present t tbl = - Hashtbl.iter - (fun k v -> - match Index.find t k with - | res -> - if not (res = v) then Alcotest.fail "Replacing existing value failed." - | exception Not_found -> - Alcotest.failf "Inserted value is not present anymore: %s." k) - tbl - -let test_one_entry r k v = - match Index.find r k with - | res -> - if not (res = v) then Alcotest.fail "Replacing existing value failed." - | exception Not_found -> - Alcotest.failf "Inserted value is not present anymore: %s." k - -let test_fd () = - match Common.get_open_fd root with - | `Ok lines -> ( - let contains sub s = - try - ignore (Re.Str.search_forward (Re.Str.regexp sub) s 0); - true - with Not_found -> false - in - let result = - let data, rs = List.partition (contains "data") lines in - if List.length data > 2 then - Alcotest.fail "Too many file descriptors opened for data files"; - let log, rs = List.partition (contains "log") rs in - if List.length log > 2 then - Alcotest.fail "Too many file descriptors opened for log files"; - let lock, rs = List.partition (contains "lock") rs in - if List.length lock > 2 then - Alcotest.fail "Too many file descriptors opened for lock files"; - if List.length rs > 0 then - Alcotest.fail "Unknown file descriptors opened"; - `Ok () - in - match result with - | `Ok () -> () - | `Skip err -> Log.warn (fun m -> m "`test_fd` was skipped: %s" err)) - | `Skip err -> Log.warn (fun m -> m "`test_fd` was skipped: %s" err) - -let readonly_s () = - let* { Context.tbl; clone; _ } = Context.with_full_index () in - let r1 = clone ~readonly:true () in - let r2 = clone ~readonly:true () in - let r3 = clone ~readonly:true () in - test_find_present r1 tbl; - test_find_present r2 tbl; - test_find_present r3 tbl; - test_fd () - -let readonly () = - let* { Context.tbl; clone; _ } = Context.with_full_index () in - let r1 = clone ~readonly:true () in - let r2 = clone ~readonly:true () in - let r3 = clone ~readonly:true () in - Hashtbl.iter - (fun k v -> - test_one_entry r1 k v; - test_one_entry r2 k v; - test_one_entry r3 k v) - tbl; - test_fd () - -let readonly_and_merge () = - let* { Context.rw; clone; _ } = Context.with_full_index () in - let w = rw in - let r1 = clone ~readonly:true () in - let r2 = clone ~readonly:true () in - let r3 = clone ~readonly:true () in - let interleave () = - let k1 = Key.v () in - let v1 = Value.v () in - Index.replace w k1 v1; - Index.flush w; - let t1 = Index.try_merge_aux ~force:true w in - Index.sync r1; - Index.sync r2; - Index.sync r3; - test_one_entry r1 k1 v1; - test_one_entry r2 k1 v1; - test_one_entry r3 k1 v1; - - let k2 = Key.v () in - let v2 = Value.v () in - Index.replace w k2 v2; - Index.flush w; - Index.sync r1; - Index.sync r2; - Index.sync r3; - test_one_entry r1 k1 v1; - let t2 = Index.try_merge_aux ~force:true w in - test_one_entry r2 k2 v2; - test_one_entry r3 k1 v1; - - let k2 = Key.v () in - let v2 = Value.v () in - let k3 = Key.v () in - let v3 = Value.v () in - test_one_entry r1 k1 v1; - Index.replace w k2 v2; - Index.flush w; - Index.sync r1; - let t3 = Index.try_merge_aux ~force:true w in - test_one_entry r1 k1 v1; - Index.replace w k3 v3; - Index.flush w; - Index.sync r3; - let t4 = Index.try_merge_aux ~force:true w in - test_one_entry r3 k3 v3; - - let k2 = Key.v () in - let v2 = Value.v () in - Index.replace w k2 v2; - Index.flush w; - Index.sync r2; - Index.sync r3; - test_one_entry w k2 v2; - let t5 = Index.try_merge_aux ~force:true w in - test_one_entry w k2 v2; - test_one_entry r2 k2 v2; - test_one_entry r3 k1 v1; - - let k2 = Key.v () in - let v2 = Value.v () in - Index.replace w k2 v2; - Index.flush w; - Index.sync r2; - Index.sync r3; - test_one_entry r2 k1 v1; - let t6 = Index.try_merge_aux ~force:true w in - test_one_entry w k2 v2; - test_one_entry r2 k2 v2; - test_one_entry r3 k2 v2; - Index.await t1 |> check_completed; - Index.await t2 |> check_completed; - Index.await t3 |> check_completed; - Index.await t4 |> check_completed; - Index.await t5 |> check_completed; - Index.await t6 |> check_completed - in - - for _ = 1 to 10 do - interleave () - done; - test_fd () - -(* A force merge has an implicit flush, however, if the replace occurs at the end of the merge, the value is not flushed *) -let write_after_merge () = - let* { Context.rw; clone; _ } = Context.with_full_index () in - let w = rw in - let r1 = clone ~readonly:true () in - let k1 = Key.v () in - let v1 = Value.v () in - let k2 = Key.v () in - let v2 = Value.v () in - Index.replace w k1 v1; - let hook = after (fun () -> Index.replace w k2 v2) in - let t = Index.try_merge_aux ~force:true ~hook w in - Index.await t |> check_completed; - Index.sync r1; - test_one_entry r1 k1 v1; - Alcotest.check_raises (Printf.sprintf "Absent value was found: %s." k2) - Not_found (fun () -> ignore_value (Index.find r1 k2)) - -let replace_while_merge () = - let* { Context.rw; clone; _ } = Context.with_full_index () in - let w = rw in - let r1 = clone ~readonly:true () in - let k1 = Key.v () in - let v1 = Value.v () in - let k2 = Key.v () in - let v2 = Value.v () in - Index.replace w k1 v1; - let hook = - before (fun () -> - Index.replace w k2 v2; - test_one_entry w k2 v2) - in - let t = Index.try_merge_aux ~force:true ~hook w in - Index.sync r1; - test_one_entry r1 k1 v1; - Index.await t |> check_completed - -(* note that here we cannot do - `test_one_entry r1 k2 v2` - as there is no way to guarantee that the latests value - added by a RW instance is found by a RO instance -*) - -let find_while_merge () = - let* { Context.rw; clone; _ } = Context.with_full_index () in - let w = rw in - let k1 = Key.v () in - let v1 = Value.v () in - Index.replace w k1 v1; - let f () = test_one_entry w k1 v1 in - let t1 = Index.try_merge_aux ~force:true ~hook:(after f) w in - let t2 = Index.try_merge_aux ~force:true ~hook:(after f) w in - let r1 = clone ~readonly:true () in - let f () = test_one_entry r1 k1 v1 in - let t3 = Index.try_merge_aux ~force:true ~hook:(before f) w in - let t4 = Index.try_merge_aux ~force:true ~hook:(before f) w in - Index.await t1 |> check_completed; - Index.await t2 |> check_completed; - Index.await t3 |> check_completed; - Index.await t4 |> check_completed - -let find_in_async_generation_change () = - let* { Context.rw; clone; _ } = Context.with_full_index () in - let w = rw in - let r1 = clone ~readonly:true () in - let k1 = Key.v () in - let v1 = Value.v () in - let f () = - Index.replace w k1 v1; - Index.flush w; - Index.sync r1; - test_one_entry r1 k1 v1 - in - let t1 = Index.try_merge_aux ~force:true ~hook:(before f) w in - Index.await t1 |> check_completed - -let find_in_async_same_generation () = - let* { Context.rw; clone; _ } = Context.with_full_index () in - let w = rw in - let r1 = clone ~readonly:true () in - let k1 = Key.v () in - let v1 = Value.v () in - let k2 = Key.v () in - let v2 = Value.v () in - let f () = - Index.replace w k1 v1; - Index.flush w; - Index.sync r1; - test_one_entry r1 k1 v1; - Index.replace w k2 v2; - Index.flush w; - Index.sync r1; - test_one_entry r1 k2 v2 - in - let t1 = Index.try_merge_aux ~force:true ~hook:(before f) w in - Index.await t1 |> check_completed - -let sync_before_and_after_clearing_async () = - let* { Context.rw; clone; _ } = Context.with_full_index () in - let w = rw in - let ro = clone ~readonly:true () in - let k1 = Key.v () in - let v1 = Value.v () in - let k2 = Key.v () in - let v2 = Value.v () in - let add_in_async () = - Index.replace w k1 v1; - Index.replace w k2 v2; - Index.flush w; - Log.debug (fun l -> l "RO updates async's offset"); - Index.sync ro - in - let sync_before_clear_async () = - Log.debug (fun l -> l "RO updates instance's generation"); - Index.sync ro - in - let hook = - Hook.v (function - | `Before -> add_in_async () - | `After_clear -> sync_before_clear_async () - | _ -> ()) - in - let t1 = Index.try_merge_aux ~force:true ~hook w in - Index.await t1 |> check_completed; - Index.sync ro; - test_one_entry ro k1 v1; - test_one_entry ro k2 v2 - -(** RW adds a value in log and flushes it, so every subsequent RO sync should - find that value. But if the RO sync occurs during a merge, after a clear but - before a generation change, then the value is missed. Also test ro find at - this point. *) -let sync_after_clear_log () = - let* Context.{ rw; clone; _ } = Context.with_empty_index () in - let ro = clone ~readonly:true () in - let k1, v1 = (Key.v (), Value.v ()) in - Index.replace rw k1 v1; - Index.flush rw; - let hook = after_clear (fun () -> Index.sync ro) in - let t = Index.try_merge_aux ~force:true ~hook rw in - Index.await t |> check_completed; - test_one_entry ro k1 v1; - let k2, v2 = (Key.v (), Value.v ()) in - Index.replace rw k2 v2; - Index.flush rw; - Index.sync ro; - let hook = after_clear (fun () -> test_one_entry ro k1 v1) in - let t = Index.try_merge_aux ~force:true ~hook rw in - Index.await t |> check_completed - -(** during a merge RO sync can miss a value if it reads the generation before - the generation is updated. *) -let merge_during_sync () = - let* Context.{ rw; clone; _ } = Context.with_empty_index () in - let ro = clone ~readonly:true () in - let k1, v1 = (Key.v (), Value.v ()) in - Index.replace rw k1 v1; - Index.flush rw; - let hook = - before_offset_read (fun () -> - let t = Index.try_merge_aux ~force:true rw in - Index.await t |> check_completed) - in - Index.sync' ~hook ro; - test_one_entry ro k1 v1 - -let test_is_merging () = - let* Context.{ rw; _ } = Context.with_empty_index () in - let add_binding_and_merge ~hook = - let k1, v1 = (Key.v (), Value.v ()) in - Index.replace rw k1 v1; - let t = Index.try_merge_aux ~force:true ~hook rw in - Index.await t |> check_completed - in - let f msg b () = Alcotest.(check bool) msg (Index.is_merging rw) b in - f "before merge" false (); - add_binding_and_merge ~hook:(before (f "before" true)); - f "between merge" false (); - add_binding_and_merge ~hook:(after (f "after" true)); - add_binding_and_merge ~hook:(after_clear (f "after clear" true)) - -let add_bindings index = - let k1, v1 = (Key.v (), Value.v ()) in - Index.replace index k1 v1 - -(** Test that a clear aborts the merge. *) -let test_non_blocking_clear () = - let* Context.{ rw; _ } = Context.with_empty_index () in - let merge_started = Semaphore.make false and merge = Semaphore.make false in - let merge_hook = - Hook.v @@ function - | `Before -> - Semaphore.release merge_started; - Semaphore.acquire merge - | `After -> Alcotest.fail "Merge should have been aborted by clear" - | _ -> () - in - let clear_hook = - Hook.v @@ function - | `Abort_signalled -> Semaphore.release merge - | `IO_clear -> () - in - add_bindings rw; - let thread = Index.try_merge_aux ~force:true ~hook:merge_hook rw in - Semaphore.acquire merge_started; - add_bindings rw; - Index.clear' ~hook:clear_hook rw; - match Index.await thread with - | Ok `Aborted -> () - | _ -> Alcotest.fail "merge should have aborted" - -(** The test consists of aborting a first merge after one entry is added in the - ./merge file and checking that a second merge succeeds. Regression test for - PR 211 in which the second merge was triggering an assert failure. *) -let test_abort_merge ~abort_merge () = - let* { Context.rw; clone; _ } = Context.with_full_index () in - let merge_started = Semaphore.make false and merge = Semaphore.make false in - let merge_hook = - Hook.v @@ function - | `After_first_entry -> - Semaphore.release merge_started; - Semaphore.acquire merge - | `After | `After_clear -> - Alcotest.fail "Merge should have been aborted by clear" - | `Before -> () - in - let abort_hook = - Hook.v @@ function - | `Abort_signalled -> Semaphore.release merge - | `IO_clear -> () - in - let t = Index.try_merge_aux ~force:true ~hook:merge_hook rw in - Semaphore.acquire merge_started; - abort_merge ~hook:abort_hook rw; - (match Index.await t with - | Ok `Aborted -> () - | _ -> Alcotest.fail "Merge should have aborted"); - let rw = clone ~readonly:false ~fresh:false () in - add_bindings rw; - let t = Index.try_merge_aux ~force:true rw in - Index.await t |> check_completed - -let test_clear_aborts_merge = test_abort_merge ~abort_merge:Index.clear' - -let test_close_immediately_aborts_merge = - test_abort_merge ~abort_merge:(Index.close' ~immediately:()) - -let tests = - [ - ("readonly in sequence", `Quick, readonly_s); - ("readonly interleaved", `Quick, readonly); - ("interleaved merge", `Quick, readonly_and_merge); - ("write at the end of merge", `Quick, write_after_merge); - ("write in log_async", `Quick, replace_while_merge); - ("find while merging", `Quick, find_while_merge); - ("find in async without log", `Quick, find_in_async_generation_change); - ("find in async with log", `Quick, find_in_async_same_generation); - ( "sync before and after clearing the async", - `Quick, - sync_before_and_after_clearing_async ); - ("sync and find after log cleared", `Quick, sync_after_clear_log); - ("merge during ro sync", `Quick, merge_during_sync); - ("is_merging", `Quick, test_is_merging); - ("clear is not blocking", `Quick, test_non_blocking_clear); - ("`clear` aborts merge", `Quick, test_clear_aborts_merge); - ( "`close ~immediately` aborts merge", - `Quick, - test_close_immediately_aborts_merge ); - ] diff --git a/test/unix/force_merge.mli b/test/unix/force_merge.mli deleted file mode 100644 index d38ba9a9..00000000 --- a/test/unix/force_merge.mli +++ /dev/null @@ -1 +0,0 @@ -val tests : unit Alcotest.test_case list diff --git a/test/unix/io_array.ml b/test/unix/io_array.ml deleted file mode 100644 index e79e670f..00000000 --- a/test/unix/io_array.ml +++ /dev/null @@ -1,81 +0,0 @@ -module Int63 = Optint.Int63 -module IO = Index_unix.Private.IO - -let ( // ) = Filename.concat -let root = "_tests" // "unix.io_array" - -module Entry = struct - module Key = Common.Key - module Value = Common.Value - - type t = Key.t * Value.t - - let encoded_size = Key.encoded_size + Value.encoded_size - - let decode string off = - let key = Key.decode string off in - let value = Value.decode string (off + Key.encoded_size) in - (key, value) - - let append_io io (key, value) = - let encoded_key = Key.encode key in - let encoded_value = Value.encode value in - IO.append io encoded_key; - IO.append io encoded_value -end - -module IOArray = Index.Private.Io_array.Make (IO) (Entry) - -let entry = Alcotest.(pair string string) - -let fresh_io name = - IO.v ~fresh:true ~generation:Int63.zero ~fan_size:Int63.zero (root // name) - -(* Append a random sequence of [size] keys to an IO instance and return - a pair of an IOArray and an equivalent in-memory array. *) -let populate_random ~size io = - let rec loop acc = function - | 0 -> acc - | n -> - let e = (Common.Key.v (), Common.Value.v ()) in - Entry.append_io io e; - loop (e :: acc) (n - 1) - in - let mem_arr = Array.of_list (List.rev (loop [] size)) in - let io_arr = IOArray.v io in - IO.flush io; - (mem_arr, io_arr) - -(* Tests *) -let read_sequential () = - let size = 1000 in - let io = fresh_io "read_sequential" in - let mem_arr, io_arr = populate_random ~size io in - for i = 0 to size - 1 do - let expected = mem_arr.(i) in - let actual = IOArray.get io_arr (Int63.of_int i) in - Alcotest.(check entry) - (Fmt.str "Inserted key at index %i is accessible" i) - expected actual - done - -let read_sequential_prefetch () = - let size = 1000 in - let io = fresh_io "read_sequential_prefetch" in - let mem_arr, io_arr = populate_random ~size io in - IOArray.pre_fetch io_arr ~low:Int63.zero ~high:(Int63.of_int 999); - - (* Read the arrays backwards *) - for i = size - 1 to 0 do - let expected = mem_arr.(i) in - let actual = IOArray.get io_arr (Int63.of_int i) in - Alcotest.(check entry) - (Fmt.str "Inserted key at index %i is accessible" i) - expected actual - done - -let tests = - [ - ("fresh", `Quick, read_sequential); - ("prefetch", `Quick, read_sequential_prefetch); - ] diff --git a/test/unix/io_array.mli b/test/unix/io_array.mli deleted file mode 100644 index d38ba9a9..00000000 --- a/test/unix/io_array.mli +++ /dev/null @@ -1 +0,0 @@ -val tests : unit Alcotest.test_case list diff --git a/test/unix/main.ml b/test/unix/main.ml deleted file mode 100644 index 9393d90f..00000000 --- a/test/unix/main.ml +++ /dev/null @@ -1,1067 +0,0 @@ -module Hook = Index.Private.Hook -module Layout = Index.Private.Layout -module Semaphore = Semaphore_compat.Semaphore.Binary -module I = Index -open Common - -type index = Common.Index.t - -let ( // ) = Filename.concat -let root = "_tests" // "unix.main" - -module Context = Common.Make_context (struct - let root = root -end) - -(* Helper functions *) - -(** [random_new_key tbl] returns a random key which is not in [tbl]. *) -let rec random_new_key tbl = - let r = Key.v () in - if Hashtbl.mem tbl r then random_new_key tbl else r - -exception Found of string - -(** [random_existing_key tbl] returns a random key from [tbl]. *) -let random_existing_key tbl = - try - Hashtbl.iter (fun k _ -> raise (Found k)) tbl; - Alcotest.fail "Provided table contains no keys." - with Found k -> k - -let test_replace t = - let k = Key.v () in - let v = Value.v () in - let v' = Value.v () in - Index.replace t k v; - Index.replace t k v'; - Index.check_binding t k v' - -let test_find_absent t tbl = - let rec loop i = - if i = 0 then () - else - let k = random_new_key tbl in - Alcotest.check_raises (Printf.sprintf "Absent value was found: %s." k) - Not_found (fun () -> ignore_value (Index.find t k)); - loop (i - 1) - in - loop 100 - -let mem_entry f k _ = - if not (f k) then Alcotest.failf "Wrong insertion: %s key is missing." k - -let mem_index_entry index = mem_entry (Index.mem index) -let mem_tbl_entry tbl = mem_entry (Hashtbl.mem tbl) - -let check_equivalence_mem index tbl = - Hashtbl.iter (mem_index_entry index) tbl; - Index.iter (mem_tbl_entry tbl) index - -(* Basic tests of find/replace on a live index *) -module Live = struct - let find_present_live () = - let* Context.{ rw; tbl; _ } = Context.with_full_index () in - check_equivalence rw tbl - - let find_absent_live () = - let* Context.{ rw; tbl; _ } = Context.with_full_index () in - test_find_absent rw tbl - - let replace_live () = - let* Context.{ rw; _ } = Context.with_full_index () in - test_replace rw - - let different_size_for_key () = - let* Context.{ rw; _ } = Context.with_empty_index () in - let k = String.init 2 (fun _i -> random_char ()) in - let v = Value.v () in - let exn = I.Private.Data.Invalid_size k in - Alcotest.check_raises - "Cannot add a key of a different size than string_size." exn (fun () -> - Index.replace rw k v) - - let different_size_for_value () = - let* Context.{ rw; _ } = Context.with_empty_index () in - let k = Key.v () in - let v = String.init 200 (fun _i -> random_char ()) in - let exn = I.Private.Data.Invalid_size v in - Alcotest.check_raises - "Cannot add a value of a different size than string_size." exn (fun () -> - Index.replace rw k v) - - let membership () = - let* Context.{ rw; tbl; _ } = Context.with_full_index () in - check_equivalence_mem rw tbl - - let iter_after_clear () = - let* Context.{ rw; _ } = Context.with_full_index () in - let () = Index.clear rw in - Index.iter (fun _ _ -> Alcotest.fail "Indexed not cleared.") rw - - let find_after_clear () = - let* Context.{ rw; tbl; _ } = Context.with_full_index () in - let () = Index.clear rw in - Hashtbl.fold - (fun k _ () -> - match Index.find rw k with - | exception Not_found -> () - | _ -> Alcotest.fail "Indexed not cleared.") - tbl () - - let open_after_clear () = - let* Context.{ clone; rw; _ } = Context.with_full_index () in - Index.clear rw; - let rw2 = clone ~fresh:false ~readonly:false () in - Alcotest.check_raises "Finding absent should raise Not_found" Not_found - (fun () -> Key.v () |> Index.find rw2 |> ignore_value) - - let files_on_disk_after_clear () = - let root = Context.fresh_name "full_index" in - let rw = Index.v ~fresh:true ~log_size:Default.log_size root in - for _ = 1 to Default.size do - let k = Key.v () in - let v = Value.v () in - Index.replace rw k v - done; - Index.flush rw; - Index.clear rw; - Index.close rw; - let module I = Index_unix.Private.IO in - let test_there path = - match I.v_readonly path with - | Error `No_file_on_disk -> Alcotest.failf "expected file: %s" path - | Ok data -> - Alcotest.(check int) path (I.size data) (I.size_header data); - I.close data - in - let test_not_there path = - match I.v_readonly path with - | Error `No_file_on_disk -> () - | Ok _ -> Alcotest.failf "do not expect file: %s" path - in - test_there (Layout.log ~root); - test_not_there (Layout.log_async ~root); - test_not_there (Layout.data ~root) - - let duplicate_entries () = - let* Context.{ rw; _ } = Context.with_empty_index () in - let k1, v1, v2, v3 = (Key.v (), Value.v (), Value.v (), Value.v ()) in - Index.replace rw k1 v1; - Index.replace rw k1 v2; - Index.replace rw k1 v3; - let thread = Index.try_merge_aux ~force:true rw in - Index.await thread |> check_completed; - let once = ref true in - Index.iter - (fun k v -> - if !once && k = k1 && v = v3 then once := false - else Alcotest.fail "Index should contain a single entry.") - rw - - let tests = - [ - ("find (present)", `Quick, find_present_live); - ("find (absent)", `Quick, find_absent_live); - ("replace", `Quick, replace_live); - ("fail add (key)", `Quick, different_size_for_key); - ("fail add (value)", `Quick, different_size_for_value); - ("membership", `Quick, membership); - ("clear and iter", `Quick, iter_after_clear); - ("clear and find", `Quick, find_after_clear); - ("open after clear", `Quick, open_after_clear); - ("files on disk after clear", `Quick, files_on_disk_after_clear); - ("duplicate entries", `Quick, duplicate_entries); - ] -end - -(* Tests of behaviour after restarting the index *) -module DuplicateInstance = struct - let find_present () = - let* Context.{ rw; tbl; clone; _ } = Context.with_full_index () in - let (_ : index) = clone ~readonly:false () in - check_equivalence rw tbl - - let find_absent () = - let* Context.{ rw; tbl; clone; _ } = Context.with_full_index () in - let (_ : index) = clone ~readonly:false () in - test_find_absent rw tbl - - let replace () = - let* Context.{ rw; clone; _ } = Context.with_full_index ~size:5 () in - let (_ : index) = clone ~readonly:false () in - test_replace rw - - let membership () = - let* Context.{ tbl; clone; _ } = Context.with_full_index () in - let rw' = clone ~readonly:false () in - check_equivalence_mem rw' tbl - - let fail_restart_fresh () = - let reuse_name = Context.fresh_name "empty_index" in - let cache = Index.empty_cache () in - let rw = - Index.v ~cache ~fresh:true ~readonly:false ~log_size:4 reuse_name - in - let exn = I.RO_not_allowed in - Alcotest.check_raises "Index readonly cannot be fresh." exn (fun () -> - ignore_index - (Index.v ~cache ~fresh:true ~readonly:true ~log_size:4 reuse_name)); - Index.close rw - - let sync () = - let* Context.{ rw; clone; _ } = Context.with_full_index () in - let k1, v1 = (Key.v (), Value.v ()) in - Index.replace rw k1 v1; - let rw2 = clone ~readonly:false () in - let k2, v2 = (Key.v (), Value.v ()) in - Index.replace rw2 k2 v2; - Index.check_binding rw k2 v2; - Index.check_binding rw2 k1 v1 - - let duplicate_entries () = - let* Context.{ rw; clone; _ } = Context.with_empty_index () in - let k1, v1, v2 = (Key.v (), Value.v (), Value.v ()) in - Index.replace rw k1 v1; - Index.replace rw k1 v2; - Index.close rw; - let rw2 = clone ~readonly:false () in - let once = ref true in - Index.iter - (fun k v -> - if !once && k = k1 && v = v2 then once := false - else Alcotest.fail "Index should contain a single entry.") - rw2 - - let tests = - [ - ("find (present)", `Quick, find_present); - ("find (absent)", `Quick, find_absent); - ("replace", `Quick, replace); - ("membership", `Quick, membership); - ("fail restart readonly fresh", `Quick, fail_restart_fresh); - ("in sync", `Quick, sync); - ("duplicate entries in log", `Quick, duplicate_entries); - ] -end - -(* Tests of read-only indices *) -module Readonly = struct - let readonly () = - let* Context.{ rw; clone; tbl; _ } = Context.with_empty_index () in - let ro = clone ~readonly:true () in - let tbl2 = - let h = Hashtbl.create 0 in - Hashtbl.iter (fun k _ -> Hashtbl.add h k (Value.v ())) tbl; - h - in - Hashtbl.iter (fun k v -> Index.replace rw k v) tbl2; - Index.flush rw; - Index.sync ro; - check_equivalence ro tbl2 - - let readonly_v_after_replace () = - let* Context.{ rw; clone; _ } = Context.with_full_index () in - let k = Key.v () in - let v = Value.v () in - Index.replace rw k v; - let ro = clone ~readonly:true () in - Index.close rw; - Index.close ro; - let rw = clone ~readonly:false () in - Index.check_binding rw k v - - let readonly_clear () = - let check_no_index_entry index k = - Alcotest.check_raises (Fmt.str "Find %s key after clearing." k) Not_found - (fun () -> ignore_value (Index.find index k)) - in - let* Context.{ rw; tbl; clone; _ } = - (* Ensure that the clear also wipes the LRU *) - let lru_size = 10 in - Context.with_full_index ~lru_size () - in - let ro = clone ~readonly:true () in - Index.clear rw; - Index.sync ro; - Log.info (fun m -> m "Checking that RO observes the empty index"); - Hashtbl.iter (fun k _ -> check_no_index_entry ro k) tbl; - Index.close rw; - Index.close ro; - let rw = clone ~readonly:false () in - let ro = clone ~readonly:true () in - let k, v = (Key.v (), Value.v ()) in - Index.replace rw k v; - Index.check_binding rw k v; - check_no_index_entry ro k; - Index.flush rw; - Index.sync ro; - Index.check_binding rw k v; - Index.check_binding ro k v; - Index.clear rw; - check_no_index_entry rw k; - Index.check_binding ro k v; - Index.sync ro; - check_no_index_entry rw k; - check_no_index_entry ro k - - (* If sync is called right after the generation is set, and before the old - file is removed, the readonly instance reopens the old file. It does not - try to reopen the file until the next generation change occurs. *) - let readonly_io_clear () = - let* Context.{ rw; clone; _ } = Context.with_full_index () in - let ro = clone ~readonly:true () in - let hook = - Hook.v @@ function `IO_clear -> Index.sync ro | `Abort_signalled -> () - in - Index.clear' ~hook rw; - let k, v = (Key.v (), Value.v ()) in - Index.replace rw k v; - Index.flush rw; - Index.sync ro; - Index.check_binding rw k v; - Index.check_binding ro k v - - let hashtbl_pick tbl = - match Hashtbl.fold (fun k v acc -> (k, v) :: acc) tbl [] with - | h :: _ -> h - | _ -> assert false - - let before l m = - I.Private.Hook.v @@ function - | `Before -> Semaphore.acquire l - | `After -> Semaphore.release m - | _ -> () - - let after l m = - I.Private.Hook.v @@ function - | `After_offset_read -> - Semaphore.release l; - Semaphore.acquire m - | _ -> () - - (* check that the ro instance is "snapshot isolated", e.g. it can read old - values, even when rw overwrites them. *) - let readonly_snapshot () = - let* Context.{ rw; clone; tbl; _ } = - Context.with_full_index ~throttle:`Overcommit_memory () - in - let ro = clone ~readonly:true () in - let tbl2 = - let h = Hashtbl.create 0 in - Hashtbl.iter (fun k _ -> Hashtbl.add h k (Value.v ())) tbl; - h - in - let merge = Semaphore.make false and sync = Semaphore.make false in - let k, v = hashtbl_pick tbl2 in - - Index.clear rw; - Index.replace rw k v; - let thread = Index.try_merge_aux ~force:true ~hook:(before merge sync) rw in - Hashtbl.iter (Index.replace rw) tbl2; - Index.flush rw; - check_equivalence rw tbl2; - check_equivalence ro tbl; - Index.sync' ~hook:(after merge sync) ro; - check_equivalence ro tbl2; - Semaphore.release merge; - Index.await thread |> check_completed - - let fail_readonly_add () = - let* Context.{ clone; _ } = Context.with_empty_index () in - let ro = clone ~readonly:true () in - let exn = I.RO_not_allowed in - Alcotest.check_raises "Index readonly cannot write." exn (fun () -> - Index.replace ro (Key.v ()) (Value.v ())) - - (** Tests that the entries that are not flushed cannot be read by a readonly - index. The test relies on the fact that, for log_size > 0, adding one - entry into an empty index does not lead to flush/merge. *) - let fail_readonly_read () = - let* Context.{ rw; clone; _ } = Context.with_empty_index () in - let ro = clone ~readonly:true () in - let k1, v1 = (Key.v (), Value.v ()) in - Index.replace rw k1 v1; - Index.sync ro; - Alcotest.check_raises "Index readonly cannot read if data is not flushed." - Not_found (fun () -> ignore_value (Index.find ro k1)) - - let readonly_v_in_sync () = - let* Context.{ rw; clone; _ } = Context.with_full_index () in - let k, v = (Key.v (), Value.v ()) in - Index.replace rw k v; - Index.flush rw; - let ro = clone ~readonly:true () in - Log.info (fun m -> - m "Checking that RO observes the flushed binding %a" pp_binding (k, v)); - Index.check_binding ro k v - - (** Readonly finds value in log before and after clear. Before sync the - deleted value is still found. *) - let readonly_add_log_before_clear () = - let* Context.{ rw; clone; _ } = Context.with_empty_index () in - let ro = clone ~readonly:true () in - let k1, v1 = (Key.v (), Value.v ()) in - Index.replace rw k1 v1; - Index.flush rw; - Index.sync ro; - Index.check_binding ro k1 v1; - Index.clear rw; - Index.check_binding ro k1 v1; - Index.sync ro; - Alcotest.check_raises (Printf.sprintf "Found %s key after clearing." k1) - Not_found (fun () -> ignore_value (Index.find ro k1)) - - (** Readonly finds value in index before and after clear. Before sync the - deleted value is still found. *) - let readonly_add_index_before_clear () = - let* Context.{ rw; clone; _ } = Context.with_full_index () in - let ro = clone ~readonly:true () in - Index.clear rw; - let k1, v1 = (Key.v (), Value.v ()) in - Index.replace rw k1 v1; - let thread = Index.try_merge_aux ~force:true rw in - Index.await thread |> check_completed; - Index.sync ro; - Index.check_binding ro k1 v1; - Index.clear rw; - Index.check_binding ro k1 v1; - Index.sync ro; - Alcotest.check_raises (Printf.sprintf "Found %s key after clearing." k1) - Not_found (fun () -> ignore_value (Index.find ro k1)) - - (** Readonly finds old value in log after clear and after new values are - added, before a sync. *) - let readonly_add_after_clear () = - let* Context.{ rw; clone; _ } = Context.with_empty_index () in - let ro = clone ~readonly:true () in - let k1, v1 = (Key.v (), Value.v ()) in - Index.replace rw k1 v1; - Index.flush rw; - Index.sync ro; - Index.check_binding ro k1 v1; - Index.clear rw; - let k2, v2 = (Key.v (), Value.v ()) in - Index.replace rw k2 v2; - Index.flush rw; - Index.check_binding ro k1 v1; - Index.sync ro; - Index.check_binding ro k2 v2; - Alcotest.check_raises (Printf.sprintf "Found %s key after clearing." k1) - Not_found (fun () -> ignore_value (Index.find rw k1)); - Alcotest.check_raises (Printf.sprintf "Found %s key after clearing." k1) - Not_found (fun () -> ignore_value (Index.find ro k1)) - - (** Readonly finds old value in index after clear and after new values are - added, before a sync. This is because the readonly instance still uses the - old index file, before being replaced by the merge. *) - let readonly_add_index_after_clear () = - let* Context.{ rw; clone; _ } = Context.with_empty_index () in - let ro = clone ~readonly:true () in - Index.clear rw; - let k1, v1 = (Key.v (), Value.v ()) in - Index.replace rw k1 v1; - let t = Index.try_merge_aux ~force:true rw in - Index.await t |> check_completed; - Index.sync ro; - Index.clear rw; - let k2, v2 = (Key.v (), Value.v ()) in - Index.replace rw k2 v2; - let t = Index.try_merge_aux ~force:true rw in - Index.await t |> check_completed; - Index.check_binding ro k1 v1; - Alcotest.check_raises (Printf.sprintf "Found %s key after clearing." k1) - Not_found (fun () -> ignore_value (Index.find rw k1)); - Index.sync ro; - Alcotest.check_raises (Printf.sprintf "Found %s key after clearing." k1) - Not_found (fun () -> ignore_value (Index.find ro k1)); - Index.check_binding ro k2 v2 - - let readonly_open_after_clear () = - let* Context.{ clone; rw; _ } = Context.with_full_index () in - Index.clear rw; - let ro = clone ~fresh:false ~readonly:true () in - Alcotest.check_raises "Finding absent should raise Not_found" Not_found - (fun () -> Key.v () |> Index.find ro |> ignore_value) - - let readonly_sync_and_merge () = - let* Context.{ clone; rw; _ } = Context.with_empty_index () in - let ro = clone ~readonly:true () in - let replace = Semaphore.make false - and merge = Semaphore.make false - and sync = Semaphore.make false in - let merge_hook = - I.Private.Hook.v @@ function - | `Before -> - Semaphore.release replace; - Semaphore.acquire merge - | `After -> Semaphore.release sync - | _ -> () - in - let sync_hook = - I.Private.Hook.v @@ function - | `After_offset_read -> - Semaphore.release merge; - Semaphore.acquire sync - | _ -> () - in - let gen i = (String.make Key.encoded_size i, Value.v ()) in - let k1, v1 = gen '1' in - let k2, v2 = gen '2' in - let k3, v3 = gen '3' in - - Index.replace rw k1 v1; - let thread = Index.try_merge_aux ~force:true ~hook:merge_hook rw in - Semaphore.acquire replace; - Index.replace rw k2 v2; - Index.replace rw k3 v3; - Semaphore.release replace; - Index.flush rw; - Index.sync' ~hook:sync_hook ro; - Index.await thread |> check_completed; - Semaphore.release sync; - Index.check_binding ro k2 v2; - Index.check_binding ro k3 v3 - - let readonly_sync_and_merge_clear () = - let* Context.{ clone; rw; _ } = Context.with_empty_index () in - let ro = clone ~readonly:true () in - let merge = Semaphore.make false and sync = Semaphore.make false in - let merge_hook = - I.Private.Hook.v @@ function - | `Before -> - Semaphore.release sync; - Semaphore.acquire merge - | `After_clear -> Semaphore.release sync - | _ -> () - in - let sync_hook = - I.Private.Hook.v @@ function - | `After_offset_read -> - Semaphore.release merge; - Semaphore.acquire sync - | _ -> () - in - let gen i = (String.make Key.encoded_size i, Value.v ()) in - let k1, v1 = gen '1' in - let k2, v2 = gen '2' in - - Index.replace rw k1 v1; - Index.flush rw; - let thread = Index.try_merge_aux ~force:true ~hook:merge_hook rw in - Index.replace rw k2 v2; - Semaphore.acquire sync; - Index.sync' ~hook:sync_hook ro; - Index.await thread |> check_completed; - Semaphore.release sync; - Index.check_binding ro k1 v1 - - let reload_log_async () = - let* Context.{ rw; clone; _ } = Context.with_empty_index () in - let ro = clone ~readonly:true () in - let reload_log = ref 0 in - let reload_log_async = ref 0 in - let merge = Semaphore.make false in - let sync = Semaphore.make false in - let merge_hook = - I.Private.Hook.v @@ function - | `Before -> - Semaphore.release sync; - Semaphore.acquire merge - | `After_clear -> Semaphore.release sync - | _ -> () - in - let sync_hook = - I.Private.Hook.v (function - | `Reload_log -> reload_log := succ !reload_log - | `Reload_log_async -> reload_log_async := succ !reload_log_async - | _ -> ()) - in - let k1, v1 = (Key.v (), Value.v ()) in - let k2, v2 = (Key.v (), Value.v ()) in - Index.replace rw k1 v1; - Index.flush rw; - let t = Index.try_merge_aux ~force:true ~hook:merge_hook rw in - Index.replace rw k2 v2; - Index.flush rw; - Semaphore.acquire sync; - Index.sync' ~hook:sync_hook ro; - Index.sync' ~hook:sync_hook ro; - Index.sync' ~hook:sync_hook ro; - Index.sync' ~hook:sync_hook ro; - Semaphore.release merge; - Index.check_binding ro k1 v1; - Index.check_binding ro k2 v2; - Alcotest.(check int) "reloadings of log per merge" 0 !reload_log; - Alcotest.(check int) "reloadings of log async per merge" 1 !reload_log_async; - Index.await t |> check_completed - - let tests = - [ - ("add", `Quick, readonly); - ("read after clear", `Quick, readonly_clear); - ("snapshot isolation", `Quick, readonly_snapshot); - ("Readonly v after replace", `Quick, readonly_v_after_replace); - ("add not allowed", `Quick, fail_readonly_add); - ("fail read if no flush", `Quick, fail_readonly_read); - ("readonly v is in sync", `Quick, readonly_v_in_sync); - ( "read values added in log before clear", - `Quick, - readonly_add_log_before_clear ); - ( "read values added in index before clear", - `Quick, - readonly_add_index_before_clear ); - ("read old values in log after clear", `Quick, readonly_add_after_clear); - ( "read old values in index after clear", - `Quick, - readonly_add_index_after_clear ); - ("readonly open after clear", `Quick, readonly_open_after_clear); - ("race between sync and merge", `Quick, readonly_sync_and_merge); - ("race between sync and clear", `Quick, readonly_io_clear); - ( "race between sync and end of merge", - `Quick, - readonly_sync_and_merge_clear ); - ("reload log and log async", `Quick, reload_log_async); - ] -end - -(* Tests of {Index.close} *) -module Close = struct - exception Stop - - let check_logs msg rw ~log_size ~log_async_size = - let log = Index.log rw in - let log_async = Index.log_async rw in - let len = Option.map List.length in - Alcotest.(check (option int)) (msg ^ ": log entries") log_size (len log); - Alcotest.(check (option int)) - (msg ^ ": log_async entries") - log_async_size (len log_async) - - let force_merge rw = - let thread = Index.try_merge_aux ~force:true rw in - Index.await thread |> check_completed; - check_logs "force_merge" rw ~log_size:(Some 0) ~log_async_size:None - - let close_reopen_rw () = - let* Context.{ rw; tbl; clone; _ } = Context.with_full_index () in - Index.close rw; - let w = clone ~readonly:false () in - check_equivalence w tbl - - let close_reopen_rw_more () = - let merge = Semaphore.make false in - let merge_hook = - I.Private.Hook.v @@ function - | `After_clear -> - Semaphore.release merge; - raise Stop - | _ -> () - in - let* Context.{ rw; clone; tbl; _ } = - Context.with_full_index ~log_size:2 () - in - let k1, v1 = (Key.v (), Value.v ()) in - let k2, v2 = (Key.v (), Value.v ()) in - let k3, v3 = (Key.v (), Value.v ()) in - - (* await a cancelled merge and close *) - let close rw t ~log_size ~log_async_size = - Semaphore.acquire merge; - check_logs "close" rw ~log_size ~log_async_size; - Index.close ~immediately:() rw; - match Index.await t with - | Error (`Async_exn Stop) -> () - | _ -> Alcotest.fail "the merge thread should have been killed" - in - - (* Add k1, start a merge and crash just after the index is - renamed *) - Index.replace rw k1 v1; - check_logs __LOC__ rw ~log_size:(Some 1) ~log_async_size:None; - let t = Index.try_merge_aux ~force:true ~hook:merge_hook rw in - (* Check that the log entries have been merged with index *) - close rw t ~log_size:(Some 0) ~log_async_size:(Some 0); - - (* k1 should be there (thx to the log file) *) - let rw = clone ~readonly:false ~fresh:false () in - Hashtbl.add tbl k1 v1; - check_equivalence rw tbl; - - (* Add k2 in log and k3 in log_async and crash just after the - index is renamed. Log is merged but log_async should still be - present. *) - Index.replace rw k2 v2; - check_logs __LOC__ rw ~log_size:(Some 1) ~log_async_size:None; - let t = Index.try_merge_aux ~force:true ~hook:merge_hook rw in - Index.replace rw k3 v3; - close rw t ~log_size:(Some 0) ~log_async_size:(Some 1); - - (* Reopen, k2 and k3 should be there. *) - let rw = clone ~readonly:false ~fresh:false () in - Hashtbl.add tbl k2 v2; - Hashtbl.add tbl k3 v3; - check_equivalence rw tbl - - let crash_and_continue () = - let merge = Semaphore.make false in - let merge_hook = - I.Private.Hook.v @@ function - | `After_clear -> - Semaphore.release merge; - raise Stop - | _ -> () - in - let* Context.{ rw; tbl; _ } = Context.with_full_index ~log_size:2 () in - let k1, v1 = (Key.v (), Value.v ()) in - let k2, v2 = (Key.v (), Value.v ()) in - let k3, v3 = (Key.v (), Value.v ()) in - - let wait t = - Semaphore.acquire merge; - match Index.await t with - | Error (`Async_exn Stop) -> () - | _ -> Alcotest.fail "the merge thread should have been killed" - in - - (* empty log and log_async *) - force_merge rw; - - (* Add k1, start a merge and crash just after the index is - renamed *) - Index.replace rw k1 v1; - check_logs __LOC__ rw ~log_size:(Some 1) ~log_async_size:None; - Index.try_merge_aux ~force:true ~hook:merge_hook rw |> wait; - (* Check that the log entries have been merged with index *) - check_logs __LOC__ rw ~log_size:(Some 0) ~log_async_size:(Some 0); - - (* k1 should be in data *) - Hashtbl.add tbl k1 v1; - check_equivalence rw tbl; - - (* Add k2 in log_async, as the file is present from the previous - crash. *) - Index.replace rw k2 v2; - check_logs __LOC__ rw ~log_size:(Some 0) ~log_async_size:(Some 1); - - (* k2 should be there. *) - Hashtbl.add tbl k2 v2; - check_equivalence rw tbl; - - (* merge should merge pre-existing log_async entries *) - Index.try_merge_aux ~force:true ~hook:merge_hook rw |> wait; - check_logs __LOC__ rw ~log_size:(Some 0) ~log_async_size:(Some 0); - check_equivalence rw tbl; - - (* Add k3 in log_async as it already exists *) - Index.replace rw k3 v3; - check_logs __LOC__ rw ~log_size:(Some 0) ~log_async_size:(Some 1); - - (* k3 should be there. *) - Hashtbl.add tbl k3 v3; - check_equivalence rw tbl; - - (* full merge and check *) - force_merge rw; - check_equivalence rw tbl; - Index.close rw - - let find_absent () = - let* Context.{ rw; tbl; clone; _ } = Context.with_full_index () in - Index.close rw; - let rw = clone ~readonly:false () in - test_find_absent rw tbl - - let replace () = - let* Context.{ rw; clone; _ } = Context.with_full_index ~size:5 () in - Index.close rw; - let rw = clone ~readonly:false () in - test_replace rw - - let open_readonly_close_rw () = - let* Context.{ rw; tbl; clone; _ } = Context.with_full_index () in - let ro = clone ~readonly:true () in - Index.close rw; - check_equivalence ro tbl - - let close_reopen_readonly () = - let* Context.{ rw; tbl; clone; _ } = Context.with_full_index () in - Index.close rw; - let ro = clone ~readonly:true () in - check_equivalence ro tbl - - let fail_api_after_close () = - let k = Key.v () in - let v = Value.v () in - let calls t = - [ - ("clear", fun () -> Index.clear t); - ("find", fun () -> ignore_value (Index.find t k : string)); - ("mem", fun () -> ignore_bool (Index.mem t k : bool)); - ("replace", fun () -> Index.replace t k v); - ("iter", fun () -> Index.iter (fun _ _ -> ()) t); - ( "try_merge ~force:true", - fun () -> - let thread = Index.try_merge_aux ~force:true t in - Index.await thread |> function - | Ok `Completed -> () - | Ok `Aborted | Error _ -> - Alcotest.fail - "Unexpected return status from [try_merge ~force:true] after \ - close" ); - ("flush", fun () -> Index.flush t); - ] - in - let check_calls ~readonly instance = - Index.close instance; - List.iter - (fun (name, call) -> - Alcotest.check_raises - (Printf.sprintf "%s after close with readonly=%b raises Closed" name - readonly) - I.Closed call) - (calls instance) - in - let* Context.{ rw; _ } = Context.with_full_index () in - check_calls ~readonly:true rw; - check_calls ~readonly:false rw - - let check_double_close () = - let* Context.{ rw; _ } = Context.with_full_index () in - Index.close rw; - Index.close rw; - Alcotest.check_raises "flush after double close with raises Closed" I.Closed - (fun () -> Index.flush rw) - - let restart_twice () = - let* Context.{ rw; clone; _ } = Context.with_empty_index () in - let k1, v1 = (Key.v (), Value.v ()) in - Index.replace rw k1 v1; - Index.close rw; - let rw = clone ~fresh:true ~readonly:false () in - Alcotest.check_raises "Index restarts fresh cannot read data." Not_found - (fun () -> ignore_value (Index.find rw k1)); - Index.close rw; - let rw = clone ~fresh:false ~readonly:false () in - Alcotest.check_raises "Index restarted fresh once cannot read data." - Not_found (fun () -> ignore_value (Index.find rw k1)) - - (** [close] terminates an ongoing merge operation *) - let aborted_merge () = - let* Context.{ rw; _ } = - Context.with_full_index ~throttle:`Block_writes ~size:100 () - in - let close_request, abort_signalled = - (* Both semaphores are initially held. - - [close_request] is dropped by the merge thread in the [`Before] hook - as a signal to the parent thread to issue the [close] operation. - - - [abort_signalled] is dropped by the parent thread to signal to the - child to continue the merge operation (which must then abort prematurely). - *) - (Semaphore.make false, Semaphore.make false) - in - let hook = function - | `Before -> - Log.app (fun f -> - f "Child (pid = %d): issuing request to close the index" - Thread.(id (self ()))); - Semaphore.release close_request - | `After_first_entry -> Semaphore.acquire abort_signalled - | `After_clear | `After -> - Alcotest.failf - "Child (pid = %d): merge completed despite concurrent close" - Thread.(id (self ())) - in - let merge_promise : _ Index.async = - Index.try_merge_aux ~force:true ~hook:(I.Private.Hook.v hook) rw - in - Log.app (fun f -> f "Parent: waiting for request to close the index"); - Semaphore.acquire close_request; - Log.app (fun f -> f "Parent: closing the index"); - Index.close' - ~hook: - (I.Private.Hook.v (fun `Abort_signalled -> - Semaphore.release abort_signalled)) - ~immediately:() rw; - Log.app (fun f -> f "Parent: awaiting merge result"); - Index.await merge_promise |> function - | Ok `Completed -> - Alcotest.fail - "try_merge ~force:true returned `Completed despite concurrent close" - | Error (`Async_exn exn) -> - Alcotest.failf - "Asynchronous exception occurred during try_merge ~force:true: %s" - (Printexc.to_string exn) - | Ok `Aborted -> ( - match Common.get_open_fd root with - | `Ok ofd -> - let merge, _ = Common.partition "merge" ofd in - if List.length merge > 0 then - Alcotest.fail "Too many file descriptors opened for merge files" - | `Skip err -> Log.warn (fun m -> m "`aborted_fd` was skipped: %s" err)) - - let tests = - [ - ("close and reopen", `Quick, close_reopen_rw); - ("close and reopen more", `Quick, close_reopen_rw_more); - ("crash and continue", `Quick, crash_and_continue); - ("find (absent)", `Quick, find_absent); - ("replace", `Quick, replace); - ("open two instances, close one", `Quick, open_readonly_close_rw); - ("close and reopen on readonly", `Quick, close_reopen_readonly); - ("non-close operations fail after close", `Quick, fail_api_after_close); - ("double close", `Quick, check_double_close); - ("double restart", `Quick, restart_twice); - ("aborted merge", `Quick, aborted_merge); - ] -end - -(* Tests of {Index.filter} *) -module Filter = struct - (* Filtering should also affect the in-memory LRU. *) - let lru_size = 10 - - (** Test that all bindings are kept when using [filter] with a true predicate. *) - let filter_none () = - let* Context.{ rw; tbl; _ } = Context.with_full_index ~lru_size () in - Index.filter rw (fun _ -> true); - check_equivalence rw tbl - - (** Test that all bindings are removed when using [filter] with a false - predicate. *) - let filter_all () = - let* Context.{ rw; _ } = Context.with_full_index ~lru_size () in - Index.filter rw (fun _ -> false); - check_equivalence rw (Hashtbl.create 0) - - (** Test that [filter] can be used to remove exactly a single binding. *) - let filter_one () = - let* Context.{ rw; tbl; _ } = Context.with_full_index ~lru_size () in - let k = random_existing_key tbl in - (* Ensure the key is cached in the LRU: [filter] must remove it from there too. *) - let (_ : Value.t) = Index.find rw k in - Hashtbl.remove tbl k; - Index.filter rw (fun (k', _) -> not (String.equal k k')); - Index.check_not_found rw k; - check_equivalence rw tbl - - (** Test that the results of [filter] are propagated to a clone which was - created before. *) - let clone_then_filter () = - let* Context.{ rw; tbl; clone; _ } = Context.with_full_index ~lru_size () in - let k = random_existing_key tbl in - Hashtbl.remove tbl k; - let rw2 = clone ~readonly:false () in - Index.filter rw (fun (k', _) -> not (String.equal k k')); - check_equivalence rw tbl; - check_equivalence rw2 tbl - - (** Test that the results of [filter] are propagated to a clone which was - created after. *) - let filter_then_clone () = - let* Context.{ rw; tbl; clone; _ } = Context.with_full_index ~lru_size () in - let k = random_existing_key tbl in - Hashtbl.remove tbl k; - Index.filter rw (fun (k', _) -> not (String.equal k k')); - let rw2 = clone ~readonly:false () in - check_equivalence rw tbl; - check_equivalence rw2 tbl - - (** Test that using [filter] doesn't affect fresh clones created later at the - same path. *) - let empty_after_filter_and_fresh () = - let* Context.{ rw; tbl; clone; _ } = Context.with_full_index ~lru_size () in - let k = random_existing_key tbl in - Hashtbl.remove tbl k; - Index.filter rw (fun (k', _) -> not (String.equal k k')); - let rw2 = clone ~fresh:true ~readonly:false () in - (* rw2 should be empty since it is fresh. *) - check_equivalence rw2 (Hashtbl.create 0) - - let tests = - [ - ("filter none", `Quick, filter_none); - ("filter all", `Quick, filter_all); - ("filter one", `Quick, filter_one); - ("clone then filter", `Quick, clone_then_filter); - ("filter then clone", `Quick, filter_then_clone); - ("empty after filter+fresh", `Quick, empty_after_filter_and_fresh); - ] -end - -(** Tests of [Index.v ~throttle]*) -module Throttle = struct - let add_binding ?hook t = - match Index.replace_random ?hook t with - | binding, None -> binding - | binding, Some _ -> - Alcotest.failf "New binding %a triggered an unexpected merge operation" - pp_binding binding - - let add_overflow_binding ?hook t = - match Index.replace_random ?hook t with - | binding, Some merge_result -> (binding, merge_result) - | binding, None -> - Alcotest.failf "Expected new binding %a to trigger a merge operation" - pp_binding binding - - let merge () = - let* Context.{ rw; tbl; _ } = - Context.with_full_index ~throttle:`Overcommit_memory () - in - let m = Semaphore.make false in - let hook = Hook.v @@ function `Before -> Semaphore.acquire m | _ -> () in - let (_ : binding) = add_binding rw in - let merge_result = Index.try_merge_aux ~force:true ~hook rw in - Hashtbl.iter (fun k v -> Index.replace rw k v) tbl; - Semaphore.release m; - Index.await merge_result |> check_completed - - let implicit_merge () = - let log_size = 4 in - let* Context.{ rw; _ } = - Context.with_empty_index ~log_size ~throttle:`Overcommit_memory () - in - let m = Semaphore.make false in - let hook = - Hook.v @@ function `Merge `Before -> Semaphore.acquire m | _ -> () - in - for _ = 1 to log_size do - ignore (add_binding rw : binding) - done; - Log.app (fun m -> m "Triggering an implicit merge"); - let _, merge_result = add_overflow_binding ~hook rw in - Log.app (fun m -> m "Overcommitting to the log while a merge is ongoing"); - for _ = 1 to log_size + 1 do - ignore (add_binding rw : binding) - done; - Semaphore.release m; - Index.await merge_result |> check_completed; - Log.app (fun m -> - m "Triggering a second implicit merge (with an overcommitted log)"); - let _, merge_result = add_overflow_binding rw in - Index.await merge_result |> check_completed; - () - - let tests = - [ - ("force merge", `Quick, merge); ("implicit merge", `Quick, implicit_merge); - ] -end - -let () = - Common.report (); - Alcotest.run "index.unix" - [ - ("io_array", Io_array.tests); - ("merge", Force_merge.tests); - ("live", Live.tests); - ("lru", Test_lru.tests); - ("on restart", DuplicateInstance.tests); - ("readonly", Readonly.tests); - ("close", Close.tests); - ("filter", Filter.tests); - ("flush_callback", Flush_callback.tests); - ("throttle", Throttle.tests); - ] diff --git a/test/unix/main.mli b/test/unix/main.mli deleted file mode 100644 index 6e4acb5a..00000000 --- a/test/unix/main.mli +++ /dev/null @@ -1 +0,0 @@ -(* left empty on purpose *) diff --git a/test/unix/test_lru.ml b/test/unix/test_lru.ml deleted file mode 100644 index 3e902759..00000000 --- a/test/unix/test_lru.ml +++ /dev/null @@ -1,125 +0,0 @@ -(** Tests of the in-memory LRU used by the Index implementation. - - NOTE: most other tests in the suite set an LRU size of 0 for simplicity. *) - -module Stats = Index.Stats -open Common - -module Context = Common.Make_context (struct - let root = Filename.concat "_tests" "test_log_with_lru" -end) - -let check_bool pos ~expected act = Alcotest.(check ~pos bool) "" expected act -let check_int pos ~expected act = Alcotest.(check ~pos int) "" expected act - -let check_value pos ~expected act = - let key = Alcotest.testable Key.pp Key.equal in - Alcotest.(check ~pos key) "" expected act - -let check_lru_stats pos ~hits ~misses = - Alcotest.(check ~pos int) "LRU hits" hits Stats.((get ()).lru_hits); - Alcotest.(check ~pos int) "LRU misses" misses Stats.((get ()).lru_misses) - -let get_new_cached_binding idx = - let k, v = (Key.v (), Value.v ()) in - Index.replace idx k v; - check_value __POS__ ~expected:v (Index.find idx k); - (k, v) - -let test_replace_and_find () = - let lru_size = 1 in - let* { rw = idx; _ } = Context.with_empty_index ~lru_size () in - - (* Check that [replace] populates the LRU: *) - let k1, v1 = (Key.v (), Value.v ()) in - let () = - Stats.reset_stats (); - Index.replace idx k1 v1; - (* [k1] is now in the LRU: *) - check_value __POS__ ~expected:v1 (Index.find idx k1); - check_lru_stats __POS__ ~hits:1 ~misses:0 - in - - (* Check that a second [replace] updates the LRU contents: *) - let k2, v2 = (Key.v (), Value.v ()) in - let () = - assert (k1 <> k2); - Stats.reset_stats (); - Index.replace idx k2 v2; - (* [k2] has replaced [k1] in the LRU, so we miss on [find k1]: *) - check_value __POS__ ~expected:v1 (Index.find idx k1); - check_lru_stats __POS__ ~hits:0 ~misses:1; - (* [k1] is now in the LRU: *) - check_value __POS__ ~expected:v1 (Index.find idx k1); - check_lru_stats __POS__ ~hits:1 ~misses:1 - in - () - -let test_mem () = - let lru_size = 1 in - let* { rw = idx; _ } = Context.with_empty_index ~lru_size () in - - (* Initially, [k2] is in the LRU and [k1] is not: *) - let k1, k2, v1, v2 = (Key.v (), Key.v (), Value.v (), Value.v ()) in - Index.replace idx k1 v1; - Index.replace idx k2 v2; - - (* [mem k2] hits in the LRU: *) - let () = - Stats.reset_stats (); - check_bool __POS__ ~expected:true (Index.mem idx k2); - check_lru_stats __POS__ ~hits:1 ~misses:0 - in - - (* [mem k1] initially misses in the LRU, but subsequent calls hit in the LRU - (because the [k2] binding is replaced by [k1] on the miss). *) - let () = - Stats.reset_stats (); - check_bool __POS__ ~expected:true (Index.mem idx k1); - check_lru_stats __POS__ ~hits:0 ~misses:1; - check_bool __POS__ ~expected:true (Index.mem idx k1); - check_lru_stats __POS__ ~hits:1 ~misses:1 - in - () - -(* Check that the LRU is cleared on [clear]. *) -let test_clear () = - let lru_size = 1 in - let* { rw = idx; _ } = Context.with_full_index ~lru_size () in - - (* Add a binding and ensure that it's in the LRU: *) - let k, v = (Key.v (), Value.v ()) in - Index.replace idx k v; - check_value __POS__ ~expected:v (Index.find idx k); - - (* We should miss in the LRU when attempting to find [k] after [clear]: *) - Index.clear idx; - Stats.reset_stats (); - Alcotest.check_raises "find after clear" Not_found (fun () -> - ignore (Index.find idx k)); - check_lru_stats __POS__ ~hits:0 ~misses:1 - -(* Check that bindings in the LRU are properly removed by [filter]: *) -let test_filter () = - let lru_size = 1 in - let* { rw = idx; _ } = Context.with_full_index ~lru_size () in - - (* Add a binding and ensure that it's in the LRU: *) - let k, v = (Key.v (), Value.v ()) in - Index.replace idx k v; - check_value __POS__ ~expected:v (Index.find idx k); - - (* Remove [k] from the index via [filter], then try to [find] it: *) - Index.filter idx (fun (k', _) -> not (Key.equal k k')); - Stats.reset_stats (); - Alcotest.check_raises ~pos:__POS__ "find after filter-false" Not_found - (fun () -> ignore (Index.find idx k)); - check_lru_stats __POS__ ~hits:0 ~misses:1 - -let tests = - [ - ("replace_and_find", `Quick, test_replace_and_find); - ("mem", `Quick, test_mem); - ("clear", `Quick, test_clear); - ("filter", `Quick, test_filter); - ]