From 2bd2865450aa0ebf858920de342ddbfd0c326d2e Mon Sep 17 00:00:00 2001 From: jakob krainz Date: Fri, 22 Jan 2021 21:40:16 +0100 Subject: [PATCH 01/20] start compat work on map & splay --- src/batMap.ml | 298 ++++++++++++++++++++++++++++++----- src/batMap.mli | 210 +++++++++++++++++++++++- src/batSplay.ml | 27 +++- src/batteries_compattest.mlv | 11 +- 4 files changed, 500 insertions(+), 46 deletions(-) diff --git a/src/batMap.ml b/src/batMap.ml index 7ef637c1c..974d0b2d5 100644 --- a/src/batMap.ml +++ b/src/batMap.ml @@ -53,6 +53,9 @@ module Concrete = struct let empty = Empty + let is_empty m = + m == Empty + (* The create and bal functions are from stdlib's map.ml (3.12) differences from the old (extlib) implementation : @@ -105,6 +108,10 @@ module Concrete = struct | Node (l, _, _, _, _) -> min_binding l | Empty -> raise Not_found + let min_binding_opt m = + try Some (min_binding m) + with Not_found -> None + let get_root = function | Empty -> raise Not_found | Node (_, k, v, _, _) -> k, v @@ -112,7 +119,7 @@ module Concrete = struct let pop_min_binding s = let mini = ref (get_root s) in let rec loop = function - | Empty -> assert(false) + | Empty -> raise Not_found | Node(Empty, k, v, r, _) -> mini := (k, v); r | Node(l, k, v, r, _) -> bal (loop l) k v r in @@ -122,12 +129,16 @@ module Concrete = struct let rec max_binding = function | Node (_, k, v, Empty, _) -> k, v | Node (_, _, _, r, _) -> max_binding r - | Empty -> invalid_arg "PMap.max_binding: empty tree" + | Empty -> raise Not_found + + let max_binding_opt m = + try Some (max_binding m) + with Not_found -> None let pop_max_binding s = let maxi = ref (get_root s) in let rec loop = function - | Empty -> assert(false) + | Empty -> raise Not_found | Node (l, k, v, Empty, _) -> maxi := (k, v); l | Node (l, k, v, r, _) -> bal l k v (loop r) in @@ -137,7 +148,7 @@ module Concrete = struct let rec remove_min_binding = function | Node (Empty, _, _, r, _) -> r | Node (l, k, v, r, _) -> bal (remove_min_binding l) k v r - | Empty -> invalid_arg "PMap.remove_min_binding" + | Empty -> raise Not_found let merge t1 t2 = match t1, t2 with @@ -149,15 +160,25 @@ module Concrete = struct let add x d cmp map = let rec loop = function - | Node (l, k, v, r, h) -> + | Node (l, k, v, r, h) as node -> let c = cmp x k in - if c = 0 then Node (l, x, d, r, h) + if c = 0 then + if x == k && d == v then + node + else + Node (l, x, d, r, h) else if c < 0 then let nl = loop l in - bal nl k v r + if nl == l then + node + else + bal nl k v r else let nr = loop r in - bal l k v nr + if nr == r then + node + else + bal l k v nr | Empty -> Node (Empty, x, d, Empty, 1) in loop map @@ -263,10 +284,22 @@ module Concrete = struct let remove x cmp map = let rec loop = function - | Node (l, k, v, r, _) -> + | Node (l, k, v, r, _) as node -> let c = cmp x k in - if c = 0 then merge l r else - if c < 0 then bal (loop l) k v r else bal l k v (loop r) + if c = 0 then + merge l r + else if c < 0 then + let nl = loop l in + if nl == l then + node + else + bal nl k v r + else + let nr = loop r in + if nr == r then + node + else + bal l k v nr | Empty -> Empty in loop map @@ -292,17 +325,34 @@ module Concrete = struct else let rec loop = function | Empty -> raise Not_found - | Node(l, k, v, r, h) -> - let c = cmp k1 k in - if c = 0 then - Node(l, k2, v2, r, h) - else if c < 0 then - Node(loop l, k, v, r, h) - else - Node(l, k, v, loop r, h) + | Node(l, k, v, r, h) as node -> + let c = cmp k1 k in + if c = 0 then + if v == v2 && k == k2 then + node + else + Node(l, k2, v2, r, h) + else if c < 0 then + let nl = loop l in + if nl == l then + node + else + Node(nl, k, v, r, h) + else + let nr = loop r in + if nr == r then + node + else + Node(l, k, v, nr, h) in loop map + let update_stdlib k f cmp m = + let findresult = (find_option k cmp m) in + match f findresult with + | Some x -> add k x cmp m + | None -> remove k cmp m + let mem x cmp map = let rec loop = function | Node (l, k, _v, r, _) -> @@ -495,7 +545,7 @@ module Concrete = struct let filterv f t cmp = foldi (fun k a acc -> if f a then add k a cmp acc else acc) t empty let filter f t cmp = - foldi (fun k a acc -> if f k a then add k a cmp acc else acc) t empty + foldi (fun k a acc -> if f k a then acc else remove k cmp acc) t t let filter_map f t cmp = foldi (fun k a acc -> match f k a with | None -> acc @@ -533,6 +583,10 @@ module Concrete = struct (empty |> add 0 1 |> add 1 1 |> choose) (empty |> add 1 1 |> add 0 1 |> choose) *) + let choose_opt m = + try Some (choose m) + with Not_found -> None + let any = function | Empty -> raise Not_found | Node (_, k, v, _, _) -> (k,v) @@ -806,6 +860,32 @@ module Concrete = struct | Some v2 -> add k (f v1 v2) cmp1 m) m1 empty + let add_seq cmp s m = + Seq.fold_left + (fun m (k, v) -> add k v cmp m) + m + s + + let of_seq cmp s = + add_seq cmp s empty + + let to_seq m = + BatSeq.of_list (bindings m) (* TODO: optimize *) + + let to_seq_from (cmp : 'a -> 'a -> int) k m = + to_seq (filter (fun k2 _ -> cmp k k2 >= 0) m cmp) + + let union_stdlib cmp f m1 m2 = + foldi + (fun k v m -> + if mem k cmp m + then match f k v (find k cmp m) with + | Some v2 -> add k v2 cmp m + | None -> m + else add k v cmp m) + m1 + m2 + let compare ckey cval m1 m2 = BatEnum.compare (fun (k1,v1) (k2,v2) -> BatOrd.bin_comp ckey k1 k2 cval v1 v2) (enum m1) (enum m2) let equal ckey eq_val m1 m2 = @@ -822,6 +902,7 @@ sig val is_empty: 'a t -> bool val cardinal: 'a t -> int val add: key -> 'a -> 'a t -> 'a t + val update_stdlib: key -> ('a option -> 'a option) -> 'a t -> 'a t val update: key -> key -> 'a -> 'a t -> 'a t val find: key -> 'a t -> 'a val find_opt: key -> 'a t -> 'a option @@ -850,10 +931,13 @@ sig val keys : _ t -> key BatEnum.t val values: 'a t -> 'a BatEnum.t val min_binding : 'a t -> (key * 'a) + val min_binding_opt : 'a t -> (key * 'a) option val pop_min_binding: 'a t -> (key * 'a) * 'a t - val max_binding : 'a t -> (key * 'a) + val max_binding : 'a t -> (key * 'a) + val max_binding_opt : 'a t -> (key * 'a) option val pop_max_binding: 'a t -> (key * 'a) * 'a t val choose : 'a t -> (key * 'a) + val choose_opt : 'a t -> (key * 'a) option val any : 'a t -> (key * 'a) val split : key -> 'a t -> ('a t * 'a option * 'a t) val partition : (key -> 'a -> bool) -> 'a t -> 'a t * 'a t @@ -866,6 +950,12 @@ sig val exists: (key -> 'a -> bool) -> 'a t -> bool val merge: (key -> 'a option -> 'b option -> 'c option) -> 'a t -> 'b t -> 'c t + val union: + (key -> 'a -> 'a -> 'a option) -> 'a t -> 'a t -> 'a t + val to_seq : 'a t -> (key * 'a) Seq.t + val to_seq_from : key -> 'a t -> (key * 'a) Seq.t + val add_seq : (key * 'a) Seq.t -> 'a t -> 'a t + val of_seq : (key * 'a) Seq.t -> 'a t (** {6 Boilerplate code}*) (** {7 Printing}*) val print : ?first:string -> ?last:string -> ?sep:string -> ?kvsep:string -> @@ -928,6 +1018,7 @@ struct let keys t = Concrete.keys (impl_of_t t) let values t = Concrete.values (impl_of_t t) let update k1 k2 v2 t = t_of_impl (Concrete.update k1 k2 v2 Ord.compare (impl_of_t t)) + let update_stdlib k f m = t_of_impl (Concrete.update_stdlib k f Ord.compare (impl_of_t m)) let find_default d k t = Concrete.find_default d k Ord.compare (impl_of_t t) let find_opt k t = Concrete.find_option k Ord.compare (impl_of_t t) let find_first f t = Concrete.find_first f (impl_of_t t) @@ -973,6 +1064,7 @@ struct (maxi, t_of_impl rest) let choose t = Concrete.choose (impl_of_t t) + let choose_opt t = Concrete.choose_opt (impl_of_t t) let any t = Concrete.any (impl_of_t t) let split k t = @@ -1006,6 +1098,8 @@ struct let bindings t = Concrete.bindings (impl_of_t t) + let union f m1 m2 = t_of_impl (Concrete.union_stdlib Ord.compare f (impl_of_t m1) (impl_of_t m2)) + let merge f t1 t2 = t_of_impl (Concrete.merge f Ord.compare (impl_of_t t1) (impl_of_t t2)) @@ -1052,12 +1146,32 @@ module String = Make (BatString) type ('k, 'v) t = ('k, 'v) Concrete.map let empty = Concrete.empty -let is_empty x = x = Concrete.Empty +let is_empty = Concrete.is_empty + +(*$T is_empty + is_empty empty + not(is_empty (empty |> add 1 1)) + *) let add x d m = Concrete.add x d Pervasives.compare m let update k1 k2 v2 m = Concrete.update k1 k2 v2 Pervasives.compare m +let update_stdlib k f m = Concrete.update_stdlib k f Pervasives.compare m + +(*$T update_stdlib + let of_list l = of_enum (BatList.enum l) and cmp = Pervasives.( = ) in \ + equal cmp (update_stdlib 1 (fun x -> assert(x = Some 1); Some 3) (of_list [1,1; 2,2])) (of_list [1,3;2,2]) + let of_list l = of_enum (BatList.enum l) and cmp = Pervasives.( = ) in \ + equal cmp (update_stdlib 3 (fun x -> assert(x = None); Some 3) (of_list [1,1; 2,2])) (of_list [1,1;2,2;3,3]) + let of_list l = of_enum (BatList.enum l) and cmp = Pervasives.( = ) in \ + equal cmp (update_stdlib 1 (fun x -> assert(x = Some 1); None) (of_list [1,1; 2,2])) (of_list [2,2]) + let of_list l = of_enum (BatList.enum l) in \ + let s = (of_list [1,1; 2,2]) in (update_stdlib 3 (fun x -> assert(x = None ); None ) s) == s + let of_list l = of_enum (BatList.enum l) in \ + let s = (of_list [1,1; 2,2]) in (update_stdlib 1 (fun x -> assert(x = Some 1); Some 1) s) == s + *) + let find x m = Concrete.find x Pervasives.compare m (*$T add; find @@ -1089,20 +1203,6 @@ let find_first_opt f map = Concrete.find_first_opt f map let find_last f map = Concrete.find_last f map let find_last_opt f map = Concrete.find_last_opt f map -(*$T pop_min_binding - (empty |> add 1 true |> pop_min_binding) = ((1, true), empty) - (empty |> add 1 true |> add 2 false |> pop_min_binding) = \ - ((1, true), add 2 false empty) - try ignore (pop_min_binding empty); false with Not_found -> true -*) - -(*$T pop_max_binding - (empty |> add 1 true |> pop_max_binding) = ((1, true), empty) - (empty |> add 1 true |> add 2 false |> pop_max_binding) = \ - ((2, false), add 1 true empty) - try ignore (pop_max_binding empty); false with Not_found -> true -*) - (*$Q find ; add (Q.list Q.small_int) (fun xs -> \ let of_list xs y m0 = List.fold_left (fun acc x -> add x y acc) m0 xs in \ @@ -1168,11 +1268,111 @@ let filter f t = Concrete.filter f t Pervasives.compare let filter_map f t = Concrete.filter_map f t Pervasives.compare let choose = Concrete.choose +let choose_opt = Concrete.choose_opt let any = Concrete.any let max_binding = Concrete.max_binding let min_binding = Concrete.min_binding +let max_binding_opt = Concrete.max_binding_opt +let min_binding_opt = Concrete.min_binding_opt let pop_min_binding = Concrete.pop_min_binding let pop_max_binding = Concrete.pop_max_binding + +(*$T pop_min_binding + (empty |> add 1 true |> pop_min_binding) = ((1, true), empty) + (empty |> add 1 true |> add 2 false |> pop_min_binding) = \ + ((1, true), add 2 false empty) + try ignore (pop_min_binding empty); false with Not_found -> true +*) + +(*$T pop_max_binding + (empty |> add 1 true |> pop_max_binding) = ((1, true), empty) + (empty |> add 1 true |> add 2 false |> pop_max_binding) = \ + ((2, false), add 1 true empty) + try ignore (pop_max_binding empty); false with Not_found -> true +*) + +(*$T choose + let of_list l = of_enum (BatList.enum l) in \ + (1,1) = choose (of_list [1,1]) + try ignore(choose empty); false with Not_found -> true + *) + +(*$T choose_opt + let of_list l = of_enum (BatList.enum l) in \ + Some (1,1) = choose_opt (of_list [1,1]) + None = choose_opt (empty) + *) + +(*$T max_binding + let of_list l = of_enum (BatList.enum l) in \ + (3,3) = max_binding (of_list [1,1;2,2;3,3]) + try ignore(max_binding empty); false with Not_found -> true + *) + +(*$T max_binding_opt + let of_list l = of_enum (BatList.enum l) in \ + Some (3,3) = max_binding_opt (of_list [1,1;2,2;3,3]) + None = max_binding_opt empty + *) + +(*$T min_binding + let of_list l = of_enum (BatList.enum l) in \ + (1,1) = min_binding (of_list [1,1;2,2;3,3]) + try ignore(min_binding empty); false with Not_found -> true + *) + +(*$T min_binding_opt + let of_list l = of_enum (BatList.enum l) in \ + Some (1,1) = min_binding_opt (of_list [1,1;2,2;3,3]) + None = min_binding_opt empty + *) + +(*$T add + let s = empty |> add 1 1 |> add 2 2 in s == (s |> add 2 2) + *) + +(*$T remove + let s = empty |> add 1 1 |> add 2 2 in s == (s |> remove 4) + *) + +(*$T update + let s = empty |> add 1 1 |> add 2 2 in \ + s == (s |> update 2 2 2) + *) + +(*$T update_stdlib + let s = empty |> add 1 1 |> add 2 2 in \ + s == (s |> update_stdlib 2 (fun _ -> Some 2)) + *) + +(*$T filter + let s = empty |> add 1 1 |> add 2 2 in \ + s == (filter (fun _ _ -> true) s) + *) + + +let of_seq s = + Concrete.of_seq Pervasives.compare s + +let add_seq s m = + Concrete.add_seq Pervasives.compare s m + +let to_seq = Concrete.to_seq + +let to_seq_from x m = + Concrete.to_seq_from Pervasives.compare x m + +let union_stdlib f m1 m2 = Concrete.union_stdlib Pervasives.compare f m1 m2 +(*$T union_stdlib + let cmp = Pervasives.( = ) in \ + equal cmp (union_stdlib (fun _ -> failwith "must not be called") empty empty) empty + let of_list l = of_enum (BatList.enum l) and cmp = Pervasives.( = ) in \ + equal cmp (union_stdlib (fun _ -> failwith "must not be called") (of_list [1,1;2,2]) empty) (of_list [1,1;2,2]) + let of_list l = of_enum (BatList.enum l) and cmp = Pervasives.( = ) in \ + equal cmp (union_stdlib (fun _ -> failwith "must not be called") empty (of_list [1,1;2,2])) (of_list [1,1;2,2]) + let of_list l = of_enum (BatList.enum l) and cmp = Pervasives.( = ) in \ + equal cmp (union_stdlib (fun _ -> failwith "must not be called") (of_list [3,3;4,4]) (of_list [1,1;2,2])) (of_list [1,1;2,2;3,3;4,4]) + *) let singleton k v = Concrete.singleton k v @@ -1205,7 +1405,6 @@ let pop = Concrete.pop let split k m = Concrete.split k Pervasives.compare m - (* We can't compare external primitives directly using the physical equality operator, since two different occurrences of an external primitive are two different closures. So we first make a local binding of [Pervasives.compare] @@ -1220,7 +1419,10 @@ let union m1 m2 = let m1 = empty |> add 1 1 |> add 2 2 in \ let m2 = empty |> add 2 20 |> add 3 30 in \ (union m1 m2 |> find 2 = 20) && (union m2 m1 |> find 2 = 2) -*) + *) + +let union_stdlib f m1 m2 = + Concrete.union_stdlib Pervasives.compare f m1 m2 let diff m1 m2 = let comp = Pervasives.compare in @@ -1283,6 +1485,9 @@ module PMap = struct (*$< PMap *) let update k1 k2 v2 m = { m with map = Concrete.update k1 k2 v2 m.cmp m.map } + let update_stdlib k f m = + { m with map = Concrete.update_stdlib k f m.cmp m.map } + let find x m = Concrete.find x m.cmp m.map @@ -1403,6 +1608,8 @@ module PMap = struct (*$< PMap *) let max_binding t = Concrete.max_binding t.map let min_binding t = Concrete.min_binding t.map + let max_binding_opt t = Concrete.max_binding_opt t.map + let min_binding_opt t = Concrete.min_binding_opt t.map let pop_min_binding m = let mini, rest = Concrete.pop_min_binding m.map in (mini, { m with map = rest }) @@ -1424,6 +1631,7 @@ module PMap = struct (*$< PMap *) let cardinal m = Concrete.cardinal m.map let choose m = Concrete.choose m.map + let choose_opt m = Concrete.choose_opt m.map let any m = Concrete.any m.map let split k m = @@ -1470,6 +1678,20 @@ module PMap = struct (*$< PMap *) let merge_unsafe f m1 m2 = { m1 with map = Concrete.merge f m1.cmp m1.map m2.map } + let of_seq ?(cmp = Pervasives.compare) s = + { map = Concrete.of_seq cmp s; cmp = cmp } + + let to_seq m = Concrete.to_seq m.map + + let to_seq_from k m = + Concrete.to_seq_from m.cmp k m.map + + let add_seq s m = + { m with map = Concrete.add_seq m.cmp s m.map } + + let union_stdlib f m1 m2 = + { m1 with map = Concrete.union_stdlib m1.cmp f m1.map m2.map } + let bindings m = Concrete.bindings m.map diff --git a/src/batMap.mli b/src/batMap.mli index 59869d822..909fd88ca 100644 --- a/src/batMap.mli +++ b/src/batMap.mli @@ -90,6 +90,19 @@ sig [m], plus a binding of [x] to [y]. If [x] was already bound in [m], its previous binding disappears. *) + val update_stdlib : key -> ('a option -> 'a option) -> 'a t -> 'a t + (** [update_stdlib k f m] returns a map containing the same bindings as [m], + except [k] has a new binding as determined by [f]: + First, calculate [y] as [f (find_opt k m)]. + If [y = Some v] then [k] will be bound to [v] in the resulting map. + Else [k] will not be bound in the resulting map. + + This function does the same thing as [update] in the stdlib, but has a + different name for backwards compatibility reasons. + + @since NEXT_RELEASE *) + + (* TODO: maybe deprecate this function to re-gain compatibility with stdlib? *) val update: key -> key -> 'a -> 'a t -> 'a t (** [update k1 k2 v2 m] replace the previous binding of [k1] in [m] by [k2] associated to [v2]. @@ -97,7 +110,7 @@ sig in the case where [k1] and [k2] have the same key ordering. @raise Not_found if [k1] is not bound in [m]. @since 2.4.0 *) - + val find: key -> 'a t -> 'a (** [find x m] returns the current binding of [x] in [m], or raises [Not_found] if no such binding exists. *) @@ -259,14 +272,28 @@ sig The returned enumeration is sorted in increasing key order. *) val min_binding : 'a t -> (key * 'a) - (** Return the [(key, value)] pair with the smallest key. *) + (** Return the [(key, value)] pair with the smallest key. + Raises Not_found if the map is empty. *) + + val min_binding_opt : 'a t -> (key * 'a) option + (** Return [Some (key, value)] for the [key, value] pair with + the smallest key, or [None] if the map is empty. + + @since NEXT_RELEASE *) val pop_min_binding : 'a t -> (key * 'a) * 'a t (** Return the [(key, value)] pair with the smallest key along with the rest of the map. *) val max_binding : 'a t -> (key * 'a) - (** Return the [(key, value)] pair with the largest key. *) + (** Return the [(key, value)] pair with the largest key. + Raises Not_found if the map is empty. *) + + val max_binding_opt : 'a t -> (key * 'a) option + (** Return [Some (key, value)] for the [key, value] pair with + the largest key, or [None] if the map is empty. + + @since NEXT_RELEASE *) val pop_max_binding : 'a t -> (key * 'a) * 'a t (** Return the ([key, value]) pair with the largest key @@ -284,6 +311,14 @@ sig @raise Not_found if the map is empty *) + val choose_opt : 'a t -> (key * 'a) option + (** Return [Some (k, v)] for one binding [(k, v)] of the given map, + if the map is not empty. Else, return None. Which binding is + chosen is unspecified, but equal bindings will be chosen for + equal maps. + + @since NEXT_RELEASE *) + val any : 'a t -> (key * 'a) (** Return one binding of the given map. The difference with choose is that there is no guarantee that equals @@ -353,6 +388,39 @@ sig value, is determined with the function [f]. *) + val union: + (key -> 'a -> 'a -> 'a option) -> 'a t -> 'a t -> 'a t + (** [union f m1 m2] computes a map whose keys are a subset of the keys of + [m1] and of [m2]. When the same binding is defined in both arguments, + the function f is used to combine them. + This function is similar to [merge], except [f] is only called if a key + is present in both [m1] and [m2]. If a key is present in either [m1] + or [m2] but not in both, it (and the corresponding value) will be + present in the resulting map. + + @since NEXT_RELEASE *) + + val to_seq : 'a t -> (key * 'a) Seq.t + (** Iterate on the whole map, in ascending order of keys. + + @since NEXT_RELEASE *) + + val to_seq_from : key -> 'a t -> (key * 'a) Seq.t + (** [to_seq_from k m] iterates on a subset of the bindings in [m], + namely those bindings greater or equal to [k], in ascending order. + + @since NEXT_RELEASE *) + + val add_seq : (key * 'a) Seq.t -> 'a t -> 'a t + (** add the given bindings to the map, in order. + + @since NEXT_RELEASE *) + + val of_seq : (key * 'a) Seq.t -> 'a t + (** build a map from the given bindings + + @since NEXT_RELEASE *) + (** {6 Boilerplate code}*) (** {7 Printing}*) @@ -470,6 +538,15 @@ val update: 'a -> 'a -> 'b -> ('a, 'b) t -> ('a, 'b) t @raise Not_found if [k1] is not bound in [m]. @since 2.4.0 *) +val update_stdlib : 'a -> ('b option -> 'b option) -> ('a, 'b) t -> ('a, 'b) t +(** [update_stdlib k f m] returns a map containing the same bindings as [m], + except [k] has a new binding as determined by [f]: + First, calculate [y] as [f (find_opt k m)]. + If [y = Some v] then [k] will be bound to [v] in the resulting map. + Else [k] will not be bound in the resulting map. + + @since NEXT_RELEASE *) + val find : 'a -> ('a, 'b) t -> 'b (** [find x m] returns the current binding of [x] in [m], or raises [Not_found] if no such binding exists. *) @@ -596,6 +673,15 @@ val choose : ('key, 'a) t -> ('key * 'a) @raise Not_found if the map is empty *) +val choose_opt : ('key, 'a) t -> ('key * 'a) option +(** Return [Some (k, v)] for one binding [(k, v)] of the given map, + if the map is not empty. Else, return None. + Which binding is chosen is unspecified, but equal bindings will be + chosen for equal maps. + + @since NEXT_RELEASE *) + + val any : ('key, 'a) t -> ('key * 'a) (** Return one binding of the given map. The difference with choose is that there is no guarantee that equals @@ -615,13 +701,27 @@ val split : 'key -> ('key, 'a) t -> (('key, 'a) t * 'a option * ('key, 'a) t) *) val min_binding : ('key, 'a) t -> ('key * 'a) -(** Returns the binding with the smallest key. *) +(** Returns the binding with the smallest key. + Raises Not_found if the map is empty. *) + +val min_binding_opt : ('key, 'a) t -> ('key * 'a) option +(** Return [Some (key, value)] for the [key, value] pair with + the smallest key, or [None] if the map is empty. + + @since NEXT_RELEASE *) val pop_min_binding : ('key, 'a) t -> ('key * 'a) * ('key, 'a) t (** Returns the binding with the smallest key along with the rest of the map. *) val max_binding : ('key, 'a) t -> ('key * 'a) -(** Returns the binding with the largest key. *) +(** Return the [(key, value)] pair with the largest key. + Raises Not_found if the map is empty. *) + +val max_binding_opt : ('key, 'a) t -> ('key * 'a) option +(** Return [Some (key, value)] for the [key, value] pair with + the largest key, or [None] if the map is empty. + + @since NEXT_RELEASE *) val pop_max_binding : ('key, 'a) t -> ('key * 'a) * ('key, 'a) t (** Returns the binding with the largest key along with the rest of the map. *) @@ -697,6 +797,41 @@ val union : ('a, 'b) t -> ('a, 'b) t -> ('a, 'b) t [m1]'s. Equivalent to [foldi add m2 m1]. The resulting map uses the comparison function of [m1]. *) +val union_stdlib: + ('key -> 'a -> 'a -> 'a option) -> ('key, 'a) t -> ('key, 'a) t -> ('key, 'a) t +(** [union_stdlib f m1 m2] computes a map whose keys are a subset of the keys of + [m1] and of [m2]. When the same binding is defined in both arguments, + the function f is used to combine them. + This function is similar to [merge], except [f] is only called if a key + is present in both [m1] and [m2]. If a key is present in either [m1] + or [m2] but not in both, it (and the corresponding value) will be + present in the resulting map. + + This is the union method from the stdlib map, renamed for backwards compatibility. + + @since NEXT_RELEASE *) + +val to_seq : ('key, 'a) t -> ('key * 'a) Seq.t +(** Iterate on the whole map, in ascending order of keys. + + @since NEXT_RELEASE *) + +val to_seq_from : 'key -> ('key, 'a) t -> ('key * 'a) Seq.t +(** [to_seq_from k m] iterates on a subset of the bindings in [m], + namely those bindings greater or equal to [k], in ascending order. + + @since NEXT_RELEASE *) + +val add_seq : ('key * 'a) Seq.t -> ('key, 'a) t -> ('key, 'a) t +(** add the given bindings to the map, in order. + + @since NEXT_RELEASE *) + +val of_seq : ('key * 'a) Seq.t -> ('key, 'a) t +(** build a map from the given bindings + + @since NEXT_RELEASE *) + val diff : ('a, 'b) t -> ('a, 'b) t -> ('a, 'b) t (** [diff m1 m2] removes all bindings of keys found in [m2] from [m1], using the comparison function of [m1]. Equivalent to @@ -824,6 +959,15 @@ module PMap : sig @raise Not_found if [k1] is not bound in [m]. @since 2.4.0 *) + val update_stdlib : 'a -> ('b option -> 'b option) -> ('a, 'b) t -> ('a, 'b) t + (** [update_stdlib k f m] returns a map containing the same bindings as [m], + except [k] has a new binding as determined by [f]: + First, calculate [y] as [f (find_opt k m)]. + If [y = Some v] then [k] will be bound to [v] in the resulting map. + Else [k] will not be bound in the resulting map. + + @since NEXT_RELEASE *) + val find : 'a -> ('a, 'b) t -> 'b (** [find x m] returns the current binding of [x] in [m], or raises [Not_found] if no such binding exists. *) @@ -942,6 +1086,14 @@ module PMap : sig Which binding is chosen is unspecified, but equal bindings will be chosen for equal maps. @raise Not_found if the map is empty. *) + + val choose_opt : ('key, 'a) t -> ('key * 'a) option + (** Return [Some (k, v)] for one binding [(k, v)] of the given map, + if the map is not empty. Else, return None. Which binding is + chosen is unspecified, but equal bindings will be chosen for + equal maps. + + @since NEXT_RELEASE *) val any : ('key, 'a) t -> ('key * 'a) (** Return one binding of the given map. @@ -963,6 +1115,12 @@ module PMap : sig val min_binding : ('key, 'a) t -> ('key * 'a) (** Returns the binding with the smallest key. *) + + val min_binding_opt : ('key, 'a) t -> ('key * 'a) option + (** Return [Some (key, value)] for the [key, value] pair with + the smallest key, or [None] if the map is empty. + + @since NEXT_RELEASE *) val pop_min_binding : ('key, 'a) t -> ('key * 'a) * ('key, 'a) t (** Return the binding with the smallest key along with the rest of the map. *) @@ -970,6 +1128,12 @@ module PMap : sig val max_binding : ('key, 'a) t -> ('key * 'a) (** Returns the binding with the largest key. *) + val max_binding_opt : ('key, 'a) t -> ('key * 'a) option + (** Return [Some (key, value)] for the [key, value] pair with + the largest key, or [None] if the map is empty. + + @since NEXT_RELEASE *) + val pop_max_binding : ('key, 'a) t -> ('key * 'a) * ('key, 'a) t (** Return the binding with the largest key along with the rest of the map. *) @@ -1072,6 +1236,42 @@ module PMap : sig comparison function of its first parameter, but which ['b option] elements are passed to the function is unspecified. *) + val union_stdlib: + ('key -> 'a -> 'a -> 'a option) -> ('key, 'a) t -> ('key, 'a) t -> ('key, 'a) t + (** [union f m1 m2] computes a map whose keys are a subset of the keys of + [m1] and of [m2]. When the same binding is defined in both arguments, + the function f is used to combine them. + This function is similar to [merge], except [f] is only called if a key + is present in both [m1] and [m2]. If a key is present in either [m1] + or [m2] but not in both, it (and the corresponding value) will be + present in the resulting map. + + This is the union method from the stdlib map, renamed for backwards compatibility. + + @since NEXT_RELEASE *) + + val to_seq : ('key, 'a) t -> ('key * 'a) Seq.t + (** Iterate on the whole map, in ascending order of keys. + + @since NEXT_RELEASE *) + + val to_seq_from : 'key -> ('key, 'a) t -> ('key * 'a) Seq.t + (** [to_seq_from k m] iterates on a subset of the bindings in [m], + namely those bindings greater or equal to [k], in ascending order. + + @since NEXT_RELEASE *) + + val add_seq : ('key * 'a) Seq.t -> ('key, 'a) t -> ('key, 'a) t + (** add the given bindings to the map, in order. + + @since NEXT_RELEASE *) + + val of_seq : ?cmp:('key -> 'key -> int) -> ('key * 'a) Seq.t -> ('key, 'a) t + (** build a map from the given bindings + + @since NEXT_RELEASE *) + + val compare: ('b -> 'b -> int) -> ('a,'b) t -> ('a, 'b) t -> int val equal : ('b -> 'b -> bool) -> ('a,'b) t -> ('a, 'b) t -> bool (** Construct a comparison or equality function for maps based on a diff --git a/src/batSplay.ml b/src/batSplay.ml index e6c9d826a..c94c0b202 100644 --- a/src/batSplay.ml +++ b/src/batSplay.ml @@ -441,7 +441,12 @@ struct | C (cx, Node (l, _kv, r)) -> C (cx, Node (l, (k2, v2), r)) | C (cx, Empty) -> raise Not_found end - end + end + + let update_stdlib k f m = + match f (find_opt k m) with + | Some x -> add k x m + | None -> remove k m let mem k m = try ignore (find k m) ; true with Not_found -> false @@ -477,6 +482,11 @@ struct in bfind tr + let min_binding_opt tr = + try Some(min_binding tr) + with Not_found -> None + + let choose = min_binding (*$= choose (empty |> add 0 1 |> add 1 1 |> choose) \ @@ -485,7 +495,8 @@ struct (*$T choose try ignore (choose empty) ; false with Not_found -> true *) - + let choose_opt = min_binding_opt + let any tr = match sget tr with | Empty -> raise Not_found | Node (_, kv, _) -> kv @@ -511,6 +522,10 @@ struct in bfind tr + let max_binding_opt tr = + try Some(max_binding tr) + with Not_found -> None + let pop_max_binding tr = let maxi = ref (choose tr) in let rec bfind = function @@ -769,6 +784,12 @@ struct | Empty -> raise Not_found | Node (l, kv, r) -> kv, sref (bst_append l r) + let to_seq _ = failwith "unimplemented" + let of_seq _ = failwith "unimplemented" + let add_seq _ = failwith "unimplemented" + let to_seq_from _ _ = failwith "unimplemented" + let union _f _m1 _m2 = failwith "unimplemented" + let extract k tr = let tr = sget tr in (* the reference here is a tad ugly but allows to reuse `cfind` @@ -787,3 +808,5 @@ struct | Some v -> v, sref tr (*$>*) end + + diff --git a/src/batteries_compattest.mlv b/src/batteries_compattest.mlv index a9578eb62..b35fb6779 100644 --- a/src/batteries_compattest.mlv +++ b/src/batteries_compattest.mlv @@ -40,7 +40,6 @@ module Stdlib_verifications = struct include module type of Legacy.List val find_map : ('a -> 'b option) -> 'a list -> 'b end) - (* module Map = (Map : module type of Legacy.Map)*) module Seq = (Seq : module type of Legacy.Seq) module Marshal = (Marshal: sig @@ -110,4 +109,14 @@ module Stdlib_verifications = struct module Big_int = (Big_int : module type of Legacy.Big_int) (* FIXME: This does not pass for some reason: module Bigarray = (Bigarray : module type of Legacy.Bigarray)*) + + (* test compatibility of BatMap.S with Legacy.Map.S *) + let sort_map (type s) (module Map : Legacy.Map.S with type key = s) l = + Map.bindings (List.fold_right (fun x m -> Map.add x x m) l Map.empty) + module IntMap = struct + include BatMap.Int + let update = update_stdlib + end + let _ = assert ([1,1;2,2;3,3;] = (sort_map (module IntMap) [3; 1; 2;])) + end From 20aacb5bba4734aa686cada503444dfd2d78fdc5 Mon Sep 17 00:00:00 2001 From: jakob krainz Date: Fri, 22 Jan 2021 22:25:58 +0100 Subject: [PATCH 02/20] faster implementations --- src/batMap.ml | 60 ++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 47 insertions(+), 13 deletions(-) diff --git a/src/batMap.ml b/src/batMap.ml index 974d0b2d5..f32cd91f8 100644 --- a/src/batMap.ml +++ b/src/batMap.ml @@ -163,7 +163,7 @@ module Concrete = struct | Node (l, k, v, r, h) as node -> let c = cmp x k in if c = 0 then - if x == k && d == v then + if d == v then node else Node (l, x, d, r, h) @@ -347,11 +347,33 @@ module Concrete = struct in loop map - let update_stdlib k f cmp m = - let findresult = (find_option k cmp m) in - match f findresult with - | Some x -> add k x cmp m - | None -> remove k cmp m + let rec update_stdlib x f cmp = function + | Empty -> + begin match f None with + | None -> Empty + | Some data -> Node(Empty, x, data, Empty, 1) + end + | Node (l, v, d, r, h) as m -> + let c = cmp x v in + if c = 0 then + begin + match f (Some d) with + | None -> merge l r + | Some data -> + if d == data + then m + else Node(l, x, data, r, h) + end + else if c < 0 then + let ll = update_stdlib x f cmp l in + if l == ll + then m + else bal ll v d r + else + let rr = update_stdlib x f cmp r in + if r == rr + then m + else bal l v d rr let mem x cmp map = let rec loop = function @@ -861,19 +883,31 @@ module Concrete = struct m1 empty let add_seq cmp s m = - Seq.fold_left + BatSeq.fold_left (fun m (k, v) -> add k v cmp m) m s let of_seq cmp s = add_seq cmp s empty - - let to_seq m = - BatSeq.of_list (bindings m) (* TODO: optimize *) - - let to_seq_from (cmp : 'a -> 'a -> int) k m = - to_seq (filter (fun k2 _ -> cmp k k2 >= 0) m cmp) + + let rec to_seq m = + fun () -> + match m with + | Empty -> BatSeq.Nil + | Node(l, k, v, r, _) -> + BatSeq.append (to_seq l) (fun () -> BatSeq.Cons ((k, v), to_seq r)) () + + let rec to_seq_from cmp k m = + fun () -> + match m with + | Empty -> BatSeq.Nil + | Node(l, k, v, r, _) -> + if cmp k v <= 0 then + BatSeq.append (to_seq_from cmp k l) (fun () -> BatSeq.Cons ((k,v), to_seq r)) () + else + to_seq_from cmp k r () + let union_stdlib cmp f m1 m2 = foldi From 02968ad4271fd667fcb90f2ba4ccfb6c6ccc531d Mon Sep 17 00:00:00 2001 From: jakob krainz Date: Fri, 22 Jan 2021 22:26:14 +0100 Subject: [PATCH 03/20] fix documentation --- src/batMap.mli | 99 +++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 78 insertions(+), 21 deletions(-) diff --git a/src/batMap.mli b/src/batMap.mli index 909fd88ca..79680b3c6 100644 --- a/src/batMap.mli +++ b/src/batMap.mli @@ -88,7 +88,11 @@ sig val add: key -> 'a -> 'a t -> 'a t (** [add x y m] returns a map containing the same bindings as [m], plus a binding of [x] to [y]. If [x] was already bound - in [m], its previous binding disappears. *) + in [m], its previous binding disappears. + If [x] was already bound to some [z] that is physically equal + to [y], then the returned map is physically equal to [m]. + + @before NEXT_RELEASE physical equality was not ensured. *) val update_stdlib : key -> ('a option -> 'a option) -> 'a t -> 'a t (** [update_stdlib k f m] returns a map containing the same bindings as [m], @@ -96,6 +100,8 @@ sig First, calculate [y] as [f (find_opt k m)]. If [y = Some v] then [k] will be bound to [v] in the resulting map. Else [k] will not be bound in the resulting map. + If [v] is physically equal to the value of the previous binding of [k] in [m], + then the returned map will be physically equal to [m]. This function does the same thing as [update] in the stdlib, but has a different name for backwards compatibility reasons. @@ -108,8 +114,12 @@ sig [k2] associated to [v2]. This is equivalent to [add k2 v2 (remove k1) m], but more efficient in the case where [k1] and [k2] have the same key ordering. + If [k1] and [k2] have the same key ordering and [v2] is physically + equal to the value [k1] is bound to in [m] then the returned map will + be physically equal to [m] @raise Not_found if [k1] is not bound in [m]. - @since 2.4.0 *) + @since 2.4.0 + @before NEXT_RELEASE physical equality was nor ensured. *) val find: key -> 'a t -> 'a (** [find x m] returns the current binding of [x] in [m], @@ -158,8 +168,10 @@ sig val remove: key -> 'a t -> 'a t (** [remove x m] returns a map containing the same bindings as [m], except for [x] which is unbound in the returned map. - The returned map compares equal to the passed one if [x] was - already unbound. *) + The returned map is physically equal to the passed one if [x] was + already unbound. + + @before NEXT_RELEASE physical equality was not ensured *) val remove_exn: key -> 'a t -> 'a t (** [remove_exn x m] behaves like [remove x m] except that it raises @@ -243,7 +255,11 @@ sig (** [filter f m] returns a map where only the [(key, value)] pairs of [m] such that [f key value = true] remain. The bindings are passed to [f] in increasing order with respect to the ordering over the type - of the keys. *) + of the keys. + If [f] returns [true] for all bindings of [m] the returned map is physically + equal to [m]. + + @before NEXT_RELEASE physical equality was not ensured. *) val filter_map: (key -> 'a -> 'b option) -> 'a t -> 'b t (** [filter_map f m] combines the features of [filter] and @@ -528,15 +544,23 @@ val cardinal: ('a, 'b) t -> int val add : 'a -> 'b -> ('a, 'b) t -> ('a, 'b) t (** [add x y m] returns a map containing the same bindings as [m], plus a binding of [x] to [y]. If [x] was already bound - in [m], its previous binding disappears. *) + in [m], its previous binding disappears. + If [x] was already bound to some [z] that is physically equal + to [y], then the returned map is physically equal to [m]. + + @before NEXT_RELEASE physical equality was not ensured. *) val update: 'a -> 'a -> 'b -> ('a, 'b) t -> ('a, 'b) t (** [update k1 k2 v2 m] replace the previous binding of [k1] in [m] by [k2] associated to [v2]. This is equivalent to [add k2 v2 (remove k1) m], but more efficient in the case where [k1] and [k2] have the same key ordering. + If [k1] and [k2] have the same key ordering and [v2] is physically + equal to the value [k1] is bound to in [m] then the returned map will + be physically equal to [m] @raise Not_found if [k1] is not bound in [m]. - @since 2.4.0 *) + @since 2.4.0 + @before NEXT_RELEASE physical equality was nor ensured. *) val update_stdlib : 'a -> ('b option -> 'b option) -> ('a, 'b) t -> ('a, 'b) t (** [update_stdlib k f m] returns a map containing the same bindings as [m], @@ -545,6 +569,11 @@ val update_stdlib : 'a -> ('b option -> 'b option) -> ('a, 'b) t -> ('a, 'b) t If [y = Some v] then [k] will be bound to [v] in the resulting map. Else [k] will not be bound in the resulting map. + If [v] is physically equal to the value of the previous binding of [k] in [m], + then the returned map will be physically equal to [m]. + + This function does the same thing as [update] in the stdlib, but has a + different name for backwards compatibility reasons. @since NEXT_RELEASE *) val find : 'a -> ('a, 'b) t -> 'b @@ -594,8 +623,10 @@ val find_last_opt: ('a -> bool) -> ('a, 'b) t -> ('a * 'b) option val remove : 'a -> ('a, 'b) t -> ('a, 'b) t (** [remove x m] returns a map containing the same bindings as [m], except for [x] which is unbound in the returned map. - The returned map compares equal to the passed one if [x] was - already unbound. *) + The returned map is physically equal to the passed one if [x] was + already unbound. + + @before NEXT_RELEASE physical equality was not ensured *) val remove_exn: 'a -> ('a, 'b) t -> ('a, 'b) t (** [remove_exn x m] behaves like [remove x m] except that it raises @@ -653,10 +684,14 @@ val filterv: ('a -> bool) -> ('key, 'a) t -> ('key, 'a) t type of the keys. *) val filter: ('key -> 'a -> bool) -> ('key, 'a) t -> ('key, 'a) t -(**[filter f m] returns a map where only the [(key, value)] pairs - [key], [a] of [m] such that [f key a = true] remain. The - bindings are passed to [f] in increasing order with respect - to the ordering over the type of the keys. *) +(** [filter f m] returns a map where only the [(key, value)] pairs of [m] + such that [f key value = true] remain. The bindings are passed to + [f] in increasing order with respect to the ordering over the type + of the keys. + If [f] returns [true] for all bindings of [m] the returned map is physically + equal to [m]. + + @before NEXT_RELEASE physical equality was not ensured. *) val filter_map: ('key -> 'a -> 'b option) -> ('key, 'a) t -> ('key, 'b) t (** [filter_map f m] combines the features of [filter] and @@ -949,15 +984,24 @@ module PMap : sig val add : 'a -> 'b -> ('a, 'b) t -> ('a, 'b) t (** [add x y m] returns a map containing the same bindings as [m], plus a binding of [x] to [y]. If [x] was already bound - in [m], its previous binding disappears. *) + in [m], its previous binding disappears. + + If [x] was already bound to some [z] that is physically equal + to [y], then the returned map is physically equal to [m]. + + @before NEXT_RELEASE physical equality was not ensured. *) val update : 'a -> 'a -> 'b -> ('a, 'b) t -> ('a, 'b) t (** [update k1 k2 v2 m] replace the previous binding of [k1] in [m] by [k2] associated to [v2]. This is equivalent to [add k2 v2 (remove k1) m], but more efficient in the case where [k1] and [k2] have the same key ordering. + If [k1] and [k2] have the same key ordering and [v2] is physically + equal to the value [k1] is bound to in [m] then the returned map will + be physically equal to [m] @raise Not_found if [k1] is not bound in [m]. - @since 2.4.0 *) + @since 2.4.0 + @before NEXT_RELEASE physical equality was nor ensured. *) val update_stdlib : 'a -> ('b option -> 'b option) -> ('a, 'b) t -> ('a, 'b) t (** [update_stdlib k f m] returns a map containing the same bindings as [m], @@ -965,7 +1009,12 @@ module PMap : sig First, calculate [y] as [f (find_opt k m)]. If [y = Some v] then [k] will be bound to [v] in the resulting map. Else [k] will not be bound in the resulting map. - + If [v] is physically equal to the value of the previous binding of [k] in [m], + then the returned map will be physically equal to [m]. + + This function does the same thing as [update] in the stdlib, but has a + different name for backwards compatibility reasons. + @since NEXT_RELEASE *) val find : 'a -> ('a, 'b) t -> 'b @@ -1010,7 +1059,11 @@ module PMap : sig val remove : 'a -> ('a, 'b) t -> ('a, 'b) t (** [remove x m] returns a map containing the same bindings as - [m], except for [x] which is unbound in the returned map. *) + [m], except for [x] which is unbound in the returned map. + The returned map is physically equal to the passed one if [x] was + already unbound. + + @before NEXT_RELEASE physical equality was not ensured *) val remove_exn : 'a -> ('a, 'b) t -> ('a, 'b) t (** [remove_exn x m] behaves like [remove x m] except that it raises @@ -1068,10 +1121,14 @@ module PMap : sig type of the keys. *) val filter: ('key -> 'a -> bool) -> ('key, 'a) t -> ('key, 'a) t - (**[filter f m] returns a map where only the [(key, value)] pairs - [key], [a] of [m] such that [f key a = true] remain. The - bindings are passed to [f] in increasing order with respect - to the ordering over the type of the keys. *) + (** [filter f m] returns a map where only the [(key, value)] pairs of [m] + such that [f key value = true] remain. The bindings are passed to + [f] in increasing order with respect to the ordering over the type + of the keys. + If [f] returns [true] for all bindings of [m] the returned map is physically + equal to [m]. + + @before NEXT_RELEASE physical equality was not ensured. *) val filter_map: ('key -> 'a -> 'b option) -> ('key, 'a) t -> ('key, 'b) t (** [filter_map f m] combines the features of [filter] and From d34ba912ae40c63a7d1f79c85451819b7e088726 Mon Sep 17 00:00:00 2001 From: jakob krainz Date: Sat, 23 Jan 2021 01:18:45 +0100 Subject: [PATCH 04/20] some implementation work --- src/batMap.ml | 37 +++++++++++++++++++++++++------------ src/batSplay.ml | 37 +++++++++++++++++++++++++++++++------ 2 files changed, 56 insertions(+), 18 deletions(-) diff --git a/src/batMap.ml b/src/batMap.ml index f32cd91f8..05d33b27b 100644 --- a/src/batMap.ml +++ b/src/batMap.ml @@ -902,21 +902,21 @@ module Concrete = struct fun () -> match m with | Empty -> BatSeq.Nil - | Node(l, k, v, r, _) -> - if cmp k v <= 0 then + | Node(l, k2, v, r, _) -> + if cmp k k2 <= 0 then BatSeq.append (to_seq_from cmp k l) (fun () -> BatSeq.Cons ((k,v), to_seq r)) () else to_seq_from cmp k r () - let union_stdlib cmp f m1 m2 = foldi (fun k v m -> - if mem k cmp m - then match f k v (find k cmp m) with - | Some v2 -> add k v2 cmp m - | None -> m - else add k v cmp m) + match find_option k cmp m with + | Some v1 -> + (match f k v v1 with + | Some vmerged -> add k vmerged cmp m + | None -> m) + | None -> add k v cmp m) m1 m2 @@ -1514,13 +1514,22 @@ module PMap = struct (*$< PMap *) let is_empty x = x.map = Concrete.Empty let add x d m = - { m with map = Concrete.add x d m.cmp m.map } + let newmap = Concrete.add x d m.cmp m.map in + if newmap == m.map + then m + else { m with map = newmap } let update k1 k2 v2 m = - { m with map = Concrete.update k1 k2 v2 m.cmp m.map } + let newmap = Concrete.update k1 k2 v2 m.cmp m.map in + if newmap == m.map + then m + else { m with map = newmap } let update_stdlib k f m = - { m with map = Concrete.update_stdlib k f m.cmp m.map } + let newmap = Concrete.update_stdlib k f m.cmp m.map in + if newmap == m.map + then m + else { m with map = newmap } let find x m = Concrete.find x m.cmp m.map @@ -1637,8 +1646,12 @@ module PMap = struct (*$< PMap *) Concrete.print ?first ?last ?sep ?kvsep print_k print_v out t.map let filterv f t = { t with map = Concrete.filterv f t.map t.cmp } - let filter f t = { t with map = Concrete.filter f t.map t.cmp } let filter_map f t = { t with map = Concrete.filter_map f t.map t.cmp } + let filter f t = + let newmap = Concrete.filter f t.map t.cmp in + if newmap == t.map + then t + else { t with map = newmap } let max_binding t = Concrete.max_binding t.map let min_binding t = Concrete.min_binding t.map diff --git a/src/batSplay.ml b/src/batSplay.ml index c94c0b202..f080e716a 100644 --- a/src/batSplay.ml +++ b/src/batSplay.ml @@ -784,12 +784,37 @@ struct | Empty -> raise Not_found | Node (l, kv, r) -> kv, sref (bst_append l r) - let to_seq _ = failwith "unimplemented" - let of_seq _ = failwith "unimplemented" - let add_seq _ = failwith "unimplemented" - let to_seq_from _ _ = failwith "unimplemented" - let union _f _m1 _m2 = failwith "unimplemented" - + + let add_seq s m = + BatSeq.fold_left + (fun m (k, v) -> add k v m) + m + s + + let of_seq s = + add_seq s empty + + let rec to_seq m = + fun () -> + (* since the tree can change shape arbitrarily, fetch all bindings and turn them into a seq. *) + BatSeq.of_list (bindings m) () + + let rec to_seq_from k m = + fun () -> + BatSeq.filter (fun (k2, _) -> Ord.compare k k2 <= 0) (to_seq m) () + + let union_stdlib f m1 m2 = + fold + (fun k v m -> + match find_opt k m with + | Some v1 -> + (match f k v v1 with + | Some vmerged -> add k vmerged m + | None -> m) + | None -> add k v m) + m1 + m2 + let extract k tr = let tr = sget tr in (* the reference here is a tad ugly but allows to reuse `cfind` From ca88bcd31a39cfe2024bbdffb6c346ea9e8040ad Mon Sep 17 00:00:00 2001 From: jakob krainz Date: Sat, 23 Jan 2021 01:19:06 +0100 Subject: [PATCH 05/20] more testing --- testsuite/test_map.ml | 159 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 157 insertions(+), 2 deletions(-) diff --git a/testsuite/test_map.ml b/testsuite/test_map.ml index a40b11a92..58696ecab 100644 --- a/testsuite/test_map.ml +++ b/testsuite/test_map.ml @@ -79,6 +79,9 @@ module TestMap val equal : ('a -> 'a -> bool) -> 'a m -> 'a m -> bool + (* true if add, remove, update_stdlib and filter support physical equality *) + val supports_phys_equality : bool + (* tested functions *) val empty : 'a m val is_empty : _ m -> bool @@ -90,6 +93,10 @@ module TestMap val cardinal : _ m -> int val min_binding : 'a m -> (key * 'a) val max_binding : 'a m -> (key * 'a) + val pop_min_binding : 'a m -> (key * 'a) * 'a m + val pop_max_binding : 'a m -> (key * 'a) * 'a m + val min_binding_opt : 'a m -> (key * 'a) option + val max_binding_opt : 'a m -> (key * 'a) option val modify : key -> ('a -> 'a) -> 'a m -> 'a m val modify_def : 'a -> key -> ('a -> 'a) -> 'a m -> 'a m val modify_opt : key -> ('a option -> 'a option) -> 'a m -> 'a m @@ -171,7 +178,11 @@ module TestMap "add k v (add k v' t) = add k v t" @= (M.add k v (M.add k v' t), M.add k v t); "add 4 8 [3,4; 5,6] = [3,4; 4,8; 5,6]" @= - (M.add 4 8 t, il [(3,4); (4,8); (5,6)]); + (M.add 4 8 t, il [(3,4); (4,8); (5,6)]); + if M.supports_phys_equality then begin + "add 3,4 [3,4; 5,6] == [3,4; 5,6]" @? + (t == M.add 3 4 t); + end; () let test_cardinal () = @@ -212,8 +223,99 @@ module TestMap M.cardinal t - if M.mem k t then 1 else 0) in test_cardinal 3 t; test_cardinal 57 t; + if M.supports_phys_equality then begin + "remove 12 [3,4; 5,6] == [3,4; 5,6]" @? + (t == M.remove 12 t); + "remove 12 [] == []" @? + (M.empty == M.remove 12 M.empty); + end; + () + + let test_update () = + (* TODO write some tests *) + (* + let of_list l = of_enum (BatList.enum l) and cmp = Pervasives.( = ) in \ + equal cmp (update_stdlib 1 (fun x -> assert(x = Some 1); Some 3) (of_list [1,1; 2,2])) (of_list [1,3;2,2]) + let of_list l = of_enum (BatList.enum l) and cmp = Pervasives.( = ) in \ + equal cmp (update_stdlib 3 (fun x -> assert(x = None); Some 3) (of_list [1,1; 2,2])) (of_list [1,1;2,2;3,3]) + let of_list l = of_enum (BatList.enum l) and cmp = Pervasives.( = ) in \ + equal cmp (update_stdlib 1 (fun x -> assert(x = Some 1); None) (of_list [1,1; 2,2])) (of_list [2,2]) + let of_list l = of_enum (BatList.enum l) in \ + let s = (of_list [1,1; 2,2]) in (update_stdlib 3 (fun x -> assert(x = None ); None ) s) == s + let of_list l = of_enum (BatList.enum l) in \ + let s = (of_list [1,1; 2,2]) in (update_stdlib 1 (fun x -> assert(x = Some 1); Some 1) s) == s + + let t = il [(3,4); (5, 6)] in*) + (); + if M.supports_phys_equality then begin + () + end; + () + + let test_update_stdlib () = + (* TODO write some tests *) + (* let of_list l = of_enum (BatList.enum l) and cmp = Pervasives.( = ) in \ + equal cmp (update_stdlib 1 (fun x -> assert(x = Some 1); Some 3) (of_list [1,1; 2,2])) (of_list [1,3;2,2]) + let of_list l = of_enum (BatList.enum l) and cmp = Pervasives.( = ) in \ + equal cmp (update_stdlib 3 (fun x -> assert(x = None); Some 3) (of_list [1,1; 2,2])) (of_list [1,1;2,2;3,3]) + let of_list l = of_enum (BatList.enum l) and cmp = Pervasives.( = ) in \ + equal cmp (update_stdlib 1 (fun x -> assert(x = Some 1); None) (of_list [1,1; 2,2])) (of_list [2,2]) + let of_list l = of_enum (BatList.enum l) in \ + let s = (of_list [1,1; 2,2]) in (update_stdlib 3 (fun x -> assert(x = None ); None ) s) == s + let of_list l = of_enum (BatList.enum l) in \ + let s = (of_list [1,1; 2,2]) in (update_stdlib 1 (fun x -> assert(x = Some 1); Some 1) s) == s + *) + let t = il [(3,4); (5, 6)] in + (); + if M.supports_phys_equality then begin + () + end; + () + + let test_filter () = + (* TODO write some tests *) + let t = il [(3,4); (5, 6)] in + (); + if M.supports_phys_equality then begin + (* + let s = empty |> add 1 1 |> add 2 2 in \ + s == (filter (fun _ _ -> true) s) + *) + () + end; + () + + let test_union_stdlib () = + (* TODO write some tests *) + (*$T union_stdlib + let cmp = Pervasives.( = ) in \ + equal cmp (union_stdlib (fun _ -> failwith "must not be called") empty empty) empty + let of_list l = of_enum (BatList.enum l) and cmp = Pervasives.( = ) in \ + equal cmp (union_stdlib (fun _ -> failwith "must not be called") (of_list [1,1;2,2]) empty) (of_list [1,1;2,2]) + let of_list l = of_enum (BatList.enum l) and cmp = Pervasives.( = ) in \ + equal cmp (union_stdlib (fun _ -> failwith "must not be called") empty (of_list [1,1;2,2])) (of_list [1,1;2,2]) + let of_list l = of_enum (BatList.enum l) and cmp = Pervasives.( = ) in \ + equal cmp (union_stdlib (fun _ -> failwith "must not be called") (of_list [3,3;4,4]) (of_list [1,1;2,2])) (of_list [1,1;2,2;3,3;4,4]) + *) + () + + let test_add_seq () = + (* TODO write some tests *) + () + + let test_of_seq () = + (* TODO write some tests *) + () + + let test_to_seq () = + (* TODO write some tests *) + () + + let test_to_seq_from () = + (* TODO write some tests *) () + let test_mem () = let k, k', v = 1, 2, () in "mem k (singleton k v)" @? M.mem k (M.singleton k v); @@ -224,14 +326,52 @@ module TestMap let t = il [(2, 0); (1,2); (3, 4); (2, 0)] in "min_binding [(2, 0); (1,2); (3, 4); (2, 0)] = (1, 2)" @? (M.min_binding t = (1, 2)); + "min_binding [] -> Not_found" @? + (try ignore(M.min_binding M.empty); false with Not_found -> true); () let test_max_binding () = let t = il [(2, 0); (1,2); (3, 4); (2, 0)] in "max_binding [(2, 0); (1,2); (3, 4); (2, 0)] = (3, 4)" @? (M.max_binding t = (3, 4)); + "max_binding [] -> Not_found" @? + (try ignore(M.max_binding M.empty); false with Not_found -> true); () + let test_min_binding_opt () = + let t = il [(2, 0); (1,2); (3, 4); (2, 0)] in + "min_binding_opt [(2, 0); (1,2); (3, 4); (2, 0)] = (1, 2)" @? + (M.min_binding_opt t = Some (1, 2)); + "min_binding_opt [] = None" @? + (M.min_binding_opt t = None); + () + + let test_max_binding_opt () = + let t = il [(2, 0); (1,2); (3, 4); (2, 0)] in + "max_binding_opt [(2, 0); (1,2); (3, 4); (2, 0)] = (3, 4)" @? + (M.max_binding_opt t = Some (3, 4)); + "max_binding_opt [] = None" @? + (M.max_binding_opt t = None); + () + + let test_pop_min_binding () = + let t = il [(2, 0); (1,2); (3, 4); (2, 0)] in + let t2 = il [(2, 0); (3, 4); (2, 0)] in + "pop_min_binding [(2, 0); (1,2); (3, 4); (2, 0)] = (1, 2), ..." @? + (M.pop_min_binding t = ((1, 2), t2)); + "pop_min_binding [] -> Not_found" @? + (try ignore(M.pop_min_binding M.empty); false with Not_found -> true); + () + + let test_pop_max_binding () = + let t = il [(2, 0); (1,2); (3, 4); (2, 0)] in + let t2 = il [1,2; (2, 0)] in + "pop_max_binding [(2, 0); (1,2); (3, 4); (2, 0)] = (1, 2), ..." @? + (M.pop_max_binding t = ((3, 4), t2)); + "pop_max_binding [] -> Not_found" @? + (try ignore(M.pop_max_binding M.empty); false with Not_found -> true); + () + let test_modify () = let k, k', f, t = 1, 2, ((+) 1), il [(1,2); (3, 4)] in "mem k t => find k (modify k f t) = f (find k t)" @? @@ -579,7 +719,18 @@ module TestMap "test_iterators" >:: test_iterators; "test_pop" >:: test_pop; "test_extract" >:: test_extract; - ] + "test_update" >:: test_update; + "test_update_stdlib" >:: test_update_stdlib; + "test_filter" >:: test_filter; + "test_add_seq" >:: test_add_seq; + "test_of_seq" >:: test_of_seq; + "test_to_seq" >:: test_to_seq; + "test_to_seq_from" >:: test_to_seq_from; + "test_min_binding_opt" >:: test_min_binding_opt; + "test_max_binding_opt" >:: test_max_binding_opt; + "test_pop_min_binding" >:: test_pop_min_binding; + "test_pop_max_binding" >:: test_pop_max_binding; + ] end module M = struct @@ -594,6 +745,8 @@ module M = struct let iteri = M.iter let filterv_map f = M.filter_map (fun _ -> f) + + let supports_phys_equality = true end module P = struct @@ -607,6 +760,7 @@ module P = struct let iteri = M.iter let filterv_map f = M.filter_map (fun _ -> f) + let supports_phys_equality = true end module S = struct @@ -621,6 +775,7 @@ module S = struct let fold f = M.fold (fun _ -> f) let foldi = M.fold + let supports_phys_equality = false end module TM = TestMap(M) From 145f14a563e32ef81e744cef3e87101f6d4080ca Mon Sep 17 00:00:00 2001 From: jakob krainz Date: Sun, 24 Jan 2021 16:30:34 +0100 Subject: [PATCH 06/20] fix compile errors & tests --- src/batMap.ml | 14 ++++++++------ src/batSplay.ml | 24 +++++++++++++++++------- 2 files changed, 25 insertions(+), 13 deletions(-) diff --git a/src/batMap.ml b/src/batMap.ml index 05d33b27b..d4f5da833 100644 --- a/src/batMap.ml +++ b/src/batMap.ml @@ -108,9 +108,10 @@ module Concrete = struct | Node (l, _, _, _, _) -> min_binding l | Empty -> raise Not_found - let min_binding_opt m = - try Some (min_binding m) - with Not_found -> None + let rec min_binding_opt = function + | Node (Empty, k, v, _, _) -> Some (k, v) + | Node (l, _, _, _, _) -> min_binding_opt l + | Empty -> None let get_root = function | Empty -> raise Not_found @@ -131,9 +132,10 @@ module Concrete = struct | Node (_, _, _, r, _) -> max_binding r | Empty -> raise Not_found - let max_binding_opt m = - try Some (max_binding m) - with Not_found -> None + let rec max_binding_opt = function + | Node (_, k, v, Empty, _) -> Some (k, v) + | Node (_, _, _, r, _) -> max_binding_opt r + | Empty -> None let pop_max_binding s = let maxi = ref (get_root s) in diff --git a/src/batSplay.ml b/src/batSplay.ml index f080e716a..dca489131 100644 --- a/src/batSplay.ml +++ b/src/batSplay.ml @@ -483,8 +483,13 @@ struct bfind tr let min_binding_opt tr = - try Some(min_binding tr) - with Not_found -> None + let tr = sget tr in + let rec bfind = function + | Node (Empty, kv, _) -> Some kv + | Node (l, _, _) -> bfind l + | Empty -> None + in + bfind tr let choose = min_binding @@ -509,7 +514,7 @@ struct let rec bfind = function | Node (Empty, kv, r) -> mini := kv; r | Node (l, kv, r) -> Node (bfind l, kv, r) - | Empty -> assert(false) + | Empty -> raise Not_found in (!mini, sref (bfind (sget tr))) @@ -523,15 +528,20 @@ struct bfind tr let max_binding_opt tr = - try Some(max_binding tr) - with Not_found -> None + let tr = sget tr in + let rec bfind = function + | Node (_, kv, Empty) -> Some kv + | Node (_, _, r) -> bfind r + | Empty -> None + in + bfind tr let pop_max_binding tr = let maxi = ref (choose tr) in let rec bfind = function | Node (l, kv, Empty) -> maxi := kv; l | Node (l, kv, r) -> Node (l, kv, bfind r) - | Empty -> assert(false) + | Empty -> raise Not_found in (!maxi, sref (bfind (sget tr))) @@ -803,7 +813,7 @@ struct fun () -> BatSeq.filter (fun (k2, _) -> Ord.compare k k2 <= 0) (to_seq m) () - let union_stdlib f m1 m2 = + let union f m1 m2 = fold (fun k v m -> match find_opt k m with From 02bdbeff78774ea0a389907a25c302ca3ce2f73f Mon Sep 17 00:00:00 2001 From: jakob krainz Date: Sun, 24 Jan 2021 17:42:05 +0100 Subject: [PATCH 07/20] more tests --- testsuite/test_map.ml | 139 ++++++++++++++++++++++++------------------ 1 file changed, 81 insertions(+), 58 deletions(-) diff --git a/testsuite/test_map.ml b/testsuite/test_map.ml index 58696ecab..2734e7fe9 100644 --- a/testsuite/test_map.ml +++ b/testsuite/test_map.ml @@ -91,6 +91,8 @@ module TestMap val remove : key -> 'a m -> 'a m val mem : key -> _ m -> bool val cardinal : _ m -> int + val update: key -> key -> 'a -> 'a m -> 'a m + val update_stdlib: key -> ('a option -> 'a option) -> 'a m -> 'a m val min_binding : 'a m -> (key * 'a) val max_binding : 'a m -> (key * 'a) val pop_min_binding : 'a m -> (key * 'a) * 'a m @@ -131,6 +133,9 @@ module TestMap val merge : (key -> 'a option -> 'b option -> 'c option) -> 'a m -> 'b m -> 'c m + + val union_stdlib : + (key -> 'a -> 'a -> 'a option) -> 'a m -> 'a m -> 'a m val print : ?first:string -> ?last:string -> ?sep:string -> ?kvsep:string -> @@ -231,74 +236,84 @@ module TestMap end; () - let test_update () = - (* TODO write some tests *) - (* - let of_list l = of_enum (BatList.enum l) and cmp = Pervasives.( = ) in \ - equal cmp (update_stdlib 1 (fun x -> assert(x = Some 1); Some 3) (of_list [1,1; 2,2])) (of_list [1,3;2,2]) - let of_list l = of_enum (BatList.enum l) and cmp = Pervasives.( = ) in \ - equal cmp (update_stdlib 3 (fun x -> assert(x = None); Some 3) (of_list [1,1; 2,2])) (of_list [1,1;2,2;3,3]) - let of_list l = of_enum (BatList.enum l) and cmp = Pervasives.( = ) in \ - equal cmp (update_stdlib 1 (fun x -> assert(x = Some 1); None) (of_list [1,1; 2,2])) (of_list [2,2]) - let of_list l = of_enum (BatList.enum l) in \ - let s = (of_list [1,1; 2,2]) in (update_stdlib 3 (fun x -> assert(x = None ); None ) s) == s - let of_list l = of_enum (BatList.enum l) in \ - let s = (of_list [1,1; 2,2]) in (update_stdlib 1 (fun x -> assert(x = Some 1); Some 1) s) == s - - let t = il [(3,4); (5, 6)] in*) - (); + let test_update_stdlib () = + let s = (il [1,1; 2,2]) in + "update_stdlib change [1,1;2,2] to [1,3;2,2]" @= + (M.update_stdlib 1 (fun x -> assert(x = Some 1); Some 3) s, il [1,3;2,2]); + "update_stdlib change [1,1;2,2] to [1,1;2,2;3,3]" @= + (M.update_stdlib 3 (fun x -> assert(x = None); Some 3) s, il [1,1;2,2;3,3]); + "update_stdlib change [1,1;2,2] to [2,2]" @= + (M.update_stdlib 1 (fun x -> assert(x = Some 1); None) s, il [2,2]); + "update_stdlib change [1,1;2,2] to [1,1;2,2] by not changing binding of 3 (phys eq)" @= + (M.update_stdlib 3 (fun x -> assert(x = None); None ) s, s); + "update_stdlib change [1,1;2,2] to [1,1;2,2] by not changing binding of 1 (phys eq)" @= + (M.update_stdlib 1 (fun x -> assert(x = Some 1); Some 1) s, s); + "update_stdlib change [] to [] by not changing binding of 1 (phys eq)" @= + (M.update_stdlib 1 (fun x -> assert(x = None); None) M.empty, M.empty); if M.supports_phys_equality then begin - () + "update_stdlib change [1,1;2,2] to [1,1;2,2] by not changing binding of 3 (phys eq)" @? + (M.update_stdlib 3 (fun x -> assert(x = None); None ) s == s); + "update_stdlib change [1,1;2,2] to [1,1;2,2] by not changing binding of 1 (phys eq)" @? + (M.update_stdlib 1 (fun x -> assert(x = Some 1); Some 1) s == s); + "update_stdlib change [] to [] by not changing binding of 1 (phys eq)" @? + (M.update_stdlib 1 (fun x -> assert(x = None); None) M.empty == M.empty); end; () - let test_update_stdlib () = - (* TODO write some tests *) - (* let of_list l = of_enum (BatList.enum l) and cmp = Pervasives.( = ) in \ - equal cmp (update_stdlib 1 (fun x -> assert(x = Some 1); Some 3) (of_list [1,1; 2,2])) (of_list [1,3;2,2]) - let of_list l = of_enum (BatList.enum l) and cmp = Pervasives.( = ) in \ - equal cmp (update_stdlib 3 (fun x -> assert(x = None); Some 3) (of_list [1,1; 2,2])) (of_list [1,1;2,2;3,3]) - let of_list l = of_enum (BatList.enum l) and cmp = Pervasives.( = ) in \ - equal cmp (update_stdlib 1 (fun x -> assert(x = Some 1); None) (of_list [1,1; 2,2])) (of_list [2,2]) - let of_list l = of_enum (BatList.enum l) in \ - let s = (of_list [1,1; 2,2]) in (update_stdlib 3 (fun x -> assert(x = None ); None ) s) == s - let of_list l = of_enum (BatList.enum l) in \ - let s = (of_list [1,1; 2,2]) in (update_stdlib 1 (fun x -> assert(x = Some 1); Some 1) s) == s - *) - let t = il [(3,4); (5, 6)] in - (); + let test_update () = + let s = (il [1,1; 2,2]) in + "update 1 1 3 [1,1;2,2]" @= + (M.update 1 1 3 s, il [1,3;2,2]); + "update 1 3 3 [1,1;2,2]" @= + (M.update 1 3 3 s, il [2,2;3,3]); + "update 1 2 3 [1,1;2,2]" @= + (M.update 1 2 3 s, il [2,3]); + "update 1 1 1 [1,1;2,2]" @= + (M.update 1 1 1 s, s); if M.supports_phys_equality then begin - () + "update 1 1 1 [1,1;2,2] (phys eq)" @? + (M.update 1 1 1 s == s); end; () let test_filter () = - (* TODO write some tests *) - let t = il [(3,4); (5, 6)] in - (); + let t = il [(3,4); (6, 5); (7,8); (10, 9)] in + "filter (_ -> false) t" @= + (M.filter (fun _ _ -> false) t, M.empty); + "filter (fun a b -> a > b) t" @= + (M.filter (fun a b -> a > b) t, il [6,5;10,9]); + "filter (_ -> true) t" @= + (M.filter (fun _ _ -> true) t, t); + "filter (_ -> true) empty" @= + (M.filter (fun _ _ -> true) M.empty, M.empty); if M.supports_phys_equality then begin - (* - let s = empty |> add 1 1 |> add 2 2 in \ - s == (filter (fun _ _ -> true) s) - *) - () + "filter (_ -> true) t (phys eq)" @? + (M.filter (fun _ _ -> true) t == t); + "filter (_ -> true) empty (phys eq)" @? + (M.filter (fun _ _ -> true) M.empty == M.empty); end; () let test_union_stdlib () = - (* TODO write some tests *) - (*$T union_stdlib - let cmp = Pervasives.( = ) in \ - equal cmp (union_stdlib (fun _ -> failwith "must not be called") empty empty) empty - let of_list l = of_enum (BatList.enum l) and cmp = Pervasives.( = ) in \ - equal cmp (union_stdlib (fun _ -> failwith "must not be called") (of_list [1,1;2,2]) empty) (of_list [1,1;2,2]) - let of_list l = of_enum (BatList.enum l) and cmp = Pervasives.( = ) in \ - equal cmp (union_stdlib (fun _ -> failwith "must not be called") empty (of_list [1,1;2,2])) (of_list [1,1;2,2]) - let of_list l = of_enum (BatList.enum l) and cmp = Pervasives.( = ) in \ - equal cmp (union_stdlib (fun _ -> failwith "must not be called") (of_list [3,3;4,4]) (of_list [1,1;2,2])) (of_list [1,1;2,2;3,3;4,4]) - *) + "union_stdlib empty empty" @= + (M.union_stdlib (fun _ -> failwith "must not be called") M.empty M.empty, M.empty); + "union_stdlib [1,1;2,2] empty" @= + (M.union_stdlib (fun _ -> failwith "must not be called") (il [1,1;2,2]) M.empty, il [1,1;2,2]); + "union_stdlib empty [1,1;2,2]" @= + (M.union_stdlib (fun _ -> failwith "must not be called") M.empty (il [1,1;2,2]), il [1,1;2,2]); + "union_stdlib [1,1;2,2] [3,3;4,4]" @= + (M.union_stdlib (fun _ -> failwith "must not be called") (il [3,3;4,4]) (il [1,1;2,2]), il [1,1;2,2;3,3;4,4]); + "union_stdlib [1,1;2,2;3,10] [3,6;4,4]" @= + (M.union_stdlib (fun _k a b -> Some (a+b)) (il [3,6;4,4]) (il [1,1;2,2;3,10]), il [1,1;2,2;3,16;4,4]); + "union_stdlib [1,1;2,2;3,10] [3,6;4,4]" @= + (M.union_stdlib (fun _k a b -> None) (il [3,6;4,4]) (il [1,1;2,2;3,10]), il [1,1;2,2;4,4]); + "union_stdlib [1,1;4,2;3,10] [3,6;4,4]" @= + (M.union_stdlib (fun k a b -> if k = 3 then Some (a+b) else None) (il [3,6;4,4]) (il [1,1;4,2;3,10]), il [1,1;2,2;3,16]); () + let list_of_seq s = + BatSeq.fold_right (fun x l -> x :: l) s [] + let test_add_seq () = (* TODO write some tests *) () @@ -343,7 +358,7 @@ module TestMap "min_binding_opt [(2, 0); (1,2); (3, 4); (2, 0)] = (1, 2)" @? (M.min_binding_opt t = Some (1, 2)); "min_binding_opt [] = None" @? - (M.min_binding_opt t = None); + (M.min_binding_opt M.empty = None); () let test_max_binding_opt () = @@ -351,23 +366,29 @@ module TestMap "max_binding_opt [(2, 0); (1,2); (3, 4); (2, 0)] = (3, 4)" @? (M.max_binding_opt t = Some (3, 4)); "max_binding_opt [] = None" @? - (M.max_binding_opt t = None); + (M.max_binding_opt M.empty = None); () let test_pop_min_binding () = let t = il [(2, 0); (1,2); (3, 4); (2, 0)] in let t2 = il [(2, 0); (3, 4); (2, 0)] in + let mb, rest = M.pop_min_binding t in "pop_min_binding [(2, 0); (1,2); (3, 4); (2, 0)] = (1, 2), ..." @? - (M.pop_min_binding t = ((1, 2), t2)); + (mb = (1, 2)); + "pop_min_binding [(2, 0); (1,2); (3, 4); (2, 0)] = (1, 2), ..." @= + (rest, t2); "pop_min_binding [] -> Not_found" @? (try ignore(M.pop_min_binding M.empty); false with Not_found -> true); () let test_pop_max_binding () = - let t = il [(2, 0); (1,2); (3, 4); (2, 0)] in - let t2 = il [1,2; (2, 0)] in + let t = il [(2, 6); (1,2); (3, 4); (2, 6)] in + let t2 = il [(1, 2); (2, 6)] in + let mb, rest = M.pop_max_binding t in "pop_max_binding [(2, 0); (1,2); (3, 4); (2, 0)] = (1, 2), ..." @? - (M.pop_max_binding t = ((3, 4), t2)); + (mb = (3, 4)); + "pop_max_binding [(2, 0); (1,2); (3, 4); (2, 0)] = (1, 2), ..." @= + (rest, t2); "pop_max_binding [] -> Not_found" @? (try ignore(M.pop_max_binding M.empty); false with Not_found -> true); () @@ -745,6 +766,7 @@ module M = struct let iteri = M.iter let filterv_map f = M.filter_map (fun _ -> f) + let union_stdlib = M.union let supports_phys_equality = true end @@ -775,6 +797,7 @@ module S = struct let fold f = M.fold (fun _ -> f) let foldi = M.fold + let union_stdlib = M.union let supports_phys_equality = false end From a6ca8630862a76a5b9de2d10c828ec0c41d36197 Mon Sep 17 00:00:00 2001 From: jakob krainz Date: Sun, 24 Jan 2021 18:01:34 +0100 Subject: [PATCH 08/20] bugfix --- src/batMap.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/batMap.ml b/src/batMap.ml index d4f5da833..686adf954 100644 --- a/src/batMap.ml +++ b/src/batMap.ml @@ -906,7 +906,7 @@ module Concrete = struct | Empty -> BatSeq.Nil | Node(l, k2, v, r, _) -> if cmp k k2 <= 0 then - BatSeq.append (to_seq_from cmp k l) (fun () -> BatSeq.Cons ((k,v), to_seq r)) () + BatSeq.append (to_seq_from cmp k l) (fun () -> BatSeq.Cons ((k2,v), to_seq r)) () else to_seq_from cmp k r () From 560f2e069dc50bbefef67bab743a245028391ed5 Mon Sep 17 00:00:00 2001 From: jakob krainz Date: Sun, 24 Jan 2021 18:01:47 +0100 Subject: [PATCH 09/20] some more tests --- testsuite/test_map.ml | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/testsuite/test_map.ml b/testsuite/test_map.ml index 2734e7fe9..4a2dc335e 100644 --- a/testsuite/test_map.ml +++ b/testsuite/test_map.ml @@ -130,6 +130,11 @@ module TestMap val choose : 'a m -> (key * 'a) val split : key -> 'a m -> ('a m * 'a option * 'a m) + val add_seq : (key * 'a) BatSeq.t -> 'a m -> 'a m + val of_seq : (key * 'a) BatSeq.t -> 'a m + val to_seq : 'a m -> (key * 'a) BatSeq.t + val to_seq_from : key -> 'a m -> (key * 'a) BatSeq.t + val merge : (key -> 'a option -> 'b option -> 'c option) -> 'a m -> 'b m -> 'c m @@ -315,19 +320,28 @@ module TestMap BatSeq.fold_right (fun x l -> x :: l) s [] let test_add_seq () = - (* TODO write some tests *) + "add_seq [1,1;2,2;3,3] [3,3;4,4]" @= (il [1,1;2,2;3,3;4,4], M.add_seq (BatSeq.of_list [1,1;2,2;3,3]) (il [3,3;4,4])); + "add_seq [1,1;2,2] [3,3;4,4]" @= (il [1,1;2,2;3,3;4,4], M.add_seq (BatSeq.of_list [1,1;2,2]) (il [3,3;4,4])); + "add_seq [] [3,3;4,4]" @= (il [3,3;4,4], M.add_seq (BatSeq.of_list []) (il [3,3;4,4])); + "add_seq [1,1;2,2] [] " @= (il [1,1;2,2], M.add_seq (BatSeq.of_list [1,1;2,2]) (il [])); + "add_seq [] [] " @= (il [], M.add_seq (BatSeq.of_list []) (il [])); () let test_of_seq () = - (* TODO write some tests *) + "of_seq [1,1;2,2;3,3;4,4]" @= (il [1,1;2,2;3,3;4,4], M.of_seq (BatSeq.of_list [1,1;2,2;4,4;3,3])); + "of_seq []" @= (il [ ], M.of_seq (BatSeq.of_list [ ])); () - let test_to_seq () = - (* TODO write some tests *) + let test_to_seq () = + "to_seq [1,1;2,2;3,3;4,4]" @? (BatSeq.equal (BatSeq.of_list [1,1;2,2;3,3;4,4]) (M.to_seq (il [4,4;1,1;3,3;2,2]))); + "to_seq []" @? (BatSeq.equal (BatSeq.of_list [ ]) (M.to_seq (il [ ]))); () - + let test_to_seq_from () = - (* TODO write some tests *) + "to_seq_from 5 []" @? (BatSeq.equal (BatSeq.of_list [ ]) (M.to_seq_from 5 (il [ ]))); + "to_seq_from 5 [1,1;2,2;3,3;4,4]" @? (BatSeq.equal (BatSeq.of_list [ ]) (M.to_seq_from 5 (il [4,4;1,1;3,3;2,2]))); + "to_seq_from 3 [1,1;2,2;3,3;4,4]" @? (BatSeq.equal (BatSeq.of_list [3,3;4,4 ]) (M.to_seq_from 3 (il [4,4;1,1;3,3;2,2]))); + "to_seq_from 0 [1,1;2,2;3,3;4,4]" @? (BatSeq.equal (BatSeq.of_list [1,1;2,2;3,3;4,4]) (M.to_seq_from 0 (il [4,4;1,1;3,3;2,2]))); () From d8895a776a56a0bcdb3c5a71e2805163b87236df Mon Sep 17 00:00:00 2001 From: jakob krainz Date: Mon, 25 Jan 2021 21:38:02 +0100 Subject: [PATCH 10/20] fix bug & tests --- src/batMap.ml | 2 +- src/batSplay.ml | 2 +- testsuite/test_map.ml | 11 ++++++----- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/batMap.ml b/src/batMap.ml index 686adf954..e50697494 100644 --- a/src/batMap.ml +++ b/src/batMap.ml @@ -917,7 +917,7 @@ module Concrete = struct | Some v1 -> (match f k v v1 with | Some vmerged -> add k vmerged cmp m - | None -> m) + | None -> remove k cmp m) | None -> add k v cmp m) m1 m2 diff --git a/src/batSplay.ml b/src/batSplay.ml index dca489131..768d3b5be 100644 --- a/src/batSplay.ml +++ b/src/batSplay.ml @@ -820,7 +820,7 @@ struct | Some v1 -> (match f k v v1 with | Some vmerged -> add k vmerged m - | None -> m) + | None -> remove k m) | None -> add k v m) m1 m2 diff --git a/testsuite/test_map.ml b/testsuite/test_map.ml index 4a2dc335e..ed6893126 100644 --- a/testsuite/test_map.ml +++ b/testsuite/test_map.ml @@ -308,12 +308,12 @@ module TestMap (M.union_stdlib (fun _ -> failwith "must not be called") M.empty (il [1,1;2,2]), il [1,1;2,2]); "union_stdlib [1,1;2,2] [3,3;4,4]" @= (M.union_stdlib (fun _ -> failwith "must not be called") (il [3,3;4,4]) (il [1,1;2,2]), il [1,1;2,2;3,3;4,4]); - "union_stdlib [1,1;2,2;3,10] [3,6;4,4]" @= + "union_stdlib [1,1;2,2;3,10] [3,6;4,4] keep sum on conflict" @= (M.union_stdlib (fun _k a b -> Some (a+b)) (il [3,6;4,4]) (il [1,1;2,2;3,10]), il [1,1;2,2;3,16;4,4]); - "union_stdlib [1,1;2,2;3,10] [3,6;4,4]" @= - (M.union_stdlib (fun _k a b -> None) (il [3,6;4,4]) (il [1,1;2,2;3,10]), il [1,1;2,2;4,4]); - "union_stdlib [1,1;4,2;3,10] [3,6;4,4]" @= - (M.union_stdlib (fun k a b -> if k = 3 then Some (a+b) else None) (il [3,6;4,4]) (il [1,1;4,2;3,10]), il [1,1;2,2;3,16]); + "union_stdlib [1,1;2,2;3,10] [3,6;4,4] drop on conflict" @= + (M.union_stdlib (fun _k a b -> None) (il [3,6;4,4]) (il [1,1;2,2;3,10]), il [1,1;2,2;4,4]); + "union_stdlib [1,1;4,2;3,10] [3,6;4,4] keep 3 w sum, drop 4" @= + (M.union_stdlib (fun k a b -> if k = 3 then Some (a+b) else None) (il [2,2;3,6;4,4]) (il [1,1;4,2;3,10]), il [1,1;2,2;3,16]); () let list_of_seq s = @@ -765,6 +765,7 @@ module TestMap "test_max_binding_opt" >:: test_max_binding_opt; "test_pop_min_binding" >:: test_pop_min_binding; "test_pop_max_binding" >:: test_pop_max_binding; + "test_union_stdlib" >:: test_union_stdlib; ] end From 7f853c8d7125d020433e8f48a31b1293185a7825 Mon Sep 17 00:00:00 2001 From: jakob krainz Date: Fri, 29 Jan 2021 22:14:47 +0100 Subject: [PATCH 11/20] some explanation on splay trees --- src/batSplay.mli | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/batSplay.mli b/src/batSplay.mli index 500e1c6b1..353990858 100644 --- a/src/batSplay.mli +++ b/src/batSplay.mli @@ -18,7 +18,16 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA *) -(** Maps and sets based on splay trees *) +(** Maps over ordered types based on splay trees. + + Splay trees are ordered binary trees that have the + most recently used element as the root of the tree. + If another element is accessed (even read-only), + the tree will be rearranged internally. + + Not threadsafe; even read-only functions will rearrange + the tree, even though its contents will remain unchanged. + *) module Map (Ord : BatInterfaces.OrderedType) : sig From 435fe08e0c0c9bbfb2c8dd58c3d4b918b26bc98c Mon Sep 17 00:00:00 2001 From: jakob krainz Date: Sat, 30 Jan 2021 10:32:18 +0100 Subject: [PATCH 12/20] optimize update_stdlib --- src/batMap.ml | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/batMap.ml b/src/batMap.ml index e50697494..99af0f653 100644 --- a/src/batMap.ml +++ b/src/batMap.ml @@ -913,12 +913,11 @@ module Concrete = struct let union_stdlib cmp f m1 m2 = foldi (fun k v m -> - match find_option k cmp m with - | Some v1 -> - (match f k v v1 with - | Some vmerged -> add k vmerged cmp m - | None -> remove k cmp m) - | None -> add k v cmp m) + update_stdlib k + (fun v2opt -> + match v2opt with + | Some v2 -> f k v v2 + | None -> Some v) cmp m) m1 m2 From 3521a6608527e79c91930d06533e93209ee8e683 Mon Sep 17 00:00:00 2001 From: jakob krainz Date: Sat, 30 Jan 2021 11:02:57 +0100 Subject: [PATCH 13/20] merge & add compat test BatSplay vs stdlib.Map --- src/batteries_compattest.mlv | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/batteries_compattest.mlv b/src/batteries_compattest.mlv index 219c1f31d..89a7c21eb 100644 --- a/src/batteries_compattest.mlv +++ b/src/batteries_compattest.mlv @@ -40,7 +40,6 @@ module Stdlib_verifications = struct include module type of Legacy.List val find_map : ('a -> 'b option) -> 'a list -> 'b end) - (* module Map = (Map : module type of Legacy.Map)*) ##V>=4.7## module Seq = (Seq : module type of Legacy.Seq) module Marshal = (Marshal: sig @@ -116,8 +115,14 @@ module Stdlib_verifications = struct module IntMap = struct include BatMap.Int let update = update_stdlib - end + end let _ = assert ([1,1;2,2;3,3;] = (sort_map (module IntMap) [3; 1; 2;])) + (* test compat of BatSplay.S with Legacy.Map.S *) + module IntSplayMap = struct + include BatSplay.Map (BatInt) + let update = update_stdlib + end + let _ = assert ([1,1;2,2;3,3;] = (sort_map (module IntSplayMap) [3; 1; 2;])) (* test compatibility of BatSet.S with Legacy.Set.S *) let sort (type s) (module Set : Legacy.Set.S with type elt = s) l = From 7564cdd3a03026969fead236131e4f275f19b3e1 Mon Sep 17 00:00:00 2001 From: jakob krainz Date: Sat, 30 Jan 2021 19:02:16 +0100 Subject: [PATCH 14/20] small fixes --- src/batMap.mli | 8 ++++---- src/batSplay.ml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/batMap.mli b/src/batMap.mli index 79680b3c6..7af44ffc0 100644 --- a/src/batMap.mli +++ b/src/batMap.mli @@ -119,7 +119,7 @@ sig be physically equal to [m] @raise Not_found if [k1] is not bound in [m]. @since 2.4.0 - @before NEXT_RELEASE physical equality was nor ensured. *) + @before NEXT_RELEASE physical equality was not ensured. *) val find: key -> 'a t -> 'a (** [find x m] returns the current binding of [x] in [m], @@ -289,7 +289,7 @@ sig val min_binding : 'a t -> (key * 'a) (** Return the [(key, value)] pair with the smallest key. - Raises Not_found if the map is empty. *) + @raise Not_found if the map is empty. *) val min_binding_opt : 'a t -> (key * 'a) option (** Return [Some (key, value)] for the [key, value] pair with @@ -560,7 +560,7 @@ val update: 'a -> 'a -> 'b -> ('a, 'b) t -> ('a, 'b) t be physically equal to [m] @raise Not_found if [k1] is not bound in [m]. @since 2.4.0 - @before NEXT_RELEASE physical equality was nor ensured. *) + @before NEXT_RELEASE physical equality was not ensured. *) val update_stdlib : 'a -> ('b option -> 'b option) -> ('a, 'b) t -> ('a, 'b) t (** [update_stdlib k f m] returns a map containing the same bindings as [m], @@ -1001,7 +1001,7 @@ module PMap : sig be physically equal to [m] @raise Not_found if [k1] is not bound in [m]. @since 2.4.0 - @before NEXT_RELEASE physical equality was nor ensured. *) + @before NEXT_RELEASE physical equality was not ensured. *) val update_stdlib : 'a -> ('b option -> 'b option) -> ('a, 'b) t -> ('a, 'b) t (** [update_stdlib k f m] returns a map containing the same bindings as [m], diff --git a/src/batSplay.ml b/src/batSplay.ml index 768d3b5be..600d048d0 100644 --- a/src/batSplay.ml +++ b/src/batSplay.ml @@ -441,7 +441,7 @@ struct | C (cx, Node (l, _kv, r)) -> C (cx, Node (l, (k2, v2), r)) | C (cx, Empty) -> raise Not_found end - end + end let update_stdlib k f m = match f (find_opt k m) with From 9b157fee90515615cc244f6a10173d1b54e744fc Mon Sep 17 00:00:00 2001 From: jakob krainz Date: Sat, 30 Jan 2021 20:26:44 +0100 Subject: [PATCH 15/20] improve BatSplay.to_seq and to_seq_from --- src/batSplay.ml | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/src/batSplay.ml b/src/batSplay.ml index 600d048d0..199711aca 100644 --- a/src/batSplay.ml +++ b/src/batSplay.ml @@ -804,14 +804,28 @@ struct let of_seq s = add_seq s empty - let rec to_seq m = + let rec to_seq_hlp m = fun () -> - (* since the tree can change shape arbitrarily, fetch all bindings and turn them into a seq. *) - BatSeq.of_list (bindings m) () - - let rec to_seq_from k m = - fun () -> - BatSeq.filter (fun (k2, _) -> Ord.compare k k2 <= 0) (to_seq m) () + match m with + | Empty -> BatSeq.Nil + | Node(l, kv, r) -> + BatSeq.append (to_seq_hlp l) (fun () -> BatSeq.Cons (kv, to_seq_hlp r)) () + + let to_seq m = + to_seq_hlp (sget m) + + let to_seq_from k m = + let rec to_seq_from_hlp k m = + fun () -> + match m with + | Empty -> BatSeq.Nil + | Node(l, ((k2, _) as kv), r) -> + if Ord.compare k k2 <= 0 then + BatSeq.append (to_seq_from_hlp k l) (fun () -> BatSeq.Cons (kv, to_seq_hlp r)) () + else + to_seq_from_hlp k r () in + to_seq_from_hlp k (sget m) + let union f m1 m2 = fold From 61c0fc0ab6ee48c024b855ef4c3ce759ec815d04 Mon Sep 17 00:00:00 2001 From: jakob krainz Date: Sat, 30 Jan 2021 22:50:50 +0100 Subject: [PATCH 16/20] rewrote to_seq and to_seq_from to reuse existing iterator --- src/batMap.ml | 32 +++++++++++++++++--------------- src/batSet.ml | 36 +++++++++++++++++++----------------- src/batSplay.ml | 36 ++++++++++++++++-------------------- testsuite/test_map.ml | 12 +++++++++++- 4 files changed, 63 insertions(+), 53 deletions(-) diff --git a/src/batMap.ml b/src/batMap.ml index 99af0f653..385e9067d 100644 --- a/src/batMap.ml +++ b/src/batMap.ml @@ -893,23 +893,25 @@ module Concrete = struct let of_seq cmp s = add_seq cmp s empty - let rec to_seq m = - fun () -> + let rec to_seq_hlp m = match m with - | Empty -> BatSeq.Nil - | Node(l, k, v, r, _) -> - BatSeq.append (to_seq l) (fun () -> BatSeq.Cons ((k, v), to_seq r)) () + | E -> BatSeq.Nil + | C(k, v, r, e) -> + BatSeq.Cons ((k, v), fun () -> to_seq_hlp (cons_iter r e)) + + let to_seq m () = + to_seq_hlp (cons_iter m E) + + let to_seq_from cmp k m () = + let rec cons_enum_from k2 m e = + match m with + | Empty -> e + | Node (l, k, v, r, _) -> + if cmp k2 k <= 0 + then cons_enum_from k2 l (C (k, v, r, e)) + else cons_enum_from k2 r e in + to_seq_hlp (cons_enum_from k m E) - let rec to_seq_from cmp k m = - fun () -> - match m with - | Empty -> BatSeq.Nil - | Node(l, k2, v, r, _) -> - if cmp k k2 <= 0 then - BatSeq.append (to_seq_from cmp k l) (fun () -> BatSeq.Cons ((k2,v), to_seq r)) () - else - to_seq_from cmp k r () - let union_stdlib cmp f m1 m2 = foldi (fun k v m -> diff --git a/src/batSet.ml b/src/batSet.ml index 7e2b295b7..7e87b089f 100644 --- a/src/batSet.ml +++ b/src/batSet.ml @@ -704,24 +704,26 @@ module Concrete = struct let of_seq cmp s = add_seq cmp s empty - - let rec to_seq m = - fun () -> - match m with - | Empty -> BatSeq.Nil - | Node(l, v, r, _) -> - BatSeq.append (to_seq l) (fun () -> BatSeq.Cons (v, to_seq r)) () - - let rec to_seq_from cmp k m = - fun () -> + + let rec to_seq_hlp m = match m with - | Empty -> BatSeq.Nil - | Node(l, v, r, _) -> - if cmp k v <= 0 then - BatSeq.append (to_seq_from cmp k l) (fun () -> BatSeq.Cons (v, to_seq r)) () - else - to_seq_from cmp k r () - + | E -> BatSeq.Nil + | C(k, r, e) -> + BatSeq.Cons (k, fun () -> to_seq_hlp (cons_iter r e)) + + let to_seq m () = + to_seq_hlp (cons_iter m E) + + let to_seq_from cmp k m () = + let rec cons_enum_from k2 m e = + match m with + | Empty -> e + | Node (l, k, r, _) -> + if cmp k2 k <= 0 + then cons_enum_from k2 l (C (k, r, e)) + else cons_enum_from k2 r e in + to_seq_hlp (cons_enum_from k m E) + end module type S = diff --git a/src/batSplay.ml b/src/batSplay.ml index 199711aca..a5bbe087f 100644 --- a/src/batSplay.ml +++ b/src/batSplay.ml @@ -805,28 +805,24 @@ struct add_seq s empty let rec to_seq_hlp m = - fun () -> match m with - | Empty -> BatSeq.Nil - | Node(l, kv, r) -> - BatSeq.append (to_seq_hlp l) (fun () -> BatSeq.Cons (kv, to_seq_hlp r)) () - - let to_seq m = - to_seq_hlp (sget m) - - let to_seq_from k m = - let rec to_seq_from_hlp k m = - fun () -> + | End -> BatSeq.Nil + | More(k, v, r, e) -> + BatSeq.Cons ((k, v), fun () -> to_seq_hlp (cons_enum r e)) + + let to_seq m () = + to_seq_hlp (cons_enum (sget m) End) + + let to_seq_from k m () = + let rec cons_enum_from k2 m e = match m with - | Empty -> BatSeq.Nil - | Node(l, ((k2, _) as kv), r) -> - if Ord.compare k k2 <= 0 then - BatSeq.append (to_seq_from_hlp k l) (fun () -> BatSeq.Cons (kv, to_seq_hlp r)) () - else - to_seq_from_hlp k r () in - to_seq_from_hlp k (sget m) - - + | Empty -> e + | Node (l, (k, v), r) -> + if Ord.compare k2 k <= 0 + then cons_enum_from k2 l (More (k, v, r, e)) + else cons_enum_from k2 r e in + to_seq_hlp (cons_enum_from k (sget m) End) + let union f m1 m2 = fold (fun k v m -> diff --git a/testsuite/test_map.ml b/testsuite/test_map.ml index ed6893126..6d09e3691 100644 --- a/testsuite/test_map.ml +++ b/testsuite/test_map.ml @@ -341,7 +341,17 @@ module TestMap "to_seq_from 5 []" @? (BatSeq.equal (BatSeq.of_list [ ]) (M.to_seq_from 5 (il [ ]))); "to_seq_from 5 [1,1;2,2;3,3;4,4]" @? (BatSeq.equal (BatSeq.of_list [ ]) (M.to_seq_from 5 (il [4,4;1,1;3,3;2,2]))); "to_seq_from 3 [1,1;2,2;3,3;4,4]" @? (BatSeq.equal (BatSeq.of_list [3,3;4,4 ]) (M.to_seq_from 3 (il [4,4;1,1;3,3;2,2]))); - "to_seq_from 0 [1,1;2,2;3,3;4,4]" @? (BatSeq.equal (BatSeq.of_list [1,1;2,2;3,3;4,4]) (M.to_seq_from 0 (il [4,4;1,1;3,3;2,2]))); + "to_seq_from 5 [1,1;2,2;3,3;4,4]" @? (BatSeq.equal (BatSeq.of_list [1,1;2,2;3,3;4,4]) (M.to_seq_from 0 (il [4,4;1,1;3,3;2,2]))); + let l = [0,0;1,1;2,2;3,3;4,4;5,5;6,6;7,7;8,8;9,9] + and l2 = [5,5;6,6;7,7;8,8;9,9] in + "to_seq_from 5 [1,1 -- 9,9]" @? (BatSeq.equal (BatSeq.of_list l2) (M.to_seq_from 5 (il l))); + "to_seq_from 0 [1,1 -- 9,9]" @? (BatSeq.equal (BatSeq.of_list l) (M.to_seq_from 0 (il l))); + let max = 40 in + let l = BatList.init max (fun i -> (i, i)) in + for i = 0 to max do + let subl = BatList.filter (fun (x, _) -> x >= i) l in + "to_seq_from N [1,1 -- M,M]" @? (BatSeq.equal (BatSeq.of_list subl) (M.to_seq_from i (il l))); + done; () From 959b9cbf2a2554488bb93052f50884e1c1411c6c Mon Sep 17 00:00:00 2001 From: jakob krainz Date: Sat, 30 Jan 2021 23:08:47 +0100 Subject: [PATCH 17/20] re-insert 'assert(false)' on impossible branches --- src/batMap.ml | 4 ++-- src/batSplay.ml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/batMap.ml b/src/batMap.ml index 385e9067d..dc42d4ca6 100644 --- a/src/batMap.ml +++ b/src/batMap.ml @@ -120,7 +120,7 @@ module Concrete = struct let pop_min_binding s = let mini = ref (get_root s) in let rec loop = function - | Empty -> raise Not_found + | Empty -> assert(false) (* get_root already raises Not_found on empty map *) | Node(Empty, k, v, r, _) -> mini := (k, v); r | Node(l, k, v, r, _) -> bal (loop l) k v r in @@ -140,7 +140,7 @@ module Concrete = struct let pop_max_binding s = let maxi = ref (get_root s) in let rec loop = function - | Empty -> raise Not_found + | Empty -> assert(false) (* get_root already raises Not_found on empty map *) | Node (l, k, v, Empty, _) -> maxi := (k, v); l | Node (l, k, v, r, _) -> bal l k v (loop r) in diff --git a/src/batSplay.ml b/src/batSplay.ml index a5bbe087f..80bbf302a 100644 --- a/src/batSplay.ml +++ b/src/batSplay.ml @@ -514,7 +514,7 @@ struct let rec bfind = function | Node (Empty, kv, r) -> mini := kv; r | Node (l, kv, r) -> Node (bfind l, kv, r) - | Empty -> raise Not_found + | Empty -> assert(false) (* choose already raises Not_found on empty map *) in (!mini, sref (bfind (sget tr))) @@ -541,7 +541,7 @@ struct let rec bfind = function | Node (l, kv, Empty) -> maxi := kv; l | Node (l, kv, r) -> Node (l, kv, bfind r) - | Empty -> raise Not_found + | Empty -> assert(false) (* choose already raises Not_found on empty map *) in (!maxi, sref (bfind (sget tr))) From bab9dcbd6e74f1ff432900395de08266e267b4a9 Mon Sep 17 00:00:00 2001 From: jakob krainz Date: Sun, 31 Jan 2021 14:40:08 +0100 Subject: [PATCH 18/20] refactoring of to_seq --- src/batMap.ml | 29 +++++++++++++++-------------- src/batSet.ml | 29 +++++++++++++++-------------- src/batSplay.ml | 27 ++++++++++++++------------- 3 files changed, 44 insertions(+), 41 deletions(-) diff --git a/src/batMap.ml b/src/batMap.ml index dc42d4ca6..04f3c4299 100644 --- a/src/batMap.ml +++ b/src/batMap.ml @@ -523,6 +523,14 @@ module Concrete = struct | Empty -> t | Node (l, k, v, r, _) -> rev_cons_iter r (C (k, v, l, t)) + let rec cons_iter_from cmp k2 m e = + match m with + | Empty -> e + | Node (l, k, v, r, _) -> + if cmp k2 k <= 0 + then cons_iter_from cmp k2 l (C (k, v, r, e)) + else cons_iter_from cmp k2 r e + let rec enum_next l () = match !l with E -> raise BatEnum.No_more_elements | C (k, v, m, t) -> l := cons_iter m t; (k, v) @@ -893,24 +901,17 @@ module Concrete = struct let of_seq cmp s = add_seq cmp s empty - let rec to_seq_hlp m = + let rec seq_of_iter m () = match m with | E -> BatSeq.Nil | C(k, v, r, e) -> - BatSeq.Cons ((k, v), fun () -> to_seq_hlp (cons_iter r e)) + BatSeq.Cons ((k, v), seq_of_iter (cons_iter r e)) - let to_seq m () = - to_seq_hlp (cons_iter m E) - - let to_seq_from cmp k m () = - let rec cons_enum_from k2 m e = - match m with - | Empty -> e - | Node (l, k, v, r, _) -> - if cmp k2 k <= 0 - then cons_enum_from k2 l (C (k, v, r, e)) - else cons_enum_from k2 r e in - to_seq_hlp (cons_enum_from k m E) + let to_seq m = + seq_of_iter (cons_iter m E) + + let to_seq_from cmp k m = + seq_of_iter (cons_iter_from cmp k m E) let union_stdlib cmp f m1 m2 = foldi diff --git a/src/batSet.ml b/src/batSet.ml index 7e87b089f..806a92895 100644 --- a/src/batSet.ml +++ b/src/batSet.ml @@ -451,6 +451,14 @@ module Concrete = struct Empty -> t | Node (l, e, r, _) -> rev_cons_iter r (C (e, l, t)) + let rec cons_iter_from cmp k2 m e = + match m with + | Empty -> e + | Node (l, k, r, _) -> + if cmp k2 k <= 0 + then cons_iter_from cmp k2 l (C (k, r, e)) + else cons_iter_from cmp k2 r e + let enum_next l () = match !l with E -> raise BatEnum.No_more_elements | C (e, s, t) -> l := cons_iter s t; e @@ -705,24 +713,17 @@ module Concrete = struct let of_seq cmp s = add_seq cmp s empty - let rec to_seq_hlp m = + let rec seq_of_iter m () = match m with | E -> BatSeq.Nil | C(k, r, e) -> - BatSeq.Cons (k, fun () -> to_seq_hlp (cons_iter r e)) + BatSeq.Cons (k, seq_of_iter (cons_iter r e)) - let to_seq m () = - to_seq_hlp (cons_iter m E) - - let to_seq_from cmp k m () = - let rec cons_enum_from k2 m e = - match m with - | Empty -> e - | Node (l, k, r, _) -> - if cmp k2 k <= 0 - then cons_enum_from k2 l (C (k, r, e)) - else cons_enum_from k2 r e in - to_seq_hlp (cons_enum_from k m E) + let to_seq m = + seq_of_iter (cons_iter m E) + + let to_seq_from cmp k m = + seq_of_iter (cons_iter_from cmp k m E) end diff --git a/src/batSplay.ml b/src/batSplay.ml index 80bbf302a..9fa515ed6 100644 --- a/src/batSplay.ml +++ b/src/batSplay.ml @@ -609,6 +609,14 @@ struct | Node (l, (k, v), r) -> rev_cons_enum r (More (k, v, l, e)) + let rec cons_enum_from k2 m e = + match m with + | Empty -> e + | Node (l, (k, v), r) -> + if Ord.compare k2 k <= 0 + then cons_enum_from k2 l (More (k, v, r, e)) + else cons_enum_from k2 r e + let compare cmp tr1 tr2 = let tr1, tr2 = sget tr1, sget tr2 in let rec aux e1 e2 = match (e1, e2) with @@ -804,24 +812,17 @@ struct let of_seq s = add_seq s empty - let rec to_seq_hlp m = + let rec seq_of_iter m () = match m with | End -> BatSeq.Nil | More(k, v, r, e) -> - BatSeq.Cons ((k, v), fun () -> to_seq_hlp (cons_enum r e)) + BatSeq.Cons ((k, v), seq_of_iter (cons_enum r e)) - let to_seq m () = - to_seq_hlp (cons_enum (sget m) End) + let to_seq m = + seq_of_iter (cons_enum (sget m) End) - let to_seq_from k m () = - let rec cons_enum_from k2 m e = - match m with - | Empty -> e - | Node (l, (k, v), r) -> - if Ord.compare k2 k <= 0 - then cons_enum_from k2 l (More (k, v, r, e)) - else cons_enum_from k2 r e in - to_seq_hlp (cons_enum_from k (sget m) End) + let to_seq_from k m = + seq_of_iter (cons_enum_from k (sget m) End) let union f m1 m2 = fold From b0d56888fa8c30a31765b5541fb2c5815ad93d24 Mon Sep 17 00:00:00 2001 From: jakob krainz Date: Sun, 31 Jan 2021 16:13:19 +0100 Subject: [PATCH 19/20] fix compatibility with older ocaml versions where stdlib.Seq is missing --- src/batMap.ml | 16 ++++++++++++---- src/batMap.mli | 24 ++++++++++++------------ 2 files changed, 24 insertions(+), 16 deletions(-) diff --git a/src/batMap.ml b/src/batMap.ml index 04f3c4299..0ea361cb7 100644 --- a/src/batMap.ml +++ b/src/batMap.ml @@ -990,10 +990,10 @@ sig (key -> 'a option -> 'b option -> 'c option) -> 'a t -> 'b t -> 'c t val union: (key -> 'a -> 'a -> 'a option) -> 'a t -> 'a t -> 'a t - val to_seq : 'a t -> (key * 'a) Seq.t - val to_seq_from : key -> 'a t -> (key * 'a) Seq.t - val add_seq : (key * 'a) Seq.t -> 'a t -> 'a t - val of_seq : (key * 'a) Seq.t -> 'a t + val to_seq : 'a t -> (key * 'a) BatSeq.t + val to_seq_from : key -> 'a t -> (key * 'a) BatSeq.t + val add_seq : (key * 'a) BatSeq.t -> 'a t -> 'a t + val of_seq : (key * 'a) BatSeq.t -> 'a t (** {6 Boilerplate code}*) (** {7 Printing}*) val print : ?first:string -> ?last:string -> ?sep:string -> ?kvsep:string -> @@ -1101,6 +1101,9 @@ struct let maxi, rest = Concrete.pop_max_binding (impl_of_t t) in (maxi, t_of_impl rest) + let max_binding_opt t = Concrete.max_binding_opt (impl_of_t t) + let min_binding_opt t = Concrete.min_binding_opt (impl_of_t t) + let choose t = Concrete.choose (impl_of_t t) let choose_opt t = Concrete.choose_opt (impl_of_t t) let any t = Concrete.any (impl_of_t t) @@ -1141,6 +1144,11 @@ struct let merge f t1 t2 = t_of_impl (Concrete.merge f Ord.compare (impl_of_t t1) (impl_of_t t2)) + let of_seq s = t_of_impl (Concrete.of_seq Ord.compare s) + let add_seq s m = t_of_impl (Concrete.add_seq Ord.compare s (impl_of_t m)) + let to_seq m = Concrete.to_seq (impl_of_t m) + let to_seq_from k m = Concrete.to_seq_from Ord.compare k (impl_of_t m) + module Exceptionless = struct let find k t = try Some (find k t) with Not_found -> None diff --git a/src/batMap.mli b/src/batMap.mli index 7af44ffc0..7ce343690 100644 --- a/src/batMap.mli +++ b/src/batMap.mli @@ -416,23 +416,23 @@ sig @since NEXT_RELEASE *) - val to_seq : 'a t -> (key * 'a) Seq.t + val to_seq : 'a t -> (key * 'a) BatSeq.t (** Iterate on the whole map, in ascending order of keys. @since NEXT_RELEASE *) - val to_seq_from : key -> 'a t -> (key * 'a) Seq.t + val to_seq_from : key -> 'a t -> (key * 'a) BatSeq.t (** [to_seq_from k m] iterates on a subset of the bindings in [m], namely those bindings greater or equal to [k], in ascending order. @since NEXT_RELEASE *) - val add_seq : (key * 'a) Seq.t -> 'a t -> 'a t + val add_seq : (key * 'a) BatSeq.t -> 'a t -> 'a t (** add the given bindings to the map, in order. @since NEXT_RELEASE *) - val of_seq : (key * 'a) Seq.t -> 'a t + val of_seq : (key * 'a) BatSeq.t -> 'a t (** build a map from the given bindings @since NEXT_RELEASE *) @@ -846,23 +846,23 @@ val union_stdlib: @since NEXT_RELEASE *) -val to_seq : ('key, 'a) t -> ('key * 'a) Seq.t +val to_seq : ('key, 'a) t -> ('key * 'a) BatSeq.t (** Iterate on the whole map, in ascending order of keys. @since NEXT_RELEASE *) -val to_seq_from : 'key -> ('key, 'a) t -> ('key * 'a) Seq.t +val to_seq_from : 'key -> ('key, 'a) t -> ('key * 'a) BatSeq.t (** [to_seq_from k m] iterates on a subset of the bindings in [m], namely those bindings greater or equal to [k], in ascending order. @since NEXT_RELEASE *) -val add_seq : ('key * 'a) Seq.t -> ('key, 'a) t -> ('key, 'a) t +val add_seq : ('key * 'a) BatSeq.t -> ('key, 'a) t -> ('key, 'a) t (** add the given bindings to the map, in order. @since NEXT_RELEASE *) -val of_seq : ('key * 'a) Seq.t -> ('key, 'a) t +val of_seq : ('key * 'a) BatSeq.t -> ('key, 'a) t (** build a map from the given bindings @since NEXT_RELEASE *) @@ -1307,23 +1307,23 @@ module PMap : sig @since NEXT_RELEASE *) - val to_seq : ('key, 'a) t -> ('key * 'a) Seq.t + val to_seq : ('key, 'a) t -> ('key * 'a) BatSeq.t (** Iterate on the whole map, in ascending order of keys. @since NEXT_RELEASE *) - val to_seq_from : 'key -> ('key, 'a) t -> ('key * 'a) Seq.t + val to_seq_from : 'key -> ('key, 'a) t -> ('key * 'a) BatSeq.t (** [to_seq_from k m] iterates on a subset of the bindings in [m], namely those bindings greater or equal to [k], in ascending order. @since NEXT_RELEASE *) - val add_seq : ('key * 'a) Seq.t -> ('key, 'a) t -> ('key, 'a) t + val add_seq : ('key * 'a) BatSeq.t -> ('key, 'a) t -> ('key, 'a) t (** add the given bindings to the map, in order. @since NEXT_RELEASE *) - val of_seq : ?cmp:('key -> 'key -> int) -> ('key * 'a) Seq.t -> ('key, 'a) t + val of_seq : ?cmp:('key -> 'key -> int) -> ('key * 'a) BatSeq.t -> ('key, 'a) t (** build a map from the given bindings @since NEXT_RELEASE *) From a39bbce75f5b8f962b3aaa95db52bca35ad558d7 Mon Sep 17 00:00:00 2001 From: jakob krainz Date: Sun, 31 Jan 2021 17:02:11 +0100 Subject: [PATCH 20/20] fix union_stdlib and is_empty --- src/batMap.ml | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/src/batMap.ml b/src/batMap.ml index 0ea361cb7..ebe9f52fe 100644 --- a/src/batMap.ml +++ b/src/batMap.ml @@ -54,7 +54,7 @@ module Concrete = struct let empty = Empty let is_empty m = - m == Empty + m = Empty (* The create and bal functions are from stdlib's map.ml (3.12) differences from the old (extlib) implementation : @@ -575,7 +575,7 @@ module Concrete = struct build the result map. The unusual parameter order was chosen to reflect this. *) let filterv f t cmp = - foldi (fun k a acc -> if f a then add k a cmp acc else acc) t empty + foldi (fun k a acc -> if f a then acc else remove k cmp acc) t t let filter f t cmp = foldi (fun k a acc -> if f k a then acc else remove k cmp acc) t t let filter_map f t cmp = @@ -913,16 +913,13 @@ module Concrete = struct let to_seq_from cmp k m = seq_of_iter (cons_iter_from cmp k m E) - let union_stdlib cmp f m1 m2 = - foldi - (fun k v m -> - update_stdlib k - (fun v2opt -> - match v2opt with - | Some v2 -> f k v v2 - | None -> Some v) cmp m) - m1 - m2 + let union_stdlib f cmp1 m1 cmp2 m2 = + let fwrap a b1 b2 = + match b1, b2 with + | Some b1, Some b2 -> f a b1 b2 + | x, None + | None, x -> x in + heuristic_merge fwrap cmp1 m1 cmp2 m2 let compare ckey cval m1 m2 = BatEnum.compare (fun (k1,v1) (k2,v2) -> BatOrd.bin_comp ckey k1 k2 cval v1 v2) (enum m1) (enum m2) @@ -1139,7 +1136,7 @@ struct let bindings t = Concrete.bindings (impl_of_t t) - let union f m1 m2 = t_of_impl (Concrete.union_stdlib Ord.compare f (impl_of_t m1) (impl_of_t m2)) + let union f m1 m2 = t_of_impl (Concrete.union_stdlib f Ord.compare (impl_of_t m1) Ord.compare (impl_of_t m2)) let merge f t1 t2 = t_of_impl (Concrete.merge f Ord.compare (impl_of_t t1) (impl_of_t t2)) @@ -1408,7 +1405,7 @@ let to_seq = Concrete.to_seq let to_seq_from x m = Concrete.to_seq_from Pervasives.compare x m -let union_stdlib f m1 m2 = Concrete.union_stdlib Pervasives.compare f m1 m2 +let union_stdlib f m1 m2 = Concrete.union_stdlib f Pervasives.compare m1 Pervasives.compare m2 (*$T union_stdlib let cmp = Pervasives.( = ) in \ equal cmp (union_stdlib (fun _ -> failwith "must not be called") empty empty) empty @@ -1468,7 +1465,7 @@ let union m1 m2 = *) let union_stdlib f m1 m2 = - Concrete.union_stdlib Pervasives.compare f m1 m2 + Concrete.union_stdlib f Pervasives.compare m1 Pervasives.compare m2 let diff m1 m2 = let comp = Pervasives.compare in @@ -1749,7 +1746,7 @@ module PMap = struct (*$< PMap *) { m with map = Concrete.add_seq m.cmp s m.map } let union_stdlib f m1 m2 = - { m1 with map = Concrete.union_stdlib m1.cmp f m1.map m2.map } + { m1 with map = Concrete.union_stdlib f m1.cmp m1.map m2.cmp m2.map } let bindings m = Concrete.bindings m.map