From f7e740fb2ba36d0fcf3fd196d60333552911e3a4 Mon Sep 17 00:00:00 2001 From: Joe Hellerstein Date: Mon, 28 Oct 2024 17:34:28 -0700 Subject: [PATCH] feat: generalized hash trie indexes for relational tuples (#1503) Generalized Hash Tries are part of the SIGMOD '23 FreeJoin [paper](https://dl.acm.org/doi/abs/10.1145/3589295) by Wang/Willsey/Suciu. They provide a compressed ("factorized") representation of relations. By operating in the factorized domain, join algorithms can defer cross-products and achieve asymptotically optimal performance. --------- Co-authored-by: Mingwei Samuel Co-authored-by: Andre Giron --- Cargo.lock | 16 + Cargo.toml | 1 + .../surface_examples__example_1_simplest.snap | 1 - ...phism__cartesian_product@graphvis_dot.snap | 1 - ...m__cartesian_product@graphvis_mermaid.snap | 1 - ...eton__fold_singleton@graphvis_mermaid.snap | 1 - ...ce_singleton__multi_tick@graphvis_dot.snap | 1 - ...ingleton__multi_tick@graphvis_mermaid.snap | 1 - hydroflow/tests/surface_lattice_bimorphism.rs | 52 + .../surface_lattice_generalized_hash_trie.rs | 73 ++ .../src/graph/ops/anti_join_multiset.rs | 5 +- lattices/Cargo.toml | 3 + lattices/src/ght/colt.rs | 306 ++++++ lattices/src/ght/lattice.rs | 439 ++++++++ lattices/src/ght/macros.rs | 117 +++ lattices/src/ght/mod.rs | 545 ++++++++++ lattices/src/ght/test.rs | 943 ++++++++++++++++++ lattices/src/lib.rs | 3 +- .../compile-fail/non_lattice_field.stderr | 12 +- variadics/src/variadic_collections.rs | 34 +- variadics_macro/CHANGELOG.md | 0 variadics_macro/Cargo.toml | 22 + variadics_macro/README.md | 17 + variadics_macro/src/lib.rs | 47 + 24 files changed, 2612 insertions(+), 29 deletions(-) create mode 100644 hydroflow/tests/surface_lattice_generalized_hash_trie.rs create mode 100644 lattices/src/ght/colt.rs create mode 100644 lattices/src/ght/lattice.rs create mode 100644 lattices/src/ght/macros.rs create mode 100644 lattices/src/ght/mod.rs create mode 100644 lattices/src/ght/test.rs create mode 100644 variadics_macro/CHANGELOG.md create mode 100644 variadics_macro/Cargo.toml create mode 100644 variadics_macro/README.md create mode 100644 variadics_macro/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index bc00ec71c75..a53092a9c76 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1599,9 +1599,12 @@ version = "0.5.7" dependencies = [ "cc-traits", "lattices_macro", + "ref-cast", "sealed", "serde", "trybuild", + "variadics", + "variadics_macro", ] [[package]] @@ -3452,6 +3455,19 @@ dependencies = [ "trybuild", ] +[[package]] +name = "variadics_macro" +version = "0.5.5" +dependencies = [ + "insta", + "prettyplease", + "proc-macro-crate", + "proc-macro2", + "quote", + "syn 2.0.75", + "variadics", +] + [[package]] name = "vcpkg" version = "0.2.15" diff --git a/Cargo.toml b/Cargo.toml index 668519191c0..8b9deaa71ad 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -28,6 +28,7 @@ members = [ "stageleft_tool", "topolotree", "variadics", + "variadics_macro", "website_playground", ] diff --git a/hydroflow/tests/snapshots/surface_examples__example_1_simplest.snap b/hydroflow/tests/snapshots/surface_examples__example_1_simplest.snap index fa30505c69c..065e174f4cb 100644 --- a/hydroflow/tests/snapshots/surface_examples__example_1_simplest.snap +++ b/hydroflow/tests/snapshots/surface_examples__example_1_simplest.snap @@ -12,4 +12,3 @@ Hello 6 Hello 7 Hello 8 Hello 9 - diff --git a/hydroflow/tests/snapshots/surface_lattice_bimorphism__cartesian_product@graphvis_dot.snap b/hydroflow/tests/snapshots/surface_lattice_bimorphism__cartesian_product@graphvis_dot.snap index e8a4194cc70..dae12d839db 100644 --- a/hydroflow/tests/snapshots/surface_lattice_bimorphism__cartesian_product@graphvis_dot.snap +++ b/hydroflow/tests/snapshots/surface_lattice_bimorphism__cartesian_product@graphvis_dot.snap @@ -81,4 +81,3 @@ digraph { } } } - diff --git a/hydroflow/tests/snapshots/surface_lattice_bimorphism__cartesian_product@graphvis_mermaid.snap b/hydroflow/tests/snapshots/surface_lattice_bimorphism__cartesian_product@graphvis_mermaid.snap index 9a8e0c4a1c4..7b6a3151258 100644 --- a/hydroflow/tests/snapshots/surface_lattice_bimorphism__cartesian_product@graphvis_mermaid.snap +++ b/hydroflow/tests/snapshots/surface_lattice_bimorphism__cartesian_product@graphvis_mermaid.snap @@ -67,4 +67,3 @@ subgraph sg_4v1 ["sg_4v1 stratum 1"] 9v1 end end - diff --git a/hydroflow/tests/snapshots/surface_singleton__fold_singleton@graphvis_mermaid.snap b/hydroflow/tests/snapshots/surface_singleton__fold_singleton@graphvis_mermaid.snap index cfc33287844..3775b3febce 100644 --- a/hydroflow/tests/snapshots/surface_singleton__fold_singleton@graphvis_mermaid.snap +++ b/hydroflow/tests/snapshots/surface_singleton__fold_singleton@graphvis_mermaid.snap @@ -53,4 +53,3 @@ subgraph sg_3v1 ["sg_3v1 stratum 1"] 3v1 end end - diff --git a/hydroflow/tests/snapshots/surface_singleton__multi_tick@graphvis_dot.snap b/hydroflow/tests/snapshots/surface_singleton__multi_tick@graphvis_dot.snap index 74d09d7f66a..3a4356ee0bc 100644 --- a/hydroflow/tests/snapshots/surface_singleton__multi_tick@graphvis_dot.snap +++ b/hydroflow/tests/snapshots/surface_singleton__multi_tick@graphvis_dot.snap @@ -61,4 +61,3 @@ digraph { } } } - diff --git a/hydroflow/tests/snapshots/surface_singleton__multi_tick@graphvis_mermaid.snap b/hydroflow/tests/snapshots/surface_singleton__multi_tick@graphvis_mermaid.snap index 54cd4484ba3..3a8ee979cc0 100644 --- a/hydroflow/tests/snapshots/surface_singleton__multi_tick@graphvis_mermaid.snap +++ b/hydroflow/tests/snapshots/surface_singleton__multi_tick@graphvis_mermaid.snap @@ -53,4 +53,3 @@ subgraph sg_2v1 ["sg_2v1 stratum 0"] 3v1 end end - diff --git a/hydroflow/tests/surface_lattice_bimorphism.rs b/hydroflow/tests/surface_lattice_bimorphism.rs index 401b0953018..30ff1716557 100644 --- a/hydroflow/tests/surface_lattice_bimorphism.rs +++ b/hydroflow/tests/surface_lattice_bimorphism.rs @@ -2,9 +2,14 @@ use std::collections::{HashMap, HashSet}; use hydroflow::util::collect_ready; use hydroflow::{assert_graphvis_snapshots, hydroflow_syntax}; +use lattices::ght::lattice::{DeepJoinLatticeBimorphism, GhtBimorphism}; +use lattices::ght::GeneralizedHashTrieNode; use lattices::map_union::{KeyedBimorphism, MapUnionHashMap, MapUnionSingletonMap}; use lattices::set_union::{CartesianProductBimorphism, SetUnionHashSet, SetUnionSingletonSet}; +use lattices::GhtType; use multiplatform_test::multiplatform_test; +use variadics::variadic_collections::VariadicHashSet; +use variadics::CloneVariadic; #[multiplatform_test] pub fn test_cartesian_product() { @@ -134,3 +139,50 @@ pub fn test_cartesian_product_tick_state() { &*collect_ready::, _>(&mut out_recv) ); } + +#[test] +fn test_ght_join_bimorphism() { + type MyGhtATrie = GhtType!(u32, u64, u16 => &'static str: VariadicHashSet); + type MyGhtBTrie = GhtType!(u32, u64, u16 => &'static str: VariadicHashSet); + + type JoinSchema = variadics::var_type!(u32, u64, u16, &'static str, &'static str); + + type MyNodeBim = <(MyGhtATrie, MyGhtBTrie) as DeepJoinLatticeBimorphism< + VariadicHashSet, + >>::DeepJoinLatticeBimorphism; + type MyBim = GhtBimorphism; + + let mut hf = hydroflow_syntax! { + lhs = source_iter([ + var_expr!(123, 2, 5, "hello"), + var_expr!(50, 1, 1, "hi"), + var_expr!(5, 1, 7, "hi"), + var_expr!(5, 1, 7, "bye"), + ]) + -> map(|row| MyGhtATrie::new_from([row])) + -> state::<'tick, MyGhtATrie>(); + rhs = source_iter([ + var_expr!(5, 1, 8, "hi"), + var_expr!(5, 1, 7, "world"), + var_expr!(5, 1, 7, "folks"), + var_expr!(10, 1, 2, "hi"), + var_expr!(12, 10, 98, "bye"), + ]) + -> map(|row| MyGhtBTrie::new_from([row])) + -> state::<'tick, MyGhtBTrie>(); + + lhs[items] -> [0]my_join; + rhs[items] -> [1]my_join; + + + my_join = lattice_bimorphism(MyBim::default(), #lhs, #rhs) + -> lattice_reduce() + -> enumerate() + -> inspect(|x| println!("{:?} {:#?}", context.current_tick(), x)) + -> flat_map(|(_num, ght)| ght.recursive_iter().map(::clone_ref_var).collect::>()) + -> null(); + // -> for_each(|x| println!("{:#?}\n", x)); + }; + // hf.meta_graph().unwrap().open_mermaid(&Default::default()); + hf.run_available(); +} diff --git a/hydroflow/tests/surface_lattice_generalized_hash_trie.rs b/hydroflow/tests/surface_lattice_generalized_hash_trie.rs new file mode 100644 index 00000000000..ece19b084ab --- /dev/null +++ b/hydroflow/tests/surface_lattice_generalized_hash_trie.rs @@ -0,0 +1,73 @@ +use hydroflow::hydroflow_syntax; +use hydroflow::lattices::ght::lattice::{DeepJoinLatticeBimorphism, GhtBimorphism}; +use hydroflow::lattices::ght::GeneralizedHashTrieNode; +use hydroflow::lattices::GhtType; +use hydroflow::util::collect_ready; +use hydroflow::variadics::{var_expr, var_type}; +use variadics::variadic_collections::VariadicHashSet; // Import the Insert trait + +#[test] +fn test_basic() { + type MyGht = GhtType!(u16, u32 => u64: VariadicHashSet); + type FlatTup = var_type!(u16, u32, u64); + let input: Vec = vec![ + var_expr!(42, 314, 43770), + var_expr!(42, 315, 43770), + var_expr!(42, 314, 30619), + var_expr!(43, 10, 600), + ]; + let mut merged = MyGht::default(); + for i in input.clone() { + merged.insert(i); + } + println!("merged: {:?}", merged); + let mut df = hydroflow_syntax! { + source_iter(input) + -> map(|t| MyGht::new_from(vec![t])) + -> lattice_fold::<'static>(MyGht::default) + -> inspect(|t| println!("{:?}", t)) + -> assert(|x: &MyGht| x.eq(&merged)) + -> null(); + }; + df.run_available(); +} + +#[test] +fn test_join() { + type MyGht = GhtType!(u8 => u16: VariadicHashSet); + type ResultGht = GhtType!(u8 => u16, u16: VariadicHashSet); + let (out_send, out_recv) = hydroflow::util::unbounded_channel::<_>(); + + let r = vec![ + var_expr!(1, 10), + var_expr!(2, 20), + var_expr!(3, 30), + var_expr!(4, 40), + ]; + let s = vec![var_expr!(1, 10), var_expr!(5, 50)]; + + type MyNodeBim = <(MyGht, MyGht) as DeepJoinLatticeBimorphism< + VariadicHashSet, + >>::DeepJoinLatticeBimorphism; + type MyBim = GhtBimorphism; + + let mut df = hydroflow_syntax! { + R = source_iter(r) + -> map(|t| MyGht::new_from([t])) + -> state::(); + S = source_iter(s) + -> map(|t| MyGht::new_from([t])) + -> state::(); + R[items] -> [0]my_join; + S[items] -> [1]my_join; + my_join = lattice_bimorphism(MyBim::default(), #R, #S) + -> lattice_reduce() + -> for_each(|x| out_send.send(x).unwrap()); + }; + df.run_available(); + + assert_eq!( + &[ResultGht::new_from(vec![var_expr!(1, 10, 10),])], + &*collect_ready::, _>(out_recv) + ); +} diff --git a/hydroflow_lang/src/graph/ops/anti_join_multiset.rs b/hydroflow_lang/src/graph/ops/anti_join_multiset.rs index dbb18358b86..cf9a1e9441d 100644 --- a/hydroflow_lang/src/graph/ops/anti_join_multiset.rs +++ b/hydroflow_lang/src/graph/ops/anti_join_multiset.rs @@ -2,9 +2,8 @@ use quote::{quote_spanned, ToTokens}; use syn::parse_quote; use super::{ - DelayType, OpInstGenerics, OperatorCategory, OperatorConstraints, - OperatorInstance, OperatorWriteOutput, Persistence, PortIndexValue, WriteContextArgs, RANGE_0, - RANGE_1, + DelayType, OpInstGenerics, OperatorCategory, OperatorConstraints, OperatorInstance, + OperatorWriteOutput, Persistence, PortIndexValue, WriteContextArgs, RANGE_0, RANGE_1, }; use crate::diagnostic::{Diagnostic, Level}; diff --git a/lattices/Cargo.toml b/lattices/Cargo.toml index b4daacad7ee..7684d2ca3cf 100644 --- a/lattices/Cargo.toml +++ b/lattices/Cargo.toml @@ -19,6 +19,9 @@ cc-traits = "2.0.0" sealed = "0.5.0" serde = { version = "1.0.197", features = ["derive"], optional = true } lattices_macro = { path = "../lattices_macro", version = "^0.5.6" } +ref-cast = "1.0.23" +variadics = { path = "../variadics", version = "^0.0.6" } +variadics_macro = { path = "../variadics_macro", version = "^0.5.5" } [dev-dependencies] trybuild = "1.0.0" diff --git a/lattices/src/ght/colt.rs b/lattices/src/ght/colt.rs new file mode 100644 index 00000000000..b4b22239cd8 --- /dev/null +++ b/lattices/src/ght/colt.rs @@ -0,0 +1,306 @@ +//! COLT from Wang/Willsey/Suciu + +use std::hash::Hash; + +use variadics::variadic_collections::VariadicCollection; +use variadics::{var_expr, var_type, PartialEqVariadic, SplitBySuffix, VariadicExt}; + +use crate::ght::{GeneralizedHashTrieNode, GhtGet, GhtInner, GhtLeaf}; + +/// Data structure design for our COLT is unique. +/// +/// In the paper, the COLT is an unbalanced trie that "grows upward" from leaves lazily +/// on access via the `force` method. +/// Unfortunately, unbalanced tries break our types: a node's type to be defined via the +/// type of its children, recursively -- meaning all paths need to be the same type (and length)! +/// +/// To work around this, our COLT is a variadic *list* GHTs (a forest) of increasing height, +/// starting with a trie of height 0 and continuing until a trie of height |key| - 1. +/// Our `force` method does not add a node above a leaf L as in the paper. Instead +/// it `take`s L from the current trie and merges it into the next trie to the right which is 1 taller. +// +/// The following trait provides the behavior we need from the nodes in a COLT forest. Every +/// `ColtForestNode` is a `GeneralizedHashTrieNode` with some extra methods. +pub trait ColtForestNode: GeneralizedHashTrieNode { + /// result of `force`ing a node + type Force: GeneralizedHashTrieNode; + + /// Force the generation of a parent node, as in the Wang/Willsey/Suciu COLT structure, + /// to be merged into the next trie to the right. + fn force(self) -> Option; + + /// Force the generation of a parent node but retain ref to this node + fn force_drain(&mut self) -> Option; +} + +// Force only acts on leaves +impl ColtForestNode for GhtInner +where + Head: 'static + Hash + Eq + Clone, + Node: 'static + ColtForestNode, + ::Schema: + SplitBySuffix::SuffixSchema)>, +{ + type Force = Node; // where Node:GeneralizedHashTrieNode; + fn force(self) -> Option { + None + } + + fn force_drain(&mut self) -> Option { + None + } +} + +// Leaf case +impl ColtForestNode + for GhtLeaf +where + Head: 'static + Clone + Hash + Eq, + Rest: 'static + Clone + Hash + Eq + VariadicExt, + Schema: 'static + Hash + Eq + Clone + VariadicExt + PartialEqVariadic, + Rest: PartialEqVariadic, + Schema: SplitBySuffix, + Schema: SplitBySuffix, + >::Prefix: Eq + Hash + Clone, + >::Prefix: Eq + Hash + Clone, + Storage: VariadicCollection + Default + IntoIterator, + GhtLeaf: GeneralizedHashTrieNode, + GhtInner>: + GeneralizedHashTrieNode, +{ + type Force = GhtInner>; + fn force(mut self) -> Option { + let mut retval = Self::Force::default(); + self.forced = true; + for row in self.into_iter().unwrap() { + retval.insert(row); + } + Some(retval) + } + + fn force_drain(&mut self) -> Option>> { + let mut retval = Self::Force::default(); + self.forced = true; + for row in self.elements.drain() { + retval.insert(row); + } + Some(retval) + } +} + +/// Emulate the `get` and iter` functions for a single Ght node +/// [`GhtGet`] across a forest of ColtForestNodes. +/// +/// The "current" ColtGet node (corresponding to the "current" GhtGet node) at depth +/// d from the root is a variadic list of nodes, each at depth d in its their +/// respective trie in the forest, Tries of height d or smaller are omitted, +/// hence the first element in any ColtGet is a GhtLeaf. +pub trait ColtGet { + /// Schema variadic: the schema of the relation stored in this COLT. + /// This type is the same in all Tries and nodes of the COLT. + type Schema: VariadicExt + Eq + Hash + Clone; + /// The type of Storage + /// This type is the same in all Tries and nodes of the COLT + type Storage: VariadicCollection; + /// SuffixSchema variadic: the suffix of the schema *from this node of the trie + /// downward*. The first entry in this variadic is of type Head. + /// This type is the same in all Tries of the COLT (but changes as we traverse downward) + type SuffixSchema: VariadicExt + Eq + Hash + Clone; + /// The type of the first column in the SuffixSchema + /// This type is the same in all Tries of the COLT (but changes as we traverse downward) + type Head: Eq + Hash; + + /// Type returned by [`Self::get`]. + type Get; + + /// Following the spec in Wang/Willsey/Suciu, on an Inner node this retrieves the value + /// (child) associated with the given "head" key. It returns an `Option` containing a + /// reference to the value if found, or `None` if not found. + /// On a Leaf node, returns None. + fn get(self, head: &Self::Head) -> Self::Get; + + /// Iterator for the "head" keys (from inner nodes) or nothing (from leaf nodes). + fn iter(&self) -> impl Iterator; +} + +/// `ColtGet` without the first (head) trie. +pub trait ColtGetTail: ColtGet { + /// merge an inner node into the head of this tail of the forest + fn merge(&mut self, inner_to_merge: InnerToMerge); +} + +impl<'a, Rest, Schema, SuffixSchema, Storage> ColtGet for var_type!(&'a mut GhtLeaf, ...Rest) +where + Rest: ColtGetTail< + as ColtForestNode>::Force, + Storage = Storage, + >, + ::SuffixSchema: 'a, + GhtLeaf: ColtForestNode, + Schema: Clone + Hash + Eq + VariadicExt, + SuffixSchema: Clone + Hash + Eq + VariadicExt, + Storage: VariadicCollection, +{ + type Schema = Schema; + type Head = Rest::Head; + type SuffixSchema = SuffixSchema; + type Get = Rest::Get; + type Storage = Rest::Storage; + + fn get(self, head: &Self::Head) -> Self::Get { + let (first, mut rest) = self; + let forced = first.force_drain().unwrap(); + ColtGetTail::merge(&mut rest, forced); + Rest::get(rest, head) + } + + fn iter(&self) -> impl Iterator { + std::iter::empty() + } +} + +// we only merge in GhtInner> nodes, so this +// should never be called. +impl<'a, Rest, Schema, SuffixSchema, T, Storage> ColtGetTail for var_type!(&'a mut GhtLeaf, ...Rest) +where + Rest: ColtGetTail< + as ColtForestNode>::Force, + Storage = Storage, + >, + ::SuffixSchema: 'a, + GhtLeaf: ColtForestNode, + Schema: Clone + Hash + Eq + VariadicExt, + SuffixSchema: Clone + Hash + Eq + VariadicExt, + Storage: VariadicCollection, +{ + fn merge(&mut self, _inner_to_merge: T) { + panic!(); + } +} + +impl<'a, Head, Head2, Rest, Node> ColtGet for var_type!(&'a mut GhtInner>, ...Rest) +where + Rest: ColtGet, + Head: Eq + Hash + Clone, + Head2: Eq + Hash + Clone, + Node: GeneralizedHashTrieNode, + GhtInner>: GeneralizedHashTrieNode< + Head = Rest::Head, + SuffixSchema = Rest::SuffixSchema, + Schema = Rest::Schema, + Storage = Rest::Storage, + >, + GhtInner: GeneralizedHashTrieNode, +{ + type Schema = Rest::Schema; + type Head = Rest::Head; + type SuffixSchema = Rest::SuffixSchema; + type Get = var_type!(&'a mut GhtInner, ...Rest::Get); + type Storage = Rest::Storage; + + fn get(self, head: &Self::Head) -> Self::Get { + let (first, rest) = self; + // create a child entry here for this get, to absorb future forces + // TODO(mingwei): extra clone here if entry already exists. + let child = first.children.entry(head.clone()).or_default(); + var_expr!(child, ...Rest::get(rest, head)) + } + + fn iter(&self) -> impl Iterator { + self.0.children.keys().cloned().chain(Rest::iter(&self.1)) + } +} + +impl<'a, Head, Rest, Schema, ValType, Storage> ColtGet for var_type!(&'a mut GhtInner>, ...Rest) +where + Rest: ColtGet, + Head: Eq + Hash + Clone, + Schema: Eq + Hash + Clone + PartialEqVariadic, + ValType: Eq + Hash + Clone + PartialEqVariadic, + Storage: VariadicCollection, + GhtLeaf: GeneralizedHashTrieNode, + Schema: 'static + Eq + VariadicExt + Hash + Clone + SplitBySuffix + PartialEqVariadic, + >::Prefix: Eq + Hash + Clone, + GhtInner>: + GeneralizedHashTrieNode + GhtGet, + GhtInner>: + GeneralizedHashTrieNode, + GhtLeaf: + GeneralizedHashTrieNode + GhtGet, +{ + type Schema = Rest::Schema; + type Head = Rest::Head; + type SuffixSchema = Rest::SuffixSchema; + type Get = var_type!(&'a mut GhtLeaf, ...Rest::Get); + type Storage = Rest::Storage; + + fn get(self, head: &Self::Head) -> Self::Get { + let (first, rest) = self; + let child = first.children.entry(head.clone()).or_default(); + var_expr!(child, ...Rest::get(rest, head)) + } + + fn iter(&self) -> impl Iterator { + self.0.children.keys().cloned().chain(Rest::iter(&self.1)) + } +} + +impl<'a, Head, Rest, Schema, ValType, Storage> + ColtGetTail>> for var_type!(&'a mut GhtInner>, ...Rest) +where + Rest: ColtGet, + Head: Eq + Hash + Clone, + Schema: Eq + Hash + Clone + PartialEqVariadic, + ValType: Eq + Hash + Clone + PartialEqVariadic, + Storage: VariadicCollection, + var_type!(&'a mut GhtInner>, ...Rest): + ColtGet, + GhtLeaf: GeneralizedHashTrieNode, + Schema: 'static + Eq + VariadicExt + Hash + Clone + SplitBySuffix + PartialEqVariadic, + >::Prefix: Eq + Hash + Clone, + GhtInner>: + GeneralizedHashTrieNode + GhtGet, +{ + fn merge(&mut self, inner_to_merge: GhtInner>) { + let (head, _rest) = self; + // can't use Merge with COLT bc columnstore is not a lattice!! + head.merge_node(inner_to_merge); + } +} + +impl<'a, Head, Node> ColtGet for var_type!(&'a mut GhtInner) +where + GhtInner: GeneralizedHashTrieNode, + Head: Clone + Eq + Hash, + Node: GeneralizedHashTrieNode, +{ + type Schema = as GeneralizedHashTrieNode>::Schema; + type SuffixSchema = as GeneralizedHashTrieNode>::SuffixSchema; + type Head = Head; + type Get = var_type!(&'a mut Node); + type Storage = Node::Storage; + + fn get(self, head: &Self::Head) -> Self::Get { + let child = self.0.children.entry(head.clone()).or_default(); + var_expr!(child) + } + + fn iter(&self) -> impl Iterator { + self.0.children.keys().cloned() + } +} +impl ColtGetTail>> for var_type!(&mut GhtInner>) +where + GhtInner>: + GeneralizedHashTrieNode + GhtGet, + GhtLeaf: GeneralizedHashTrieNode, + Head: Clone + Eq + Hash, + Schema: Clone + Eq + Hash + VariadicExt, + Storage: VariadicCollection, +{ + fn merge(&mut self, inner_to_merge: GhtInner>) { + let (head, _rest) = self; + // can't use Merge with COLT bc columnstore is not a lattice!! + head.merge_node(inner_to_merge); + } +} diff --git a/lattices/src/ght/lattice.rs b/lattices/src/ght/lattice.rs new file mode 100644 index 00000000000..11b8a6aabde --- /dev/null +++ b/lattices/src/ght/lattice.rs @@ -0,0 +1,439 @@ +//! Lattice traits for GHT + +use core::cmp::Ordering::{Equal, Greater, Less}; +use std::cmp::Ordering; +use std::collections::HashMap; +use std::hash::Hash; + +use variadics::variadic_collections::VariadicSet; +use variadics::{var_expr, var_type, CloneVariadic, PartialEqVariadic, SplitBySuffix, VariadicExt}; + +use crate::ght::{GeneralizedHashTrieNode, GhtGet, GhtInner, GhtLeaf}; +use crate::{IsBot, IsTop, LatticeBimorphism, LatticeOrd, Merge}; + +impl Merge> for GhtInner +where + Node: GeneralizedHashTrieNode + Merge, + Node::Storage: VariadicSet, // multiset is not a lattice! + Self: GeneralizedHashTrieNode, + Head: Hash + Eq + Clone, +{ + fn merge(&mut self, other: GhtInner) -> bool { + let mut changed = false; + + for (k, v) in other.children { + match self.children.entry(k) { + std::collections::hash_map::Entry::Occupied(mut occupied) => { + changed |= occupied.get_mut().merge_node(v); + } + std::collections::hash_map::Entry::Vacant(vacant) => { + vacant.insert(v); + changed = true; + } + } + } + changed + } +} + +impl Merge> + for GhtLeaf +where + Schema: Eq + Hash, + Storage: VariadicSet + Extend + IntoIterator, +{ + fn merge(&mut self, other: GhtLeaf) -> bool { + let old_len = self.elements.len(); + self.elements.extend(other.elements); + self.elements.len() > old_len + } +} + +impl PartialEq> for GhtInner +where + Head: Hash + Eq + 'static + Clone, + Node: GeneralizedHashTrieNode + 'static + PartialEq, + Node::Storage: VariadicSet, // multiset is not a lattice! + Node::Schema: SplitBySuffix, + GhtInner: GhtGet, + as GhtGet>::Get: PartialEq, +{ + fn eq(&self, other: &GhtInner) -> bool { + if self.children.len() != other.children.len() { + return false; + } + + for head in self.iter() { + let other_node = other.get(&head); + if other_node.is_none() { + return false; + } + let this_node = self.get(&head); + if this_node.is_none() { + return false; + } + if this_node.unwrap() != other_node.unwrap() { + return false; + } + } + true + } +} + +impl PartialOrd> for GhtInner +where + Head: Hash + Eq + 'static + Clone, + Node: 'static + GeneralizedHashTrieNode + PartialEq + PartialOrd, + Node::Storage: VariadicSet, // multiset is not a lattice! + Node::Schema: SplitBySuffix, +{ + fn partial_cmp(&self, other: &GhtInner) -> Option { + let mut self_any_greater = false; + let mut other_any_greater = false; + if self.children.is_empty() && other.children.is_empty() { + Some(Equal) + } else { + for k in self.children.keys().chain(other.children.keys()) { + match (self.children.get(k), other.children.get(k)) { + (Some(self_value), Some(other_value)) => { + match self_value.partial_cmp(other_value)? { + Greater => { + self_any_greater = true; + } + Less => { + other_any_greater = true; + } + Equal => {} + } + } + (Some(_), None) => { + self_any_greater = true; + } + (None, Some(_)) => { + other_any_greater = true; + } + (None, None) => unreachable!(), + } + } + match (self_any_greater, other_any_greater) { + (true, false) => Some(Greater), + (false, true) => Some(Less), + (false, false) => Some(Equal), + (true, true) => unreachable!(), + } + } + } +} + +impl PartialOrd> + for GhtLeaf +where + Schema: Eq + Hash + PartialEqVariadic, + SuffixSchema: Eq + Hash, + Storage: VariadicSet + PartialEq, +{ + fn partial_cmp(&self, other: &GhtLeaf) -> Option { + match self.elements.len().cmp(&other.elements.len()) { + Greater => { + if other.elements.iter().all(|tup| self.elements.contains(tup)) { + Some(Greater) + } else { + None + } + } + Equal => { + if self + .elements + .iter() + .all(|head| other.elements.contains(head)) + { + Some(Equal) + } else { + None + } + } + Less => { + if self + .elements + .iter() + .all(|head| other.elements.contains(head)) + { + Some(Less) + } else { + None + } + } + } + } +} + +impl LatticeOrd> for GhtInner +where + Self: PartialOrd>, + Head: Clone, + Node: GeneralizedHashTrieNode, + Node::Storage: VariadicSet, // multiset is not a lattice! +{ +} +impl LatticeOrd> + for GhtLeaf +where + Schema: Eq + Hash + PartialEqVariadic, + SuffixSchema: Eq + Hash, + Storage: VariadicSet + PartialEq, +{ +} + +impl IsBot for GhtInner +where + Head: Clone, + Node: GeneralizedHashTrieNode + IsBot, +{ + fn is_bot(&self) -> bool { + self.children.iter().all(|(_, v)| v.is_bot()) + } +} + +impl IsBot for GhtLeaf +where + Schema: Eq + Hash, + SuffixSchema: Eq + Hash, + Storage: VariadicSet, +{ + fn is_bot(&self) -> bool { + self.elements.is_empty() + } +} + +impl IsTop for GhtInner +where + Head: Clone, + Node: GeneralizedHashTrieNode, + Node::Storage: VariadicSet, // multiset is not a lattice! +{ + fn is_top(&self) -> bool { + false + } +} + +impl IsTop for GhtLeaf +where + Schema: Eq + Hash, + SuffixSchema: Eq + Hash, + Storage: VariadicSet, +{ + fn is_top(&self) -> bool { + false + } +} + +////////////////////////// +// BiMorphisms for GHT +// + +/// Bimorphism for the cartesian product of two GHT *subtries*. +/// +/// Output is a set of all possible pairs of +/// *suffixes* from the two subtries. If you use this at the root of a GHT, it's a full cross-product. +/// If you use this at an internal node, it provides a 'factorized' representation with only the suffix +/// cross-products expanded. +pub struct GhtCartesianProductBimorphism { + _phantom: std::marker::PhantomData GhtOut>, +} +impl Default for GhtCartesianProductBimorphism { + fn default() -> Self { + Self { + _phantom: Default::default(), + } + } +} +impl<'a, 'b, GhtA, GhtB, GhtOut> LatticeBimorphism<&'a GhtA, &'b GhtB> + for GhtCartesianProductBimorphism +where + GhtA: GeneralizedHashTrieNode, + GhtA::Storage: VariadicSet, // multiset is not a lattice! + GhtB: GeneralizedHashTrieNode, + GhtB::Storage: VariadicSet, // multiset is not a lattice! + GhtOut: FromIterator, + GhtA::SuffixSchema: CloneVariadic, + GhtB::SuffixSchema: CloneVariadic, +{ + type Output = GhtOut; + + fn call(&mut self, ght_a: &'a GhtA, ght_b: &'b GhtB) -> Self::Output { + ght_a.recursive_iter().flat_map(|a| { + let (_a_prefix, a_suffix) = >::split_by_suffix_ref(a); + ght_b + .recursive_iter() + .map(move |b| { + let (_b_prefix, b_suffix) = >::split_by_suffix_ref(b); + var_expr!(...::clone_ref_var(a_suffix), ...::clone_ref_var(b_suffix)) + }) + }).collect() + } +} + +/// Forms the cartesian product of the ValTypes only +/// Used on GhtLeaf nodes to implement DeepJoinLatticeBimorphism +pub struct GhtValTypeProductBimorphism { + _phantom: std::marker::PhantomData GhtOut>, +} +impl Default for GhtValTypeProductBimorphism { + fn default() -> Self { + Self { + _phantom: Default::default(), + } + } +} +impl<'a, 'b, GhtA, GhtB, GhtOut> LatticeBimorphism<&'a GhtA, &'b GhtB> + for GhtValTypeProductBimorphism +where + GhtA: GeneralizedHashTrieNode, + GhtA::Storage: VariadicSet, // multiset is not a lattice! + GhtB: GeneralizedHashTrieNode, + GhtB::Storage: VariadicSet, // multiset is not a lattice! + GhtOut: FromIterator, + GhtA::Schema: Eq + Hash + CloneVariadic, + GhtB::Schema: Eq + Hash + SplitBySuffix, + GhtB::ValType: CloneVariadic, +{ + type Output = GhtOut; + + fn call(&mut self, ght_a: &'a GhtA, ght_b: &'b GhtB) -> Self::Output { + ght_a.recursive_iter().flat_map(|a| { + ght_b + .recursive_iter() + .map(move |b| { + let (_prefix_b, suffix_b) + = >::split_by_suffix_ref(b); + var_expr!(...::clone_ref_var(a), ...::clone_ref_var(suffix_b)) + } + ) + }).collect() + } +} + +/// Composable bimorphism, wraps an existing morphism by partitioning it per key. +/// +/// For example, `GhtKeyedBimorphism<..., GhtCartesianProduct<...>>` is a join. +#[derive(Default)] +pub struct GhtBimorphism { + bimorphism: Bimorphism, + // _phantom: std::marker::PhantomData MapOut>, +} +impl GhtBimorphism { + /// Create a `KeyedBimorphism` using `bimorphism` for handling values. + pub fn new(bimorphism: Bimorphism) -> Self { + Self { + bimorphism, + // _phantom: std::marker::PhantomData, + } + } +} + +impl LatticeBimorphism for GhtBimorphism +where + GhtA: GeneralizedHashTrieNode, + GhtA::Storage: VariadicSet, // multiset is not a lattice! + GhtB: GeneralizedHashTrieNode, + GhtB::Storage: VariadicSet, // multiset is not a lattice! + GhtOut: GeneralizedHashTrieNode, // FromIterator, + for<'a, 'b> ValFunc: LatticeBimorphism<&'a GhtA, &'b GhtB, Output = GhtOut>, +{ + type Output = GhtOut; + + fn call(&mut self, ght_a: GhtA, ght_b: GhtB) -> Self::Output { + let node_bim = &mut self.bimorphism; // GhtNodeKeyedBimorphism::::new(self.bimorphism); + node_bim.call(&ght_a, &ght_b) + } +} + +#[derive(Default)] +/// bimorphism trait for equijoining Ght Nodes +pub struct GhtNodeKeyedBimorphism { + bimorphism: Bimorphism, +} +/// bimorphism implementation for equijoining Ght Nodes +impl GhtNodeKeyedBimorphism { + /// initialize bimorphism + pub fn new(bimorphism: Bimorphism) -> Self { + Self { bimorphism } + } +} +/// bimorphism implementation for equijoining Ght Nodes +impl<'a, 'b, Head, GhtA, GhtB, ValFunc> LatticeBimorphism<&'a GhtA, &'b GhtB> + for GhtNodeKeyedBimorphism +where + Head: Clone + Hash + Eq, + ValFunc: LatticeBimorphism<&'a GhtA::Get, &'b GhtB::Get>, + ValFunc::Output: GeneralizedHashTrieNode, + GhtA: GeneralizedHashTrieNode + GhtGet, + GhtB: GeneralizedHashTrieNode + GhtGet, + GhtA::Storage: VariadicSet, // multiset is not a lattice! + GhtB::Storage: VariadicSet, // multiset is not a lattice! + ::AsRefVar<'a>: CloneVariadic, + ::AsRefVar<'b>: CloneVariadic, +{ + type Output = GhtInner; // HashMap; // GhtOut; + + fn call(&mut self, ght_a: &'a GhtA, ght_b: &'b GhtB) -> Self::Output { + let mut children = HashMap::::new(); + // for head in ght_b.iter_keys() { + for head in ght_b.iter() { + if let Some(get_a) = ght_a.get(&head) { + let get_b = ght_b.get(&head).unwrap(); + let val = self.bimorphism.call(get_a, get_b); + children.insert(head.clone(), val); + } + } + GhtInner { children } + } +} + +/// bimorphism trait for equijoin on full tuple (keys in all GhtInner nodes) +pub trait DeepJoinLatticeBimorphism { + /// bimorphism type for equijoin on full tuple (keys in all GhtInner nodes) + type DeepJoinLatticeBimorphism; +} +/// bimorphism implementation for equijoin on full tuple (keys in all GhtInner nodes) +impl DeepJoinLatticeBimorphism + for (GhtInner, GhtInner) +where + Head: 'static + Hash + Eq + Clone, + NodeA: 'static + GeneralizedHashTrieNode, + NodeB: 'static + GeneralizedHashTrieNode, + NodeA::Storage: VariadicSet, // multiset is not a lattice! + NodeB::Storage: VariadicSet, // multiset is not a lattice! + (NodeA, NodeB): DeepJoinLatticeBimorphism, + Storage: VariadicSet, +{ + type DeepJoinLatticeBimorphism = GhtNodeKeyedBimorphism< + <(NodeA, NodeB) as DeepJoinLatticeBimorphism>::DeepJoinLatticeBimorphism, + >; +} +impl + DeepJoinLatticeBimorphism + for ( + GhtLeaf, + GhtLeaf, + ) +where + SchemaA: 'static + VariadicExt + Eq + Hash + SplitBySuffix, /* + AsRefVariadicPartialEq */ + ValTypeA: 'static + VariadicExt + Eq + Hash, // + AsRefVariadicPartialEq + SchemaB: 'static + VariadicExt + Eq + Hash + SplitBySuffix, /* + AsRefVariadicPartialEq */ + ValTypeB: 'static + VariadicExt + Eq + Hash, // + AsRefVariadicPartialEq + StorageA: VariadicSet, + StorageB: VariadicSet, + StorageOut: VariadicSet, + for<'x> SchemaA::AsRefVar<'x>: CloneVariadic, + for<'x> SchemaB::AsRefVar<'x>: CloneVariadic, + var_type!(...SchemaA, ...ValTypeB): Eq + Hash, +{ + type DeepJoinLatticeBimorphism = GhtValTypeProductBimorphism< + GhtLeaf< + var_type!(...SchemaA, ...ValTypeB), + var_type!(...ValTypeA, ...ValTypeB), + StorageOut, + >, + >; +} diff --git a/lattices/src/ght/macros.rs b/lattices/src/ght/macros.rs new file mode 100644 index 00000000000..174f124518e --- /dev/null +++ b/lattices/src/ght/macros.rs @@ -0,0 +1,117 @@ +//! Macros for GHT +#[macro_export] +/// Internal macro for constructing a Ght struct with the given schema and storage type +/// +/// Should not be used directly, use `GhtType!` instead +macro_rules! GhtTypeWithSchema { + // Empty key & Val (Leaf) + (() => () => $( $schema:ty ),+ : $storage:ident) => ( + $crate::ght::GhtLeaf::<$( $schema ),*, () > + ); + + // Empty key (Leaf) + (() => $( $z:ty ),* => $schema:ty : $storage:ident) => ( + $crate::ght::GhtLeaf::<$schema, $crate::variadics::var_type!($( $z ),*), $crate::variadics::variadic_collections::$storage<$schema> > + ); + + // Singleton key & Empty val (Inner over Leaf) + ($a:ty => () => $schema:ty : $storage:ident) => ( + $crate::ght::GhtInner::<$a, $crate::ght::GhtLeaf::<$schema, (), $crate::variadics::variadic_collections::$storage<$schema> >> + ); + + // Singleton key (Inner over Leaf) + ($a:ty => $( $z:ty ),* => $schema:ty : $storage:ident) => ( + $crate::ght::GhtInner::<$a, $crate::ght::GhtLeaf::<$schema, $crate::variadics::var_type!($( $z ),*), $crate::variadics::variadic_collections::$storage<$schema> >> + ); + + // Recursive case with empty val + ($a:ty, $( $b:ty ),* => () => $schema:ty : $storage:ident) => ( + $crate::ght::GhtInner::<$a, $crate::GhtTypeWithSchema!($( $b ),* => () => $schema : $storage)> + ); + + // Recursive case + ($a:ty, $( $b:ty ),* => $( $z:ty ),* => $schema:ty : $storage:ident) => ( + $crate::ght::GhtInner::<$a, $crate::GhtTypeWithSchema!($( $b ),* => $( $z ),* => $schema : $storage)> + ); +} + +#[macro_export] +/// Public macro for constructing a Ght struct with the given schema and storage type +/// +/// # Example +/// ``` +/// use lattices::GhtType; +/// use variadics::variadic_collections::VariadicHashSet; +/// +/// // This generates a Ght struct with (u16, u32) as key, (u64) as val, and VariadicHashSet as storage +/// type MyHashGht = GhtType!(u16, u32 => u64: VariadicHashSet); +/// let my_ght = MyHashGht::default(); +/// +/// /// // This generates a Ght struct with (u16, u32) as key, () as val, and VariadicCountedHashSet as storage +/// type MyMultisetGht = GhtType!(u16, u32 => (): VariadicCountedHashSet); +/// let my_ght = MyMultisetGht::default(); +/// +/// // This generates a Ght struct with (u16, u32) as key, () as val, and VariadicColumnSet as storage +/// type MyColumnarMultisetGht = GhtType!(u16, u32 => (): VariadicColumnMultiset); +/// let my_ght = MyColumnarMultisetGht::default(); +/// ``` +macro_rules! GhtType { + // Empty key + (() => $( $z:ty ),*: $storage:ident) => ( + $crate::GhtTypeWithSchema!(() => $( $z ),* => $crate::variadics::var_type!($( $z ),*): $storage) + ); + + // Recursive case empty val + ($( $b:ty ),* => (): $storage:ident) => ( + $crate::GhtTypeWithSchema!($( $b ),* => () => $crate::variadics::var_type!($( $b ),*): $storage) + ); + + // Recursive case + ($( $b:ty ),* => $( $z:ty ),*: $storage:ident) => ( + $crate::GhtTypeWithSchema!($( $b ),* => $( $z ),* => $crate::variadics::var_type!($( $b ),*, $( $z ),*): $storage) + ); +} + +#[macro_export] +/// Construct a forest of Ghts (i.e. a ColtForest) with the given schema and storage type. +/// +/// # Example +/// ``` +/// use lattices::ColtType; +/// +/// type MyColt = ColtType!(u16, u32, u64); +/// ``` +macro_rules! ColtType { + // Base case: single type to empty + ($a:ty => ()) => { + $crate::variadics::var_type!($crate::GhtType!($a => (): VariadicColumnMultiset)) + }; + // Base case: single type to single type + ($a:ty => $c:ty) => { + ($crate::GhtType!($a => $c: VariadicColumnMultiset), $crate::ColtType!($a, $c => ())) + }; + // Recursive case: single type to multiple types + ($a:ty => $c:ty, $( $d:ty ),*) => { + ($crate::GhtType!($a => $c, $( $d ),*: VariadicColumnMultiset), $crate::ColtType!($a, $c => $( $d ),*)) + }; + // Base case: multiple types to empty + ($a:ty, $( $b:ty ),* => ()) => { + $crate::variadics::var_type!($crate::GhtType!($a, $( $b ),* => (): VariadicColumnMultiset)) + }; + // Base case: multiple types to single type + ($a:ty, $( $b:ty ),* => $c:ty) => { + ($crate::GhtType!($a, $( $b ),* => $c: VariadicColumnMultiset), $crate::ColtType!($a, $( $b ),*, $c => ())) + }; + // Recursive case: multiple types to multiple types + ($a:ty, $( $b:ty ),* => $c:ty, $( $d:ty ),*) => { + ($crate::GhtType!($a, $( $b ),* => $c, $( $d ),*: VariadicColumnMultiset), $crate::ColtType!($a, $( $b ),*, $c => $( $d ),*)) + }; + // General case: single type + ($a:ty) => { + ($crate::GhtType!(() => $a: VariadicColumnMultiset), $crate::ColtType!($a => ())) + }; + // General case: multiple types + ($a:ty, $( $b:ty ),*) => { + ($crate::GhtType!(() => $a, $( $b ),*: VariadicColumnMultiset), $crate::ColtType!($a => $( $b ),*)) + }; +} diff --git a/lattices/src/ght/mod.rs b/lattices/src/ght/mod.rs new file mode 100644 index 00000000000..9496a43cd1e --- /dev/null +++ b/lattices/src/ght/mod.rs @@ -0,0 +1,545 @@ +//! GHT from the Wang/Willsey/Suciu Freejoin work +use std::collections::HashMap; +use std::fmt::Debug; +use std::hash::Hash; +use std::marker::PhantomData; + +use variadics::variadic_collections::VariadicCollection; +use variadics::{ + var_args, var_type, PartialEqVariadic, RefVariadic, Split, SplitBySuffix, VariadicExt, +}; + +pub mod colt; +pub mod lattice; +pub mod macros; +pub mod test; + +/// The GeneralizedHashTrieNode trait captures the properties of nodes in a Ght. +/// +/// The Ght, defined by Wang/Willsey/Suciu, is a hash-based trie for storing tuples. +/// It is parameterized by an ordered schema [`VariadicExt`] of the relation stored in the trie. +/// It is a tree of [`GhtInner`] nodes, with [`GhtLeaf`] nodes at the leaves. +/// The trie is keyed on a prefix of the schema [`Self::KeyType`], +/// and the remaining columns [`Self::ValType`] are stored in the leaf. +/// All leaf nodes use the same `[Self::Storage]` type to store the data. +pub trait GeneralizedHashTrieNode: Default { + // types that are the same in all nodes of the trie + /// Schema variadic: the schema of the relation stored in this trie. + type Schema: VariadicExt + Eq + Hash + Clone + SplitBySuffix; + /// The prefix of columns in [`Self::Schema`] that the trie is keyed on + type KeyType: VariadicExt + Eq + Hash + Clone; + /// The suffix of columns in [`Self::Schema`] that are not part of the trie keys + type ValType: VariadicExt + Eq + Hash + Clone; + /// The type that holds the data in the leaves + type Storage: VariadicCollection + + Default + + IntoIterator; + + // types that vary per node + /// SuffixSchema variadic: the suffix of [`Self::Schema`] from this node of the trie + /// downward. The first entry in this variadic is of type [`Self::Head`]. + type SuffixSchema: VariadicExt + Eq + Hash + Clone; + /// The first field in [`Self::SuffixSchema`], and the key for the next node in the trie. + type Head: Eq + Hash + Clone; + + /// Create a new Ght from the iterator. + fn new_from(input: impl IntoIterator) -> Self; + + /// Merge a matching Ght node into this node + fn merge_node(&mut self, other: Self) -> bool; + + /// Report the height of this node. This is the length of path from this node to a leaf - 1. + /// E.g. if we have GhtInner> the height is 2 + /// This is a static property of the type of this node, so simply invokes the static method. + fn height(&self) -> usize { + Self::HEIGHT + } + + /// The height of this node in the GhT. Leaf = 0. + const HEIGHT: usize; + + /// Inserts an item into the hash trie. + fn insert(&mut self, row: Self::Schema) -> bool; + + /// Returns `true` if the (entire) row is found below in the trie, `false` otherwise. + /// See [`GhtGet::get`] to look just for "head" keys in this node + fn contains<'a>(&'a self, row: ::AsRefVar<'a>) -> bool; + + /// Iterate through (entire) rows stored in this HashTrie. + fn recursive_iter(&self) -> impl Iterator::AsRefVar<'_>>; + + /// return the leaf below that contains this row, or `None` if not found. + fn find_containing_leaf( + &self, + row: ::AsRefVar<'_>, + ) -> Option<&'_ GhtLeaf>; + + /// into_iter for leaf elements, or None for inner nodes + fn into_iter(self) -> Option>; + + /// pull all the data out of this trie node but retain the reference + fn drain(&mut self) -> Option>; +} + +/// internal node of a HashTrie +#[derive(Debug, Clone)] +pub struct GhtInner +where + Head: Clone, + Node: GeneralizedHashTrieNode, +{ + pub(crate) children: HashMap, +} + +impl Default for GhtInner +where + Head: Clone, + Node: GeneralizedHashTrieNode, +{ + fn default() -> Self { + let children = Default::default(); + Self { children } + } +} + +impl GeneralizedHashTrieNode for GhtInner +where + Head: 'static + Hash + Eq + Clone, + Node: 'static + GeneralizedHashTrieNode, + Node::Schema: SplitBySuffix, +{ + type Schema = Node::Schema; + type KeyType = Node::KeyType; + type ValType = Node::ValType; + type Storage = Node::Storage; + type SuffixSchema = var_type!(Head, ...Node::SuffixSchema); + type Head = Head; + + fn new_from(input: impl IntoIterator) -> Self { + let mut retval: Self = Default::default(); + for row in input { + retval.insert(row); + } + retval + } + + fn merge_node(&mut self, other: Self) -> bool { + let mut changed = false; + + for (k, v) in other.children { + match self.children.entry(k) { + std::collections::hash_map::Entry::Occupied(mut occupied) => { + changed |= occupied.get_mut().merge_node(v) + } + std::collections::hash_map::Entry::Vacant(vacant) => { + vacant.insert(v); + changed = true + } + } + } + changed + } + + const HEIGHT: usize = Node::HEIGHT + 1; + + fn insert(&mut self, row: Self::Schema) -> bool { + let (_prefix, var_args!(head, ..._rest)) = + Self::Schema::split_by_suffix_ref(row.as_ref_var()); + self.children.entry(head.clone()).or_default().insert(row) + } + + fn contains<'a>(&'a self, row: ::AsRefVar<'a>) -> bool { + let (_prefix, var_args!(head, ..._rest)) = Self::Schema::split_by_suffix_ref(row); + if let Some(node) = self.children.get(head) { + node.contains(row) + } else { + false + } + } + + fn recursive_iter(&self) -> impl Iterator::AsRefVar<'_>> { + self.children + .iter() + .flat_map(|(_k, vs)| vs.recursive_iter()) + } + + fn find_containing_leaf( + &self, + row: ::AsRefVar<'_>, + ) -> Option<&'_ GhtLeaf> { + let (_prefix, var_args!(head, ..._rest)) = Self::Schema::split_by_suffix_ref(row); + self.children + .get(head) + .and_then(|child| child.find_containing_leaf(row)) + } + + fn into_iter(self) -> Option> { + None::>> + } + + fn drain(&mut self) -> Option> { + None::>> + } +} + +impl FromIterator for GhtInner +where + Head: 'static + Hash + Eq + Clone, + Node: 'static + GeneralizedHashTrieNode + Clone, + Node::Schema: SplitBySuffix, +{ + fn from_iter>(iter: Iter) -> Self { + let mut out = Self::default(); + for row in iter { + out.insert(row); + } + out + } +} + +/// leaf node of a HashTrie +#[derive(Debug, PartialEq, Eq, Clone)] +pub struct GhtLeaf +where + Schema: Eq + Hash, + Storage: VariadicCollection, +{ + pub(crate) elements: Storage, + pub(crate) forced: bool, + /// defines ValType for the parents, recursively + pub(crate) _suffix_schema: PhantomData, +} +impl Default for GhtLeaf +where + Schema: Eq + Hash, + Storage: VariadicCollection + Default, +{ + fn default() -> Self { + let elements = Storage::default(); + Self { + elements, + forced: false, + _suffix_schema: PhantomData, + } + } +} + +impl GeneralizedHashTrieNode + for GhtLeaf +where + Schema: 'static + + Eq + + VariadicExt + + Hash + + Clone + + SplitBySuffix + + PartialEqVariadic, + ValHead: Clone + Eq + Hash, + var_type!(ValHead, ...ValRest): Clone + Eq + Hash + PartialEqVariadic, + >::Prefix: Eq + Hash + Clone, + Storage: VariadicCollection + Default + IntoIterator, +{ + type Schema = Schema; + type SuffixSchema = var_type!(ValHead, ...ValRest); + type ValType = var_type!(ValHead, ...ValRest); + type KeyType = >::Prefix; + type Head = ValHead; + type Storage = Storage; + + fn new_from(input: impl IntoIterator) -> Self { + let mut retval: Self = Default::default(); + for i in input { + retval.insert(i); + } + retval + } + + fn merge_node(&mut self, other: Self) -> bool { + let old_len = self.elements.len(); + self.elements.extend(other.elements); + self.elements.len() > old_len + } + + const HEIGHT: usize = 0; + + fn insert(&mut self, row: Self::Schema) -> bool { + self.elements.insert(row); + true + } + + fn contains<'a>(&'a self, row: ::AsRefVar<'a>) -> bool { + self.elements.iter().any(|r| Schema::eq_ref(r, row)) + } + + fn recursive_iter(&self) -> impl Iterator::AsRefVar<'_>> { + self.elements.iter() + } + + fn find_containing_leaf( + &self, + row: ::AsRefVar<'_>, + ) -> Option<&'_ GhtLeaf<::Schema, Self::ValType, Self::Storage>> + { + // TODO(mingwei): actually use the hash set as a hash set + if self + .elements + .iter() + .any(|x| ::eq_ref(row, x)) + { + Some(self) + } else { + None + } + } + + fn into_iter(self) -> Option> { + Some(self.elements.into_iter()) + } + + fn drain(&mut self) -> Option> { + Some(self.elements.drain()) + } +} + +impl GeneralizedHashTrieNode for GhtLeaf +where + Schema: 'static + Eq + VariadicExt + Hash + Clone + PartialEqVariadic, + Storage: VariadicCollection + Default + IntoIterator, +{ + type Schema = Schema; + type SuffixSchema = (); + type ValType = (); + type KeyType = Schema; + type Head = (); + type Storage = Storage; + + fn new_from(input: impl IntoIterator) -> Self { + let mut retval: Self = Default::default(); + for i in input { + retval.insert(i); + } + retval + } + + fn merge_node(&mut self, other: Self) -> bool { + let old_len = self.elements.len(); + self.elements.extend(other.elements); + self.elements.len() > old_len + } + + const HEIGHT: usize = 0; + + fn insert(&mut self, row: Self::Schema) -> bool { + self.elements.insert(row); + true + } + + fn contains<'a>(&'a self, row: ::AsRefVar<'a>) -> bool { + self.elements.iter().any(|r| Schema::eq_ref(r, row)) + } + + fn recursive_iter(&self) -> impl Iterator::AsRefVar<'_>> { + self.elements.iter() + } + + fn find_containing_leaf( + &self, + row: ::AsRefVar<'_>, + ) -> Option<&'_ GhtLeaf<::Schema, Self::ValType, Self::Storage>> + { + // TODO(mingwei): actually use the hash set as a hash set + if self + .elements + .iter() + .any(|x| ::eq_ref(row, x)) + { + Some(self) + } else { + None + } + } + + fn into_iter(self) -> Option> { + Some(self.elements.into_iter()) + } + + fn drain(&mut self) -> Option> { + Some(self.elements.drain()) + } +} + +impl FromIterator for GhtLeaf +where + Schema: Eq + Hash, + Storage: VariadicCollection + Default + FromIterator, +{ + fn from_iter>(iter: Iter) -> Self { + let elements = iter.into_iter().collect(); + Self { + elements, + forced: false, + _suffix_schema: PhantomData, + } + } +} + +/// A trait for the get and iter methods from Wang/Willsey/Suciu, which +/// work differently on leaves than internal nodes +pub trait GhtGet: GeneralizedHashTrieNode { + /// Type returned by [`Self::get`]. + type Get: GeneralizedHashTrieNode; + + /// On an Inner node, retrieves the value (child) associated with the given "head" key. + /// returns an `Option` containing a reference to the value if found, or `None` if not found. + /// On a Leaf node, returns None. + fn get<'a>(&'a self, head: &Self::Head) -> Option<&'a Self::Get>; + + /// get, but mutable output + fn get_mut<'a>(&'a mut self, head: &Self::Head) -> Option<&'a mut Self::Get>; + + /// Iterator for the "head" keys (from inner nodes) or nothing (from leaf nodes). + fn iter(&self) -> impl Iterator; + + /// Iterator for the tuples (from leaf nodes) or nothing (from inner nodes). + fn iter_tuples(&self) -> impl Iterator::AsRefVar<'_>>; +} + +impl GhtGet for GhtInner +where + Head: 'static + Eq + Hash + Clone, + Node: 'static + GeneralizedHashTrieNode, + Node::Schema: SplitBySuffix, +{ + /// Type returned by [`Self::get`]. + type Get = Node; + + /// On an Inner node, retrieves the value (child) associated with the given "head" key. + /// returns an `Option` containing a reference to the value if found, or `None` if not found. + /// On a Leaf node, returns None. + fn get<'a>(&'a self, head: &Self::Head) -> Option<&'a Self::Get> { + self.children.get(head) + } + + fn get_mut<'a>(&'a mut self, head: &Self::Head) -> Option<&'a mut Self::Get> { + self.children.get_mut(head) + } + + fn iter(&self) -> impl Iterator { + self.children.keys().cloned() + } + + fn iter_tuples(&self) -> impl Iterator::AsRefVar<'_>> { + std::iter::empty() + } +} + +impl GhtGet for GhtLeaf +where + Schema: 'static + Eq + Hash + Clone + PartialEqVariadic + SplitBySuffix, + ValType: Eq + Hash + Clone + PartialEqVariadic, + >::Prefix: Eq + Hash + Clone, + GhtLeaf: GeneralizedHashTrieNode, + Storage: VariadicCollection, +{ + /// Type returned by [`Self::get`]. + type Get = GhtLeaf; + + /// On an Inner node, retrieves the value (child) associated with the given "head" key. + /// returns an `Option` containing a reference to the value if found, or `None` if not found. + /// On a Leaf node, returns None. + fn get<'a>(&'a self, _head: &Self::Head) -> Option<&'a Self::Get> { + None + } + fn get_mut<'a>(&'a mut self, _head: &Self::Head) -> Option<&'a mut Self::Get> { + None + } + + fn iter(&self) -> impl Iterator { + std::iter::empty() + } + + fn iter_tuples(&self) -> impl Iterator::AsRefVar<'_>> { + self.elements.iter() + } +} + +/// A trait to iterate through the items in a Ght based on a prefix of the schema. +pub trait GhtPrefixIter { + /// the schema output + type Item: VariadicExt; + /// given a prefix, return an iterator through the items below + fn prefix_iter<'a>( + &'a self, + prefix: KeyPrefix, + ) -> impl Iterator::AsRefVar<'a>> + where + Self::Item: 'a; +} + +impl<'k, Head, Node, PrefixRest> GhtPrefixIter + for GhtInner +where + Head: Eq + Hash + Clone, + Node: GeneralizedHashTrieNode + GhtPrefixIter, +{ + type Item = >::Item; + fn prefix_iter<'a>( + &'a self, + prefix: var_type!(&'k Head, ...PrefixRest), + ) -> impl Iterator::AsRefVar<'a>> + where + Self::Item: 'a, + { + let var_args!(head, ...rest) = prefix; + self.children + .get(head) + .map(|node| node.prefix_iter(rest)) + .into_iter() + .flatten() + } +} +impl GhtPrefixIter for GhtInner +where + Self: GeneralizedHashTrieNode, + Head: Eq + Hash + Clone, + Node: GeneralizedHashTrieNode, +{ + type Item = ::Schema; + fn prefix_iter<'a>( + &'a self, + _prefix: var_type!(), + ) -> impl Iterator::AsRefVar<'a>> + where + Self::Item: 'a, + { + self.recursive_iter() + } +} + +impl GhtPrefixIter + for GhtLeaf +where + KeyPrefixRef: 'static + RefVariadic, + Schema: 'static + VariadicExt + Hash + Eq + SplitBySuffix, + ValType: VariadicExt, + ValType: Split, + KeyPrefixRef::UnRefVar: PartialEqVariadic, + Storage: 'static + VariadicCollection, +{ + type Item = Schema; + fn prefix_iter<'a>( + &'a self, + prefix: KeyPrefixRef, + ) -> impl Iterator::AsRefVar<'a>> + where + Self::Item: 'a, + { + self.elements.iter().filter(move |&row| { + let (_row_prefix, row_mid_suffix) = + >::split_by_suffix_ref(row); + let (row_mid, _row_suffix): (::AsRefVar<'_>, _) = + >::split_ref(row_mid_suffix); + ::eq_ref(prefix.unref_ref(), row_mid) + }) + } +} diff --git a/lattices/src/ght/test.rs b/lattices/src/ght/test.rs new file mode 100644 index 00000000000..e24238c2de3 --- /dev/null +++ b/lattices/src/ght/test.rs @@ -0,0 +1,943 @@ +//! Tests for the GHT code +#[cfg(test)] +mod tests { + use std::collections::HashSet; + + #[test] + fn basic_test() { + use variadics::var_expr; + + use crate::ght::GeneralizedHashTrieNode; + use crate::GhtType; + + // Example usage + type MyTrie1 = GhtType!(u32, u32 => &'static str: VariadicCountedHashSet); + + fn ght_type() {} + ght_type::(); + + let htrie1 = MyTrie1::new_from(vec![var_expr!(42, 314, "hello")]); + assert!(htrie1.contains(var_expr!(&42, &314, &"hello"))); + assert_eq!(htrie1.recursive_iter().count(), 1); + + type MyTrie2 = GhtType!(u32 => u32: VariadicCountedHashSet); + let htrie2 = MyTrie2::new_from(vec![var_expr!(42, 314)]); + assert!(htrie2.contains(var_expr!(&42, &314))); + assert_eq!(htrie1.recursive_iter().count(), 1); + + type MyTrie3 = GhtType!(u32, u64, u16 => &'static str: VariadicCountedHashSet); + let htrie3 = MyTrie3::new_from(vec![ + var_expr!(123, 2, 5, "hello"), + var_expr!(50, 1, 1, "hi"), + var_expr!(5, 1, 7, "hi"), + var_expr!(5, 1, 7, "bye"), + ]); + assert!(htrie3.contains(var_expr!(&50, &1, &1, &"hi"))); + assert_eq!(htrie3.recursive_iter().count(), 4); + } + #[test] + fn test_ght_node_type_macro() { + use variadics::var_expr; + + use crate::ght::GeneralizedHashTrieNode; + use crate::GhtType; + + // 0 => 1 + type LilTrie = GhtType!(() => u32: VariadicCountedHashSet); + let _j = LilTrie::default(); + let _l = LilTrie::new_from(vec![var_expr!(1)]); + + // 0 => >1 + type LilTrie2 = GhtType!(() => u32, u64: VariadicCountedHashSet); + let _l = LilTrie2::default(); + let _l = LilTrie2::new_from(vec![var_expr!(1, 1)]); + + // 1 => 0 + type KeyNoValTrie = GhtType!(u32 => (): VariadicCountedHashSet); + let l = KeyNoValTrie::new_from(vec![var_expr!(1)]); + let _: KeyNoValTrie = l; + + // 1 => 1 + type SmallTrie = GhtType!(u32 => &'static str: VariadicCountedHashSet); + type SmallKeyedTrie = GhtType!(u32 => &'static str: VariadicCountedHashSet); + let l = SmallTrie::new_from(vec![var_expr!(1, "hello")]); + let _: SmallKeyedTrie = l; + + // 1 => >1 + type SmallKeyLongValTrie = GhtType!(u32 => u64, u16, &'static str: VariadicCountedHashSet); + let _x = SmallKeyLongValTrie::new_from(vec![var_expr!(1, 999, 222, "hello")]); + + // >1 => 0 + type LongKeyNoValTrie = GhtType!(u32, u64 => (): VariadicCountedHashSet); + let l = LongKeyNoValTrie::new_from(vec![var_expr!(1, 999)]); + let _: LongKeyNoValTrie = l; + + // >1 => 1 + type LongKeySmallValTrie = GhtType!(u32, u16 => &'static str: VariadicCountedHashSet); + type LongKeySmallValKeyedTrie = GhtType!(u32, u16 => &'static str: VariadicCountedHashSet); + let x = LongKeySmallValTrie::new_from(vec![var_expr!(1, 314, "hello")]); + let _: LongKeySmallValKeyedTrie = x; + let _ = LongKeySmallValTrie::new_from(vec![var_expr!(1, 314, "hello")]); + + // >1 => >1 + type LongKeyLongValTrie = GhtType!(u32, u64 => u16, &'static str: VariadicCountedHashSet); + let _x = LongKeyLongValTrie::new_from(vec![var_expr!(1, 999, 222, "hello")]); + } + + #[test] + fn test_insert() { + use variadics::var_expr; + + use crate::ght::GeneralizedHashTrieNode; + use crate::GhtType; + + type MyGht = GhtType!(u16, u32 => u64: VariadicCountedHashSet); + let mut htrie = MyGht::default(); + htrie.insert(var_expr!(42, 314, 43770)); + assert_eq!(htrie.recursive_iter().count(), 1); + assert_eq!(MyGht::HEIGHT, 2); + htrie.insert(var_expr!(42, 315, 43770)); + assert_eq!(htrie.recursive_iter().count(), 2); + htrie.insert(var_expr!(42, 314, 30619)); + assert_eq!(htrie.recursive_iter().count(), 3); + htrie.insert(var_expr!(43, 10, 600)); + assert_eq!(htrie.recursive_iter().count(), 4); + assert!(htrie.contains(var_expr!(&42, &314, &30619))); + assert!(htrie.contains(var_expr!(&42, &315, &43770))); + assert!(htrie.contains(var_expr!(&43, &10, &600))); + + type LongKeyLongValTrie = GhtType!(u32, u64 => u16, &'static str: VariadicCountedHashSet); + let mut htrie = LongKeyLongValTrie::new_from(vec![var_expr!(1, 999, 222, "hello")]); + htrie.insert(var_expr!(1, 999, 111, "bye")); + htrie.insert(var_expr!(1, 1000, 123, "cya")); + assert!(htrie.contains(var_expr!(&1, &999, &222, &"hello"))); + assert!(htrie.contains(var_expr!(&1, &999, &111, &"bye"))); + assert!(htrie.contains(var_expr!(&1, &1000, &123, &"cya"))); + } + + #[test] + fn test_scale() { + use variadics::var_expr; + + use crate::ght::GeneralizedHashTrieNode; + use crate::GhtType; + + type MyGht = GhtType!(bool, usize, &'static str => i32: VariadicCountedHashSet); + let mut htrie = MyGht::new_from(vec![var_expr!(true, 1, "hello", -5)]); + assert_eq!(htrie.recursive_iter().count(), 1); + for i in 1..1000000 { + htrie.insert(var_expr!(true, 1, "hello", i)); + } + assert_eq!(htrie.recursive_iter().count(), 1000000); + } + + #[test] + fn test_contains() { + use variadics::{var_expr, VariadicExt}; + + use crate::ght::GeneralizedHashTrieNode; + use crate::GhtType; + + type MyGht = GhtType!(u16, u32 => u64: VariadicCountedHashSet); + let htrie = MyGht::new_from(vec![var_expr!(42_u16, 314_u32, 43770_u64)]); + let x = var_expr!(&42, &314, &43770); + assert!(htrie.contains(x)); + assert!(htrie.contains(var_expr!(42, 314, 43770).as_ref_var())); + assert!(htrie.contains(var_expr!(&42, &314, &43770))); + assert!(!htrie.contains(var_expr!(42, 314, 30619).as_ref_var())); + assert!(!htrie.contains(var_expr!(&42, &315, &43770))); + assert!(!htrie.contains(var_expr!(&43, &314, &43770))); + } + + #[test] + fn test_get() { + use variadics::{var_expr, VariadicExt}; + + use crate::ght::{GeneralizedHashTrieNode, GhtGet}; + use crate::GhtType; + + type MyGht = GhtType!(u32, u32 => u32: VariadicCountedHashSet); + let ht_root = MyGht::new_from(vec![var_expr!(42, 314, 43770)]); + + let inner = ht_root.get(&42).unwrap(); + let t = inner.recursive_iter().next().unwrap(); + assert_eq!(t, var_expr!(&42, &314, &43770)); + + let leaf = inner.get(&314).unwrap(); + let t = leaf.recursive_iter().next().unwrap(); + assert_eq!(t, var_expr!(42, 314, 43770).as_ref_var()); + } + + #[test] + fn test_iter() { + use variadics::var_expr; + + use crate::ght::{GeneralizedHashTrieNode, GhtGet}; + use crate::GhtType; + type MyGht = GhtType!(u32, u32 => u32: VariadicCountedHashSet); + let ht_root = MyGht::new_from(vec![var_expr!(42, 314, 43770)]); + let inner_key = ht_root.iter().next().unwrap(); + let inner = ht_root.get(&inner_key).unwrap(); + let t = inner.recursive_iter().next().unwrap(); + assert_eq!(t, var_expr!(&42, &314, &43770)); + + let leaf_key = inner.iter().next().unwrap(); + let leaf = inner.get(&leaf_key).unwrap(); + // iter() on leaf should return None + let t = leaf.iter().next(); + assert!(t.is_none()); + } + + #[test] + fn test_recursive_iter() { + use variadics::{var_expr, var_type, VariadicExt}; + + use crate::ght::GeneralizedHashTrieNode; + use crate::GhtType; + + type MyGht = GhtType!(u32, u32 => u32: VariadicCountedHashSet); + type InputType = var_type!(u32, u32, u32); + type ResultType<'a> = var_type!(&'a u32, &'a u32, &'a u32); + let input: HashSet = HashSet::from_iter( + [ + (42, 314, 30619), + (42, 314, 43770), + (42, 315, 43770), + (43, 10, 600), + ] + .iter() + .map(|&(a, b, c)| var_expr!(a, b, c)), + ); + let htrie = MyGht::new_from(input.clone()); + let result = input.iter().map(|v| v.as_ref_var()).collect(); + let v: HashSet = htrie.recursive_iter().collect(); + assert_eq!(v, result); + } + + #[test] + fn test_prefix_iter_leaf() { + use variadics::variadic_collections::VariadicCountedHashSet; + use variadics::{var_expr, var_type}; + + use crate::ght::{GeneralizedHashTrieNode, GhtLeaf, GhtPrefixIter}; + + type InputType = var_type!(u8, u16, u32); + type ResultType<'a> = var_type!(&'a u8, &'a u16, &'a u32); + + let input: HashSet = HashSet::from_iter( + [ + (42, 314, 30619), + (42, 314, 43770), + (42, 315, 43770), + (43, 10, 600), + ] + .iter() + .map(|&(a, b, c)| var_expr!(a, b, c)), + ); + let leaf = + GhtLeaf::>::new_from( + input.clone(), + ); + // let key = var_expr!(42u8).as_ref_var(); + let key = (); // (var_expr!().as_ref_var();) + let v: HashSet = leaf.prefix_iter(key).collect(); + let result = input + .iter() + // .filter(|t: &&InputType| t.0 == 42) + .map(|t: &InputType| var_expr!(&t.0, &t.1 .0, &t.1 .1 .0)) + .collect(); + assert_eq!(v, result); + } + + #[test] + fn test_prefix_iter() { + use variadics::{var_expr, var_type, VariadicExt}; + + use crate::ght::{GeneralizedHashTrieNode, GhtPrefixIter}; + use crate::GhtType; + + type MyGht = GhtType!(u8, u16 => u32: VariadicCountedHashSet); + type InputType = var_type!(u8, u16, u32); + type ResultType<'a> = var_type!(&'a u8, &'a u16, &'a u32); + let input: HashSet = HashSet::from_iter( + [ + (42, 314, 30619), + (42, 314, 43770), + (42, 315, 43770), + (43, 10, 600), + ] + .iter() + .map(|&(a, b, c)| var_expr!(a, b, c)), + ); + let htrie = MyGht::new_from(input.clone()); + + let v: HashSet = htrie.prefix_iter(var_expr!(42, 315).as_ref_var()).collect(); + let result = HashSet::from_iter([var_expr!(&42, &315, &43770)].iter().copied()); + assert_eq!(v, result); + + let v: HashSet = htrie.prefix_iter(var_expr!(42u8).as_ref_var()).collect(); + let result = input + .iter() + .filter(|t: &&InputType| t.0 == 42) + .map(|t: &InputType| var_expr!(&t.0, &t.1 .0, &t.1 .1 .0)) + .collect(); + assert_eq!(v, result); + + for row in htrie.prefix_iter(var_expr!(42, 315, 43770).as_ref_var()) { + assert_eq!(row, var_expr!(&42, &315, &43770)); + } + } + + #[test] + fn test_prefix_iter_complex() { + use variadics::{var_expr, var_type, VariadicExt}; + + use crate::ght::{GeneralizedHashTrieNode, GhtPrefixIter}; + use crate::GhtType; + + type MyGht = GhtType!(bool, u32, &'static str => i32: VariadicCountedHashSet); + type InputType = var_type!(bool, u32, &'static str, i32); + type ResultType<'a> = var_type!(&'a bool, &'a u32, &'a &'static str, &'a i32); + let input: HashSet = HashSet::from_iter( + [ + (true, 1, "hello", -5), + (true, 1, "hi", -2), + (true, 1, "hi", -3), + (true, 1, "hi", -4), + (true, 1, "hi", -5), + (true, 2, "hello", 1), + (false, 10, "bye", 5), + ] + .iter() + .map(|&(a, b, c, d)| var_expr!(a, b, c, d)), + ); + + let htrie = MyGht::new_from(input.clone()); + + let v: HashSet = htrie + .prefix_iter(var_expr!(true, 1, "hi").as_ref_var()) + .collect(); + let result = input + .iter() + .filter(|t: &&InputType| t.0 && t.1.0 == 1 && t.1.1.0 == "hi") + //.map(|t: &InputType| (&t.0, &t.1 .0, (&t.1 .1 .0, (&t.1 .1 .1 .0, ())))) + .map(|t| t.as_ref_var()) + .collect(); + assert_eq!(v, result); + + let v: HashSet = htrie.prefix_iter(var_expr!(true).as_ref_var()).collect(); + let result = input + .iter() + .filter(|t: &&InputType| t.0) + .map(|t: &InputType| t.as_ref_var()) + .collect(); + assert_eq!(v, result); + } + + #[test] + fn test_merge() { + use variadics::{var_expr, var_type}; + + use crate::ght::GeneralizedHashTrieNode; + use crate::{GhtType, Merge}; + + type MyGht = GhtType!(u32, u64 => u16, &'static str: VariadicHashSet); + + let mut test_ght1 = MyGht::new_from(vec![var_expr!(42, 314, 10, "hello")]); + let test_ght2 = MyGht::new_from(vec![var_expr!(42, 314, 10, "hello")]); + + assert_eq!( + test_ght1 + .recursive_iter() + .collect::>() + .len(), + 1 + ); + test_ght1.merge(test_ght2.clone()); + // merge does not contain duplicate copy of the tuple + assert_eq!( + test_ght1 + .recursive_iter() + .collect::>() + .len(), + 1 + ); + assert!(!test_ght1.merge(test_ght2.clone())); + + let mut test_ght1 = MyGht::new_from(vec![var_expr!(42, 314, 10, "hello")]); + let mut test_ght2 = MyGht::new_from(vec![var_expr!(42, 314, 10, "hello")]); + test_ght1.merge(test_ght2.clone()); + + test_ght1.insert(var_expr!(42, 314, 20, "goodbye")); + test_ght2.insert(var_expr!(42, 314, 20, "again")); + + // change on merge + assert!(test_ght1.merge(test_ght2.clone())); + for k in test_ght2.recursive_iter() { + assert!(test_ght1.contains(k)) + } + } + + #[test] + fn test_node_lattice() { + use variadics::var_expr; + + use crate::ght::GeneralizedHashTrieNode; + use crate::{GhtType, NaiveLatticeOrd}; + + type MyGht = GhtType!(u32, u64 => u16, &'static str: VariadicHashSet); + type MyGhtNode = GhtType!(u32, u64 => u16, &'static str: VariadicHashSet); + + let mut test_vec: Vec = Vec::new(); + + let empty_ght = MyGht::new_from(vec![]); + let test_ght1 = MyGht::new_from(vec![var_expr!(42, 314, 10, "hello")]); + let mut test_ght2 = MyGht::new_from(vec![var_expr!(42, 314, 10, "hello")]); + test_ght2.insert(var_expr!(42, 314, 20, "again")); + let mut test_ght3 = test_ght2.clone(); + test_ght3.insert(var_expr!(42, 400, 1, "level 2")); + let mut test_ght4 = test_ght3.clone(); + test_ght4.insert(var_expr!(43, 1, 1, "level 1")); + + let test_vec_wrap = [empty_ght, test_ght1, test_ght2, test_ght3, test_ght4]; + + for ght in test_vec_wrap.iter().cloned() { + ght.naive_cmp(&ght.clone()); + test_vec.push(ght); + } + crate::test::check_all(&test_vec); + crate::test::check_all(&test_vec_wrap); + } + + #[test] + fn test_cartesian_bimorphism() { + use variadics::var_expr; + + use crate::ght::lattice::GhtCartesianProductBimorphism; + use crate::ght::GeneralizedHashTrieNode; + use crate::{GhtType, LatticeBimorphism}; + + type MyGhtA = GhtType!(u32, u64 => u16, &'static str: VariadicHashSet); + type MyGhtB = GhtType!(u32, u64, u16 => &'static str: VariadicHashSet); + + let mut ght_a = MyGhtA::default(); + let mut ght_b = MyGhtB::default(); + + ght_a.insert(var_expr!(123, 2, 5, "hello")); + ght_a.insert(var_expr!(50, 1, 1, "hi")); + ght_a.insert(var_expr!(5, 1, 7, "hi")); + ght_b.insert(var_expr!(5, 1, 8, "hi")); + ght_b.insert(var_expr!(10, 1, 2, "hi")); + ght_b.insert(var_expr!(12, 10, 98, "bye")); + + type MyGhtAb = GhtType!(u32, u64, u16, &'static str, u32, u64 => u16, &'static str: VariadicCountedHashSet); + + let mut bim = GhtCartesianProductBimorphism::::default(); + let ght_out = bim.call(&ght_a, &ght_b); + assert_eq!( + ght_out.recursive_iter().count(), + ght_a.recursive_iter().count() * ght_b.recursive_iter().count() + ); + } + + #[test] + fn test_join_bimorphism() { + use variadics::variadic_collections::{VariadicCountedHashSet, VariadicHashSet}; + use variadics::{var_expr, var_type}; + + use crate::ght::lattice::{ + DeepJoinLatticeBimorphism, GhtNodeKeyedBimorphism, GhtValTypeProductBimorphism, + }; + use crate::ght::{GeneralizedHashTrieNode, GhtInner, GhtLeaf}; + use crate::{GhtType, LatticeBimorphism}; + + type ResultSchemaType = var_type!(u32, u64, u16, &'static str, &'static str); + type ResultSchemaRefType<'a> = var_type!( + &'a u32, + &'a u64, + &'a u16, + &'a &'static str, + &'a &'static str + ); + type MyGhtATrie = GhtType!(u32, u64, u16 => &'static str: VariadicHashSet); + type MyGhtBTrie = GhtType!(u32, u64, u16 => &'static str: VariadicHashSet); + + let mut ght_a = MyGhtATrie::default(); + let mut ght_b = MyGhtBTrie::default(); + + ght_a.insert(var_expr!(123, 2, 5, "hello")); + ght_a.insert(var_expr!(50, 1, 1, "hi")); + ght_a.insert(var_expr!(5, 1, 7, "hi")); + + ght_b.insert(var_expr!(5, 1, 8, "hi")); + ght_b.insert(var_expr!(5, 1, 7, "world")); + ght_b.insert(var_expr!(10, 1, 2, "hi")); + ght_b.insert(var_expr!(12, 10, 98, "bye")); + + let result: HashSet = [var_expr!(&5, &1, &7, &"hi", &"world")] + .iter() + .copied() + .collect(); + { + // here we manually construct the proper bimorphism stack. + // note that the bottommost bimorphism is GhtValTypeProductBimorphism, + // which ensures that the Schema of the resulting output GhtLeaf and GhtInner + // nodes correctly includes the key columns, not just the cross-product of the values. + type MyGhtOut = GhtInner< + &'static str, + GhtLeaf< + ResultSchemaType, + var_type!(&'static str), + VariadicCountedHashSet, + >, + >; + // let mut bim = GhtNodeKeyedBimorphism::new(GhtNodeKeyedBimorphism::new( + // GhtNodeKeyedBimorphism::new(GhtValTypeProductBimorphism::::default()), + // )); + let mut bim = GhtNodeKeyedBimorphism::new(GhtNodeKeyedBimorphism::new( + GhtNodeKeyedBimorphism::new(GhtValTypeProductBimorphism::::default()), + )); + let out = bim.call(&ght_a, &ght_b); + let out: HashSet = out.recursive_iter().collect(); + assert_eq!(out, result.iter().copied().collect()); + } + { + // Here we use DeepJoinLatticeBimorphism as a more compact representation of the + // manual stack of bimorphisms above. This is the recommended approach. + type MyNodeBim<'a> = <(MyGhtATrie, MyGhtBTrie) as DeepJoinLatticeBimorphism< + VariadicHashSet, + >>::DeepJoinLatticeBimorphism; + let mut bim = ::default(); + let out = bim.call(&ght_a, &ght_b); + let out: HashSet = out.recursive_iter().collect(); + assert_eq!(out, result.iter().copied().collect()); + } + } + + #[test] + fn test_ght_with_tuple_macro() { + use variadics::{var_expr, VariadicExt}; + use variadics_macro::tuple; + + use crate::ght::GeneralizedHashTrieNode; + use crate::GhtType; + + type MyRoot = GhtType!(u16, u32 => u64: VariadicCountedHashSet); + + let mut trie1 = MyRoot::default(); + assert_eq!(3, <::Schema>::LEN); + trie1.insert(var_expr!(1, 2, 3)); + let t = trie1.recursive_iter().next().unwrap(); + let tup = tuple!(t, 3); + assert_eq!(tup, (&1, &2, &3)); + } + + #[test] + fn test_triangle_generic_join() { + use std::hash::{BuildHasherDefault, DefaultHasher}; + + use variadics::var_expr; + + use crate::ght::{GeneralizedHashTrieNode, GhtPrefixIter}; + use crate::GhtType; + + const MATCHES: u32 = 1000; + type MyGht = GhtType!(u32 => u32: VariadicCountedHashSet); + + let r_iter = (0..MATCHES) + .map(|i| (0, i)) + .chain((1..MATCHES).map(|i| (i, 0))); + + let s_iter = (0..MATCHES) + .map(|i| (0, i)) + .chain((1..MATCHES).map(|i| (i, 0))); + + let t_iter = (0..MATCHES) + .map(|i| (0, i)) + .chain((1..MATCHES).map(|i| (i, 0))); + + let rx_ght = MyGht::new_from(r_iter.clone().map(|(x, y)| var_expr!(x, y))); + let sb_ght = MyGht::new_from(s_iter.clone().map(|(y, b)| var_expr!(b, y))); + let tx_ght = MyGht::new_from(t_iter.clone().map(|(z, x)| var_expr!(x, z))); + + let r_x = r_iter + .clone() + .map(|(x, _y)| x) + .collect::>>(); + let t_x = s_iter + .clone() + .map(|(_z, x)| x) + .collect::>>(); + let x_inter = r_x.intersection(&t_x); + let len = x_inter.clone().count(); + if len > 1 { + assert_eq!(1000, len); + } + + let mut output: Vec<(u32, u32, u32)> = Vec::new(); + let mut x_iters = 0usize; + let mut y_iters = 0usize; + let mut z_iters = 0usize; + for a in x_inter { + x_iters += 1; + let r = rx_ght + .prefix_iter(var_expr!(a)) + .map(|(_x, (y, ()))| *y) + .collect::>>(); + let s_y = s_iter + .clone() + .map(|(y, _z)| y) + .collect::>>(); + let y_inter = r.intersection(&s_y); + let len = y_inter.clone().count(); + if len > 1 { + assert_eq!(1000, len); + } + for b in y_inter { + y_iters += 1; + let s = sb_ght + .prefix_iter(var_expr!(b)) + .map(|(_b, (z, ()))| *z) + .collect::>>(); + let t = tx_ght + .prefix_iter(var_expr!(a)) + .map(|(_x, (z, ()))| *z) + .collect::>>(); + let z_inter = s.intersection(&t); + let len = z_inter.clone().count(); + if len > 1 { + assert_eq!(1000, len); + } + for c in z_inter { + z_iters += 1; + output.push((*a, *b, *c)); + } + } + } + + assert_eq!(1000, x_iters); + assert_eq!(1999, y_iters); + assert_eq!(2998, z_iters); + assert_eq!(2998, output.len()); + } + + fn clover_setup( + matches: usize, + ) -> ( + impl Iterator, + impl Iterator, + impl Iterator, + ) { + let r_iter = (1..matches) + .map(|i| (1u32, i as u32)) + .chain((1..matches).map(|i| (2, i as u32))) + .chain([(0, 0)]); + + let s_iter = (1..matches) + .map(|i| (2u32, i as u32)) + .chain((1..matches).map(|i| (3, i as u32))) + .chain([(0, 0)]); + + let t_iter = (1..matches) + .map(|i| (3u32, i as u32)) + .chain((1..matches).map(|i| (1, i as u32))) + .chain([(0, 0)]); + (r_iter, s_iter, t_iter) + } + + #[test] + fn clover_generic_join() { + use variadics::var_expr; + + use crate::ght::{GeneralizedHashTrieNode, GhtGet}; + use crate::GhtType; + + const MATCHES: usize = 1000; + let (r_iter, s_iter, t_iter) = clover_setup(MATCHES); + + type MyGht = GhtType!(u32 => u32: VariadicCountedHashSet); + let rx_ght = MyGht::new_from(r_iter.map(|(x, a)| var_expr!(x, a))); + let sx_ght = MyGht::new_from(s_iter.map(|(x, b)| var_expr!(x, b))); + let tx_ght = MyGht::new_from(t_iter.map(|(x, c)| var_expr!(x, c))); + for x in rx_ght.iter() { + if let (Some(r), Some(s), Some(t)) = (rx_ght.get(&x), sx_ght.get(&x), tx_ght.get(&x)) { + // All unwraps succeeded, use `r`, `s`, `t` here + for a in r.iter() { + for b in s.iter() { + for c in t.iter() { + assert_eq!((x, a, b, c), (0, 0, 0, 0)); + } + } + } + } else { + // If any unwrap fails, continue to the next iteration + continue; + } + } + } + + #[test] + fn clover_factorized_join() { + use variadics::var_expr; + + use crate::ght::{GeneralizedHashTrieNode, GhtGet}; + use crate::GhtType; + + const MATCHES: usize = 1000; + let (r_iter, s_iter, t_iter) = clover_setup(MATCHES); + + type Ght1 = GhtType!(() => u32, u32: VariadicCountedHashSet); + type Ght2 = GhtType!(u32 => u32: VariadicCountedHashSet); + let rx_ght = Ght1::new_from(r_iter.map(|(x, a)| var_expr!(x, a))); + let sx_ght = Ght2::new_from(s_iter.map(|(x, b)| var_expr!(x, b))); + let tx_ght = Ght2::new_from(t_iter.map(|(x, c)| var_expr!(x, c))); + + for t in rx_ght.recursive_iter() { + let (x, (a, ())): (&u32, (&u32, _)) = t; + if let (Some(s), Some(t)) = (sx_ght.get(x), tx_ght.get(x)) { + // All unwraps succeeded, use `s`, `t` here + for b in s.iter() { + for c in t.iter() { + assert_eq!((x, a, b, c), (&0, &0, 0, 0)); + } + } + } else { + // If any unwrap fails, continue to the next iteration + continue; + } + } + } + + #[test] + fn test_force() { + use variadics::var_expr; + + use crate::ght::colt::ColtForestNode; + use crate::ght::GeneralizedHashTrieNode; + use crate::GhtType; + + type LeafType = GhtType!(() => u16, u32, u64: VariadicCountedHashSet); + let n = LeafType::new_from(vec![ + var_expr!(1, 1, 1), + var_expr!(1, 2, 2), + var_expr!(1, 3, 3), + var_expr!(2, 4, 4), + ]); + let out = n.force().unwrap(); + assert_eq!(out.height(), 1); + } + + #[test] + fn test_forest_macro() { + use crate::ColtType; + + type Forest4 = ColtType!(u8, u16, u32, u64); + let _f4 = Forest4::default(); + + type Forest3 = ColtType!(u8, u16, u32); + let _f3 = Forest3::default(); + + type Forest2 = ColtType!(u8, u16); + let _f2 = Forest2::default(); + + type Forest1 = ColtType!(u8); + let _f2 = Forest1::default(); + + type Forest01 = ColtType!(() => u16); + let _f01 = Forest01::default(); + + type Forest02 = ColtType!(() => u8, u16); + let _f02 = Forest02::default(); + + type Forest10 = ColtType!(u8 => ()); + let _f10 = Forest10::default(); + + type Forest11 = ColtType!(u8 => u16); + let _f11 = Forest11::default(); + + type Forest12 = ColtType!(u8 => u16, u32); + let _f12 = Forest12::default(); + + type Forest20 = ColtType!(u8, u16 => ()); + let _f20 = Forest20::default(); + + type Forest21 = ColtType!(u8, u16 => u32); + let _f21 = Forest21::default(); + + type Forest22 = ColtType!(u8, u16 => u32, u64); + let _f22 = Forest22::default(); + } + + #[test] + fn test_colt_little_get() { + use variadics::variadic_collections::VariadicCollection; + use variadics::{var_expr, VariadicExt}; + + use crate::ght::colt::ColtGet; + use crate::ght::GeneralizedHashTrieNode; + use crate::ColtType; + + type MyForest = ColtType!(u8); + + let mut forest = MyForest::default(); + + forest.0.insert(var_expr!(1)); + forest.0.insert(var_expr!(2)); + forest.0.insert(var_expr!(3)); + + assert_eq!(2, forest.len()); + assert_eq!(3, forest.0.elements.len()); + + let result = ColtGet::get(forest.as_mut_var(), &3); + assert_eq!(1, result.len()); + assert_eq!(0, forest.0.elements.len()); + assert!(forest.0.forced); + } + + #[test] + fn test_colt_get() { + use variadics::variadic_collections::VariadicCollection; + use variadics::{var_expr, VariadicExt}; + + use crate::ght::colt::ColtGet; + use crate::ght::{GeneralizedHashTrieNode, GhtGet}; + use crate::ColtType; + + type MyForest = ColtType!(u8, u16, u32, u64); + let mut forest = MyForest::default(); + forest.0.insert(var_expr!(1, 1, 1, 1)); + forest.0.insert(var_expr!(2, 2, 2, 2)); + forest.0.insert(var_expr!(3, 3, 3, 3)); + + let len = forest.len(); + assert_eq!(5, len); + { + let get_result = ColtGet::get(forest.as_mut_var(), &1); + assert_eq!(get_result.len(), len - 1); + assert_eq!(get_result.0.height(), 0); + let get_result2 = ColtGet::get(get_result, &1); + assert_eq!(get_result2.len(), len - 2); + let get_result3 = ColtGet::get(get_result2, &1); + assert_eq!(get_result3.len(), len - 3); + assert_eq!( + get_result3.0.elements.iter().next(), + Some(var_expr!(1, 1, 1, 1).as_ref_var()) + ); + assert_eq!(get_result3.1 .0.children.len(), 0); + } + { + let get_result = ColtGet::get(forest.as_mut_var(), &3); + assert_eq!(get_result.len(), len - 1); + let get_result2 = ColtGet::get(get_result, &3); + assert_eq!(get_result2.len(), len - 2); + assert_eq!( + get_result2.0.elements.iter().next(), + Some(var_expr!(3, 3, 3, 3).as_ref_var()) + ); + assert_eq!(get_result2.1 .0.children.len(), 0); + } + assert!(forest.0.forced); + assert_eq!(3, forest.1 .0.children.len()); // keys 1, 2 and 3 + assert_eq!(0, forest.1 .0.get(&1).unwrap().elements.len()); + assert_eq!(1, forest.1 .0.get(&2).unwrap().elements.len()); + assert_eq!(0, forest.1 .0.get(&3).unwrap().elements.len()); + assert_eq!(2, forest.1 .1 .0.children.len()); // keys 1 and 3 + assert_eq!( + 0, + forest + .1 + .1 + .0 + .get(&1) + .unwrap() + .get(&1) + .unwrap() + .elements + .len() + ); + assert!(forest.1 .1 .0.get(&2).is_none()); + assert_eq!( + 1, + forest + .1 + .1 + .0 + .get(&3) + .unwrap() + .get(&3) + .unwrap() + .elements + .len() + ); + assert_eq!( + 1, + forest + .1 + .1 + .1 + .0 + .get(&1) + .unwrap() + .get(&1) + .unwrap() + .get(&1) + .unwrap() + .elements + .len() + ); + } + + #[test] + fn test_colt_scale() { + use variadics::variadic_collections::VariadicCollection; + use variadics::{var_expr, VariadicExt}; + + use crate::ght::colt::ColtGet; + use crate::ght::{GeneralizedHashTrieNode, GhtPrefixIter}; + + type MyColt = crate::ColtType!(i32, bool, usize, &'static str); + let mut forest = MyColt::default(); + for i in 1..100000 { + forest.0.insert(var_expr!(i, true, 1, "hello")); + } + { + let result = forest.as_mut_var().get(&3); + assert_eq!(result.len(), 4); + } + // check: first Leaf trie is forced + assert!(forest.0.forced); + assert_eq!(forest.0.elements.len(), 0); + { + let result = forest.as_mut_var().get(&3); + let result2 = result.get(&true); + assert_eq!(result2.len(), 3); + } + { + // check: leaf below 3 in first non-empty trie is forced + let result = forest.as_mut_var().get(&3); + assert!(result.0.forced); + assert_eq!(result.0.elements.len(), 0); + } + // check: prefix (3, true) is now found in the third trie: forest.1.1.0 + assert!(forest + .1 + .1 + .0 + .prefix_iter(var_expr!(3, true).as_ref_var()) + .next() + .is_some()); + { + let result = forest.as_mut_var().get(&3); + let result2 = result.get(&true); + assert_eq!(result2.len(), 3); + let result3 = result2.get(&1); + assert_eq!(result3.len(), 2); + let result4 = result3.get(&"hello"); + assert_eq!(result4.0.elements.len(), 1); + assert_eq!( + result4.0.elements.iter().next(), + Some(var_expr!(3, true, 1, "hello").as_ref_var()) + ); + } + } +} diff --git a/lattices/src/lib.rs b/lattices/src/lib.rs index c8fc0804819..bac63c543b1 100644 --- a/lattices/src/lib.rs +++ b/lattices/src/lib.rs @@ -3,14 +3,15 @@ use std::cmp::Ordering::{self, *}; -pub use cc_traits; use sealed::sealed; +pub use {cc_traits, variadics}; /// Module for definiting algebraic structures and properties. pub mod algebra; pub mod collections; mod conflict; mod dom_pair; +pub mod ght; pub mod map_union; pub mod map_union_with_tombstones; mod ord; diff --git a/lattices/tests/compile-fail/non_lattice_field.stderr b/lattices/tests/compile-fail/non_lattice_field.stderr index f2630b97f64..7b8e66201e5 100644 --- a/lattices/tests/compile-fail/non_lattice_field.stderr +++ b/lattices/tests/compile-fail/non_lattice_field.stderr @@ -8,11 +8,11 @@ error[E0277]: the trait bound `String: Merge` is not satisfied `()` implements `Merge<()>` `Conflict` implements `Merge>` `DomPair` implements `Merge>` + `GhtInner` implements `Merge>` + `GhtLeaf` implements `Merge>` `MapUnion` implements `Merge>` `MapUnionWithTombstones` implements `Merge>` `Max` implements `Merge>` - `Min` implements `Merge>` - `NotALattice` implements `Merge` and $N others = help: see issue #48214 = note: this error originates in the derive macro `Lattice` (in Nightly builds, run with -Z macro-backtrace for more info) @@ -31,11 +31,11 @@ error[E0277]: the trait bound `String: IsBot` is not satisfied () Conflict DomPair + GhtInner + GhtLeaf MapUnion MapUnionWithTombstones Max<()> - Max - Max and $N others = help: see issue #48214 = note: this error originates in the derive macro `Lattice` (in Nightly builds, run with -Z macro-backtrace for more info) @@ -54,11 +54,11 @@ error[E0277]: the trait bound `String: IsTop` is not satisfied () Conflict DomPair + GhtInner + GhtLeaf MapUnion MapUnionWithTombstones Max<()> - Max - Max and $N others = help: see issue #48214 = note: this error originates in the derive macro `Lattice` (in Nightly builds, run with -Z macro-backtrace for more info) diff --git a/variadics/src/variadic_collections.rs b/variadics/src/variadic_collections.rs index 1a1fa28b6de..5dcc8fb923a 100644 --- a/variadics/src/variadic_collections.rs +++ b/variadics/src/variadic_collections.rs @@ -6,7 +6,7 @@ use hashbrown::hash_table::{Entry, HashTable}; use crate::{PartialEqVariadic, VariadicExt, VecVariadic}; /// Trait for a set of Variadic Tuples -pub trait VariadicCollection { +pub trait VariadicCollection: Extend { /// The Schema (aka Variadic type) associated with tuples in this set type Schema: PartialEqVariadic; @@ -58,7 +58,7 @@ impl Default for VariadicHashSet { impl fmt::Debug for VariadicHashSet where - T: fmt::Debug + VariadicExt + PartialEqVariadic, + T: fmt::Debug + VariadicExt + PartialEqVariadic + Eq + Hash, for<'a> T::AsRefVar<'a>: Hash + fmt::Debug, { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { @@ -82,7 +82,7 @@ where } impl VariadicCollection for VariadicHashSet where - T: VariadicExt + PartialEqVariadic, + T: VariadicExt + PartialEqVariadic + Eq + Hash, for<'a> T::AsRefVar<'a>: Hash, S: BuildHasher, { @@ -127,7 +127,7 @@ where } impl VariadicSet for VariadicHashSet where - T: VariadicExt + PartialEqVariadic, + T: VariadicExt + PartialEqVariadic + Eq + Hash, for<'a> T::AsRefVar<'a>: Hash, S: BuildHasher, { @@ -283,7 +283,7 @@ where impl VariadicCollection for VariadicCountedHashSet where - K: VariadicExt + PartialEqVariadic + Hash + Clone, + K: VariadicExt + PartialEqVariadic + Eq + Hash + Clone, for<'a> K::AsRefVar<'a>: Hash, S: BuildHasher, { @@ -333,7 +333,7 @@ where impl VariadicMultiset for VariadicCountedHashSet where - K: VariadicExt + PartialEqVariadic + Hash + Clone, + K: VariadicExt + PartialEqVariadic + Eq + Hash + Clone, for<'a> K::AsRefVar<'a>: Hash, S: BuildHasher, { @@ -356,6 +356,7 @@ where } /// Iterator helper for [`VariadicCountedHashSet::into_iter`]. +#[derive(Clone)] pub struct DuplicateCounted { iter: Iter, state: Option<(Item, usize)>, @@ -474,9 +475,10 @@ where /// Column storage for Variadic tuples of type Schema /// An alternative to VariadicHashMultiset +#[derive(Clone)] pub struct VariadicColumnMultiset where - Schema: VariadicExt, + Schema: VariadicExt + Eq + Hash, { columns: Schema::IntoVec, last_offset: usize, @@ -484,7 +486,7 @@ where impl VariadicColumnMultiset where - T: VariadicExt, + T: VariadicExt + Eq + Hash, { /// initialize an empty columnar multiset pub fn new() -> Self { @@ -497,7 +499,7 @@ where impl Default for VariadicColumnMultiset where - T: VariadicExt, + T: VariadicExt + Eq + Hash, { fn default() -> Self { Self::new() @@ -506,7 +508,8 @@ where impl VariadicCollection for VariadicColumnMultiset where - Schema: PartialEqVariadic, + Schema: PartialEqVariadic + Eq + Hash, + for<'a> ::AsRefVar<'a>: Hash, { type Schema = Schema; @@ -543,11 +546,16 @@ where } } -impl VariadicMultiset for VariadicColumnMultiset where Schema: PartialEqVariadic {} +impl VariadicMultiset for VariadicColumnMultiset +where + Schema: PartialEqVariadic + Eq + Hash, + for<'a> ::AsRefVar<'a>: Hash, +{ +} impl fmt::Debug for VariadicColumnMultiset where - T: fmt::Debug + VariadicExt + PartialEqVariadic, + T: fmt::Debug + VariadicExt + PartialEqVariadic + Eq + Hash, for<'a> T::AsRefVar<'a>: Hash + fmt::Debug, { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { @@ -557,7 +565,7 @@ where impl IntoIterator for VariadicColumnMultiset where - Schema: PartialEqVariadic, + Schema: PartialEqVariadic + Eq + Hash, { type Item = Schema; type IntoIter = ::IntoZip; diff --git a/variadics_macro/CHANGELOG.md b/variadics_macro/CHANGELOG.md new file mode 100644 index 00000000000..e69de29bb2d diff --git a/variadics_macro/Cargo.toml b/variadics_macro/Cargo.toml new file mode 100644 index 00000000000..2c8ef8b0944 --- /dev/null +++ b/variadics_macro/Cargo.toml @@ -0,0 +1,22 @@ +[package] +name = "variadics_macro" +publish = true +version = "0.5.5" +edition = "2021" +license = "Apache-2.0" +documentation = "https://docs.rs/variadics/" +description = "Procedural macros for the `variadics` crate." + +[lib] +proc-macro = true + +[dependencies] +proc-macro2 = "1.0.63" +proc-macro-crate = "1.1.0" +quote = "1.0.0" +syn = { version = "2.0.0", features = [ "full", "parsing", "visit-mut" ] } +variadics = { path = "../variadics", version = "^0.0.6" } + +[dev-dependencies] +insta = "1.7.1" +prettyplease = "0.2.20" diff --git a/variadics_macro/README.md b/variadics_macro/README.md new file mode 100644 index 00000000000..5f2c0e52089 --- /dev/null +++ b/variadics_macro/README.md @@ -0,0 +1,17 @@ +## `tuple!` Macro + +Create a tuple from a Variadic type known at compile time. + +Example usage: +```rust +use variadics::var_expr; +use variadics_macro::tuple; + +let tup = var_expr!(1, 2, 3, "four"); +let a = tuple!(tup, 4); +assert_eq!(a, (1, 2, 3, "four")); + +let tup = var_expr!(1, 2, var_expr!(3)); +let b = tuple!(tup, 3); +assert_eq!(b, (1, 2, (3, ()))); +``` \ No newline at end of file diff --git a/variadics_macro/src/lib.rs b/variadics_macro/src/lib.rs new file mode 100644 index 00000000000..2583b358426 --- /dev/null +++ b/variadics_macro/src/lib.rs @@ -0,0 +1,47 @@ +#![doc = include_str!("../README.md")] +extern crate proc_macro; + +use proc_macro::TokenStream; +use proc_macro2::Ident; +use quote::{format_ident, quote}; +use syn::parse::{Parse, ParseStream}; +use syn::{parse_macro_input, LitInt}; + +struct InputLen { + input: Ident, + len: LitInt, +} + +impl Parse for InputLen { + fn parse(ts: ParseStream) -> syn::Result { + let input = ts.parse()?; + ts.parse::()?; + let len = ts.parse()?; + Ok(InputLen { input, len }) + } +} + +#[proc_macro] +pub fn tuple(ts: TokenStream) -> TokenStream { + let InputLen { input, len } = parse_macro_input!(ts as InputLen); + let len = len.base10_parse::().unwrap(); + let pattern = (0..len) + .rev() + .map(|i| format_ident!("x{}", i)) + .fold(quote! { () }, |rest, item| quote! { (#item, #rest) }); + let idents = (0..len).map(|i| format_ident!("x{}", i)); + let tuple = quote! { + ( #( #idents, )* ) + }; + + // Create the assignment statement + let expanded = quote! { + { + let #pattern = #input; + let retval = #tuple; + retval + } + }; + + expanded.into() +}