diff --git a/noir_stdlib/src/collections/mod.nr b/noir_stdlib/src/collections/mod.nr index 2d952f4d6cd..29f3e8cc854 100644 --- a/noir_stdlib/src/collections/mod.nr +++ b/noir_stdlib/src/collections/mod.nr @@ -1,3 +1,4 @@ mod vec; mod bounded_vec; mod map; +mod umap; diff --git a/noir_stdlib/src/collections/umap.nr b/noir_stdlib/src/collections/umap.nr new file mode 100644 index 00000000000..fe16ef6bca2 --- /dev/null +++ b/noir_stdlib/src/collections/umap.nr @@ -0,0 +1,469 @@ +use crate::cmp::Eq; +use crate::collections::vec::Vec; +use crate::option::Option; +use crate::default::Default; +use crate::hash::{Hash, Hasher, BuildHasher, BuildHasherDefault}; +use crate::hash::poseidon2::Poseidon2; +use crate::collections::bounded_vec::BoundedVec; + +// An unconstrained hash table with open addressing and quadratic probing. +// Note that "unconstrained" here means that almost all operations on this +// map are unconstrained and importantly are not constrained afterward either. +// This map is meant to be used in unconstrained or comptime code where this +// is not an issue. +// +// Compared to the constrained HashMap type, UHashMap can grow automatically +// as needed and is more efficient since it can break out of loops early. +struct UHashMap { + _table: [Slot], + + // Amount of valid elements in the map. + _len: u32, + + _build_hasher: B +} + +// Data unit in the UHashMap table. +// In case Noir adds support for enums in the future, this +// should be refactored to have three states: +// 1. (key, value) +// 2. (empty) +// 3. (deleted) +struct Slot { + _key_value: Option<(K, V)>, + _is_deleted: bool, +} + +impl Default for Slot{ + fn default() -> Self{ + Slot{ + _key_value: Option::none(), + _is_deleted: false + } + } +} + +impl Slot { + fn is_valid(self) -> bool { + !self._is_deleted & self._key_value.is_some() + } + + fn is_available(self) -> bool { + self._is_deleted | self._key_value.is_none() + } + + fn key_value(self) -> Option<(K, V)> { + self._key_value + } + + fn key_value_unchecked(self) -> (K, V) { + self._key_value.unwrap_unchecked() + } + + fn set(&mut self, key: K, value: V) { + self._key_value = Option::some((key, value)); + self._is_deleted = false; + } + + // Shall not override `_key_value` with Option::none(), + // because we must be able to differentiate empty + // and deleted slots for lookup. + fn mark_deleted(&mut self) { + self._is_deleted = true; + } +} + +// While conducting lookup, we iterate attempt from 0 to N - 1 due to heuristic, +// that if we have went that far without finding desired, +// it is very unlikely to be after - performance will be heavily degraded. +impl UHashMap { + // Creates a new instance of UHashMap with specified BuildHasher. + // docs:start:with_hasher + pub fn with_hasher(_build_hasher: B) -> Self + where + B: BuildHasher { + // docs:end:with_hasher + let _table = &[Slot::default()]; + let _len = 0; + Self { _table, _len, _build_hasher } + } + + pub fn with_hasher_and_capacity(_build_hasher: B, capacity: u32) -> Self + where + B: BuildHasher { + // docs:end:with_hasher + let mut _table = &[]; + for _ in 0..capacity { + _table = _table.push_back(Slot::default()); + } + let _len = 0; + Self { _table, _len, _build_hasher } + } + + // Clears the map, removing all key-value entries. + // docs:start:clear + pub fn clear(&mut self) { + // docs:end:clear + self._table = &[Slot::default()]; + self._len = 0; + } + + // Returns true if the map contains a value for the specified key. + // docs:start:contains_key + pub fn contains_key( + self, + key: K + ) -> bool + where + K: Hash + Eq, + B: BuildHasher, + H: Hasher { + // docs:end:contains_key + self.get(key).is_some() + } + + // Returns true if the map contains no elements. + // docs:start:is_empty + pub fn is_empty(self) -> bool { + // docs:end:is_empty + self._len == 0 + } + + // Returns a BoundedVec of all valid entries in this UHashMap. + // The length of the returned vector will always match the length of this UHashMap. + // docs:start:entries + pub fn entries(self) -> [(K, V)] { + // docs:end:entries + let mut entries = &[]; + + for slot in self._table { + if slot.is_valid() { + // SAFETY: slot.is_valid() should ensure there is a valid key-value pairing here + let key_value = slot.key_value().unwrap_unchecked(); + entries = entries.push_back(key_value); + } + } + + let msg = f"Amount of valid elements should have been {self._len} times, but got {entries.len()}."; + assert(entries.len() == self._len, msg); + + entries + } + + // Returns a BoundedVec containing all the keys within this UHashMap. + // The length of the returned vector will always match the length of this UHashMap. + // docs:start:keys + pub fn keys(self) -> [K] { + // docs:end:keys + let mut keys = &[]; + + for slot in self._table { + if slot.is_valid() { + let (key, _) = slot.key_value_unchecked(); + keys = keys.push_back(key); + } + } + + let msg = f"Amount of valid elements should have been {self._len} times, but got {keys.len()}."; + assert(keys.len() == self._len, msg); + + keys + } + + // Returns a BoundedVec containing all the values within this UHashMap. + // The length of the returned vector will always match the length of this UHashMap. + // docs:start:values + pub fn values(self) -> [V] { + // docs:end:values + let mut values = &[]; + + for slot in self._table { + if slot.is_valid() { + let (_, value) = slot.key_value_unchecked(); + values = values.push_back(value); + } + } + + let msg = f"Amount of valid elements should have been {self._len} times, but got {values.len()}."; + assert(values.len() == self._len, msg); + + values + } + + // For each key-value entry applies mutator function. + // docs:start:iter_mut + unconstrained pub fn iter_mut( + &mut self, + f: fn(K, V) -> (K, V) + ) + where + K: Eq + Hash, + B: BuildHasher, + H: Hasher { + // docs:end:iter_mut + let mut entries = self.entries(); + let mut new_map = UHashMap::with_hasher(self._build_hasher); + + for entry in entries { + let (key, value) = f(entry.0, entry.1); + new_map.insert(key, value); + } + + self._table = new_map._table; + } + + // For each key applies mutator function. + // docs:start:iter_keys_mut + unconstrained pub fn iter_keys_mut( + &mut self, + f: fn(K) -> K + ) + where + K: Eq + Hash, + B: BuildHasher, + H: Hasher { + // docs:end:iter_keys_mut + let mut entries = self.entries(); + let mut new_map = UHashMap::with_hasher(self._build_hasher); + + for entry in entries { + let (key, value) = (f(entry.0), entry.1); + new_map.insert(key, value); + } + + self._table = new_map._table; + } + + // For each value applies mutator function. + // docs:start:iter_values_mut + pub fn iter_values_mut(&mut self, f: fn(V) -> V) { + // docs:end:iter_values_mut + for i in 0..self._table.len() { + let mut slot = self._table[i]; + if slot.is_valid() { + let (key, value) = slot.key_value_unchecked(); + slot.set(key, f(value)); + self._table[i] = slot; + } + } + } + + // Retains only the elements specified by the predicate. + // docs:start:retain + pub fn retain(&mut self, f: fn(K, V) -> bool) { + // docs:end:retain + for index in 0..self._table.len() { + let mut slot = self._table[index]; + if slot.is_valid() { + let (key, value) = slot.key_value_unchecked(); + if !f(key, value) { + slot.mark_deleted(); + self._len -= 1; + self._table[index] = slot; + } + } + } + } + + // Amount of active key-value entries. + // docs:start:len + pub fn len(self) -> u32 { + // docs:end:len + self._len + } + + // Get the current capacity of the inner table. + // docs:start:capacity + pub fn capacity(self: Self) -> u32 { + // docs:end:capacity + self._table.len() + } + + // Get the value by key. If it does not exist, returns none(). + // docs:start:get + unconstrained pub fn get( + self, + key: K + ) -> Option + where + K: Eq + Hash, + B: BuildHasher, + H: Hasher { + // docs:end:get + let mut result = Option::none(); + + let hash = self.hash(key); + + for attempt in 0..self._table.len() { + let index = self.quadratic_probe(hash, attempt as u32); + let slot = self._table[index]; + + // Not marked as deleted and has key-value. + if slot.is_valid() { + let (current_key, value) = slot.key_value_unchecked(); + if current_key == key { + result = Option::some(value); + break; + } + } + } + + result + } + + // Insert key-value entry. In case key was already present, value is overridden. + // docs:start:insert + unconstrained pub fn insert( + &mut self, + key: K, + value: V + ) + where + K: Eq + Hash, + B: BuildHasher, + H: Hasher { + // docs:end:insert + self.try_resize(); + + let hash = self.hash(key); + + for attempt in 0..self._table.len() { + let index = self.quadratic_probe(hash, attempt as u32); + let mut slot = self._table[index]; + let mut insert = false; + + // Either marked as deleted or has unset key-value. + if slot.is_available() { + insert = true; + self._len += 1; + } else { + let (current_key, _) = slot.key_value_unchecked(); + if current_key == key { + insert = true; + } + } + + if insert { + slot.set(key, value); + self._table[index] = slot; + break; + } + } + } + + unconstrained fn try_resize(&mut self) + where B: BuildHasher, K: Eq + Hash, H: Hasher { + if self.len() + 1 >= self.capacity() / 2 { + let capacity = self.capacity() * 2; + let mut new_map = UHashMap::with_hasher_and_capacity(self._build_hasher, capacity); + + for entry in self.entries() { + new_map.insert(entry.0, entry.1); + } + *self = new_map; + } + } + + // Removes a key-value entry. If key is not present, UHashMap remains unchanged. + // docs:start:remove + unconstrained pub fn remove( + &mut self, + key: K + ) + where + K: Eq + Hash, + B: BuildHasher, + H: Hasher { + // docs:end:remove + let hash = self.hash(key); + + for attempt in 0..self._table.len() { + let index = self.quadratic_probe(hash, attempt as u32); + let mut slot = self._table[index]; + + // Not marked as deleted and has key-value. + if slot.is_valid() { + let (current_key, _) = slot.key_value_unchecked(); + if current_key == key { + slot.mark_deleted(); + self._table[index] = slot; + self._len -= 1; + break; + } + } + } + } + + // Apply UHashMap's hasher onto key to obtain pre-hash for probing. + fn hash( + self, + key: K + ) -> u32 + where + K: Hash, + B: BuildHasher, + H: Hasher { + let mut hasher = self._build_hasher.build_hasher(); + key.hash(&mut hasher); + hasher.finish() as u32 + } + + // Probing scheme: quadratic function. + // We use 0.5 constant near variadic attempt and attempt^2 monomials. + // This ensures good uniformity of distribution for table sizes + // equal to prime numbers or powers of two. + fn quadratic_probe(self: Self, hash: u32, attempt: u32) -> u32 { + (hash + (attempt + attempt * attempt) / 2) % self._table.len() + } +} + +// Equality class on UHashMap has to test that they have +// equal sets of key-value entries, +// thus one is a subset of the other and vice versa. +// docs:start:eq +impl Eq for UHashMap +where + K: Eq + Hash, + V: Eq, + B: BuildHasher, + H: Hasher +{ + fn eq(self, other: UHashMap) -> bool { +// docs:end:eq + let mut equal = false; + + if self.len() == other.len(){ + equal = true; + for slot in self._table{ + // Not marked as deleted and has key-value. + if equal & slot.is_valid(){ + let (key, value) = slot.key_value_unchecked(); + let other_value = other.get(key); + + if other_value.is_none(){ + equal = false; + }else{ + let other_value = other_value.unwrap_unchecked(); + if value != other_value{ + equal = false; + } + } + } + } + } + + equal + } +} + +// docs:start:default +impl Default for UHashMap +where + B: BuildHasher + Default, + H: Hasher + Default +{ + fn default() -> Self { +// docs:end:default + UHashMap::with_hasher(B::default()) + } +} diff --git a/test_programs/execution_success/uhashmap/Nargo.toml b/test_programs/execution_success/uhashmap/Nargo.toml new file mode 100644 index 00000000000..c09debc9833 --- /dev/null +++ b/test_programs/execution_success/uhashmap/Nargo.toml @@ -0,0 +1,6 @@ +[package] +name = "hashmap" +type = "bin" +authors = [""] + +[dependencies] \ No newline at end of file diff --git a/test_programs/execution_success/uhashmap/Prover.toml b/test_programs/execution_success/uhashmap/Prover.toml new file mode 100644 index 00000000000..84d4c0733e4 --- /dev/null +++ b/test_programs/execution_success/uhashmap/Prover.toml @@ -0,0 +1,26 @@ +# Input: 6 key-value entries for hashmap capacity of 8. +# These must be distinct (both key-to-key, and value-to-value) for correct testing. + +[[input]] +key = 2 +value = 17 + +[[input]] +key = 3 +value = 19 + +[[input]] +key = 5 +value = 23 + +[[input]] +key = 7 +value = 29 + +[[input]] +key = 11 +value = 31 + +[[input]] +key = 41 +value = 43 \ No newline at end of file diff --git a/test_programs/execution_success/uhashmap/src/main.nr b/test_programs/execution_success/uhashmap/src/main.nr new file mode 100644 index 00000000000..395ed21b6b0 --- /dev/null +++ b/test_programs/execution_success/uhashmap/src/main.nr @@ -0,0 +1,352 @@ +use std::collections::umap::UHashMap; +use std::hash::BuildHasherDefault; +use std::hash::poseidon2::Poseidon2Hasher; + +type K = Field; +type V = Field; + +// It is more convenient and readable to use structs as input. +struct Entry{ + key: Field, + value: Field +} + +global HASHMAP_LEN = 6; + +global FIELD_CMP = |a: Field, b: Field| a.lt(b); + +global K_CMP = FIELD_CMP; +global V_CMP = FIELD_CMP; +global KV_CMP = |a: (K, V), b: (K, V)| a.0.lt(b.0); + +global ALLOCATE_HASHMAP = || -> UHashMap> + UHashMap::default(); + +unconstrained fn main(input: [Entry; HASHMAP_LEN]) { + test_sequential(input[0].key, input[0].value); + test_multiple_equal_insert(input[1].key, input[1].value); + test_value_override(input[2].key, input[2].value, input[3].value); + test_insert_and_methods(input); + test_hashmaps_equality(input); + test_retain(); + test_iterators(); + test_mut_iterators(); + + doc_tests(); +} + +// Insert, get, remove. +unconstrained fn test_sequential(key: K, value: V) { + let mut hashmap = ALLOCATE_HASHMAP(); + assert(hashmap.is_empty(), "New UHashMap should be empty."); + + hashmap.insert(key, value); + assert(hashmap.len() == 1, "UHashMap after one insert should have a length of 1 element."); + + let got = hashmap.get(key); + assert(got.is_some(), "Got none value."); + let got = got.unwrap_unchecked(); + assert(value == got, f"Inserted {value} but got {got} for the same key."); + + hashmap.remove(key); + assert(hashmap.is_empty(), "UHashMap after one insert and corresponding removal should be empty."); + let got = hashmap.get(key); + assert(got.is_none(), "Value has been removed, but is still available (not none)."); +} + +// Insert same pair several times. +unconstrained fn test_multiple_equal_insert(key: K, value: V) { + let mut hashmap = ALLOCATE_HASHMAP(); + assert(hashmap.is_empty(), "New UHashMap should be empty."); + + for _ in 0..HASHMAP_LEN { + hashmap.insert(key, value); + } + + let len = hashmap.len(); + assert(len == 1, f"UHashMap length must be 1, got {len}."); + + let got = hashmap.get(key); + assert(got.is_some(), "Got none value."); + let got = got.unwrap_unchecked(); + assert(value == got, f"Inserted {value} but got {got} for the same key."); +} + +// Override value for existing pair. +unconstrained fn test_value_override(key: K, value: V, new_value: V) { + let mut hashmap = ALLOCATE_HASHMAP(); + assert(hashmap.is_empty(), "New hashmap should be empty."); + + hashmap.insert(key, value); + hashmap.insert(key, new_value); + assert(hashmap.len() == 1, "UHashMap length is invalid."); + + let got = hashmap.get(key); + assert(got.is_some(), "Got none value."); + let got = got.unwrap_unchecked(); + assert(got == new_value, f"Expected {new_value}, but got {got}."); +} + +// Insert several distinct pairs and test auxiliary methods. +unconstrained fn test_insert_and_methods(input: [Entry; HASHMAP_LEN]) { + let mut hashmap = ALLOCATE_HASHMAP(); + assert(hashmap.is_empty(), "New UHashMap should be empty."); + + for entry in input { + println(f"Inserting {entry}"); + hashmap.insert(entry.key, entry.value); + } + + println(hashmap.len()); + assert(hashmap.len() == HASHMAP_LEN, "hashmap.len() does not match input length."); + + for entry in input { + assert(hashmap.contains_key(entry.key), f"Not found inserted key {entry.key}."); + } + + hashmap.clear(); + assert(hashmap.is_empty(), "UHashMap after clear() should be empty."); +} + +// Insert several pairs and test retaining. +unconstrained fn test_retain() { + let mut hashmap = ALLOCATE_HASHMAP(); + assert(hashmap.is_empty(), "New UHashMap should be empty."); + + let (key, value) = (5, 11); + hashmap.insert(key, value); + let (key, value) = (2, 13); + hashmap.insert(key, value); + let (key, value) = (11, 5); + hashmap.insert(key, value); + + let predicate = |key: K, value: V| -> bool {key * value == 55}; + hashmap.retain(predicate); + + assert(hashmap.len() == 2, "UHashMap should have retained 2 elements."); + assert(hashmap.get(2).is_none(), "Pair should have been removed, since it does not match predicate."); +} + +// Equality trait check. +unconstrained fn test_hashmaps_equality(input: [Entry; HASHMAP_LEN]) { + let mut hashmap_1 = ALLOCATE_HASHMAP(); + let mut hashmap_2 = ALLOCATE_HASHMAP(); + + for entry in input { + hashmap_1.insert(entry.key, entry.value); + hashmap_2.insert(entry.key, entry.value); + } + + assert(hashmap_1 == hashmap_2, "CtHashMaps should be equal."); + + hashmap_2.remove(input[0].key); + + assert(hashmap_1 != hashmap_2, "CtHashMaps should not be equal."); +} + +// Test entries, keys, values. +unconstrained fn test_iterators() { + let mut hashmap = ALLOCATE_HASHMAP(); + + hashmap.insert(2, 3); + hashmap.insert(5, 7); + hashmap.insert(11, 13); + + let keys: [K; 3] = hashmap.keys().as_array().sort_via(K_CMP); + let values: [V; 3] = hashmap.values().as_array().sort_via(V_CMP); + let entries: [(K, V); 3] = hashmap.entries().as_array().sort_via(KV_CMP); + + assert(keys == [2, 5, 11], "Got incorrect iteration of keys."); + assert(values == [3, 7, 13], "Got incorrect iteration of values."); + assert(entries == [(2, 3), (5, 7), (11, 13)], "Got incorrect iteration of entries."); +} + +// Test mutable iteration over keys, values and entries. +unconstrained fn test_mut_iterators() { + let mut hashmap = ALLOCATE_HASHMAP(); + + hashmap.insert(2, 3); + hashmap.insert(5, 7); + hashmap.insert(11, 13); + + let f = |k: K| -> K{ k * 3}; + hashmap.iter_keys_mut(f); + + let f = |v: V| -> V{ v * 5}; + hashmap.iter_values_mut(f); + + let keys: [K; 3] = hashmap.keys().as_array().sort_via(K_CMP); + let values: [V; 3] = hashmap.values().as_array().sort_via(V_CMP); + + assert(keys == [6, 15, 33], f"Got incorrect iteration of keys: {keys}"); + assert(values == [15, 35, 65], "Got incorrect iteration of values."); + + let f = |k: K, v: V| -> (K, V){(k * 2, v * 2)}; + hashmap.iter_mut(f); + + let entries: [(K, V); 3] = hashmap.entries().as_array().sort_via(KV_CMP); + + assert(entries == [(12, 30), (30, 70), (66, 130)], "Got incorrect iteration of entries."); +} + +// docs:start:type_alias +type MyMap = UHashMap>; +// docs:end:type_alias + +/// Tests examples from the stdlib cthashmap documentation +unconstrained fn doc_tests() { + // docs:start:default_example + let hashmap: UHashMap> = UHashMap::default(); + assert(hashmap.is_empty()); + // docs:end:default_example + + // docs:start:with_hasher_example + let my_hasher: BuildHasherDefault = Default::default(); + let hashmap: UHashMap> = UHashMap::with_hasher(my_hasher); + assert(hashmap.is_empty()); + // docs:end:with_hasher_example + + // docs:start:insert_example + let mut map: UHashMap> = UHashMap::default(); + map.insert(12, 42); + assert(map.len() == 1); + // docs:end:insert_example + + get_example(map); + + // docs:start:remove_example + map.remove(12); + assert(map.is_empty()); + + // If a key was not present in the map, remove does nothing + map.remove(12); + assert(map.is_empty()); + // docs:end:remove_example + + // docs:start:is_empty_example + assert(map.is_empty()); + + map.insert(1, 2); + assert(!map.is_empty()); + + map.remove(1); + assert(map.is_empty()); + // docs:end:is_empty_example + + // docs:start:len_example + // This is equivalent to checking map.is_empty() + assert(map.len() == 0); + + map.insert(1, 2); + map.insert(3, 4); + map.insert(5, 6); + assert(map.len() == 3); + + // 3 was already present as a key in the hash map, so the length is unchanged + map.insert(3, 7); + assert(map.len() == 3); + + map.remove(1); + assert(map.len() == 2); + // docs:end:len_example + + // docs:start:capacity_example + let empty_map: UHashMap> = UHashMap::default(); + assert(empty_map.len() == 0); + println(empty_map.capacity()); + // docs:end:capacity_example + + // docs:start:clear_example + assert(!map.is_empty()); + map.clear(); + assert(map.is_empty()); + // docs:end:clear_example + + // docs:start:contains_key_example + if map.contains_key(7) { + let value = map.get(7); + assert(value.is_some()); + } else { + println("No value for key 7!"); + } + // docs:end:contains_key_example + + entries_examples(map); + iter_examples(map); + + // docs:start:retain_example + map.retain(|k, v| (k != 0) & (v != 0)); + // docs:end:retain_example + + // docs:start:eq_example + let mut map1: UHashMap> = UHashMap::default(); + let mut map2: UHashMap> = UHashMap::default(); + + map1.insert(1, 2); + map1.insert(3, 4); + + map2.insert(3, 4); + map2.insert(1, 2); + + assert(map1 == map2); + // docs:end:eq_example +} + +// docs:start:get_example +fn get_example(map: UHashMap>) { + let x = map.get(12); + + if x.is_some() { + assert(x.unwrap() == 42); + } +} +// docs:end:get_example + +fn entries_examples(map: UHashMap>) { + // docs:start:entries_example + let entries = map.entries(); + + // The length of a hashmap may not be compile-time known, so we + // need to loop over its capacity instead + for i in 0..map.capacity() { + if i < entries.len() { + let (key, value) = entries[i]; + println(f"{key} -> {value}"); + } + } + // docs:end:entries_example + + // docs:start:keys_example + let keys = map.keys(); + + for key in keys { + let value = map.get(key).unwrap_unchecked(); + println(f"{key} -> {value}"); + } + // docs:end:keys_example + + // docs:start:values_example + let values = map.values(); + + for value in values { + println(f"Found value {value}"); + } + // docs:end:values_example +} + +unconstrained fn iter_examples(mut map: UHashMap>) { + // docs:start:iter_mut_example + // Add 1 to each key in the map, and double the value associated with that key. + map.iter_mut(|k, v| (k + 1, v * 2)); + // docs:end:iter_mut_example + + // docs:start:iter_keys_mut_example + // Double each key, leaving the value associated with that key untouched + map.iter_keys_mut(|k| k * 2); + // docs:end:iter_keys_mut_example + + // docs:start:iter_values_mut_example + // Halve each value + map.iter_values_mut(|v| v / 2); + // docs:end:iter_values_mut_example +}