From fe977aaecd08f62ef4c48ad73332e82911a3636a Mon Sep 17 00:00:00 2001 From: Mazdak Farrokhzad Date: Mon, 9 Dec 2024 19:15:22 +0100 Subject: [PATCH] don't use multimap for unique indices & correctness fix for create_index --- .../datastore/locking_tx_datastore/mut_tx.rs | 26 +- crates/table/benches/page_manager.rs | 56 +- crates/table/src/btree_index.rs | 551 +++++++++++++----- crates/table/src/btree_index/multimap.rs | 32 +- crates/table/src/btree_index/uniquemap.rs | 92 +++ crates/table/src/table.rs | 23 +- 6 files changed, 565 insertions(+), 215 deletions(-) create mode 100644 crates/table/src/btree_index/uniquemap.rs diff --git a/crates/core/src/db/datastore/locking_tx_datastore/mut_tx.rs b/crates/core/src/db/datastore/locking_tx_datastore/mut_tx.rs index 54c5c2080a7..83636f4eebe 100644 --- a/crates/core/src/db/datastore/locking_tx_datastore/mut_tx.rs +++ b/crates/core/src/db/datastore/locking_tx_datastore/mut_tx.rs @@ -399,15 +399,35 @@ impl MutTxId { _ => unimplemented!(), }; // Create and build the index. + // + // Ensure adding the index does not cause a unique constraint violation due to + // the existing rows having the same value for some column(s). let mut insert_index = table.new_index(index.index_id, &columns, is_unique)?; - insert_index.build_from_rows(&columns, table.scan_rows(blob_store))?; + let mut build_from_rows = |table: &Table, bs: &dyn BlobStore| -> Result<()> { + if let Some(violation) = insert_index.build_from_rows(&columns, table.scan_rows(bs))? { + let violation = table + .get_row_ref(bs, violation) + .expect("row came from scanning the table") + .project(&columns) + .expect("`cols` should consist of valid columns for this table"); + return Err(IndexError::from(table.build_error_unique(&insert_index, &columns, violation)).into()); + } + Ok(()) + }; + build_from_rows(table, blob_store)?; // NOTE: Also add all the rows in the already committed table to the index. + // // FIXME: Is this correct? Index scan iterators (incl. the existing `Locking` versions) // appear to assume that a table's index refers only to rows within that table, // and does not handle the case where a `TxState` index refers to `CommittedState` rows. - if let Some(committed_table) = commit_table { - insert_index.build_from_rows(&columns, committed_table.scan_rows(commit_blob_store))?; + // + // TODO(centril): An alternative here is to actually add this index to `CommittedState`, + // pretending that it was already committed, and recording this pretense. + // Then, we can roll that back on a failed tx. + if let Some(commit_table) = commit_table { + build_from_rows(commit_table, commit_blob_store)?; } + table.indexes.insert(columns.clone(), insert_index); // Associate `index_id -> (table_id, col_list)` for fast lookup. idx_map.insert(index_id, (table_id, columns.clone())); diff --git a/crates/table/benches/page_manager.rs b/crates/table/benches/page_manager.rs index 5cba4972543..a5d29f1e08b 100644 --- a/crates/table/benches/page_manager.rs +++ b/crates/table/benches/page_manager.rs @@ -721,12 +721,12 @@ impl IndexedRow for Box { } } -fn make_table_with_indexes() -> Table { +fn make_table_with_indexes(unique: bool) -> Table { let schema = R::make_schema(); let mut tbl = Table::new(schema.into(), SquashedOffset::COMMITTED_STATE); let cols = R::indexed_columns(); - let idx = tbl.new_index(IndexId::SENTINEL, &cols, false).unwrap(); + let idx = tbl.new_index(IndexId::SENTINEL, &cols, unique).unwrap(); tbl.insert_index(&NullBlobStore, cols, idx); tbl @@ -774,10 +774,10 @@ fn clear_all_same(tbl: &mut Table, val_same: u64) { } } -fn bench_id_for_index(name: &str, num_rows: u64, same_ratio: f64, num_same: usize) -> BenchmarkId { +fn bench_id_for_index(name: &str, num_rows: u64, same_ratio: f64, num_same: usize, unique: bool) -> BenchmarkId { BenchmarkId::new( name, - format_args!("(rows = {num_rows}, sratio = {same_ratio}, snum = {num_same})"), + format_args!("(rows = {num_rows}, sratio = {same_ratio}, snum = {num_same}, unique = {unique})"), ) } @@ -785,11 +785,16 @@ fn make_table_with_same_ratio( mut make_row: impl FnMut(u64) -> R, num_rows: u64, same_ratio: f64, + unique: bool, ) -> (Table, usize, u64) { - let mut tbl = make_table_with_indexes::(); + let mut tbl = make_table_with_indexes::(unique); - let num_same = (num_rows as f64 * same_ratio) as usize; - let num_same = num_same.max(1); + let num_same = if unique { + 1 + } else { + let num_same = (num_rows as f64 * same_ratio) as usize; + num_same.max(1) + }; let num_diff = num_rows / num_same as u64; for i in 0..num_diff { @@ -808,11 +813,11 @@ fn index_insert(c: &mut Criterion) { same_ratio: f64, ) { let make_row_move = &mut make_row; - let (tbl, num_same, _) = make_table_with_same_ratio::(make_row_move, num_rows, same_ratio); + let (tbl, num_same, _) = make_table_with_same_ratio::(make_row_move, num_rows, same_ratio, false); let mut ctx = (tbl, NullBlobStore); group.bench_with_input( - bench_id_for_index(name, num_rows, same_ratio, num_same), + bench_id_for_index(name, num_rows, same_ratio, num_same, false), &num_rows, |b, &num_rows| { let pre = |_, (tbl, _): &mut (Table, NullBlobStore)| { @@ -859,12 +864,13 @@ fn index_seek(c: &mut Criterion) { name: &str, num_rows: u64, same_ratio: f64, + unique: bool, ) { let make_row_move = &mut make_row; - let (tbl, num_same, num_diff) = make_table_with_same_ratio::(make_row_move, num_rows, same_ratio); + let (tbl, num_same, num_diff) = make_table_with_same_ratio::(make_row_move, num_rows, same_ratio, unique); group.bench_with_input( - bench_id_for_index(name, num_rows, same_ratio, num_same), + bench_id_for_index(name, num_rows, same_ratio, num_same, unique), &num_diff, |b, &num_diff| { let col_to_seek = black_box(R::column_value_from_u64(num_diff / 2)); @@ -896,25 +902,27 @@ fn index_seek(c: &mut Criterion) { group: Group<'_, '_>, name: &str, same_ratio: f64, + unique: bool, ) { group.throughput(Throughput::Elements(1)); for num_rows in powers(TABLE_SIZE_POWERS) { - bench_index_seek(&mut make_row, group, name, num_rows, same_ratio); + bench_index_seek(&mut make_row, group, name, num_rows, same_ratio, unique); } } let mut group = c.benchmark_group("index_seek"); - bench_many_table_sizes::(FixedLenRow::from_u64, &mut group, "u64", 0.0); - bench_many_table_sizes::(FixedLenRow::from_u64, &mut group, "U32x8", 0.00); - bench_many_table_sizes::(FixedLenRow::from_u64, &mut group, "U32x8", 0.01); - bench_many_table_sizes::(FixedLenRow::from_u64, &mut group, "U32x8", 0.05); - bench_many_table_sizes::(FixedLenRow::from_u64, &mut group, "U32x8", 0.10); - bench_many_table_sizes::(FixedLenRow::from_u64, &mut group, "U32x8", 0.25); - bench_many_table_sizes::(FixedLenRow::from_u64, &mut group, "U32x8", 0.50); - bench_many_table_sizes::(FixedLenRow::from_u64, &mut group, "U32x8", 1.00); - bench_many_table_sizes::(FixedLenRow::from_u64, &mut group, "U32x64", 0.0); - bench_many_table_sizes::>(|i| i.to_string().into(), &mut group, "String", 0.0); + bench_many_table_sizes::(FixedLenRow::from_u64, &mut group, "u64", 0.0, false); + bench_many_table_sizes::(FixedLenRow::from_u64, &mut group, "u64", 0.0, true); + bench_many_table_sizes::(FixedLenRow::from_u64, &mut group, "U32x8", 0.00, false); + bench_many_table_sizes::(FixedLenRow::from_u64, &mut group, "U32x8", 0.01, false); + bench_many_table_sizes::(FixedLenRow::from_u64, &mut group, "U32x8", 0.05, false); + bench_many_table_sizes::(FixedLenRow::from_u64, &mut group, "U32x8", 0.10, false); + bench_many_table_sizes::(FixedLenRow::from_u64, &mut group, "U32x8", 0.25, false); + bench_many_table_sizes::(FixedLenRow::from_u64, &mut group, "U32x8", 0.50, false); + bench_many_table_sizes::(FixedLenRow::from_u64, &mut group, "U32x8", 1.00, false); + bench_many_table_sizes::(FixedLenRow::from_u64, &mut group, "U32x64", 0.0, false); + bench_many_table_sizes::>(|i| i.to_string().into(), &mut group, "String", 0.0, false); } fn index_delete(c: &mut Criterion) { @@ -926,10 +934,10 @@ fn index_delete(c: &mut Criterion) { same_ratio: f64, ) { let make_row_move = &mut make_row; - let (mut tbl, num_same, _) = make_table_with_same_ratio::(make_row_move, num_rows, same_ratio); + let (mut tbl, num_same, _) = make_table_with_same_ratio::(make_row_move, num_rows, same_ratio, false); group.bench_with_input( - bench_id_for_index(name, num_rows, same_ratio, num_same), + bench_id_for_index(name, num_rows, same_ratio, num_same, false), &num_rows, |b, &num_rows| { let pre = |_, tbl: &mut Table| { diff --git a/crates/table/src/btree_index.rs b/crates/table/src/btree_index.rs index 98a84fab391..7b2748f7dfd 100644 --- a/crates/table/src/btree_index.rs +++ b/crates/table/src/btree_index.rs @@ -31,14 +31,18 @@ use spacetimedb_sats::{ }; mod multimap; +mod uniquemap; type Index = multimap::MultiMap; type IndexIter<'a, K> = multimap::MultiMapRangeIter<'a, K, RowPointer>; +type UniqueIndex = uniquemap::UniqueMap; +type UniqueIndexIter<'a, K> = uniquemap::UniqueMapRangeIter<'a, K, RowPointer>; -/// An iterator over a [`TypedMultiMap`], with a specialized key type. +/// An iterator over a [`TypedIndex`], with a specialized key type. /// /// See module docs for info about specialization. -enum TypedMultiMapRangeIter<'a> { +enum TypedIndexRangeIter<'a> { + // All the non-unique index iterators. Bool(IndexIter<'a, bool>), U8(IndexIter<'a, u8>), I8(IndexIter<'a, i8>), @@ -53,10 +57,27 @@ enum TypedMultiMapRangeIter<'a> { U256(IndexIter<'a, u256>), I256(IndexIter<'a, i256>), String(IndexIter<'a, Box>), - AlgebraicValue(IndexIter<'a, AlgebraicValue>), + AV(IndexIter<'a, AlgebraicValue>), + + // All the unique index iterators. + UniqueBool(UniqueIndexIter<'a, bool>), + UniqueU8(UniqueIndexIter<'a, u8>), + UniqueI8(UniqueIndexIter<'a, i8>), + UniqueU16(UniqueIndexIter<'a, u16>), + UniqueI16(UniqueIndexIter<'a, i16>), + UniqueU32(UniqueIndexIter<'a, u32>), + UniqueI32(UniqueIndexIter<'a, i32>), + UniqueU64(UniqueIndexIter<'a, u64>), + UniqueI64(UniqueIndexIter<'a, i64>), + UniqueU128(UniqueIndexIter<'a, Packed>), + UniqueI128(UniqueIndexIter<'a, Packed>), + UniqueU256(UniqueIndexIter<'a, u256>), + UniqueI256(UniqueIndexIter<'a, i256>), + UniqueString(UniqueIndexIter<'a, Box>), + UniqueAV(UniqueIndexIter<'a, AlgebraicValue>), } -impl Iterator for TypedMultiMapRangeIter<'_> { +impl Iterator for TypedIndexRangeIter<'_> { type Item = RowPointer; fn next(&mut self) -> Option { match self { @@ -74,7 +95,23 @@ impl Iterator for TypedMultiMapRangeIter<'_> { Self::U256(this) => this.next(), Self::I256(this) => this.next(), Self::String(this) => this.next(), - Self::AlgebraicValue(this) => this.next(), + Self::AV(this) => this.next(), + + Self::UniqueBool(this) => this.next(), + Self::UniqueU8(this) => this.next(), + Self::UniqueI8(this) => this.next(), + Self::UniqueU16(this) => this.next(), + Self::UniqueI16(this) => this.next(), + Self::UniqueU32(this) => this.next(), + Self::UniqueI32(this) => this.next(), + Self::UniqueU64(this) => this.next(), + Self::UniqueI64(this) => this.next(), + Self::UniqueU128(this) => this.next(), + Self::UniqueI128(this) => this.next(), + Self::UniqueU256(this) => this.next(), + Self::UniqueI256(this) => this.next(), + Self::UniqueString(this) => this.next(), + Self::UniqueAV(this) => this.next(), } .copied() } @@ -83,7 +120,7 @@ impl Iterator for TypedMultiMapRangeIter<'_> { /// An iterator over rows matching a certain [`AlgebraicValue`] on the [`BTreeIndex`]. pub struct BTreeIndexRangeIter<'a> { /// The iterator seeking for matching values. - iter: TypedMultiMapRangeIter<'a>, + iter: TypedIndexRangeIter<'a>, } impl Iterator for BTreeIndexRangeIter<'_> { @@ -94,10 +131,12 @@ impl Iterator for BTreeIndexRangeIter<'_> { } } -/// A `MultiMap` from a key type determined at runtime to `RowPointer`. +/// An index from a key type determined at runtime to `RowPointer`(s). /// /// See module docs for info about specialization. +#[derive(Debug)] enum TypedIndex { + // All the non-unique index types. Bool(Index), U8(Index), I8(Index), @@ -112,7 +151,24 @@ enum TypedIndex { U256(Index), I256(Index), String(Index>), - AlgebraicValue(Index), + AV(Index), + + // All the unique index types. + UniqueBool(UniqueIndex), + UniqueU8(UniqueIndex), + UniqueI8(UniqueIndex), + UniqueU16(UniqueIndex), + UniqueI16(UniqueIndex), + UniqueU32(UniqueIndex), + UniqueI32(UniqueIndex), + UniqueU64(UniqueIndex), + UniqueI64(UniqueIndex), + UniqueU128(UniqueIndex>), + UniqueI128(UniqueIndex>), + UniqueU256(UniqueIndex), + UniqueI256(UniqueIndex), + UniqueString(UniqueIndex>), + UniqueAV(UniqueIndex), } impl MemoryUsage for TypedIndex { @@ -132,52 +188,127 @@ impl MemoryUsage for TypedIndex { TypedIndex::U256(this) => this.heap_usage(), TypedIndex::I256(this) => this.heap_usage(), TypedIndex::String(this) => this.heap_usage(), - TypedIndex::AlgebraicValue(this) => this.heap_usage(), + TypedIndex::AV(this) => this.heap_usage(), + + TypedIndex::UniqueBool(this) => this.heap_usage(), + TypedIndex::UniqueU8(this) => this.heap_usage(), + TypedIndex::UniqueI8(this) => this.heap_usage(), + TypedIndex::UniqueU16(this) => this.heap_usage(), + TypedIndex::UniqueI16(this) => this.heap_usage(), + TypedIndex::UniqueU32(this) => this.heap_usage(), + TypedIndex::UniqueI32(this) => this.heap_usage(), + TypedIndex::UniqueU64(this) => this.heap_usage(), + TypedIndex::UniqueI64(this) => this.heap_usage(), + TypedIndex::UniqueU128(this) => this.heap_usage(), + TypedIndex::UniqueI128(this) => this.heap_usage(), + TypedIndex::UniqueU256(this) => this.heap_usage(), + TypedIndex::UniqueI256(this) => this.heap_usage(), + TypedIndex::UniqueString(this) => this.heap_usage(), + TypedIndex::UniqueAV(this) => this.heap_usage(), } } } impl TypedIndex { - /// Add the row referred to by `row_ref` to the index `self`, - /// which must be keyed at `cols`. - /// - /// If `cols` is inconsistent with `self`, - /// or the `row_ref` has a row type other than that used for `self`, - /// this will behave oddly; it may return an error, - /// or may insert a nonsense value into the index. - /// Note, however, that it will not invoke undefined behavior. - fn insert(&mut self, cols: &ColList, row_ref: RowRef<'_>) -> Result<(), InvalidFieldError> { - fn insert_at_type( - this: &mut Index, - cols: &ColList, - row_ref: RowRef<'_>, - ) -> Result<(), InvalidFieldError> { - let col_pos = cols.as_singleton().unwrap(); - let key = row_ref.read_col(col_pos).map_err(|_| col_pos)?; - this.insert(key, row_ref.pointer()); - Ok(()) + /// Returns a new index with keys being of `key_type` and the index possibly `is_unique`. + fn new(key_type: &AlgebraicType, is_unique: bool) -> Self { + // If the index is on a single column of a primitive type, + // use a homogeneous map with a native key type. + use TypedIndex::*; + if is_unique { + match key_type { + AlgebraicType::Bool => UniqueBool(<_>::default()), + AlgebraicType::I8 => UniqueI8(<_>::default()), + AlgebraicType::U8 => UniqueU8(<_>::default()), + AlgebraicType::I16 => UniqueI16(<_>::default()), + AlgebraicType::U16 => UniqueU16(<_>::default()), + AlgebraicType::I32 => UniqueI32(<_>::default()), + AlgebraicType::U32 => UniqueU32(<_>::default()), + AlgebraicType::I64 => UniqueI64(<_>::default()), + AlgebraicType::U64 => UniqueU64(<_>::default()), + AlgebraicType::I128 => UniqueI128(<_>::default()), + AlgebraicType::U128 => UniqueU128(<_>::default()), + AlgebraicType::I256 => UniqueI256(<_>::default()), + AlgebraicType::U256 => UniqueU256(<_>::default()), + AlgebraicType::String => UniqueString(<_>::default()), + + // The index is either multi-column, + // or we don't care to specialize on the key type, + // so use a map keyed on `AlgebraicValue`. + _ => UniqueAV(<_>::default()), + } + } else { + match key_type { + AlgebraicType::Bool => Bool(<_>::default()), + AlgebraicType::I8 => I8(<_>::default()), + AlgebraicType::U8 => U8(<_>::default()), + AlgebraicType::I16 => I16(<_>::default()), + AlgebraicType::U16 => U16(<_>::default()), + AlgebraicType::I32 => I32(<_>::default()), + AlgebraicType::U32 => U32(<_>::default()), + AlgebraicType::I64 => I64(<_>::default()), + AlgebraicType::U64 => U64(<_>::default()), + AlgebraicType::I128 => I128(<_>::default()), + AlgebraicType::U128 => U128(<_>::default()), + AlgebraicType::I256 => I256(<_>::default()), + AlgebraicType::U256 => U256(<_>::default()), + AlgebraicType::String => String(<_>::default()), + + // The index is either multi-column, + // or we don't care to specialize on the key type, + // so use a map keyed on `AlgebraicValue`. + _ => AV(<_>::default()), + } } + } + + /// Clones the structure of this index but not the indexed elements, + /// so the returned index is empty. + fn clone_structure(&self) -> Self { + use TypedIndex::*; match self { - Self::Bool(this) => insert_at_type(this, cols, row_ref), - Self::U8(this) => insert_at_type(this, cols, row_ref), - Self::I8(this) => insert_at_type(this, cols, row_ref), - Self::U16(this) => insert_at_type(this, cols, row_ref), - Self::I16(this) => insert_at_type(this, cols, row_ref), - Self::U32(this) => insert_at_type(this, cols, row_ref), - Self::I32(this) => insert_at_type(this, cols, row_ref), - Self::U64(this) => insert_at_type(this, cols, row_ref), - Self::I64(this) => insert_at_type(this, cols, row_ref), - Self::U128(this) => insert_at_type(this, cols, row_ref), - Self::I128(this) => insert_at_type(this, cols, row_ref), - Self::U256(this) => insert_at_type(this, cols, row_ref), - Self::I256(this) => insert_at_type(this, cols, row_ref), - Self::String(this) => insert_at_type(this, cols, row_ref), - - Self::AlgebraicValue(this) => { - let key = row_ref.project(cols)?; - this.insert(key, row_ref.pointer()); - Ok(()) - } + Bool(_) => Bool(<_>::default()), + U8(_) => U8(<_>::default()), + I8(_) => I8(<_>::default()), + U16(_) => U16(<_>::default()), + I16(_) => I16(<_>::default()), + U32(_) => U32(<_>::default()), + I32(_) => I32(<_>::default()), + U64(_) => U64(<_>::default()), + I64(_) => I64(<_>::default()), + U128(_) => U128(<_>::default()), + I128(_) => I128(<_>::default()), + U256(_) => U256(<_>::default()), + I256(_) => I256(<_>::default()), + String(_) => String(<_>::default()), + AV(_) => AV(<_>::default()), + UniqueBool(_) => UniqueBool(<_>::default()), + UniqueU8(_) => UniqueU8(<_>::default()), + UniqueI8(_) => UniqueI8(<_>::default()), + UniqueU16(_) => UniqueU16(<_>::default()), + UniqueI16(_) => UniqueI16(<_>::default()), + UniqueU32(_) => UniqueU32(<_>::default()), + UniqueI32(_) => UniqueI32(<_>::default()), + UniqueU64(_) => UniqueU64(<_>::default()), + UniqueI64(_) => UniqueI64(<_>::default()), + UniqueU128(_) => UniqueU128(<_>::default()), + UniqueI128(_) => UniqueI128(<_>::default()), + UniqueU256(_) => UniqueU256(<_>::default()), + UniqueI256(_) => UniqueI256(<_>::default()), + UniqueString(_) => UniqueString(<_>::default()), + UniqueAV(_) => UniqueAV(<_>::default()), + } + } + + /// Returns whether this is a unique index or not. + fn is_unique(&self) -> bool { + use TypedIndex::*; + match self { + Bool(_) | U8(_) | I8(_) | U16(_) | I16(_) | U32(_) | I32(_) | U64(_) | I64(_) | U128(_) | I128(_) + | U256(_) | I256(_) | String(_) | AV(_) => false, + UniqueBool(_) | UniqueU8(_) | UniqueI8(_) | UniqueU16(_) | UniqueI16(_) | UniqueU32(_) | UniqueI32(_) + | UniqueU64(_) | UniqueI64(_) | UniqueU128(_) | UniqueI128(_) | UniqueU256(_) | UniqueI256(_) + | UniqueString(_) | UniqueAV(_) => true, } } @@ -190,38 +321,65 @@ impl TypedIndex { /// or may insert a nonsense value into the index. /// Note, however, that it will not invoke undefined behavior. /// - /// Assumes that this is a unique constraint - /// and returns `Ok(Some(existing_row))` if it's violated. + /// Returns `Ok(Some(existing_row))` if this index was a unique index that was violated. /// The index is not inserted to in that case. - fn insert_unique(&mut self, cols: &ColList, row_ref: RowRef<'_>) -> Result, InvalidFieldError> { - fn insert_at_type( + fn insert(&mut self, cols: &ColList, row_ref: RowRef<'_>) -> Result, InvalidFieldError> { + fn mm_insert_at_type( this: &mut Index, cols: &ColList, row_ref: RowRef<'_>, ) -> Result, InvalidFieldError> { let col_pos = cols.as_singleton().unwrap(); let key = row_ref.read_col(col_pos).map_err(|_| col_pos)?; - Ok(this.insert_unique(key, row_ref.pointer()).copied()) + this.insert(key, row_ref.pointer()); + Ok(None) + } + fn um_insert_at_type( + this: &mut UniqueIndex, + cols: &ColList, + row_ref: RowRef<'_>, + ) -> Result, InvalidFieldError> { + let col_pos = cols.as_singleton().unwrap(); + let key = row_ref.read_col(col_pos).map_err(|_| col_pos)?; + Ok(this.insert(key, row_ref.pointer()).copied()) } let unique_violation = match self { - Self::Bool(this) => insert_at_type(this, cols, row_ref), - Self::U8(this) => insert_at_type(this, cols, row_ref), - Self::I8(this) => insert_at_type(this, cols, row_ref), - Self::U16(this) => insert_at_type(this, cols, row_ref), - Self::I16(this) => insert_at_type(this, cols, row_ref), - Self::U32(this) => insert_at_type(this, cols, row_ref), - Self::I32(this) => insert_at_type(this, cols, row_ref), - Self::U64(this) => insert_at_type(this, cols, row_ref), - Self::I64(this) => insert_at_type(this, cols, row_ref), - Self::U128(this) => insert_at_type(this, cols, row_ref), - Self::I128(this) => insert_at_type(this, cols, row_ref), - Self::U256(this) => insert_at_type(this, cols, row_ref), - Self::I256(this) => insert_at_type(this, cols, row_ref), - Self::String(this) => insert_at_type(this, cols, row_ref), - - Self::AlgebraicValue(this) => { + Self::Bool(idx) => mm_insert_at_type(idx, cols, row_ref), + Self::U8(idx) => mm_insert_at_type(idx, cols, row_ref), + Self::I8(idx) => mm_insert_at_type(idx, cols, row_ref), + Self::U16(idx) => mm_insert_at_type(idx, cols, row_ref), + Self::I16(idx) => mm_insert_at_type(idx, cols, row_ref), + Self::U32(idx) => mm_insert_at_type(idx, cols, row_ref), + Self::I32(idx) => mm_insert_at_type(idx, cols, row_ref), + Self::U64(idx) => mm_insert_at_type(idx, cols, row_ref), + Self::I64(idx) => mm_insert_at_type(idx, cols, row_ref), + Self::U128(idx) => mm_insert_at_type(idx, cols, row_ref), + Self::I128(idx) => mm_insert_at_type(idx, cols, row_ref), + Self::U256(idx) => mm_insert_at_type(idx, cols, row_ref), + Self::I256(idx) => mm_insert_at_type(idx, cols, row_ref), + Self::String(idx) => mm_insert_at_type(idx, cols, row_ref), + Self::AV(this) => { + let key = row_ref.project(cols)?; + this.insert(key, row_ref.pointer()); + Ok(None) + } + Self::UniqueBool(idx) => um_insert_at_type(idx, cols, row_ref), + Self::UniqueU8(idx) => um_insert_at_type(idx, cols, row_ref), + Self::UniqueI8(idx) => um_insert_at_type(idx, cols, row_ref), + Self::UniqueU16(idx) => um_insert_at_type(idx, cols, row_ref), + Self::UniqueI16(idx) => um_insert_at_type(idx, cols, row_ref), + Self::UniqueU32(idx) => um_insert_at_type(idx, cols, row_ref), + Self::UniqueI32(idx) => um_insert_at_type(idx, cols, row_ref), + Self::UniqueU64(idx) => um_insert_at_type(idx, cols, row_ref), + Self::UniqueI64(idx) => um_insert_at_type(idx, cols, row_ref), + Self::UniqueU128(idx) => um_insert_at_type(idx, cols, row_ref), + Self::UniqueI128(idx) => um_insert_at_type(idx, cols, row_ref), + Self::UniqueU256(idx) => um_insert_at_type(idx, cols, row_ref), + Self::UniqueI256(idx) => um_insert_at_type(idx, cols, row_ref), + Self::UniqueString(idx) => um_insert_at_type(idx, cols, row_ref), + Self::UniqueAV(this) => { let key = row_ref.project(cols)?; - Ok(this.insert_unique(key, row_ref.pointer()).copied()) + Ok(this.insert(key, row_ref.pointer()).copied()) } }?; Ok(unique_violation) @@ -236,7 +394,7 @@ impl TypedIndex { /// or remove the wrong value from the index. /// Note, however, that it will not invoke undefined behavior. fn delete(&mut self, cols: &ColList, row_ref: RowRef<'_>) -> Result { - fn delete_at_type( + fn mm_delete_at_type( this: &mut Index, cols: &ColList, row_ref: RowRef<'_>, @@ -245,32 +403,58 @@ impl TypedIndex { let key = row_ref.read_col(col_pos).map_err(|_| col_pos)?; Ok(this.delete(&key, &row_ref.pointer())) } + fn um_delete_at_type( + this: &mut UniqueIndex, + cols: &ColList, + row_ref: RowRef<'_>, + ) -> Result { + let col_pos = cols.as_singleton().unwrap(); + let key = row_ref.read_col(col_pos).map_err(|_| col_pos)?; + Ok(this.delete(&key)) + } match self { - Self::Bool(this) => delete_at_type(this, cols, row_ref), - Self::U8(this) => delete_at_type(this, cols, row_ref), - Self::I8(this) => delete_at_type(this, cols, row_ref), - Self::U16(this) => delete_at_type(this, cols, row_ref), - Self::I16(this) => delete_at_type(this, cols, row_ref), - Self::U32(this) => delete_at_type(this, cols, row_ref), - Self::I32(this) => delete_at_type(this, cols, row_ref), - Self::U64(this) => delete_at_type(this, cols, row_ref), - Self::I64(this) => delete_at_type(this, cols, row_ref), - Self::U128(this) => delete_at_type(this, cols, row_ref), - Self::I128(this) => delete_at_type(this, cols, row_ref), - Self::U256(this) => delete_at_type(this, cols, row_ref), - Self::I256(this) => delete_at_type(this, cols, row_ref), - Self::String(this) => delete_at_type(this, cols, row_ref), - - Self::AlgebraicValue(this) => { + Self::Bool(this) => mm_delete_at_type(this, cols, row_ref), + Self::U8(this) => mm_delete_at_type(this, cols, row_ref), + Self::I8(this) => mm_delete_at_type(this, cols, row_ref), + Self::U16(this) => mm_delete_at_type(this, cols, row_ref), + Self::I16(this) => mm_delete_at_type(this, cols, row_ref), + Self::U32(this) => mm_delete_at_type(this, cols, row_ref), + Self::I32(this) => mm_delete_at_type(this, cols, row_ref), + Self::U64(this) => mm_delete_at_type(this, cols, row_ref), + Self::I64(this) => mm_delete_at_type(this, cols, row_ref), + Self::U128(this) => mm_delete_at_type(this, cols, row_ref), + Self::I128(this) => mm_delete_at_type(this, cols, row_ref), + Self::U256(this) => mm_delete_at_type(this, cols, row_ref), + Self::I256(this) => mm_delete_at_type(this, cols, row_ref), + Self::String(this) => mm_delete_at_type(this, cols, row_ref), + Self::AV(this) => { let key = row_ref.project(cols)?; Ok(this.delete(&key, &row_ref.pointer())) } + Self::UniqueBool(this) => um_delete_at_type(this, cols, row_ref), + Self::UniqueU8(this) => um_delete_at_type(this, cols, row_ref), + Self::UniqueI8(this) => um_delete_at_type(this, cols, row_ref), + Self::UniqueU16(this) => um_delete_at_type(this, cols, row_ref), + Self::UniqueI16(this) => um_delete_at_type(this, cols, row_ref), + Self::UniqueU32(this) => um_delete_at_type(this, cols, row_ref), + Self::UniqueI32(this) => um_delete_at_type(this, cols, row_ref), + Self::UniqueU64(this) => um_delete_at_type(this, cols, row_ref), + Self::UniqueI64(this) => um_delete_at_type(this, cols, row_ref), + Self::UniqueU128(this) => um_delete_at_type(this, cols, row_ref), + Self::UniqueI128(this) => um_delete_at_type(this, cols, row_ref), + Self::UniqueU256(this) => um_delete_at_type(this, cols, row_ref), + Self::UniqueI256(this) => um_delete_at_type(this, cols, row_ref), + Self::UniqueString(this) => um_delete_at_type(this, cols, row_ref), + Self::UniqueAV(this) => { + let key = row_ref.project(cols)?; + Ok(this.delete(&key)) + } } } - fn values_in_range(&self, range: &impl RangeBounds) -> TypedMultiMapRangeIter<'_> { - fn iter_at_type<'a, T: Ord>( + fn values_in_range(&self, range: &impl RangeBounds) -> TypedIndexRangeIter<'_> { + fn mm_iter_at_type<'a, T: Ord>( this: &'a Index, range: &impl RangeBounds, av_as_t: impl Fn(&AlgebraicValue) -> Option<&T>, @@ -280,28 +464,50 @@ impl TypedIndex { let end = range.end_bound().map(av_as_t); this.values_in_range(&(start, end)) } + fn um_iter_at_type<'a, T: Ord>( + this: &'a UniqueIndex, + range: &impl RangeBounds, + av_as_t: impl Fn(&AlgebraicValue) -> Option<&T>, + ) -> UniqueIndexIter<'a, T> { + let av_as_t = |v| av_as_t(v).expect("bound does not conform to key type of index"); + let start = range.start_bound().map(av_as_t); + let end = range.end_bound().map(av_as_t); + this.values_in_range(&(start, end)) + } + use TypedIndexRangeIter::*; match self { - Self::Bool(this) => TypedMultiMapRangeIter::Bool(iter_at_type(this, range, AlgebraicValue::as_bool)), - Self::U8(this) => TypedMultiMapRangeIter::U8(iter_at_type(this, range, AlgebraicValue::as_u8)), - Self::I8(this) => TypedMultiMapRangeIter::I8(iter_at_type(this, range, AlgebraicValue::as_i8)), - Self::U16(this) => TypedMultiMapRangeIter::U16(iter_at_type(this, range, AlgebraicValue::as_u16)), - Self::I16(this) => TypedMultiMapRangeIter::I16(iter_at_type(this, range, AlgebraicValue::as_i16)), - Self::U32(this) => TypedMultiMapRangeIter::U32(iter_at_type(this, range, AlgebraicValue::as_u32)), - Self::I32(this) => TypedMultiMapRangeIter::I32(iter_at_type(this, range, AlgebraicValue::as_i32)), - Self::U64(this) => TypedMultiMapRangeIter::U64(iter_at_type(this, range, AlgebraicValue::as_u64)), - Self::I64(this) => TypedMultiMapRangeIter::I64(iter_at_type(this, range, AlgebraicValue::as_i64)), - Self::U128(this) => TypedMultiMapRangeIter::U128(iter_at_type(this, range, AlgebraicValue::as_u128)), - Self::I128(this) => TypedMultiMapRangeIter::I128(iter_at_type(this, range, AlgebraicValue::as_i128)), - Self::U256(this) => { - TypedMultiMapRangeIter::U256(iter_at_type(this, range, |av| av.as_u256().map(|x| &**x))) - } - Self::I256(this) => { - TypedMultiMapRangeIter::I256(iter_at_type(this, range, |av| av.as_i256().map(|x| &**x))) - } - Self::String(this) => TypedMultiMapRangeIter::String(iter_at_type(this, range, AlgebraicValue::as_string)), - - Self::AlgebraicValue(this) => TypedMultiMapRangeIter::AlgebraicValue(this.values_in_range(range)), + Self::Bool(this) => Bool(mm_iter_at_type(this, range, AlgebraicValue::as_bool)), + Self::U8(this) => U8(mm_iter_at_type(this, range, AlgebraicValue::as_u8)), + Self::I8(this) => I8(mm_iter_at_type(this, range, AlgebraicValue::as_i8)), + Self::U16(this) => U16(mm_iter_at_type(this, range, AlgebraicValue::as_u16)), + Self::I16(this) => I16(mm_iter_at_type(this, range, AlgebraicValue::as_i16)), + Self::U32(this) => U32(mm_iter_at_type(this, range, AlgebraicValue::as_u32)), + Self::I32(this) => I32(mm_iter_at_type(this, range, AlgebraicValue::as_i32)), + Self::U64(this) => U64(mm_iter_at_type(this, range, AlgebraicValue::as_u64)), + Self::I64(this) => I64(mm_iter_at_type(this, range, AlgebraicValue::as_i64)), + Self::U128(this) => U128(mm_iter_at_type(this, range, AlgebraicValue::as_u128)), + Self::I128(this) => I128(mm_iter_at_type(this, range, AlgebraicValue::as_i128)), + Self::U256(this) => U256(mm_iter_at_type(this, range, |av| av.as_u256().map(|x| &**x))), + Self::I256(this) => I256(mm_iter_at_type(this, range, |av| av.as_i256().map(|x| &**x))), + Self::String(this) => String(mm_iter_at_type(this, range, AlgebraicValue::as_string)), + Self::AV(this) => AV(this.values_in_range(range)), + + Self::UniqueBool(this) => UniqueBool(um_iter_at_type(this, range, AlgebraicValue::as_bool)), + Self::UniqueU8(this) => UniqueU8(um_iter_at_type(this, range, AlgebraicValue::as_u8)), + Self::UniqueI8(this) => UniqueI8(um_iter_at_type(this, range, AlgebraicValue::as_i8)), + Self::UniqueU16(this) => UniqueU16(um_iter_at_type(this, range, AlgebraicValue::as_u16)), + Self::UniqueI16(this) => UniqueI16(um_iter_at_type(this, range, AlgebraicValue::as_i16)), + Self::UniqueU32(this) => UniqueU32(um_iter_at_type(this, range, AlgebraicValue::as_u32)), + Self::UniqueI32(this) => UniqueI32(um_iter_at_type(this, range, AlgebraicValue::as_i32)), + Self::UniqueU64(this) => UniqueU64(um_iter_at_type(this, range, AlgebraicValue::as_u64)), + Self::UniqueI64(this) => UniqueI64(um_iter_at_type(this, range, AlgebraicValue::as_i64)), + Self::UniqueU128(this) => UniqueU128(um_iter_at_type(this, range, AlgebraicValue::as_u128)), + Self::UniqueI128(this) => UniqueI128(um_iter_at_type(this, range, AlgebraicValue::as_i128)), + Self::UniqueU256(this) => UniqueU256(um_iter_at_type(this, range, |av| av.as_u256().map(|x| &**x))), + Self::UniqueI256(this) => UniqueI256(um_iter_at_type(this, range, |av| av.as_i256().map(|x| &**x))), + Self::UniqueString(this) => UniqueString(um_iter_at_type(this, range, AlgebraicValue::as_string)), + Self::UniqueAV(this) => UniqueAV(this.values_in_range(range)), } } @@ -321,7 +527,23 @@ impl TypedIndex { Self::U256(this) => this.clear(), Self::I256(this) => this.clear(), Self::String(this) => this.clear(), - Self::AlgebraicValue(this) => this.clear(), + Self::AV(this) => this.clear(), + + Self::UniqueBool(this) => this.clear(), + Self::UniqueU8(this) => this.clear(), + Self::UniqueI8(this) => this.clear(), + Self::UniqueU16(this) => this.clear(), + Self::UniqueI16(this) => this.clear(), + Self::UniqueU32(this) => this.clear(), + Self::UniqueI32(this) => this.clear(), + Self::UniqueU64(this) => this.clear(), + Self::UniqueI64(this) => this.clear(), + Self::UniqueU128(this) => this.clear(), + Self::UniqueI128(this) => this.clear(), + Self::UniqueU256(this) => this.clear(), + Self::UniqueI256(this) => this.clear(), + Self::UniqueString(this) => this.clear(), + Self::UniqueAV(this) => this.clear(), } } @@ -347,7 +569,23 @@ impl TypedIndex { Self::U256(this) => this.len(), Self::I256(this) => this.len(), Self::String(this) => this.len(), - Self::AlgebraicValue(this) => this.len(), + Self::AV(this) => this.len(), + + Self::UniqueBool(this) => this.len(), + Self::UniqueU8(this) => this.len(), + Self::UniqueI8(this) => this.len(), + Self::UniqueU16(this) => this.len(), + Self::UniqueI16(this) => this.len(), + Self::UniqueU32(this) => this.len(), + Self::UniqueI32(this) => this.len(), + Self::UniqueU64(this) => this.len(), + Self::UniqueI64(this) => this.len(), + Self::UniqueU128(this) => this.len(), + Self::UniqueI128(this) => this.len(), + Self::UniqueU256(this) => this.len(), + Self::UniqueI256(this) => this.len(), + Self::UniqueString(this) => this.len(), + Self::UniqueAV(this) => this.len(), } } @@ -367,17 +605,32 @@ impl TypedIndex { Self::U256(this) => this.num_keys(), Self::I256(this) => this.num_keys(), Self::String(this) => this.num_keys(), - Self::AlgebraicValue(this) => this.num_keys(), + Self::AV(this) => this.num_keys(), + + Self::UniqueBool(this) => this.num_keys(), + Self::UniqueU8(this) => this.num_keys(), + Self::UniqueI8(this) => this.num_keys(), + Self::UniqueU16(this) => this.num_keys(), + Self::UniqueI16(this) => this.num_keys(), + Self::UniqueU32(this) => this.num_keys(), + Self::UniqueI32(this) => this.num_keys(), + Self::UniqueU64(this) => this.num_keys(), + Self::UniqueI64(this) => this.num_keys(), + Self::UniqueU128(this) => this.num_keys(), + Self::UniqueI128(this) => this.num_keys(), + Self::UniqueU256(this) => this.num_keys(), + Self::UniqueI256(this) => this.num_keys(), + Self::UniqueString(this) => this.num_keys(), + Self::UniqueAV(this) => this.num_keys(), } } } /// A B-Tree based index on a set of [`ColId`]s of a table. +#[derive(Debug)] pub struct BTreeIndex { /// The ID of this index. pub index_id: IndexId, - /// Whether this index is also a unique constraint. - pub(crate) is_unique: bool, /// The actual index, specialized for the appropriate key type. idx: TypedIndex, /// The key type of this index. @@ -389,11 +642,10 @@ impl MemoryUsage for BTreeIndex { fn heap_usage(&self) -> usize { let Self { index_id, - is_unique, idx, key_type, } = self; - index_id.heap_usage() + is_unique.heap_usage() + idx.heap_usage() + key_type.heap_usage() + index_id.heap_usage() + idx.heap_usage() + key_type.heap_usage() } } @@ -408,41 +660,30 @@ impl BTreeIndex { is_unique: bool, ) -> Result { let key_type = row_type.project(indexed_columns)?; - // If the index is on a single column of a primitive type, - // use a homogeneous map with a native key type. - let typed_index = match key_type { - AlgebraicType::Bool => TypedIndex::Bool(Index::new()), - AlgebraicType::I8 => TypedIndex::I8(Index::new()), - AlgebraicType::U8 => TypedIndex::U8(Index::new()), - AlgebraicType::I16 => TypedIndex::I16(Index::new()), - AlgebraicType::U16 => TypedIndex::U16(Index::new()), - AlgebraicType::I32 => TypedIndex::I32(Index::new()), - AlgebraicType::U32 => TypedIndex::U32(Index::new()), - AlgebraicType::I64 => TypedIndex::I64(Index::new()), - AlgebraicType::U64 => TypedIndex::U64(Index::new()), - AlgebraicType::I128 => TypedIndex::I128(Index::new()), - AlgebraicType::U128 => TypedIndex::U128(Index::new()), - AlgebraicType::I256 => TypedIndex::I256(Index::new()), - AlgebraicType::U256 => TypedIndex::U256(Index::new()), - AlgebraicType::String => TypedIndex::String(Index::new()), - - // The index is either multi-column, - // or we don't care to specialize on the key type, - // so use a map keyed on `AlgebraicValue`. - _ => TypedIndex::AlgebraicValue(Index::new()), - }; + let typed_index = TypedIndex::new(&key_type, is_unique); Ok(Self { index_id, - is_unique, idx: typed_index, key_type, }) } - /// Inserts `ptr` with the value `row` to this index. - /// This index will extract the necessary values from `row` based on `self.cols`. - pub fn insert(&mut self, cols: &ColList, row_ref: RowRef<'_>) -> Result<(), InvalidFieldError> { - self.idx.insert(cols, row_ref) + /// Clones the structure of this index but not the indexed elements, + /// so the returned index is empty. + pub fn clone_structure(&self) -> Self { + let key_type = self.key_type.clone(); + let index_id = self.index_id; + let idx = self.idx.clone_structure(); + Self { + index_id, + idx, + key_type, + } + } + + /// Returns whether this is a unique index or not. + pub fn is_unique(&self) -> bool { + self.idx.is_unique() } /// Inserts `ptr` with the value `row` to this index. @@ -454,12 +695,7 @@ impl BTreeIndex { cols: &ColList, row_ref: RowRef<'_>, ) -> Result, InvalidFieldError> { - if self.is_unique { - self.idx.insert_unique(cols, row_ref) - } else { - self.idx.insert(cols, row_ref)?; - Ok(None) - } + self.idx.insert(cols, row_ref) } /// Deletes `ptr` with its indexed value `col_value` from this index. @@ -483,17 +719,19 @@ impl BTreeIndex { } /// Extends [`BTreeIndex`] with `rows`. + /// + /// Returns the first unique constraint violation caused when adding this index, if any. pub fn build_from_rows<'table>( &mut self, cols: &ColList, rows: impl IntoIterator>, - ) -> Result<(), InvalidFieldError> { - // TODO(centril, correctness): consider `self.is_unique`. - // Should not be able to add an index which would cause unique constraint violations. + ) -> Result, InvalidFieldError> { for row_ref in rows { - self.insert(cols, row_ref)?; + if let violation @ Some(_) = self.check_and_insert(cols, row_ref)? { + return Ok(violation); + } } - Ok(()) + Ok(None) } /// Deletes all entries from the index, leaving it empty. @@ -552,7 +790,7 @@ mod test { /// Returns whether indexing `row` again would violate a unique constraint, if any. fn violates_unique_constraint(index: &BTreeIndex, cols: &ColList, row: &ProductValue) -> bool { - !index.is_unique || index.contains_any(&get_fields(cols, row)) + !index.is_unique() || index.contains_any(&get_fields(cols, row)) } /// Returns an iterator over the rows that would violate the unique constraint of this index, @@ -562,10 +800,11 @@ mod test { index: &'a BTreeIndex, row: &'a AlgebraicValue, ) -> Option> { - index.is_unique.then(|| index.seek(row)) + index.is_unique().then(|| index.seek(row)) } proptest! { + #![proptest_config(ProptestConfig { max_shrink_iters: 0x10000000, ..Default::default() })] #[test] fn remove_nonexistent_noop(((ty, cols, pv), is_unique) in (gen_row_and_cols(), any::())) { let mut index = new_index(&ty, &cols, is_unique); diff --git a/crates/table/src/btree_index/multimap.rs b/crates/table/src/btree_index/multimap.rs index 7e3e289af9d..391c76cdeb4 100644 --- a/crates/table/src/btree_index/multimap.rs +++ b/crates/table/src/btree_index/multimap.rs @@ -6,17 +6,23 @@ use std::collections::btree_map::{BTreeMap, Range}; use crate::MemoryUsage; /// A multi map that relates a `K` to a *set* of `V`s. -#[derive(Default)] +#[derive(Debug)] pub struct MultiMap { /// The map is backed by a `BTreeMap` for relating keys to values. /// - /// A value set is stored as a *sorted* `SmallVec`. - /// This is an optimization over a sorted `Vec<_>` + /// A value set is stored as a `SmallVec`. + /// This is an optimization over a `Vec<_>` /// as we allow a single element to be stored inline /// to improve performance for the common case of one element. map: BTreeMap>, } +impl Default for MultiMap { + fn default() -> Self { + Self { map: BTreeMap::new() } + } +} + impl MemoryUsage for MultiMap { fn heap_usage(&self) -> usize { let Self { map } = self; @@ -25,11 +31,6 @@ impl MemoryUsage for MultiMap { } impl MultiMap { - /// Returns an empty multi map. - pub fn new() -> Self { - Self { map: BTreeMap::new() } - } - /// Inserts the relation `key -> val` to this multimap. /// /// The map does not check whether `key -> val` was already in the map. @@ -37,21 +38,6 @@ impl MultiMap { self.map.entry(key).or_default().push(val); } - /// Inserts the relation `key -> val` to this multimap. - /// - /// If `key` was already present in the map, does not add an association with `val`. - /// Returns the existing associated value instead. - pub fn insert_unique(&mut self, key: K, val: V) -> Option<&V> { - // TODO(perf, centril): don't use a multimap at all for unique indices. - let vals = self.map.entry(key).or_default(); - if vals.is_empty() { - vals.push(val); - None - } else { - Some(&vals[0]) - } - } - /// Deletes `key -> val` from this multimap. /// /// Returns whether `key -> val` was present. diff --git a/crates/table/src/btree_index/uniquemap.rs b/crates/table/src/btree_index/uniquemap.rs new file mode 100644 index 00000000000..b71c7aead2e --- /dev/null +++ b/crates/table/src/btree_index/uniquemap.rs @@ -0,0 +1,92 @@ +use crate::MemoryUsage; +use core::ops::RangeBounds; +use std::collections::btree_map::{BTreeMap, Entry, Range}; + +/// A "unique map" that relates a `K` to a `V`. +/// +/// (This is just a `BTreeMap`) with a slightly modified interface. +#[derive(Debug)] +pub struct UniqueMap { + /// The map is backed by a `BTreeMap` for relating a key to a value. + map: BTreeMap, +} + +impl Default for UniqueMap { + fn default() -> Self { + Self { map: BTreeMap::new() } + } +} + +impl MemoryUsage for UniqueMap { + fn heap_usage(&self) -> usize { + let Self { map } = self; + map.heap_usage() + } +} + +impl UniqueMap { + /// Inserts the relation `key -> val` to this map. + /// + /// If `key` was already present in the map, does not add an association with `val`. + /// Returns the existing associated value instead. + pub fn insert(&mut self, key: K, val: V) -> Option<&V> { + match self.map.entry(key) { + Entry::Vacant(e) => { + e.insert(val); + None + } + Entry::Occupied(e) => Some(e.into_mut()), + } + } + + /// Deletes `key` from this map. + /// + /// Returns whether `key` was present. + pub fn delete(&mut self, key: &K) -> bool { + self.map.remove(key).is_some() + } + + /// Returns an iterator over the map that yields all the `V`s + /// of the `K`s that fall within the specified `range`. + pub fn values_in_range(&self, range: &impl RangeBounds) -> UniqueMapRangeIter<'_, K, V> { + UniqueMapRangeIter { + iter: self.map.range((range.start_bound(), range.end_bound())), + } + } + + /// Returns the number of unique keys in the map. + pub fn num_keys(&self) -> usize { + self.len() + } + + /// Returns the total number of entries in the map.s + pub fn len(&self) -> usize { + self.map.len() + } + + /// Returns whether there are any entries in the map. + #[allow(unused)] // No use for this currently. + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Deletes all entries from the map, leaving it empty. + /// This will not deallocate the outer map. + pub fn clear(&mut self) { + self.map.clear(); + } +} + +/// An iterator over values in a [`MultiMap`] where the keys are in a certain range. +pub struct UniqueMapRangeIter<'a, K, V> { + /// The iterator seeking for matching keys in the range. + iter: Range<'a, K, V>, +} + +impl<'a, K, V> Iterator for UniqueMapRangeIter<'a, K, V> { + type Item = &'a V; + + fn next(&mut self) -> Option { + self.iter.next().map(|(_, v)| v) + } +} diff --git a/crates/table/src/table.rs b/crates/table/src/table.rs index 1d95de53967..c0eb59e49c2 100644 --- a/crates/table/src/table.rs +++ b/crates/table/src/table.rs @@ -1,7 +1,7 @@ use super::{ bflatn_from::serialize_row_from_page, bflatn_to::write_row_to_pages, - blob_store::{BlobStore, NullBlobStore}, + blob_store::BlobStore, btree_index::{BTreeIndex, BTreeIndexRangeIter}, eq::eq_row_in_page, eq_to_pv::eq_row_in_page_to_pv, @@ -208,7 +208,7 @@ impl Table { row: RowRef<'_>, mut is_deleted: impl FnMut(RowPointer) -> bool, ) -> Result<(), UniqueConstraintViolation> { - for (cols, index) in self.indexes.iter().filter(|(_, index)| index.is_unique) { + for (cols, index) in self.indexes.iter().filter(|(_, index)| index.is_unique()) { let value = row.project(cols).unwrap(); if index.seek(&value).next().is_some_and(|ptr| !is_deleted(ptr)) { return Err(self.build_error_unique(index, cols, value)); @@ -607,9 +607,16 @@ impl Table { /// Inserts a new `index` into the table. /// /// The index will be populated using the rows of the table. + /// + /// # Panics + /// /// Panics if `cols` has some column that is out of bounds of the table's row layout. + /// Also panics if any row would violate `index`'s unique constraint, if it has one. pub fn insert_index(&mut self, blob_store: &dyn BlobStore, cols: ColList, mut index: BTreeIndex) { - index.build_from_rows(&cols, self.scan_rows(blob_store)).unwrap(); + index + .build_from_rows(&cols, self.scan_rows(blob_store)) + .expect("`cols` should consist of valid columns for this table") + .inspect(|ptr| panic!("adding `index` should cause no unique constraint violations, but {ptr:?} would")); self.indexes.insert(cols, index); } @@ -656,11 +663,9 @@ impl Table { let mut new = Table::new_with_indexes_capacity(schema, layout, sbl, visitor, squashed_offset, self.indexes.len()); + // Clone the index structure. The table is empty, so no need to `build_from_rows`. for (cols, index) in self.indexes.iter() { - // `new` is known to be empty (we just constructed it!), - // so no need for an actual blob store here. - let index = new.new_index(index.index_id, cols, index.is_unique).unwrap(); - new.insert_index(&NullBlobStore, cols.clone(), index); + new.indexes.insert(cols.clone(), index.clone_structure()); } new } @@ -1096,7 +1101,7 @@ impl UniqueConstraintViolation { impl Table { /// Returns a unique constraint violation error for the given `index` /// and the `value` that would have been duplicated. - fn build_error_unique( + pub fn build_error_unique( &self, index: &BTreeIndex, cols: &ColList, @@ -1231,7 +1236,7 @@ impl Table { #[cfg(test)] pub(crate) mod test { use super::*; - use crate::blob_store::HashMapBlobStore; + use crate::blob_store::{HashMapBlobStore, NullBlobStore}; use crate::page::tests::hash_unmodified_save_get; use crate::var_len::VarLenGranule; use proptest::prelude::*;