From d428821d4232e131d57b410ad212c6103c7c84e6 Mon Sep 17 00:00:00 2001 From: Ingvar Stepanyan Date: Tue, 17 Oct 2023 17:37:01 +0100 Subject: [PATCH] [wip] Multi-tiered index structure --- .../locking_tx_datastore/btree_index.rs | 131 +++++++++--------- .../db/datastore/locking_tx_datastore/mod.rs | 34 +++-- 2 files changed, 84 insertions(+), 81 deletions(-) diff --git a/crates/core/src/db/datastore/locking_tx_datastore/btree_index.rs b/crates/core/src/db/datastore/locking_tx_datastore/btree_index.rs index 6ba705c0bf4..6329d8a9ae5 100644 --- a/crates/core/src/db/datastore/locking_tx_datastore/btree_index.rs +++ b/crates/core/src/db/datastore/locking_tx_datastore/btree_index.rs @@ -4,11 +4,11 @@ use crate::{ error::DBError, }; use nonempty::NonEmpty; -use spacetimedb_lib::{data_key::ToDataKey, DataKey}; +use spacetimedb_lib::data_key::ToDataKey; use spacetimedb_sats::{AlgebraicValue, ProductValue}; use std::{ - collections::{btree_set, BTreeSet}, - ops::{Bound, RangeBounds}, + collections::{btree_map, BTreeMap}, + ops::RangeBounds, }; /// ## Index Key Composition @@ -31,57 +31,44 @@ use std::{ /// [AlgebraicValue::I32(0)] = Row(ProductValue(...)) /// [AlgebraicValue::Product(AlgebraicValue::I32(0), AlgebraicValue::I32(1))] = Row(ProductValue(...)) /// ``` -#[derive(Clone, Eq, PartialEq, Ord, PartialOrd)] -struct IndexKey { - value: AlgebraicValue, - row_id: RowId, -} -impl IndexKey { - #[tracing::instrument(skip_all)] - pub(crate) fn from_row(value: &AlgebraicValue, row_id: DataKey) -> Self { - Self { - value: value.clone(), - row_id: RowId(row_id), - } - } +enum Rows { + Unique(BTreeMap), + NonUnique(BTreeMap>), } -pub struct BTreeIndexIter<'a> { - iter: btree_set::Iter<'a, IndexKey>, -} +pub struct ValueIter(I); -impl Iterator for BTreeIndexIter<'_> { - type Item = RowId; +impl<'a, K: 'a, V: 'a, I> Iterator for ValueIter +where + I: Iterator, +{ + type Item = &'a V; - #[tracing::instrument(skip_all)] fn next(&mut self) -> Option { - self.iter.next().map(|key| key.row_id) + self.0.next().map(|(_, v)| v) } -} - -/// An iterator for the rows that match a value [AlgebraicValue] on the -/// [BTreeIndex] -pub struct BTreeIndexRangeIter<'a> { - range_iter: btree_set::Range<'a, IndexKey>, -} -impl Iterator for BTreeIndexRangeIter<'_> { - type Item = RowId; - - #[tracing::instrument(skip_all)] - fn next(&mut self) -> Option { - self.range_iter.next().map(|key| key.row_id) + fn size_hint(&self) -> (usize, Option) { + self.0.size_hint() } } +pub type BTreeIndexRangeIter<'a> = std::iter::Copied< + itertools::Either< + ValueIter>, + std::iter::Flatten>>>, + >, +>; + pub(crate) struct BTreeIndex { pub(crate) index_id: IndexId, pub(crate) table_id: u32, pub(crate) cols: NonEmpty, pub(crate) name: String, pub(crate) is_unique: bool, - idx: BTreeSet, + // todo: remove is_unique in favour of this + idx: Rows, } impl BTreeIndex { @@ -92,7 +79,10 @@ impl BTreeIndex { cols, name, is_unique, - idx: BTreeSet::new(), + idx: match is_unique { + true => Rows::Unique(Default::default()), + false => Rows::NonUnique(Default::default()), + }, } } @@ -104,32 +94,51 @@ impl BTreeIndex { #[tracing::instrument(skip_all)] pub(crate) fn insert(&mut self, row: &ProductValue) -> Result<(), DBError> { let col_value = self.get_fields(row)?; - let key = IndexKey::from_row(&col_value, row.to_data_key()); - self.idx.insert(key); + let row_id = RowId(row.to_data_key()); + match &mut self.idx { + Rows::Unique(rows) => { + if rows.insert(col_value, row_id).is_some() { + tracing::error!("unique constraint violation that should have been checked by now"); + } + } + Rows::NonUnique(rows) => { + rows.entry(col_value).or_default().insert(row_id); + } + } Ok(()) } #[tracing::instrument(skip_all)] pub(crate) fn delete(&mut self, col_value: &AlgebraicValue, row_id: &RowId) { - let key = IndexKey::from_row(col_value, row_id.0); - self.idx.remove(&key); + match &mut self.idx { + Rows::Unique(rows) => { + rows.remove(col_value); + } + Rows::NonUnique(rows) => { + if let Some(rows) = rows.get_mut(col_value) { + rows.remove(row_id); + } + } + } } #[tracing::instrument(skip_all)] - pub(crate) fn violates_unique_constraint(&self, row: &ProductValue) -> bool { - if self.is_unique { - let col_value = self.get_fields(row).unwrap(); - return self.contains_any(&col_value); + pub(crate) fn get_row_that_violates_unique_constraint<'a>(&'a self, row: &AlgebraicValue) -> Option<&'a RowId> { + match &self.idx { + Rows::Unique(rows) => rows.get(row), + Rows::NonUnique(_) => None, } - false } #[tracing::instrument(skip_all)] - pub(crate) fn get_rows_that_violate_unique_constraint<'a>( - &'a self, - row: &'a AlgebraicValue, - ) -> Option> { - self.is_unique.then(|| self.seek(row)) + pub(crate) fn violates_unique_constraint(&self, row: &ProductValue) -> bool { + match &self.idx { + Rows::Unique(rows) => { + let col_value = self.get_fields(row).unwrap(); + rows.contains_key(&col_value) + } + Rows::NonUnique(_) => false, + } } /// Returns `true` if the [BTreeIndex] contains a value for the specified `value`. @@ -140,24 +149,20 @@ impl BTreeIndex { /// Returns an iterator over the `RowId`s in the [BTreeIndex] #[tracing::instrument(skip_all)] - pub(crate) fn scan(&self) -> BTreeIndexIter<'_> { - BTreeIndexIter { iter: self.idx.iter() } + pub(crate) fn scan(&self) -> BTreeIndexRangeIter<'_> { + self.seek(&(..)) } /// Returns an iterator over the [BTreeIndex] that yields all the `RowId`s /// that fall within the specified `range`. #[tracing::instrument(skip_all)] pub(crate) fn seek<'a>(&'a self, range: &impl RangeBounds) -> BTreeIndexRangeIter<'a> { - let map = |bound, datakey| match bound { - Bound::Included(x) => Bound::Included(IndexKey::from_row(x, datakey)), - Bound::Excluded(x) => Bound::Excluded(IndexKey::from_row(x, datakey)), - Bound::Unbounded => Bound::Unbounded, - }; - let start = map(range.start_bound(), DataKey::min_datakey()); - let end = map(range.end_bound(), DataKey::max_datakey()); - BTreeIndexRangeIter { - range_iter: self.idx.range((start, end)), + let range = (range.start_bound(), range.end_bound()); + match &self.idx { + Rows::Unique(rows) => itertools::Either::Left(ValueIter(rows.range(range))), + Rows::NonUnique(rows) => itertools::Either::Right(ValueIter(rows.range(range)).flatten()), } + .copied() } /// Construct the [BTreeIndex] from the rows. diff --git a/crates/core/src/db/datastore/locking_tx_datastore/mod.rs b/crates/core/src/db/datastore/locking_tx_datastore/mod.rs index f947a9e20e0..69e179942cd 100644 --- a/crates/core/src/db/datastore/locking_tx_datastore/mod.rs +++ b/crates/core/src/db/datastore/locking_tx_datastore/mod.rs @@ -1281,26 +1281,11 @@ impl Inner { if let Some(table) = self.committed_state.tables.get_mut(&table_id) { for index in table.indexes.values() { let value = index.get_fields(&row)?; - let Some(violators) = index.get_rows_that_violate_unique_constraint(&value) else { + let Some(row_id) = index.get_row_that_violates_unique_constraint(&value) else { continue; }; - for row_id in violators { - if let Some(delete_table) = self.tx_state.as_ref().unwrap().delete_tables.get(&table_id) { - if !delete_table.contains(&row_id) { - let value = row.project_not_empty(&index.cols)?; - return Err(IndexError::UniqueConstraintViolation { - constraint_name: index.name.clone(), - table_name: table.schema.table_name.clone(), - col_names: index - .cols - .iter() - .map(|&x| insert_table.schema.columns[x as usize].col_name.clone()) - .collect(), - value, - } - .into()); - } - } else { + if let Some(delete_table) = self.tx_state.as_ref().unwrap().delete_tables.get(&table_id) { + if !delete_table.contains(row_id) { let value = row.project_not_empty(&index.cols)?; return Err(IndexError::UniqueConstraintViolation { constraint_name: index.name.clone(), @@ -1314,6 +1299,19 @@ impl Inner { } .into()); } + } else { + let value = row.project_not_empty(&index.cols)?; + return Err(IndexError::UniqueConstraintViolation { + constraint_name: index.name.clone(), + table_name: table.schema.table_name.clone(), + col_names: index + .cols + .iter() + .map(|&x| insert_table.schema.columns[x as usize].col_name.clone()) + .collect(), + value, + } + .into()); } } }