diff --git a/crates/table/benches/page.rs b/crates/table/benches/page.rs index 2094ef7121f..a740df61b79 100644 --- a/crates/table/benches/page.rs +++ b/crates/table/benches/page.rs @@ -10,6 +10,7 @@ use spacetimedb_sats::algebraic_value::ser::ValueSerializer; use spacetimedb_sats::{product, AlgebraicType, AlgebraicValue, ArrayValue, ProductType, ProductValue}; use spacetimedb_table::bflatn_from::serialize_row_from_page; use spacetimedb_table::bflatn_to::write_row_to_page; +use spacetimedb_table::bflatn_to_bsatn_fast_path::StaticBsatnLayout; use spacetimedb_table::blob_store::NullBlobStore; use spacetimedb_table::eq::eq_row_in_page; use spacetimedb_table::indexes::{Byte, Bytes, PageOffset, RowHash}; @@ -758,6 +759,7 @@ fn eq_in_page_same(c: &mut Criterion) { let mut group = c.benchmark_group("eq_in_page"); for (name, ty, value, _null_visitor, _aligned_offsets_visitor) in product_value_test_cases() { let (ty, mut page, visitor) = ty_page_visitor(ty); + let static_bsatn_layout = StaticBsatnLayout::for_row_type(&ty); let (offset_0, _) = unsafe { write_row_to_page(&mut page, &mut NullBlobStore, &visitor, &ty, &value) }.unwrap(); let (offset_1, _) = unsafe { write_row_to_page(&mut page, &mut NullBlobStore, &visitor, &ty, &value) }.unwrap(); @@ -771,6 +773,7 @@ fn eq_in_page_same(c: &mut Criterion) { black_box(offset_0), black_box(offset_1), black_box(&ty), + black_box(static_bsatn_layout.as_ref()), ) }) }); diff --git a/crates/table/benches/page_manager.rs b/crates/table/benches/page_manager.rs index 08a7b06277a..5cba4972543 100644 --- a/crates/table/benches/page_manager.rs +++ b/crates/table/benches/page_manager.rs @@ -60,6 +60,18 @@ fn as_bytes(t: &T) -> &Bytes { unsafe trait Row { fn row_type() -> ProductType; + fn row_type_for_schema() -> ProductType { + let mut ty = Self::row_type(); + // Ensure that we have names for every column, + // so that its accepted when used in a `ModuleDef` as a row type. + for (idx, elem) in ty.elements.iter_mut().enumerate() { + if elem.name.is_none() { + elem.name = Some(format!("col_{idx}").into()); + } + } + ty + } + fn var_len_visitor() -> VarLenVisitorProgram { row_type_visitor(&Self::row_type().into()) } @@ -459,7 +471,7 @@ fn schema_from_ty(ty: ProductType, name: &str) -> TableSchema { fn make_table(c: &mut Criterion) { fn bench_make_table(group: Group<'_, '_>, name: &str) { - let ty = R::row_type(); + let ty = R::row_type_for_schema(); let schema = schema_from_ty(ty.clone(), name); group.bench_function(name, |b| { b.iter_custom(|num_iters| { @@ -484,7 +496,7 @@ fn make_table(c: &mut Criterion) { } fn make_table_for_row_type(name: &str) -> Table { - let ty = R::row_type(); + let ty = R::row_type_for_schema(); let schema = schema_from_ty(ty.clone(), name); Table::new(schema.into(), SquashedOffset::COMMITTED_STATE) } @@ -664,7 +676,7 @@ trait IndexedRow: Row + Sized { let name = Self::table_name(); let mut builder = RawModuleDefV9Builder::new(); builder - .build_table_with_new_type(name.clone(), Self::row_type(), true) + .build_table_with_new_type(name.clone(), Self::row_type_for_schema(), true) .with_index( RawIndexAlgorithm::BTree { columns: Self::indexed_columns(), diff --git a/crates/table/src/bflatn_to_bsatn_fast_path.rs b/crates/table/src/bflatn_to_bsatn_fast_path.rs index 21c12b23737..0da7344695d 100644 --- a/crates/table/src/bflatn_to_bsatn_fast_path.rs +++ b/crates/table/src/bflatn_to_bsatn_fast_path.rs @@ -30,13 +30,13 @@ use super::{ }, util::range_move, }; -use core::mem::MaybeUninit; use core::ptr; +use core::{mem::MaybeUninit, ops::Range}; /// A precomputed BSATN layout for a type whose encoded length is a known constant, /// enabling fast BFLATN -> BSATN conversion. #[derive(PartialEq, Eq, Debug, Clone)] -pub(crate) struct StaticBsatnLayout { +pub struct StaticBsatnLayout { /// The length of the encoded BSATN representation of a row of this type, /// in bytes. /// @@ -132,12 +132,35 @@ impl StaticBsatnLayout { unsafe { buf.set_len(start + len) } } + /// Compares `row_a` for equality against `row_b`. + /// + /// # Safety + /// + /// - `row` must store a valid, initialized instance of the BFLATN row type + /// for which `self` was computed. + /// As a consequence of this, for every `field` in `self.fields`, + /// `row[field.bflatn_offset .. field.bflatn_offset + field.length]` will be initialized. + pub(crate) unsafe fn eq(&self, row_a: &Bytes, row_b: &Bytes) -> bool { + // No need to check the lengths. + // We assume they are of the same length. + self.fields.iter().all(|field| { + // SAFETY: The consequence of what the caller promised is that + // `row_(a/b).len() >= field.bflatn_offset + field.length >= field.bflatn_offset`. + unsafe { field.eq(row_a, row_b) } + }) + } + /// Construct a `StaticBsatnLayout` for converting BFLATN rows of `row_type` into BSATN. /// /// Returns `None` if `row_type` contains a column which does not have a constant length in BSATN, /// either a [`VarLenType`] /// or a [`SumTypeLayout`] whose variants do not have the same "live" unpadded length. - pub(crate) fn for_row_type(row_type: &RowTypeLayout) -> Option { + pub fn for_row_type(row_type: &RowTypeLayout) -> Option { + if !row_type.layout().fixed { + // Don't bother computing the static layout if there are variable components. + return None; + } + let mut builder = LayoutBuilder::new_builder(); builder.visit_product(row_type.product())?; Some(builder.build()) @@ -168,20 +191,48 @@ struct MemcpyField { impl MemoryUsage for MemcpyField {} impl MemcpyField { + /// Returns the range for this field in a BFLATN byte array. + fn bflatn_range(&self) -> Range { + range_move(0..self.length as usize, self.bflatn_offset as usize) + } + + /// Returns the range for this field in a BSATN byte array. + fn bsatn_range(&self) -> Range { + range_move(0..self.length as usize, self.bsatn_offset as usize) + } + + /// Compares `row_a` and `row_b` for equality in this field. + /// + /// # Safety + /// + /// - `row_a.len() >= self.bflatn_offset + self.length` + /// - `row_b.len() >= self.bflatn_offset + self.length` + unsafe fn eq(&self, row_a: &Bytes, row_b: &Bytes) -> bool { + let range = self.bflatn_range(); + let range2 = range.clone(); + // SAFETY: The `range` is in bounds as + // `row_a.len() >= self.bflatn_offset + self.length >= self.bflatn_offset`. + let row_a_field = unsafe { row_a.get_unchecked(range) }; + // SAFETY: The `range` is in bounds as + // `row_b.len() >= self.bflatn_offset + self.length >= self.bflatn_offset`. + let row_b_field = unsafe { row_b.get_unchecked(range2) }; + row_a_field == row_b_field + } + /// Copies the bytes at `row[self.bflatn_offset .. self.bflatn_offset + self.length]` - /// into `buf[self.bsatn_offset + self.length]`. + /// into `buf[self.bsatn_offset .. self.bsatn_offset + self.length]`. /// /// # Safety /// - /// - `buf` must be exactly `self.bsatn_offset + self.length` long. - /// - `row` must be exactly `self.bflatn_offset + self.length` long. + /// - `buf.len() >= self.bsatn_offset + self.length`. + /// - `row.len() >= self.bflatn_offset + self.length` unsafe fn copy(&self, buf: &mut [MaybeUninit], row: &Bytes) { let len = self.length as usize; // SAFETY: forward caller requirement #1. - let to = unsafe { buf.get_unchecked_mut(range_move(0..len, self.bsatn_offset as usize)) }; + let to = unsafe { buf.get_unchecked_mut(self.bsatn_range()) }; let dst = to.as_mut_ptr().cast(); // SAFETY: forward caller requirement #2. - let from = unsafe { row.get_unchecked(range_move(0..len, self.bflatn_offset as usize)) }; + let from = unsafe { row.get_unchecked(self.bflatn_range()) }; let src = from.as_ptr(); // SAFETY: diff --git a/crates/table/src/eq.rs b/crates/table/src/eq.rs index c4cc4c0ba21..17aab833e7b 100644 --- a/crates/table/src/eq.rs +++ b/crates/table/src/eq.rs @@ -4,6 +4,7 @@ use super::{ bflatn_from::read_tag, + bflatn_to_bsatn_fast_path::StaticBsatnLayout, indexes::{Bytes, PageOffset}, layout::{align_to, AlgebraicTypeLayout, HasLayout, ProductTypeLayout, RowTypeLayout}, page::Page, @@ -24,29 +25,44 @@ use super::{ /// 1. `fixed_offset_a/b` are valid offsets for rows typed at `ty` in `page_a/b`. /// 2. for any `vlr_a/b: VarLenRef` in the fixed parts of row `a` and `b`, /// `vlr_a/b.first_offset` must either be `NULL` or point to a valid granule in `page_a/b`. +/// 3. the `static_bsatn_layout` must be derived from `ty`. pub unsafe fn eq_row_in_page( page_a: &Page, page_b: &Page, fixed_offset_a: PageOffset, fixed_offset_b: PageOffset, ty: &RowTypeLayout, + static_bsatn_layout: Option<&StaticBsatnLayout>, ) -> bool { - // Context for the whole comparison. - let mut ctx = EqCtx { - // Contexts for rows `a` and `b`. - a: BytesPage::new(page_a, fixed_offset_a, ty), - b: BytesPage::new(page_b, fixed_offset_b, ty), - curr_offset: 0, - }; - // Test for equality! - // SAFETY: - // 1. Per requirement 1., rows `a/b` are valid at type `ty` and properly aligned for `ty`. - // Their fixed parts are defined as: - // `value_a/b = ctx.a/b.bytes[range_move(0..fixed_row_size, fixed_offset_a/b)]` - // as needed. - // 2. for any `vlr_a/b: VarLenRef` stored in `value_a/b`, - // `vlr_a/b.first_offset` must either be `NULL` or point to a valid granule in `page_a/b`. - unsafe { eq_product(&mut ctx, ty.product()) } + // Contexts for rows `a` and `b`. + let a = BytesPage::new(page_a, fixed_offset_a, ty); + let b = BytesPage::new(page_b, fixed_offset_b, ty); + + // If there are only fixed parts in the layout, + // there are no pointers to anywhere, + // So it is sound to simply check for byte-wise equality while ignoring padding. + match static_bsatn_layout { + None => { + // Context for the whole comparison. + let mut ctx = EqCtx { a, b, curr_offset: 0 }; + + // Test for equality! + // SAFETY: + // 1. Per requirement 1., rows `a/b` are valid at type `ty` and properly aligned for `ty`. + // Their fixed parts are defined as: + // `value_a/b = ctx.a/b.bytes[range_move(0..fixed_row_size, fixed_offset_a/b)]` + // as needed. + // 2. for any `vlr_a/b: VarLenRef` stored in `value_a/b`, + // `vlr_a/b.first_offset` must either be `NULL` or point to a valid granule in `page_a/b`. + unsafe { eq_product(&mut ctx, ty.product()) } + } + Some(static_bsatn_layout) => { + // SAFETY: caller promised that `a/b` are valid BFLATN representations matching `ty` + // and as `static_bsatn_layout` was promised to be derived from `ty`, + // so too are `a/b` valid for `static_bsatn_layout`. + unsafe { static_bsatn_layout.eq(a.bytes, b.bytes) } + } + } } /// A view into the fixed part of a row combined with the page it belongs to. @@ -210,3 +226,49 @@ fn eq_byte_array(ctx: &mut EqCtx<'_, '_>, len: usize) -> bool { ctx.curr_offset += len; data_a == data_b } + +#[cfg(test)] +mod test { + use crate::blob_store::NullBlobStore; + use spacetimedb_sats::{product, AlgebraicType, AlgebraicValue, ProductType}; + + #[test] + fn sum_with_variant_with_distinct_layout() { + // This is a type where the layout of the sum variants differ, + // with the latter having some padding bytes due to alignment. + let ty = ProductType::from([AlgebraicType::sum([ + AlgebraicType::U64, // xxxxxxxx + AlgebraicType::product([AlgebraicType::U8, AlgebraicType::U32]), // xpppxxxx + ])]); + + let bs = &mut NullBlobStore; + let mut table_a = crate::table::test::table(ty.clone()); + let mut table_b = crate::table::test::table(ty); + + // Insert u64::MAX with tag 0 and then delete it. + let a0 = product![AlgebraicValue::sum(0, u64::MAX.into())]; + let (_, a0_rr) = table_a.insert(bs, &a0).unwrap(); + let a0_ptr = a0_rr.pointer(); + assert!(table_a.delete(bs, a0_ptr, |_| {}).is_some()); + + // Insert u64::ALTERNATING_BIT_PATTERN with tag 0 and then delete it. + let b0 = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101u64; + let b0 = product![AlgebraicValue::sum(0, b0.into())]; + let (_, b0_rr) = table_b.insert(bs, &b0).unwrap(); + let b0_ptr = b0_rr.pointer(); + assert!(table_b.delete(bs, b0_ptr, |_| {}).is_some()); + + // Insert two identical rows `a1` and `b2` into the tables. + // They should occupy the spaces of the previous rows. + let v1 = product![AlgebraicValue::sum(1, product![0u8, 0u32].into())]; + let (_, a1_rr) = table_a.insert(bs, &v1).unwrap(); + let bs = &mut NullBlobStore; + let (_, b1_rr) = table_b.insert(bs, &v1).unwrap(); + assert_eq!(a0_ptr, a1_rr.pointer()); + assert_eq!(b0_ptr, b1_rr.pointer()); + + // Check that the rows are considered equal + // and that padding does not mess this up. + assert_eq!(a1_rr, b1_rr); + } +} diff --git a/crates/table/src/layout.rs b/crates/table/src/layout.rs index 17eafe9a050..f63a66d9b37 100644 --- a/crates/table/src/layout.rs +++ b/crates/table/src/layout.rs @@ -55,6 +55,9 @@ pub struct Layout { pub size: u16, /// The alignment of the object / expected object in bytes. pub align: u16, + /// Whether this is the layout of a fixed object + /// and not the layout of a var-len type's fixed component. + pub fixed: bool, } impl MemoryUsage for Layout {} @@ -329,12 +332,36 @@ impl MemoryUsage for PrimitiveType {} impl HasLayout for PrimitiveType { fn layout(&self) -> &'static Layout { match self { - Self::Bool | Self::I8 | Self::U8 => &Layout { size: 1, align: 1 }, - Self::I16 | Self::U16 => &Layout { size: 2, align: 2 }, - Self::I32 | Self::U32 | Self::F32 => &Layout { size: 4, align: 4 }, - Self::I64 | Self::U64 | Self::F64 => &Layout { size: 8, align: 8 }, - Self::I128 | Self::U128 => &Layout { size: 16, align: 16 }, - Self::I256 | Self::U256 => &Layout { size: 32, align: 32 }, + Self::Bool | Self::I8 | Self::U8 => &Layout { + size: 1, + align: 1, + fixed: true, + }, + Self::I16 | Self::U16 => &Layout { + size: 2, + align: 2, + fixed: true, + }, + Self::I32 | Self::U32 | Self::F32 => &Layout { + size: 4, + align: 4, + fixed: true, + }, + Self::I64 | Self::U64 | Self::F64 => &Layout { + size: 8, + align: 8, + fixed: true, + }, + Self::I128 | Self::U128 => &Layout { + size: 16, + align: 16, + fixed: true, + }, + Self::I256 | Self::U256 => &Layout { + size: 32, + align: 32, + fixed: true, + }, } } } @@ -362,7 +389,11 @@ impl MemoryUsage for VarLenType { } /// The layout of var-len objects. Aligned at a `u16` which it has 2 of. -const VAR_LEN_REF_LAYOUT: Layout = Layout { size: 4, align: 2 }; +const VAR_LEN_REF_LAYOUT: Layout = Layout { + size: 4, + align: 2, + fixed: false, +}; const _: () = assert!(VAR_LEN_REF_LAYOUT.size as usize == mem::size_of::()); const _: () = assert!(VAR_LEN_REF_LAYOUT.align as usize == mem::align_of::()); @@ -412,10 +443,12 @@ impl From for ProductTypeLayout { // This is consistent with Rust. let mut max_child_align = 1; + let mut fixed = true; let elements = Vec::from(ty.elements) .into_iter() .map(|elem| { let layout_type: AlgebraicTypeLayout = elem.algebraic_type.into(); + fixed &= layout_type.layout().fixed; let this_offset = align_to(current_offset, layout_type.align()); max_child_align = usize::max(max_child_align, layout_type.align()); @@ -433,6 +466,7 @@ impl From for ProductTypeLayout { let layout = Layout { align: max_child_align as u16, size: align_to(current_offset, max_child_align) as u16, + fixed, }; Self { layout, elements } @@ -447,10 +481,12 @@ impl From for SumTypeLayout { // This is consistent with Rust. let mut max_child_align = 0; + let mut fixed = true; let variants = Vec::from(ty.variants) .into_iter() .map(|variant| { let layout_type: AlgebraicTypeLayout = variant.algebraic_type.into(); + fixed &= layout_type.layout().fixed; max_child_align = usize::max(max_child_align, layout_type.align()); max_child_size = usize::max(max_child_size, layout_type.size()); @@ -478,7 +514,7 @@ impl From for SumTypeLayout { // [tag | pad to align | payload] let size = align + payload_size as u16; let payload_offset = align; - let layout = Layout { align, size }; + let layout = Layout { align, size, fixed }; Self { layout, payload_offset, diff --git a/crates/table/src/lib.rs b/crates/table/src/lib.rs index 6bfae13b4d2..cce4432d094 100644 --- a/crates/table/src/lib.rs +++ b/crates/table/src/lib.rs @@ -8,7 +8,7 @@ pub mod bflatn_from; pub mod bflatn_to; -mod bflatn_to_bsatn_fast_path; +pub mod bflatn_to_bsatn_fast_path; pub mod blob_store; pub mod btree_index; pub mod eq; diff --git a/crates/table/src/row_type_visitor.rs b/crates/table/src/row_type_visitor.rs index 126476284b2..0cc58a4410f 100644 --- a/crates/table/src/row_type_visitor.rs +++ b/crates/table/src/row_type_visitor.rs @@ -46,6 +46,11 @@ use std::sync::Arc; /// This is a potentially expensive operation, /// so the resulting `VarLenVisitorProgram` should be stored and re-used. pub fn row_type_visitor(ty: &RowTypeLayout) -> VarLenVisitorProgram { + if ty.layout().fixed { + // Fast-path: The row type doesn't contain var-len members, so quit early. + return VarLenVisitorProgram { insns: [].into() }; + } + let rose_tree = product_type_to_rose_tree(ty.product(), &mut 0); rose_tree_to_visitor_program(&rose_tree) diff --git a/crates/table/src/table.rs b/crates/table/src/table.rs index d9e15cefefc..156f6391c5a 100644 --- a/crates/table/src/table.rs +++ b/crates/table/src/table.rs @@ -1,5 +1,3 @@ -use crate::var_len::VarLenMembers; - use super::{ bflatn_from::serialize_row_from_page, bflatn_to::write_row_to_pages, @@ -16,7 +14,9 @@ use super::{ read_column::{ReadColumn, TypeError}, row_hash::hash_row_in_page, row_type_visitor::{row_type_visitor, VarLenVisitorProgram}, - static_assert_size, MemoryUsage, + static_assert_size, + var_len::VarLenMembers, + MemoryUsage, }; use core::hash::{Hash, Hasher}; use core::ops::RangeBounds; @@ -371,6 +371,8 @@ impl Table { // `committed_ptr` is in `committed_table.pointer_map`, // so it must be valid and therefore `committed_page` and `committed_offset` are valid. // Our invariants mean `committed_table.row_layout` applies to both tables. + // Moreover was `committed_table.inner.static_bsatn_layout` + // derived from `committed_table.row_layout`. unsafe { eq_row_in_page( committed_page, @@ -378,6 +380,7 @@ impl Table { committed_offset, tx_offset, &committed_table.inner.row_layout, + committed_table.inner.static_bsatn_layout.as_ref(), ) } }) @@ -915,8 +918,10 @@ impl PartialEq for RowRef<'_> { } let (page_a, offset_a) = self.page_and_offset(); let (page_b, offset_b) = other.page_and_offset(); - // SAFETY: `offset_a/b` are valid rows in `page_a/b` typed at `a_ty`. - unsafe { eq_row_in_page(page_a, page_b, offset_a, offset_b, a_ty) } + let static_bsatn_layout = self.table.static_bsatn_layout.as_ref(); + // SAFETY: `offset_a/b` are valid rows in `page_a/b` typed at `a_ty` + // and `static_bsatn_layout` is derived from `a_ty`. + unsafe { eq_row_in_page(page_a, page_b, offset_a, offset_b, a_ty, static_bsatn_layout) } } }