Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Avoid a multimap when the index is unique #2044

Merged
merged 2 commits into from
Dec 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 23 additions & 3 deletions crates/core/src/db/datastore/locking_tx_datastore/mut_tx.rs
Original file line number Diff line number Diff line change
Expand Up @@ -399,15 +399,35 @@ impl MutTxId {
_ => unimplemented!(),
};
// Create and build the index.
//
// Ensure adding the index does not cause a unique constraint violation due to
// the existing rows having the same value for some column(s).
let mut insert_index = table.new_index(index.index_id, &columns, is_unique)?;
insert_index.build_from_rows(&columns, table.scan_rows(blob_store))?;
let mut build_from_rows = |table: &Table, bs: &dyn BlobStore| -> Result<()> {
if let Some(violation) = insert_index.build_from_rows(&columns, table.scan_rows(bs))? {
let violation = table
.get_row_ref(bs, violation)
.expect("row came from scanning the table")
.project(&columns)
.expect("`cols` should consist of valid columns for this table");
return Err(IndexError::from(table.build_error_unique(&insert_index, &columns, violation)).into());
}
Ok(())
};
build_from_rows(table, blob_store)?;
// NOTE: Also add all the rows in the already committed table to the index.
//
// FIXME: Is this correct? Index scan iterators (incl. the existing `Locking` versions)
// appear to assume that a table's index refers only to rows within that table,
// and does not handle the case where a `TxState` index refers to `CommittedState` rows.
if let Some(committed_table) = commit_table {
insert_index.build_from_rows(&columns, committed_table.scan_rows(commit_blob_store))?;
//
// TODO(centril): An alternative here is to actually add this index to `CommittedState`,
// pretending that it was already committed, and recording this pretense.
// Then, we can roll that back on a failed tx.
if let Some(commit_table) = commit_table {
build_from_rows(commit_table, commit_blob_store)?;
}

table.indexes.insert(columns.clone(), insert_index);
// Associate `index_id -> (table_id, col_list)` for fast lookup.
idx_map.insert(index_id, (table_id, columns.clone()));
Expand Down
2 changes: 1 addition & 1 deletion crates/core/src/vm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,7 @@ pub struct IndexSemiJoinLeft<'a, 'c, Rhs: RelOps<'a>> {
pub tx: &'a TxMode<'a>,
}

static_assert_size!(IndexSemiJoinLeft<Box<IterRows<'static>>>, 280);
static_assert_size!(IndexSemiJoinLeft<Box<IterRows<'static>>>, 264);

impl<'a, Rhs: RelOps<'a>> IndexSemiJoinLeft<'a, '_, Rhs> {
fn filter(&self, index_row: &RelValue<'_>) -> bool {
Expand Down
56 changes: 32 additions & 24 deletions crates/table/benches/page_manager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -721,12 +721,12 @@ impl IndexedRow for Box<str> {
}
}

fn make_table_with_indexes<R: IndexedRow>() -> Table {
fn make_table_with_indexes<R: IndexedRow>(unique: bool) -> Table {
let schema = R::make_schema();
let mut tbl = Table::new(schema.into(), SquashedOffset::COMMITTED_STATE);

let cols = R::indexed_columns();
let idx = tbl.new_index(IndexId::SENTINEL, &cols, false).unwrap();
let idx = tbl.new_index(IndexId::SENTINEL, &cols, unique).unwrap();
tbl.insert_index(&NullBlobStore, cols, idx);

tbl
Expand Down Expand Up @@ -774,22 +774,27 @@ fn clear_all_same<R: IndexedRow>(tbl: &mut Table, val_same: u64) {
}
}

fn bench_id_for_index(name: &str, num_rows: u64, same_ratio: f64, num_same: usize) -> BenchmarkId {
fn bench_id_for_index(name: &str, num_rows: u64, same_ratio: f64, num_same: usize, unique: bool) -> BenchmarkId {
BenchmarkId::new(
name,
format_args!("(rows = {num_rows}, sratio = {same_ratio}, snum = {num_same})"),
format_args!("(rows = {num_rows}, sratio = {same_ratio}, snum = {num_same}, unique = {unique})"),
)
}

fn make_table_with_same_ratio<R: IndexedRow>(
mut make_row: impl FnMut(u64) -> R,
num_rows: u64,
same_ratio: f64,
unique: bool,
) -> (Table, usize, u64) {
let mut tbl = make_table_with_indexes::<R>();
let mut tbl = make_table_with_indexes::<R>(unique);

let num_same = (num_rows as f64 * same_ratio) as usize;
let num_same = num_same.max(1);
let num_same = if unique {
1
} else {
let num_same = (num_rows as f64 * same_ratio) as usize;
num_same.max(1)
};
let num_diff = num_rows / num_same as u64;

for i in 0..num_diff {
Expand All @@ -808,11 +813,11 @@ fn index_insert(c: &mut Criterion) {
same_ratio: f64,
) {
let make_row_move = &mut make_row;
let (tbl, num_same, _) = make_table_with_same_ratio::<R>(make_row_move, num_rows, same_ratio);
let (tbl, num_same, _) = make_table_with_same_ratio::<R>(make_row_move, num_rows, same_ratio, false);
let mut ctx = (tbl, NullBlobStore);

group.bench_with_input(
bench_id_for_index(name, num_rows, same_ratio, num_same),
bench_id_for_index(name, num_rows, same_ratio, num_same, false),
&num_rows,
|b, &num_rows| {
let pre = |_, (tbl, _): &mut (Table, NullBlobStore)| {
Expand Down Expand Up @@ -859,12 +864,13 @@ fn index_seek(c: &mut Criterion) {
name: &str,
num_rows: u64,
same_ratio: f64,
unique: bool,
) {
let make_row_move = &mut make_row;
let (tbl, num_same, num_diff) = make_table_with_same_ratio::<R>(make_row_move, num_rows, same_ratio);
let (tbl, num_same, num_diff) = make_table_with_same_ratio::<R>(make_row_move, num_rows, same_ratio, unique);

group.bench_with_input(
bench_id_for_index(name, num_rows, same_ratio, num_same),
bench_id_for_index(name, num_rows, same_ratio, num_same, unique),
&num_diff,
|b, &num_diff| {
let col_to_seek = black_box(R::column_value_from_u64(num_diff / 2));
Expand Down Expand Up @@ -896,25 +902,27 @@ fn index_seek(c: &mut Criterion) {
group: Group<'_, '_>,
name: &str,
same_ratio: f64,
unique: bool,
) {
group.throughput(Throughput::Elements(1));
for num_rows in powers(TABLE_SIZE_POWERS) {
bench_index_seek(&mut make_row, group, name, num_rows, same_ratio);
bench_index_seek(&mut make_row, group, name, num_rows, same_ratio, unique);
}
}

let mut group = c.benchmark_group("index_seek");

bench_many_table_sizes::<u64>(FixedLenRow::from_u64, &mut group, "u64", 0.0);
bench_many_table_sizes::<U32x8>(FixedLenRow::from_u64, &mut group, "U32x8", 0.00);
bench_many_table_sizes::<U32x8>(FixedLenRow::from_u64, &mut group, "U32x8", 0.01);
bench_many_table_sizes::<U32x8>(FixedLenRow::from_u64, &mut group, "U32x8", 0.05);
bench_many_table_sizes::<U32x8>(FixedLenRow::from_u64, &mut group, "U32x8", 0.10);
bench_many_table_sizes::<U32x8>(FixedLenRow::from_u64, &mut group, "U32x8", 0.25);
bench_many_table_sizes::<U32x8>(FixedLenRow::from_u64, &mut group, "U32x8", 0.50);
bench_many_table_sizes::<U32x8>(FixedLenRow::from_u64, &mut group, "U32x8", 1.00);
bench_many_table_sizes::<U32x64>(FixedLenRow::from_u64, &mut group, "U32x64", 0.0);
bench_many_table_sizes::<Box<str>>(|i| i.to_string().into(), &mut group, "String", 0.0);
bench_many_table_sizes::<u64>(FixedLenRow::from_u64, &mut group, "u64", 0.0, false);
bench_many_table_sizes::<u64>(FixedLenRow::from_u64, &mut group, "u64", 0.0, true);
bench_many_table_sizes::<U32x8>(FixedLenRow::from_u64, &mut group, "U32x8", 0.00, false);
bench_many_table_sizes::<U32x8>(FixedLenRow::from_u64, &mut group, "U32x8", 0.01, false);
bench_many_table_sizes::<U32x8>(FixedLenRow::from_u64, &mut group, "U32x8", 0.05, false);
bench_many_table_sizes::<U32x8>(FixedLenRow::from_u64, &mut group, "U32x8", 0.10, false);
bench_many_table_sizes::<U32x8>(FixedLenRow::from_u64, &mut group, "U32x8", 0.25, false);
bench_many_table_sizes::<U32x8>(FixedLenRow::from_u64, &mut group, "U32x8", 0.50, false);
bench_many_table_sizes::<U32x8>(FixedLenRow::from_u64, &mut group, "U32x8", 1.00, false);
bench_many_table_sizes::<U32x64>(FixedLenRow::from_u64, &mut group, "U32x64", 0.0, false);
bench_many_table_sizes::<Box<str>>(|i| i.to_string().into(), &mut group, "String", 0.0, false);
}

fn index_delete(c: &mut Criterion) {
Expand All @@ -926,10 +934,10 @@ fn index_delete(c: &mut Criterion) {
same_ratio: f64,
) {
let make_row_move = &mut make_row;
let (mut tbl, num_same, _) = make_table_with_same_ratio::<R>(make_row_move, num_rows, same_ratio);
let (mut tbl, num_same, _) = make_table_with_same_ratio::<R>(make_row_move, num_rows, same_ratio, false);

group.bench_with_input(
bench_id_for_index(name, num_rows, same_ratio, num_same),
bench_id_for_index(name, num_rows, same_ratio, num_same, false),
&num_rows,
|b, &num_rows| {
let pre = |_, tbl: &mut Table| {
Expand Down
Loading
Loading