Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(subscription): Support specified pk read log store #19274

Open
wants to merge 17 commits into
base: main
Choose a base branch
from
43 changes: 43 additions & 0 deletions e2e_test/subscription/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ def test_cursor_since_begin():
execute_insert("insert into t1 values(6,6)",conn)
execute_insert("flush",conn)
row = execute_query("fetch next from cur",conn)
row = execute_query("fetch next from cur",conn)
check_rows_data([4,4],row[0],"Insert")
row = execute_query("fetch next from cur",conn)
check_rows_data([5,5],row[0],"Insert")
Expand Down Expand Up @@ -138,6 +139,7 @@ def test_cursor_since_now():
execute_insert("insert into t1 values(6,6)",conn)
execute_insert("flush",conn)
row = execute_query("fetch next from cur",conn)
row = execute_query("fetch next from cur",conn)
check_rows_data([6,6],row[0],"Insert")
row = execute_query("fetch next from cur",conn)
assert row == []
Expand All @@ -163,6 +165,7 @@ def test_cursor_without_since():
execute_insert("insert into t1 values(6,6)",conn)
execute_insert("flush",conn)
row = execute_query("fetch next from cur",conn)
row = execute_query("fetch next from cur",conn)
check_rows_data([6,6],row[0],"Insert")
row = execute_query("fetch next from cur",conn)
assert row == []
Expand All @@ -187,6 +190,7 @@ def test_cursor_since_rw_timestamp():
execute_insert("insert into t1 values(6,6)",conn)
execute_insert("flush",conn)
row = execute_query("fetch next from cur",conn)
row = execute_query("fetch next from cur",conn)
valuelen = len(row[0])
rw_timestamp_1 = row[0][valuelen - 1]
check_rows_data([4,4],row[0],"Insert")
Expand All @@ -204,16 +208,19 @@ def test_cursor_since_rw_timestamp():

execute_insert(f"declare cur subscription cursor for sub since {rw_timestamp_1}",conn)
row = execute_query("fetch next from cur",conn)
row = execute_query("fetch next from cur",conn)
check_rows_data([4,4],row[0],"Insert")
execute_insert("close cur",conn)

execute_insert(f"declare cur subscription cursor for sub since {rw_timestamp_2}",conn)
row = execute_query("fetch next from cur",conn)
row = execute_query("fetch next from cur",conn)
check_rows_data([5,5],row[0],"Insert")
execute_insert("close cur",conn)

execute_insert(f"declare cur subscription cursor for sub since {rw_timestamp_3}",conn)
row = execute_query("fetch next from cur",conn)
row = execute_query("fetch next from cur",conn)
assert row == []
execute_insert("close cur",conn)

Expand Down Expand Up @@ -543,6 +550,41 @@ def test_order_multi_pk():
check_rows_data([17,17,17,17],row[4],"Insert")
drop_table_subscription()

def test_explain_cursor():
print(f"test_order_mutil_pk")
create_table_subscription()
conn = psycopg2.connect(
host="localhost",
port="4566",
user="root",
database="dev"
)
execute_insert("insert into t5 values(1,1,1,1)",conn)
execute_insert("flush",conn)
execute_insert("insert into t5 values(2,2,2,2)",conn)
execute_insert("flush",conn)
execute_insert("declare cur subscription cursor for sub5 full",conn)
execute_insert("insert into t5 values(3,3,3,3)",conn)
execute_insert("flush",conn)
execute_insert("insert into t5 values(4,4,4,4)",conn)
execute_insert("flush",conn)
plan = execute_query("explain fetch next from cur",conn)
assert plan[0][0] == "BatchExchange { order: [t5.v1 ASC, t5.v2 ASC], dist: Single }"
assert plan[1][0] == "└─BatchScan { table: t5, columns: [v1, v2, v3, v4] }"
execute_query("fetch next from cur",conn)
plan = execute_query("explain fetch next from cur",conn)
assert plan[0][0] == "BatchExchange { order: [t5.v1 ASC, t5.v2 ASC], dist: Single }"
assert plan[1][0] == "└─BatchScan { table: t5, columns: [v1, v2, v3, v4], scan_ranges: [(v1, v2) > (Int32(1), Int32(1))] }"
execute_query("fetch next from cur",conn)
execute_query("fetch next from cur",conn)
plan = execute_query("explain fetch next from cur",conn)
print(plan)
assert plan[0][0] == "BatchExchange { order: [t5.v1 ASC, t5.v2 ASC], dist: Single }"
assert "└─BatchLogSeqScan { table: t5, columns: [v1, v2, v3, v4, op]" in plan[1][0]
assert "scan_ranges: [(v1, v2) > (Int32(3), Int32(3))] }" in plan[1][0]
execute_query("fetch next from cur",conn)
drop_table_subscription()

if __name__ == "__main__":
test_cursor_snapshot()
test_cursor_op()
Expand All @@ -559,3 +601,4 @@ def test_order_multi_pk():
test_order_mv()
test_order_multi_pk()
test_block_cursor()
test_explain_cursor()
1 change: 1 addition & 0 deletions proto/batch_plan.proto
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ message LogRowSeqScanNode {
common.BatchQueryEpoch old_epoch = 4;
common.BatchQueryEpoch new_epoch = 5;
bool ordered = 6;
repeated ScanRange scan_ranges = 7;
}

message InsertNode {
Expand Down
49 changes: 34 additions & 15 deletions src/batch/executors/src/executor/log_row_seq_scan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,11 @@ use risingwave_storage::table::batch_table::storage_table::StorageTable;
use risingwave_storage::table::collect_data_chunk;
use risingwave_storage::{dispatch_state_store, StateStore};

use super::{BoxedDataChunkStream, BoxedExecutor, BoxedExecutorBuilder, Executor, ExecutorBuilder};
use super::{
BoxedDataChunkStream, BoxedExecutor, BoxedExecutorBuilder, Executor, ExecutorBuilder, ScanRange,
};
use crate::error::{BatchError, Result};
use crate::executor::build_scan_ranges_from_pb;
use crate::monitor::BatchMetrics;

pub struct LogRowSeqScanExecutor<S: StateStore> {
Expand All @@ -52,6 +55,7 @@ pub struct LogRowSeqScanExecutor<S: StateStore> {
new_epoch: u64,
version_id: HummockVersionId,
ordered: bool,
scan_ranges: Vec<ScanRange>,
}

impl<S: StateStore> LogRowSeqScanExecutor<S> {
Expand All @@ -64,6 +68,7 @@ impl<S: StateStore> LogRowSeqScanExecutor<S> {
identity: String,
metrics: Option<BatchMetrics>,
ordered: bool,
scan_ranges: Vec<ScanRange>,
) -> Self {
let mut schema = table.schema().clone();
schema.fields.push(Field::with_name(
Expand All @@ -80,6 +85,7 @@ impl<S: StateStore> LogRowSeqScanExecutor<S> {
new_epoch,
version_id,
ordered,
scan_ranges,
}
}
}
Expand Down Expand Up @@ -137,6 +143,9 @@ impl BoxedExecutorBuilder for LogStoreRowSeqScanExecutorBuilder {
let old_epoch = old_epoch.epoch;
let new_epoch = new_epoch.epoch;

let scan_ranges =
build_scan_ranges_from_pb(&log_store_seq_scan_node.scan_ranges, table_desc)?;

dispatch_state_store!(source.context().state_store(), state_store, {
let table = StorageTable::new_partial(state_store, column_ids, vnodes, table_desc);
Ok(Box::new(LogRowSeqScanExecutor::new(
Expand All @@ -148,6 +157,7 @@ impl BoxedExecutorBuilder for LogStoreRowSeqScanExecutorBuilder {
source.plan_node().get_identity().clone(),
metrics,
log_store_seq_scan_node.ordered,
scan_ranges,
)))
})
}
Expand Down Expand Up @@ -178,6 +188,7 @@ impl<S: StateStore> LogRowSeqScanExecutor<S> {
version_id,
schema,
ordered,
scan_ranges,
..
} = *self;
let table = std::sync::Arc::new(table);
Expand All @@ -189,20 +200,23 @@ impl<S: StateStore> LogRowSeqScanExecutor<S> {
// Range Scan
// WARN: DO NOT use `select` to execute range scans concurrently
// it can consume too much memory if there're too many ranges.
let stream = Self::execute_range(
table.clone(),
old_epoch,
new_epoch,
version_id,
chunk_size,
histogram,
Arc::new(schema.clone()),
ordered,
);
#[for_await]
for chunk in stream {
let chunk = chunk?;
yield chunk;
for range in scan_ranges {
let stream = Self::execute_range(
table.clone(),
old_epoch,
new_epoch,
version_id,
chunk_size,
histogram,
Arc::new(schema.clone()),
ordered,
range,
);
#[for_await]
for chunk in stream {
let chunk = chunk?;
yield chunk;
}
}
}

Expand All @@ -216,13 +230,18 @@ impl<S: StateStore> LogRowSeqScanExecutor<S> {
histogram: Option<impl Deref<Target = Histogram>>,
schema: Arc<Schema>,
ordered: bool,
scan_range: ScanRange,
) {
let pk_prefix = scan_range.pk_prefix.clone();
let range_bounds = scan_range.convert_to_range_bounds(&table);
// Range Scan.
let iter = table
.batch_iter_log_with_pk_bounds(
old_epoch,
HummockReadEpoch::BatchQueryCommitted(new_epoch, version_id),
ordered,
range_bounds,
pk_prefix,
)
.await?
.flat_map(|r| {
Expand Down
Loading
Loading