From 5852692747d5ea310c62bb403313e2268ae3a4d0 Mon Sep 17 00:00:00 2001 From: kamille Date: Wed, 15 Nov 2023 08:52:06 +0800 Subject: [PATCH 01/13] remove `primary_key_indexes` info in append mode table's query path. --- .../src/instance/flush_compaction.rs | 63 ++-- analytic_engine/src/instance/mod.rs | 61 +++- analytic_engine/src/instance/read.rs | 34 +- .../src/instance/reorder_memtable.rs | 12 +- analytic_engine/src/memtable/columnar/iter.rs | 46 ++- analytic_engine/src/memtable/mod.rs | 8 +- analytic_engine/src/memtable/reversed_iter.rs | 10 +- analytic_engine/src/memtable/skiplist/iter.rs | 33 +- analytic_engine/src/memtable/skiplist/mod.rs | 24 +- analytic_engine/src/row_iter/chain.rs | 52 ++- analytic_engine/src/row_iter/dedup.rs | 26 +- analytic_engine/src/row_iter/merge.rs | 83 +++-- analytic_engine/src/row_iter/mod.rs | 5 +- .../src/row_iter/record_batch_stream.rs | 76 ++--- analytic_engine/src/row_iter/tests.rs | 22 +- analytic_engine/src/sst/factory.rs | 6 +- .../src/sst/parquet/async_reader.rs | 68 ++-- analytic_engine/src/sst/parquet/writer.rs | 34 +- analytic_engine/src/sst/reader.rs | 6 +- analytic_engine/src/sst/writer.rs | 7 +- common_types/src/projected_schema.rs | 322 +++++++++++------- common_types/src/record_batch.rs | 254 +++++++------- common_types/src/row/contiguous.rs | 27 +- common_types/src/row/mod.rs | 20 +- common_types/src/schema.rs | 13 +- common_types/src/tests.rs | 21 +- partition_table_engine/src/scan_builder.rs | 2 +- system_catalog/src/tables.rs | 26 +- table_engine/src/provider.rs | 35 +- 29 files changed, 837 insertions(+), 559 deletions(-) diff --git a/analytic_engine/src/instance/flush_compaction.rs b/analytic_engine/src/instance/flush_compaction.rs index fa66d78953..8fb060d31d 100644 --- a/analytic_engine/src/instance/flush_compaction.rs +++ b/analytic_engine/src/instance/flush_compaction.rs @@ -17,8 +17,8 @@ use std::{cmp, collections::Bound, fmt, sync::Arc}; use common_types::{ - projected_schema::ProjectedSchema, - record_batch::{RecordBatchWithKey, RecordBatchWithKeyBuilder}, + projected_schema::{ProjectedSchema, RecordFetchingContextBuilder}, + record_batch::{FetchingRecordBatch, FetchingRecordBatchBuilder}, request_id::RequestId, row::RowViewOnBatch, time::TimeRange, @@ -41,8 +41,8 @@ use wal::manager::WalLocation; use crate::{ compaction::{CompactionInputFiles, CompactionTask, ExpiredFiles}, instance::{ - self, create_sst_read_option, reorder_memtable::Reorder, - serial_executor::TableFlushScheduler, ScanType, SpaceStore, SpaceStoreRef, + self, reorder_memtable::Reorder, serial_executor::TableFlushScheduler, ScanType, + SpaceStore, SpaceStoreRef, SstReadOptionsBuilder, }, manifest::meta_edit::{ AlterOptionsMeta, AlterSchemaMeta, MetaEdit, MetaEditRequest, MetaUpdate, VersionEditMeta, @@ -542,7 +542,7 @@ impl FlushTask { for time_range in &time_ranges { let (batch_record_sender, batch_record_receiver) = - channel::>(DEFAULT_CHANNEL_SIZE); + channel::>(DEFAULT_CHANNEL_SIZE); let file_id = self .table_data .alloc_file_id(&self.space_store.manifest) @@ -879,20 +879,29 @@ impl SpaceStore { let table_options = table_data.table_options(); let projected_schema = ProjectedSchema::no_projection(schema.clone()); let predicate = Arc::new(Predicate::empty()); - let sst_read_options = create_sst_read_option( + let maybe_table_level_metrics = table_data + .metrics + .maybe_table_level_metrics() + .sst_metrics + .clone(); + let sst_read_options_builder = SstReadOptionsBuilder::new( ScanType::Compaction, scan_options, - table_data - .metrics - .maybe_table_level_metrics() - .sst_metrics - .clone(), + maybe_table_level_metrics, table_options.num_rows_per_row_group, - projected_schema.clone(), predicate, self.meta_cache.clone(), runtime, ); + let fetching_schema = projected_schema.to_record_schema_with_key(); + let primary_key_indexes = fetching_schema.primary_key_idx().to_vec(); + let fetching_schema = fetching_schema.into_record_schema(); + let table_schema = projected_schema.table_schema().clone(); + let record_fetching_ctx_builder = RecordFetchingContextBuilder::new( + fetching_schema, + table_schema, + Some(primary_key_indexes), + ); let iter_options = IterOptions { batch_size: table_options.num_rows_per_row_group, @@ -911,8 +920,8 @@ impl SpaceStore { sequence, projected_schema, predicate: Arc::new(Predicate::empty()), + sst_read_options_builder: sst_read_options_builder.clone(), sst_factory: &self.sst_factory, - sst_read_options: sst_read_options.clone(), store_picker: self.store_picker(), merge_iter_options: iter_options.clone(), need_dedup: table_options.need_dedup(), @@ -937,6 +946,8 @@ impl SpaceStore { row_iter::record_batch_with_key_iter_to_stream(merge_iter) }; + // TODO: eliminate the duplicated building of `SstReadOptions`. + let sst_read_options = sst_read_options_builder.build(record_fetching_ctx_builder); let sst_meta = { let meta_reader = SstMetaReader { space_id: table_data.space_id, @@ -1057,12 +1068,17 @@ impl SpaceStore { } fn split_record_batch_with_time_ranges( - record_batch: RecordBatchWithKey, + record_batch: FetchingRecordBatch, time_ranges: &[TimeRange], timestamp_idx: usize, -) -> Result> { - let mut builders: Vec = (0..time_ranges.len()) - .map(|_| RecordBatchWithKeyBuilder::new(record_batch.schema_with_key().clone())) +) -> Result> { + let fetching_schema = record_batch.schema(); + let primary_key_indexes = record_batch.primary_key_indexes(); + let mut builders: Vec = (0..time_ranges.len()) + .map(|_| { + let primary_key_indexes = primary_key_indexes.map(|idxs| idxs.to_vec()); + FetchingRecordBatchBuilder::new(fetching_schema.clone(), primary_key_indexes) + }) .collect(); for row_idx in 0..record_batch.num_rows() { @@ -1103,11 +1119,18 @@ fn build_mem_table_iter( table_data: &TableDataRef, ) -> Result { let scan_ctx = ScanContext::default(); + let projected_schema = ProjectedSchema::no_projection(table_data.schema()); + let fetching_schema = projected_schema.to_record_schema_with_key(); + let primary_key_indexes = fetching_schema.primary_key_idx().to_vec(); + let fetching_schema = fetching_schema.into_record_schema(); + let table_schema = projected_schema.table_schema().clone(); + let record_fetching_ctx_builder = + RecordFetchingContextBuilder::new(fetching_schema, table_schema, Some(primary_key_indexes)); let scan_req = ScanRequest { start_user_key: Bound::Unbounded, end_user_key: Bound::Unbounded, sequence: common_types::MAX_SEQUENCE_NUMBER, - projected_schema: ProjectedSchema::no_projection(table_data.schema()), + record_fetching_ctx_builder, need_dedup: table_data.dedup(), reverse: false, metrics_collector: None, @@ -1122,7 +1145,7 @@ fn build_mem_table_iter( mod tests { use common_types::{ tests::{ - build_record_batch_with_key_by_rows, build_row, build_row_opt, + build_fetching_record_batch_by_rows, build_row, build_row_opt, check_record_batch_with_key_with_rows, }, time::TimeRange, @@ -1164,7 +1187,7 @@ mod tests { .into_iter() .flatten() .collect(); - let record_batch_with_key = build_record_batch_with_key_by_rows(rows); + let record_batch_with_key = build_fetching_record_batch_by_rows(rows); let column_num = record_batch_with_key.num_columns(); let time_ranges = vec![ TimeRange::new_unchecked_for_test(0, 100), diff --git a/analytic_engine/src/instance/mod.rs b/analytic_engine/src/instance/mod.rs index 1ff48cc927..8a67d0dce5 100644 --- a/analytic_engine/src/instance/mod.rs +++ b/analytic_engine/src/instance/mod.rs @@ -33,7 +33,10 @@ pub(crate) mod write; use std::sync::Arc; -use common_types::{projected_schema::ProjectedSchema, table::TableId}; +use common_types::{ + projected_schema::{ProjectedSchema, RecordFetchingContextBuilder}, + table::TableId, +}; use generic_error::{BoxError, GenericError}; use logger::{error, info}; use macros::define_result; @@ -313,32 +316,58 @@ impl Instance { } } -// TODO: make it a builder -#[allow(clippy::too_many_arguments)] -fn create_sst_read_option( +#[derive(Debug, Clone)] +pub struct SstReadOptionsBuilder { scan_type: ScanType, scan_options: ScanOptions, maybe_table_level_metrics: Arc, num_rows_per_row_group: usize, - projected_schema: ProjectedSchema, predicate: PredicateRef, meta_cache: Option, runtime: Arc, -) -> SstReadOptions { - SstReadOptions { - maybe_table_level_metrics, - num_rows_per_row_group, - frequency: scan_type.into(), - projected_schema, - predicate, - meta_cache, - scan_options, - runtime, +} + +impl SstReadOptionsBuilder { + pub fn new( + scan_type: ScanType, + scan_options: ScanOptions, + maybe_table_level_metrics: Arc, + num_rows_per_row_group: usize, + predicate: PredicateRef, + meta_cache: Option, + runtime: Arc, + ) -> Self { + Self { + scan_type, + scan_options, + maybe_table_level_metrics, + num_rows_per_row_group, + predicate, + meta_cache, + runtime, + } + } + + pub fn build( + self, + record_fetching_ctx_builder: RecordFetchingContextBuilder, + ) -> SstReadOptions { + SstReadOptions { + maybe_table_level_metrics: self.maybe_table_level_metrics, + num_rows_per_row_group: self.num_rows_per_row_group, + frequency: self.scan_type.into(), + record_fetching_ctx_builder, + predicate: self.predicate, + meta_cache: self.meta_cache, + scan_options: self.scan_options, + runtime: self.runtime, + } } } /// Scan type which mapped to the low level `ReadFrequency` in sst reader. -enum ScanType { +#[derive(Debug, Clone, Copy)] +pub enum ScanType { Query, Compaction, } diff --git a/analytic_engine/src/instance/read.rs b/analytic_engine/src/instance/read.rs index c425505e6c..748ef143ad 100644 --- a/analytic_engine/src/instance/read.rs +++ b/analytic_engine/src/instance/read.rs @@ -23,7 +23,7 @@ use std::{ use async_stream::try_stream; use common_types::{ projected_schema::ProjectedSchema, - record_batch::{RecordBatch, RecordBatchWithKey}, + record_batch::{FetchingRecordBatch, RecordBatch}, schema::RecordSchema, time::TimeRange, }; @@ -42,7 +42,7 @@ use time_ext::current_time_millis; use trace_metric::Metric; use crate::{ - instance::{create_sst_read_option, Instance, ScanType}, + instance::{Instance, ScanType, SstReadOptionsBuilder}, row_iter::{ chain, chain::{ChainConfig, ChainIterator}, @@ -50,7 +50,6 @@ use crate::{ merge::{MergeBuilder, MergeConfig, MergeIterator}, IterOptions, RecordBatchWithKeyIterator, }, - sst::factory::SstReadOptions, table::{ data::TableData, version::{ReadView, TableVersion}, @@ -129,7 +128,7 @@ impl Instance { None, )); - let sst_read_options = create_sst_read_option( + let sst_read_options_builder = SstReadOptionsBuilder::new( ScanType::Query, self.scan_options.clone(), table_data @@ -138,7 +137,6 @@ impl Instance { .sst_metrics .clone(), table_options.num_rows_per_row_group, - request.projected_schema.clone(), request.predicate.clone(), self.meta_cache.clone(), self.read_runtime().clone(), @@ -146,12 +144,22 @@ impl Instance { if need_merge_sort { let merge_iters = self - .build_merge_iters(table_data, &request, &table_options, sst_read_options) + .build_merge_iters( + table_data, + &request, + &table_options, + sst_read_options_builder, + ) .await?; self.build_partitioned_streams(&request, merge_iters) } else { let chain_iters = self - .build_chain_iters(table_data, &request, &table_options, sst_read_options) + .build_chain_iters( + table_data, + &request, + &table_options, + sst_read_options_builder, + ) .await?; self.build_partitioned_streams(&request, chain_iters) } @@ -189,7 +197,7 @@ impl Instance { table_data: &TableData, request: &ReadRequest, table_options: &TableOptions, - sst_read_options: SstReadOptions, + sst_read_options_builder: SstReadOptionsBuilder, ) -> Result>> { // Current visible sequence let sequence = table_data.last_sequence(); @@ -197,7 +205,7 @@ impl Instance { let version = table_data.current_version(); let read_views = self.partition_ssts_and_memtables(time_range, version, table_options); let iter_options = self.make_iter_options(table_options.num_rows_per_row_group); - + // generate builder let mut iters = Vec::with_capacity(read_views.len()); for (idx, read_view) in read_views.into_iter().enumerate() { let metrics_collector = request @@ -213,7 +221,7 @@ impl Instance { projected_schema: request.projected_schema.clone(), predicate: request.predicate.clone(), sst_factory: &self.space_store.sst_factory, - sst_read_options: sst_read_options.clone(), + sst_read_options_builder: sst_read_options_builder.clone(), store_picker: self.space_store.store_picker(), merge_iter_options: iter_options.clone(), need_dedup: table_options.need_dedup(), @@ -249,7 +257,7 @@ impl Instance { table_data: &TableData, request: &ReadRequest, table_options: &TableOptions, - sst_read_options: SstReadOptions, + sst_read_options_builder: SstReadOptionsBuilder, ) -> Result> { let projected_schema = request.projected_schema.clone(); @@ -271,7 +279,7 @@ impl Instance { table_id: table_data.id, projected_schema: projected_schema.clone(), predicate: request.predicate.clone(), - sst_read_options: sst_read_options.clone(), + sst_read_options_builder: sst_read_options_builder.clone(), sst_factory: &self.space_store.sst_factory, store_picker: self.space_store.store_picker(), }; @@ -372,7 +380,7 @@ impl StreamStateOnMultiIters { async fn fetch_next_batch( &mut self, - ) -> Option> { + ) -> Option> { loop { if self.is_exhausted() { return None; diff --git a/analytic_engine/src/instance/reorder_memtable.rs b/analytic_engine/src/instance/reorder_memtable.rs index 2e0901bbaa..01a1120dcf 100644 --- a/analytic_engine/src/instance/reorder_memtable.rs +++ b/analytic_engine/src/instance/reorder_memtable.rs @@ -26,7 +26,7 @@ pub use arrow::{ }; use async_trait::async_trait; use common_types::{ - record_batch::{RecordBatchData, RecordBatchWithKey}, + record_batch::{FetchingRecordBatch, RecordBatchData}, schema::Schema, }; use datafusion::{ @@ -71,7 +71,7 @@ define_result!(Error); pub type DfResult = std::result::Result; type SendableRecordBatchWithkeyStream = - Pin> + Send>>; + Pin> + Send>>; impl From for Error { fn from(df_err: DataFusionError) -> Self { @@ -275,12 +275,16 @@ impl Reorder { // 3. Execute plan and transform stream let stream = execute_stream(physical_plan, ctx.task_ctx())?; - let schema_with_key = self.schema.to_record_schema_with_key(); + let record_schema = self.schema.to_record_schema(); let stream = stream.map(move |batch| { let batch = batch.context(FetchRecordBatch)?; let data = RecordBatchData::try_from(batch).context(ConvertRecordBatchData)?; - Ok(RecordBatchWithKey::new(schema_with_key.clone(), data)) + Ok(FetchingRecordBatch::new_from_parts( + record_schema.clone(), + None, + data, + )) }); Ok(Box::pin(stream)) diff --git a/analytic_engine/src/memtable/columnar/iter.rs b/analytic_engine/src/memtable/columnar/iter.rs index 5e1f136451..d38888f221 100644 --- a/analytic_engine/src/memtable/columnar/iter.rs +++ b/analytic_engine/src/memtable/columnar/iter.rs @@ -27,8 +27,8 @@ use common_types::{ column::Column, column_schema::ColumnId, datum::Datum, - projected_schema::{ProjectedSchema, RowProjector}, - record_batch::{RecordBatchWithKey, RecordBatchWithKeyBuilder}, + projected_schema::{ProjectedSchema, RecordFetchingContext}, + record_batch::{FetchingRecordBatch, FetchingRecordBatchBuilder}, row::Row, schema::Schema, SequenceNumber, @@ -66,8 +66,7 @@ pub struct ColumnarIterImpl + Clone + Sync + Send> /// Schema of this memtable, used to decode row memtable_schema: Schema, /// Projection of schema to read - projected_schema: ProjectedSchema, - projector: RowProjector, + record_fetching_ctx: RecordFetchingContext, // Options related: batch_size: usize, @@ -101,17 +100,16 @@ impl + Clone + Sync + Send> ColumnarIterImpl { last_sequence: SequenceNumber, skiplist: Skiplist, ) -> Result { - let projector = request - .projected_schema - .try_project_with_key(&schema) + let record_fetching_ctx = request + .record_fetching_ctx_builder + .build(&schema) .context(ProjectSchema)?; let mut columnar_iter = Self { memtable, row_num, current_idx: 0, memtable_schema: schema, - projected_schema: request.projected_schema, - projector, + record_fetching_ctx, batch_size: ctx.batch_size, deadline: ctx.deadline, start_user_key: request.start_user_key, @@ -190,7 +188,7 @@ impl + Clone + Sync + Send> ColumnarIterImpl { } /// Fetch next record batch - fn fetch_next_record_batch(&mut self) -> Result> { + fn fetch_next_record_batch(&mut self) -> Result> { debug_assert_eq!(State::Initialized, self.state); assert!(self.batch_size > 0); let rows = if !self.need_dedup { @@ -207,8 +205,14 @@ impl + Clone + Sync + Send> ColumnarIterImpl { } } - let mut builder = RecordBatchWithKeyBuilder::with_capacity( - self.projected_schema.to_record_schema_with_key(), + let fetching_schema = self.record_fetching_ctx.fetching_schema().clone(); + let primary_key_indexes = self + .record_fetching_ctx + .primary_key_indexes() + .map(|idxs| idxs.to_vec()); + let mut builder = FetchingRecordBatchBuilder::with_capacity( + fetching_schema, + primary_key_indexes, self.batch_size, ); for row in rows.into_iter() { @@ -308,7 +312,12 @@ impl + Clone + Sync + Send> ColumnarIterImpl { Row::from_datums(vec![Datum::Null; self.memtable_schema.num_columns()]); self.batch_size ]; - for (col_idx, column_schema_idx) in self.projector.source_projection().iter().enumerate() { + for (col_idx, column_schema_idx) in self + .record_fetching_ctx + .fetching_source_column_indexes() + .iter() + .enumerate() + { if let Some(column_schema_idx) = column_schema_idx { let column_schema = self.memtable_schema.column(*column_schema_idx); if let Some(column) = memtable.get(&column_schema.id) { @@ -328,11 +337,16 @@ impl + Clone + Sync + Send> ColumnarIterImpl { let mut num_rows = 0; let memtable = self.memtable.read().unwrap(); - let record_schema = self.projected_schema.to_record_schema(); + let record_schema = self.record_fetching_ctx.fetching_schema(); let mut rows = vec![Row::from_datums(vec![Datum::Null; record_schema.num_columns()]); self.batch_size]; - for (col_idx, column_schema_idx) in self.projector.source_projection().iter().enumerate() { + for (col_idx, column_schema_idx) in self + .record_fetching_ctx + .fetching_source_column_indexes() + .iter() + .enumerate() + { if let Some(column_schema_idx) = column_schema_idx { let column_schema = self.memtable_schema.column(*column_schema_idx); if let Some(column) = memtable.get(&column_schema.id) { @@ -378,7 +392,7 @@ impl + Clone + Sync + Send> ColumnarIterImpl { } impl Iterator for ColumnarIterImpl { - type Item = Result; + type Item = Result; fn next(&mut self) -> Option { if self.state != State::Initialized { diff --git a/analytic_engine/src/memtable/mod.rs b/analytic_engine/src/memtable/mod.rs index 4de3dffdd2..a83bd65145 100644 --- a/analytic_engine/src/memtable/mod.rs +++ b/analytic_engine/src/memtable/mod.rs @@ -24,8 +24,8 @@ use std::{ops::Bound, sync::Arc, time::Instant}; use bytes_ext::{ByteVec, Bytes}; use common_types::{ - projected_schema::ProjectedSchema, - record_batch::RecordBatchWithKey, + projected_schema::{ProjectedSchema, RecordFetchingContextBuilder}, + record_batch::FetchingRecordBatch, row::Row, schema::{IndexInWriterSchema, Schema}, time::TimeRange, @@ -203,7 +203,7 @@ pub struct ScanRequest { /// visible. pub sequence: SequenceNumber, /// Schema and projection to read. - pub projected_schema: ProjectedSchema, + pub record_fetching_ctx_builder: RecordFetchingContextBuilder, pub need_dedup: bool, pub reverse: bool, /// Collector for scan metrics. @@ -291,4 +291,4 @@ pub struct Metrics { pub type MemTableRef = Arc; /// A pointer to columnar iterator -pub type ColumnarIterPtr = Box> + Send + Sync>; +pub type ColumnarIterPtr = Box> + Send + Sync>; diff --git a/analytic_engine/src/memtable/reversed_iter.rs b/analytic_engine/src/memtable/reversed_iter.rs index 4c1842786f..00e2fcaaa0 100644 --- a/analytic_engine/src/memtable/reversed_iter.rs +++ b/analytic_engine/src/memtable/reversed_iter.rs @@ -14,7 +14,7 @@ use std::iter::Rev; -use common_types::record_batch::RecordBatchWithKey; +use common_types::record_batch::FetchingRecordBatch; use generic_error::BoxError; use snafu::ResultExt; @@ -26,13 +26,13 @@ use crate::memtable::{IterReverse, Result}; // reverse order naturally. pub struct ReversedColumnarIterator { iter: I, - reversed_iter: Option>>>, + reversed_iter: Option>>>, num_record_batch: usize, } impl ReversedColumnarIterator where - I: Iterator>, + I: Iterator>, { pub fn new(iter: I, num_rows: usize, batch_size: usize) -> Self { Self { @@ -57,9 +57,9 @@ where impl Iterator for ReversedColumnarIterator where - I: Iterator>, + I: Iterator>, { - type Item = Result; + type Item = Result; fn next(&mut self) -> Option { self.init_if_necessary(); diff --git a/analytic_engine/src/memtable/skiplist/iter.rs b/analytic_engine/src/memtable/skiplist/iter.rs index 00f885f1a5..b78ff65c32 100644 --- a/analytic_engine/src/memtable/skiplist/iter.rs +++ b/analytic_engine/src/memtable/skiplist/iter.rs @@ -20,8 +20,8 @@ use arena::{Arena, BasicStats}; use bytes_ext::{Bytes, BytesMut}; use codec::row; use common_types::{ - projected_schema::{ProjectedSchema, RowProjector}, - record_batch::{RecordBatchWithKey, RecordBatchWithKeyBuilder}, + projected_schema::{ProjectedSchema, RecordFetchingContext}, + record_batch::{FetchingRecordBatch, FetchingRecordBatchBuilder}, row::contiguous::{ContiguousRowReader, ProjectedContiguousRow}, schema::Schema, SequenceNumber, @@ -57,8 +57,7 @@ pub struct ColumnarIterImpl + Clone + Sync + Send> /// Schema of this memtable, used to decode row memtable_schema: Schema, /// Projection of schema to read - projected_schema: ProjectedSchema, - projector: RowProjector, + record_fetching_ctx: RecordFetchingContext, // Options related: batch_size: usize, @@ -86,17 +85,16 @@ impl + Clone + Sync + Send> ColumnarIterImpl { request: ScanRequest, ) -> Result { // Create projection for the memtable schema - let projector = request - .projected_schema - .try_project_with_key(&memtable.schema) + let record_fetching_ctx = request + .record_fetching_ctx_builder + .build(&memtable.schema) .context(ProjectSchema)?; let iter = memtable.skiplist.iter(); let mut columnar_iter = Self { iter, memtable_schema: memtable.schema.clone(), - projected_schema: request.projected_schema, - projector, + record_fetching_ctx, batch_size: ctx.batch_size, deadline: ctx.deadline, start_user_key: request.start_user_key, @@ -148,12 +146,18 @@ impl + Clone + Sync + Send> ColumnarIterImpl { } /// Fetch next record batch - fn fetch_next_record_batch(&mut self) -> Result> { + fn fetch_next_record_batch(&mut self) -> Result> { debug_assert_eq!(State::Initialized, self.state); assert!(self.batch_size > 0); - let mut builder = RecordBatchWithKeyBuilder::with_capacity( - self.projected_schema.to_record_schema_with_key(), + let record_schema = self.record_fetching_ctx.fetching_schema().clone(); + let primary_key_indexes = self + .record_fetching_ctx + .primary_key_indexes() + .map(|idxs| idxs.to_vec()); + let mut builder = FetchingRecordBatchBuilder::with_capacity( + record_schema, + primary_key_indexes, self.batch_size, ); let mut num_rows = 0; @@ -161,7 +165,8 @@ impl + Clone + Sync + Send> ColumnarIterImpl { if let Some(row) = self.fetch_next_row()? { let row_reader = ContiguousRowReader::try_new(&row, &self.memtable_schema) .context(DecodeContinuousRow)?; - let projected_row = ProjectedContiguousRow::new(row_reader, &self.projector); + let projected_row = + ProjectedContiguousRow::new(row_reader, &self.record_fetching_ctx); trace!("Column iterator fetch next row, row:{:?}", projected_row); @@ -293,7 +298,7 @@ impl + Clone + Sync + Send> ColumnarIterImpl { } impl + Clone + Sync + Send> Iterator for ColumnarIterImpl { - type Item = Result; + type Item = Result; fn next(&mut self) -> Option { if self.state != State::Initialized { diff --git a/analytic_engine/src/memtable/skiplist/mod.rs b/analytic_engine/src/memtable/skiplist/mod.rs index 5b6a79a78d..fd66a20cc7 100644 --- a/analytic_engine/src/memtable/skiplist/mod.rs +++ b/analytic_engine/src/memtable/skiplist/mod.rs @@ -274,8 +274,8 @@ mod tests { use codec::memcomparable::MemComparable; use common_types::{ datum::Datum, - projected_schema::ProjectedSchema, - record_batch::RecordBatchWithKey, + projected_schema::{ProjectedSchema, RecordFetchingContextBuilder}, + record_batch::FetchingRecordBatch, row::Row, schema::IndexInWriterSchema, tests::{build_row, build_schema}, @@ -294,7 +294,10 @@ mod tests { ) { let projection: Vec = (0..schema.num_columns()).collect(); let projected_schema = ProjectedSchema::new(schema, Some(projection)).unwrap(); - + let fetching_schema = projected_schema.to_record_schema(); + let table_schema = projected_schema.table_schema(); + let record_fetching_ctx_builder = + RecordFetchingContextBuilder::new(fetching_schema, table_schema.clone(), None); let testcases = vec![ ( // limited by sequence @@ -302,7 +305,7 @@ mod tests { start_user_key: Bound::Unbounded, end_user_key: Bound::Unbounded, sequence: 2, - projected_schema: projected_schema.clone(), + record_fetching_ctx_builder: record_fetching_ctx_builder.clone(), need_dedup: true, reverse: false, metrics_collector: None, @@ -322,7 +325,7 @@ mod tests { start_user_key: Bound::Included(build_scan_key("a", 1)), end_user_key: Bound::Excluded(build_scan_key("e", 5)), sequence: 2, - projected_schema: projected_schema.clone(), + record_fetching_ctx_builder: record_fetching_ctx_builder.clone(), need_dedup: true, reverse: false, metrics_collector: None, @@ -341,7 +344,7 @@ mod tests { start_user_key: Bound::Included(build_scan_key("a", 1)), end_user_key: Bound::Excluded(build_scan_key("e", 5)), sequence: 1, - projected_schema, + record_fetching_ctx_builder, need_dedup: true, reverse: false, metrics_collector: None, @@ -367,13 +370,16 @@ mod tests { ) { let projection: Vec = (0..2).collect(); let projected_schema = ProjectedSchema::new(schema, Some(projection)).unwrap(); - + let fetching_schema = projected_schema.to_record_schema(); + let table_schema = projected_schema.table_schema(); + let record_fetching_ctx_builder = + RecordFetchingContextBuilder::new(fetching_schema, table_schema.clone(), None); let testcases = vec![( ScanRequest { start_user_key: Bound::Included(build_scan_key("a", 1)), end_user_key: Bound::Excluded(build_scan_key("e", 5)), sequence: 2, - projected_schema, + record_fetching_ctx_builder, need_dedup: true, reverse: false, metrics_collector: None, @@ -457,7 +463,7 @@ mod tests { test_memtable_scan_for_projection(schema, memtable); } - fn check_iterator>>( + fn check_iterator>>( iter: T, expected_rows: Vec, ) { diff --git a/analytic_engine/src/row_iter/chain.rs b/analytic_engine/src/row_iter/chain.rs index ed2b8a30d8..2d645ed8a2 100644 --- a/analytic_engine/src/row_iter/chain.rs +++ b/analytic_engine/src/row_iter/chain.rs @@ -19,7 +19,9 @@ use std::{ use async_trait::async_trait; use common_types::{ - projected_schema::ProjectedSchema, record_batch::RecordBatchWithKey, request_id::RequestId, + projected_schema::{ProjectedSchema, RecordFetchingContextBuilder}, + record_batch::FetchingRecordBatch, + request_id::RequestId, schema::RecordSchemaWithKey, }; use generic_error::GenericError; @@ -30,8 +32,11 @@ use table_engine::{predicate::PredicateRef, table::TableId}; use trace_metric::{MetricsCollector, TraceMetricWhenDrop}; use crate::{ + instance::SstReadOptionsBuilder, row_iter::{ - record_batch_stream, record_batch_stream::BoxedPrefetchableRecordBatchStream, + record_batch_stream::{ + self, BoxedPrefetchableRecordBatchStream, MemtableStreamContext, SstStreamContext, + }, RecordBatchWithKeyIterator, }, space::SpaceId, @@ -74,7 +79,7 @@ pub struct ChainConfig<'a> { pub predicate: PredicateRef, pub num_streams_to_prefetch: usize, - pub sst_read_options: SstReadOptions, + pub sst_read_options_builder: SstReadOptionsBuilder, /// Sst factory pub sst_factory: &'a SstFactoryRef, /// Store picker for persisting sst. @@ -119,6 +124,29 @@ impl<'a> Builder<'a> { impl<'a> Builder<'a> { pub async fn build(self) -> Result { + let fetching_schema = self.config.projected_schema.to_record_schema(); + let table_schema = self.config.projected_schema.table_schema(); + let record_fetching_ctx_builder = + RecordFetchingContextBuilder::new(fetching_schema.clone(), table_schema.clone(), None); + let sst_read_options = self + .config + .sst_read_options_builder + .build(record_fetching_ctx_builder.clone()); + + let memtable_stream_ctx = MemtableStreamContext { + record_fetching_ctx_builder, + fetching_schema: fetching_schema.clone(), + predicate: self.config.predicate, + need_dedup: false, + reverse: false, + deadline: self.config.deadline, + }; + + let sst_stream_ctx = SstStreamContext { + sst_read_options, + fetching_schema, + }; + let total_sst_streams: usize = self.ssts.iter().map(|v| v.len()).sum(); let mut total_streams = self.memtables.len() + total_sst_streams; if self.sampling_mem.is_some() { @@ -128,12 +156,8 @@ impl<'a> Builder<'a> { if let Some(v) = &self.sampling_mem { let stream = record_batch_stream::filtered_stream_from_memtable( - self.config.projected_schema.clone(), - false, &v.mem, - false, - self.config.predicate.as_ref(), - self.config.deadline, + &memtable_stream_ctx, self.config.metrics_collector.clone(), ) .context(BuildStreamFromMemtable)?; @@ -142,14 +166,10 @@ impl<'a> Builder<'a> { for memtable in &self.memtables { let stream = record_batch_stream::filtered_stream_from_memtable( - self.config.projected_schema.clone(), - false, // chain iterator only handle the case reading in no order so just read in asc // order by default. &memtable.mem, - false, - self.config.predicate.as_ref(), - self.config.deadline, + &memtable_stream_ctx, self.config.metrics_collector.clone(), ) .context(BuildStreamFromMemtable)?; @@ -163,8 +183,8 @@ impl<'a> Builder<'a> { self.config.table_id, sst, self.config.sst_factory, - &self.config.sst_read_options, self.config.store_picker, + &sst_stream_ctx, self.config.metrics_collector.clone(), ) .await @@ -307,7 +327,7 @@ impl ChainIterator { } } - async fn next_batch_internal(&mut self) -> Result> { + async fn next_batch_internal(&mut self) -> Result> { self.init_if_necessary(); self.maybe_prefetch().await; @@ -364,7 +384,7 @@ impl RecordBatchWithKeyIterator for ChainIterator { &self.schema } - async fn next_batch(&mut self) -> Result> { + async fn next_batch(&mut self) -> Result> { let timer = Instant::now(); let res = self.next_batch_internal().await; self.metrics.scan_duration += timer.elapsed(); diff --git a/analytic_engine/src/row_iter/dedup.rs b/analytic_engine/src/row_iter/dedup.rs index 64e3beacae..89b9bbbfff 100644 --- a/analytic_engine/src/row_iter/dedup.rs +++ b/analytic_engine/src/row_iter/dedup.rs @@ -16,7 +16,7 @@ use std::cmp::Ordering; use async_trait::async_trait; use common_types::{ - record_batch::{RecordBatchWithKey, RecordBatchWithKeyBuilder}, + record_batch::{FetchingRecordBatch, FetchingRecordBatchBuilder}, request_id::RequestId, row::{Row, RowViewOnBatch, RowWithMeta}, schema::RecordSchemaWithKey, @@ -54,7 +54,7 @@ define_result!(Error); pub struct DedupIterator { request_id: RequestId, schema: RecordSchemaWithKey, - record_batch_builder: RecordBatchWithKeyBuilder, + record_batch_builder: FetchingRecordBatchBuilder, iter: I, /// Previous row returned. prev_row: Option, @@ -69,13 +69,17 @@ pub struct DedupIterator { impl DedupIterator { pub fn new(request_id: RequestId, iter: I, iter_options: IterOptions) -> Self { - let schema = iter.schema(); - - let record_batch_builder = - RecordBatchWithKeyBuilder::with_capacity(schema.clone(), iter_options.batch_size); + let schema_with_key = iter.schema(); + let primary_key_indexes = schema_with_key.primary_key_idx().to_vec(); + let fetching_schema = schema_with_key.to_record_schema(); + let record_batch_builder = FetchingRecordBatchBuilder::with_capacity( + fetching_schema, + Some(primary_key_indexes), + iter_options.batch_size, + ); Self { request_id, - schema: schema.clone(), + schema: schema_with_key.clone(), record_batch_builder, iter, prev_row: None, @@ -85,7 +89,7 @@ impl DedupIterator { } } - fn dedup_batch(&mut self, record_batch: RecordBatchWithKey) -> Result { + fn dedup_batch(&mut self, record_batch: FetchingRecordBatch) -> Result { self.selected_rows.clear(); // Ignore all rows by default. self.selected_rows.resize(record_batch.num_rows(), false); @@ -141,9 +145,9 @@ impl DedupIterator { /// Filter batch by `selected_rows`. fn filter_batch( &mut self, - record_batch: RecordBatchWithKey, + record_batch: FetchingRecordBatch, selected_num: usize, - ) -> Result { + ) -> Result { self.total_selected_rows += selected_num; self.total_duplications += record_batch.num_rows() - selected_num; @@ -176,7 +180,7 @@ impl RecordBatchWithKeyIterator for DedupIterator &self.schema } - async fn next_batch(&mut self) -> Result> { + async fn next_batch(&mut self) -> Result> { match self .iter .next_batch() diff --git a/analytic_engine/src/row_iter/merge.rs b/analytic_engine/src/row_iter/merge.rs index 840c1b7fb2..83dcb49b25 100644 --- a/analytic_engine/src/row_iter/merge.rs +++ b/analytic_engine/src/row_iter/merge.rs @@ -23,8 +23,8 @@ use std::{ use async_trait::async_trait; use common_types::{ - projected_schema::ProjectedSchema, - record_batch::{RecordBatchWithKey, RecordBatchWithKeyBuilder}, + projected_schema::{ProjectedSchema, RecordFetchingContextBuilder}, + record_batch::{FetchingRecordBatch, FetchingRecordBatchBuilder}, request_id::RequestId, row::RowViewOnBatch, schema::RecordSchemaWithKey, @@ -39,9 +39,12 @@ use table_engine::{predicate::PredicateRef, table::TableId}; use trace_metric::{MetricsCollector, TraceMetricWhenDrop}; use crate::{ + instance::SstReadOptionsBuilder, row_iter::{ - record_batch_stream, - record_batch_stream::{BoxedPrefetchableRecordBatchStream, SequencedRecordBatch}, + record_batch_stream::{ + self, BoxedPrefetchableRecordBatchStream, MemtableStreamContext, SequencedRecordBatch, + SstStreamContext, + }, IterOptions, RecordBatchWithKeyIterator, }, space::SpaceId, @@ -108,7 +111,7 @@ pub struct MergeConfig<'a> { /// The predicate of the query. pub predicate: PredicateRef, - pub sst_read_options: SstReadOptions, + pub sst_read_options_builder: SstReadOptionsBuilder, /// Sst factory pub sst_factory: &'a SstFactoryRef, /// Store picker for persisting sst. @@ -129,8 +132,10 @@ pub struct MergeBuilder<'a> { /// Sampling memtable to read. sampling_mem: Option, + /// MemTables to read. memtables: MemTableVec, + /// Ssts to read of each level. ssts: Vec>, } @@ -170,6 +175,34 @@ impl<'a> MergeBuilder<'a> { } pub async fn build(self) -> Result { + let fetching_schema = self.config.projected_schema.to_record_schema_with_key(); + let primary_key_indexes = fetching_schema.primary_key_idx().to_vec(); + let fetching_schema = fetching_schema.into_record_schema(); + let table_schema = self.config.projected_schema.table_schema(); + let record_fetching_ctx_builder = RecordFetchingContextBuilder::new( + fetching_schema.clone(), + table_schema.clone(), + Some(primary_key_indexes), + ); + let sst_read_options = self + .config + .sst_read_options_builder + .build(record_fetching_ctx_builder.clone()); + + let memtable_stream_ctx = MemtableStreamContext { + record_fetching_ctx_builder, + fetching_schema: fetching_schema.clone(), + predicate: self.config.predicate, + need_dedup: self.config.need_dedup, + reverse: self.config.reverse, + deadline: self.config.deadline, + }; + + let sst_stream_ctx = SstStreamContext { + sst_read_options, + fetching_schema, + }; + let sst_streams_num: usize = self .ssts .iter() @@ -192,12 +225,8 @@ impl<'a> MergeBuilder<'a> { if let Some(v) = &self.sampling_mem { let stream = record_batch_stream::filtered_stream_from_memtable( - self.config.projected_schema.clone(), - self.config.need_dedup, &v.mem, - self.config.reverse, - self.config.predicate.as_ref(), - self.config.deadline, + &memtable_stream_ctx, self.config.metrics_collector.clone(), ) .context(BuildStreamFromMemtable)?; @@ -206,12 +235,8 @@ impl<'a> MergeBuilder<'a> { for memtable in &self.memtables { let stream = record_batch_stream::filtered_stream_from_memtable( - self.config.projected_schema.clone(), - self.config.need_dedup, &memtable.mem, - self.config.reverse, - self.config.predicate.as_ref(), - self.config.deadline, + &memtable_stream_ctx, self.config.metrics_collector.clone(), ) .context(BuildStreamFromMemtable)?; @@ -226,8 +251,8 @@ impl<'a> MergeBuilder<'a> { self.config.table_id, f, self.config.sst_factory, - &self.config.sst_read_options, self.config.store_picker, + &sst_stream_ctx, self.config.metrics_collector.clone(), ) .await @@ -324,7 +349,7 @@ impl BufferedStreamState { /// Returns number of rows added. fn append_rows_to( &mut self, - builder: &mut RecordBatchWithKeyBuilder, + builder: &mut FetchingRecordBatchBuilder, len: usize, ) -> Result { let added = builder @@ -336,7 +361,7 @@ impl BufferedStreamState { /// Take record batch slice with at most `len` rows from cursor and advance /// the cursor. - fn take_record_batch_slice(&mut self, len: usize) -> RecordBatchWithKey { + fn take_record_batch_slice(&mut self, len: usize) -> FetchingRecordBatch { let len_to_fetch = cmp::min( self.buffered_record_batch.record_batch.num_rows() - self.cursor, len, @@ -403,14 +428,14 @@ impl BufferedStream { /// REQUIRE: the buffer is not exhausted. fn append_rows_to( &mut self, - builder: &mut RecordBatchWithKeyBuilder, + builder: &mut FetchingRecordBatchBuilder, len: usize, ) -> Result { self.state.as_mut().unwrap().append_rows_to(builder, len) } /// REQUIRE: the buffer is not exhausted. - fn take_record_batch_slice(&mut self, len: usize) -> RecordBatchWithKey { + fn take_record_batch_slice(&mut self, len: usize) -> FetchingRecordBatch { self.state.as_mut().unwrap().take_record_batch_slice(len) } @@ -634,7 +659,7 @@ pub struct MergeIterator { request_id: RequestId, inited: bool, schema: RecordSchemaWithKey, - record_batch_builder: RecordBatchWithKeyBuilder, + record_batch_builder: FetchingRecordBatchBuilder, origin_streams: Vec, /// ssts are kept here to avoid them from being purged. #[allow(dead_code)] @@ -661,8 +686,14 @@ impl MergeIterator { metrics: Metrics, ) -> Self { let heap_cap = streams.len(); - let record_batch_builder = - RecordBatchWithKeyBuilder::with_capacity(schema.clone(), iter_options.batch_size); + let primary_key_indexes = schema.primary_key_idx().to_vec(); + let fetching_schema = schema.to_record_schema(); + let record_batch_builder = FetchingRecordBatchBuilder::with_capacity( + fetching_schema, + Some(primary_key_indexes), + iter_options.batch_size, + ); + Self { table_id, request_id, @@ -790,7 +821,7 @@ impl MergeIterator { async fn fetch_rows_from_one_stream( &mut self, num_rows_to_fetch: usize, - ) -> Result> { + ) -> Result> { assert_eq!(self.hot.len(), 1); self.metrics.times_fetch_rows_from_one += 1; @@ -834,7 +865,7 @@ impl MergeIterator { /// Fetch the next batch from the streams. /// /// `init_if_necessary` should be finished before this method. - async fn fetch_next_batch(&mut self) -> Result> { + async fn fetch_next_batch(&mut self) -> Result> { self.init_if_necessary().await?; self.record_batch_builder.clear(); @@ -876,7 +907,7 @@ impl RecordBatchWithKeyIterator for MergeIterator { &self.schema } - async fn next_batch(&mut self) -> Result> { + async fn next_batch(&mut self) -> Result> { let record_batch = self.fetch_next_batch().await?; trace!("MergeIterator send next record batch:{:?}", record_batch); diff --git a/analytic_engine/src/row_iter/mod.rs b/analytic_engine/src/row_iter/mod.rs index d049796633..e6451506fd 100644 --- a/analytic_engine/src/row_iter/mod.rs +++ b/analytic_engine/src/row_iter/mod.rs @@ -16,7 +16,7 @@ use async_stream::try_stream; use async_trait::async_trait; -use common_types::{record_batch::RecordBatchWithKey, schema::RecordSchemaWithKey}; +use common_types::{record_batch::FetchingRecordBatch, schema::RecordSchemaWithKey}; use generic_error::BoxError; use crate::sst::writer::RecordBatchStream; @@ -43,7 +43,8 @@ pub trait RecordBatchWithKeyIterator: Send { fn schema(&self) -> &RecordSchemaWithKey; - async fn next_batch(&mut self) -> std::result::Result, Self::Error>; + async fn next_batch(&mut self) + -> std::result::Result, Self::Error>; } pub fn record_batch_with_key_iter_to_stream( diff --git a/analytic_engine/src/row_iter/record_batch_stream.rs b/analytic_engine/src/row_iter/record_batch_stream.rs index d64e2a50bb..d49f177c7c 100644 --- a/analytic_engine/src/row_iter/record_batch_stream.rs +++ b/analytic_engine/src/row_iter/record_batch_stream.rs @@ -23,7 +23,10 @@ use arrow::{ datatypes::{DataType as ArrowDataType, SchemaRef as ArrowSchemaRef}, }; use common_types::{ - projected_schema::ProjectedSchema, record_batch::RecordBatchWithKey, SequenceNumber, + projected_schema::{ProjectedSchema, RecordFetchingContextBuilder}, + record_batch::FetchingRecordBatch, + schema::RecordSchema, + SequenceNumber, }; use datafusion::{ common::ToDFSchema, @@ -36,7 +39,10 @@ use futures::stream::{self, StreamExt}; use generic_error::{BoxError, GenericResult}; use macros::define_result; use snafu::{Backtrace, OptionExt, ResultExt, Snafu}; -use table_engine::{predicate::Predicate, table::TableId}; +use table_engine::{ + predicate::{Predicate, PredicateRef}, + table::TableId, +}; use trace_metric::MetricsCollector; use crate::{ @@ -129,7 +135,7 @@ define_result!(Error); // struct? But what is the sequence after merge? #[derive(Debug)] pub struct SequencedRecordBatch { - pub record_batch: RecordBatchWithKey, + pub record_batch: FetchingRecordBatch, pub sequence: SequenceNumber, } @@ -212,44 +218,27 @@ pub fn filter_stream( /// Build filtered (by `predicate`) [SequencedRecordBatchStream] from a /// memtable. pub fn filtered_stream_from_memtable( - projected_schema: ProjectedSchema, - need_dedup: bool, memtable: &MemTableRef, - reverse: bool, - predicate: &Predicate, - deadline: Option, + ctx: &MemtableStreamContext, metrics_collector: Option, ) -> Result { - stream_from_memtable( - projected_schema.clone(), - need_dedup, - memtable, - reverse, - deadline, - metrics_collector, - ) - .and_then(|origin_stream| { + stream_from_memtable(memtable, ctx, metrics_collector).and_then(|origin_stream| { filter_stream( origin_stream, - projected_schema - .as_record_schema_with_key() - .to_arrow_schema_ref(), - predicate, + ctx.fetching_schema.to_arrow_schema_ref(), + &ctx.predicate, ) }) } /// Build [SequencedRecordBatchStream] from a memtable. pub fn stream_from_memtable( - projected_schema: ProjectedSchema, - need_dedup: bool, memtable: &MemTableRef, - reverse: bool, - deadline: Option, + ctx: &MemtableStreamContext, metrics_collector: Option, ) -> Result { let scan_ctx = ScanContext { - deadline, + deadline: ctx.deadline, ..Default::default() }; let max_seq = memtable.last_sequence(); @@ -259,9 +248,9 @@ pub fn stream_from_memtable( start_user_key: Bound::Unbounded, end_user_key: Bound::Unbounded, sequence: max_seq, - projected_schema, - need_dedup, - reverse, + record_fetching_ctx_builder: ctx.record_fetching_ctx_builder.clone(), + need_dedup: ctx.need_dedup, + reverse: ctx.reverse, metrics_collector, }; @@ -277,6 +266,15 @@ pub fn stream_from_memtable( Ok(Box::new(NoopPrefetcher(Box::new(stream)))) } +pub struct MemtableStreamContext { + pub record_fetching_ctx_builder: RecordFetchingContextBuilder, + pub fetching_schema: RecordSchema, + pub predicate: PredicateRef, + pub need_dedup: bool, + pub reverse: bool, + pub deadline: Option, +} + /// Build the filtered by `sst_read_options.predicate` /// [SequencedRecordBatchStream] from a sst. pub async fn filtered_stream_from_sst_file( @@ -284,8 +282,8 @@ pub async fn filtered_stream_from_sst_file( table_id: TableId, sst_file: &FileHandle, sst_factory: &SstFactoryRef, - sst_read_options: &SstReadOptions, store_picker: &ObjectStorePickerRef, + ctx: &SstStreamContext, metrics_collector: Option, ) -> Result { stream_from_sst_file( @@ -293,19 +291,16 @@ pub async fn filtered_stream_from_sst_file( table_id, sst_file, sst_factory, - sst_read_options, store_picker, + ctx, metrics_collector, ) .await .and_then(|origin_stream| { filter_stream( origin_stream, - sst_read_options - .projected_schema - .as_record_schema_with_key() - .to_arrow_schema_ref(), - sst_read_options.predicate.as_ref(), + ctx.fetching_schema.to_arrow_schema_ref(), + &ctx.sst_read_options.predicate, ) }) } @@ -316,8 +311,8 @@ pub async fn stream_from_sst_file( table_id: TableId, sst_file: &FileHandle, sst_factory: &SstFactoryRef, - sst_read_options: &SstReadOptions, store_picker: &ObjectStorePickerRef, + ctx: &SstStreamContext, metrics_collector: Option, ) -> Result { sst_file.read_meter().mark(); @@ -332,7 +327,7 @@ pub async fn stream_from_sst_file( let mut sst_reader = sst_factory .create_reader( &path, - sst_read_options, + &ctx.sst_read_options, read_hint, store_picker, metrics_collector, @@ -353,6 +348,11 @@ pub async fn stream_from_sst_file( Ok(Box::new(stream)) } +pub struct SstStreamContext { + pub sst_read_options: SstReadOptions, + pub fetching_schema: RecordSchema, +} + #[cfg(test)] pub mod tests { use common_types::{row::Row, schema::Schema}; diff --git a/analytic_engine/src/row_iter/tests.rs b/analytic_engine/src/row_iter/tests.rs index 97b83c0b43..be45cb1a73 100644 --- a/analytic_engine/src/row_iter/tests.rs +++ b/analytic_engine/src/row_iter/tests.rs @@ -14,8 +14,8 @@ use async_trait::async_trait; use common_types::{ - projected_schema::ProjectedSchema, - record_batch::{RecordBatchWithKey, RecordBatchWithKeyBuilder}, + projected_schema::{ProjectedSchema, RecordFetchingContext}, + record_batch::{FetchingRecordBatch, FetchingRecordBatchBuilder}, row::{ contiguous::{ContiguousRowReader, ContiguousRowWriter, ProjectedContiguousRow}, Row, @@ -34,12 +34,12 @@ define_result!(Error); pub struct VectorIterator { schema: RecordSchemaWithKey, - items: Vec>, + items: Vec>, idx: usize, } impl VectorIterator { - pub fn new(schema: RecordSchemaWithKey, items: Vec) -> Self { + pub fn new(schema: RecordSchemaWithKey, items: Vec) -> Self { Self { schema, items: items.into_iter().map(Some).collect(), @@ -56,7 +56,7 @@ impl RecordBatchWithKeyIterator for VectorIterator { &self.schema } - async fn next_batch(&mut self) -> Result> { + async fn next_batch(&mut self) -> Result> { if self.idx == self.items.len() { return Ok(None); } @@ -68,13 +68,15 @@ impl RecordBatchWithKeyIterator for VectorIterator { } } -pub fn build_record_batch_with_key(schema: Schema, rows: Vec) -> RecordBatchWithKey { +pub fn build_record_batch_with_key(schema: Schema, rows: Vec) -> FetchingRecordBatch { assert!(schema.num_columns() > 1); let projection: Vec = (0..schema.num_columns()).collect(); let projected_schema = ProjectedSchema::new(schema.clone(), Some(projection)).unwrap(); - let row_projected_schema = projected_schema.try_project_with_key(&schema).unwrap(); - let mut builder = - RecordBatchWithKeyBuilder::with_capacity(projected_schema.to_record_schema_with_key(), 2); + let fetching_schema = projected_schema.to_record_schema(); + let table_schema = projected_schema.table_schema(); + let record_fetching_ctx = + RecordFetchingContext::new(&fetching_schema, None, &table_schema, &table_schema).unwrap(); + let mut builder = FetchingRecordBatchBuilder::with_capacity(fetching_schema, None, 2); let index_in_writer = IndexInWriterSchema::for_same_schema(schema.num_columns()); let mut buf = Vec::new(); @@ -84,7 +86,7 @@ pub fn build_record_batch_with_key(schema: Schema, rows: Vec) -> RecordBatc writer.write_row(&row).unwrap(); let source_row = ContiguousRowReader::try_new(&buf, &schema).unwrap(); - let projected_row = ProjectedContiguousRow::new(source_row, &row_projected_schema); + let projected_row = ProjectedContiguousRow::new(source_row, &record_fetching_ctx); builder .append_projected_contiguous_row(&projected_row) .unwrap(); diff --git a/analytic_engine/src/sst/factory.rs b/analytic_engine/src/sst/factory.rs index 747a653cc7..4867d5d197 100644 --- a/analytic_engine/src/sst/factory.rs +++ b/analytic_engine/src/sst/factory.rs @@ -17,7 +17,9 @@ use std::{fmt::Debug, sync::Arc}; use async_trait::async_trait; -use common_types::projected_schema::ProjectedSchema; +use common_types::projected_schema::{ + ProjectedSchema, RecordFetchingContext, RecordFetchingContextBuilder, +}; use macros::define_result; use object_store::{ObjectStoreRef, Path}; use runtime::Runtime; @@ -136,7 +138,7 @@ pub struct SstReadOptions { pub frequency: ReadFrequency, pub num_rows_per_row_group: usize, - pub projected_schema: ProjectedSchema, + pub record_fetching_ctx_builder: RecordFetchingContextBuilder, pub predicate: PredicateRef, pub meta_cache: Option, pub scan_options: ScanOptions, diff --git a/analytic_engine/src/sst/parquet/async_reader.rs b/analytic_engine/src/sst/parquet/async_reader.rs index c8fbb479b5..47fb82dd93 100644 --- a/analytic_engine/src/sst/parquet/async_reader.rs +++ b/analytic_engine/src/sst/parquet/async_reader.rs @@ -26,8 +26,8 @@ use arrow::{datatypes::SchemaRef, record_batch::RecordBatch as ArrowRecordBatch} use async_trait::async_trait; use bytes_ext::Bytes; use common_types::{ - projected_schema::{ProjectedSchema, RowProjector}, - record_batch::{ArrowRecordBatchProjector, RecordBatchWithKey}, + projected_schema::{RecordFetchingContext, RecordFetchingContextBuilder}, + record_batch::FetchingRecordBatch, }; use datafusion::{ common::ToDFSchema, @@ -73,7 +73,7 @@ use crate::{ const PRUNE_ROW_GROUPS_METRICS_COLLECTOR_NAME: &str = "prune_row_groups"; type SendableRecordBatchStream = Pin> + Send>>; -type RecordBatchWithKeyStream = Box> + Send + Unpin>; +type RecordBatchWithKeyStream = Box> + Send + Unpin>; pub struct Reader<'a> { /// The path where the data is persisted. @@ -83,14 +83,15 @@ pub struct Reader<'a> { /// The hint for the sst file size. file_size_hint: Option, num_rows_per_row_group: usize, - projected_schema: ProjectedSchema, meta_cache: Option, predicate: PredicateRef, /// Current frequency decides the cache policy. frequency: ReadFrequency, /// Init those fields in `init_if_necessary` meta_data: Option, - row_projector: Option, + + record_fetching_ctx_builder: RecordFetchingContextBuilder, + record_fetching_ctx: Option, /// Options for `read_parallelly` metrics: Metrics, @@ -131,12 +132,12 @@ impl<'a> Reader<'a> { store, file_size_hint, num_rows_per_row_group: options.num_rows_per_row_group, - projected_schema: options.projected_schema.clone(), meta_cache: options.meta_cache.clone(), predicate: options.predicate.clone(), frequency: options.frequency, meta_data: None, - row_projector: None, + record_fetching_ctx_builder: options.record_fetching_ctx_builder.clone(), + record_fetching_ctx: None, metrics, df_plan_metrics, table_level_sst_metrics: options.maybe_table_level_metrics.clone(), @@ -155,11 +156,7 @@ impl<'a> Reader<'a> { return Ok(Vec::new()); } - let row_projector = { - let row_projector = self.row_projector.take().unwrap(); - ArrowRecordBatchProjector::from(row_projector) - }; - + let row_projector = self.record_fetching_ctx.take().unwrap(); let streams: Vec<_> = streams .into_iter() .map(|stream| { @@ -243,7 +240,7 @@ impl<'a> Reader<'a> { assert!(self.meta_data.is_some()); let meta_data = self.meta_data.as_ref().unwrap(); - let row_projector = self.row_projector.as_ref().unwrap(); + let row_projector = self.record_fetching_ctx.as_ref().unwrap(); let arrow_schema = meta_data.custom().schema.to_arrow_schema_ref(); // Get target row groups. let target_row_groups = { @@ -355,13 +352,15 @@ impl<'a> Reader<'a> { meta_data }; - let row_projector = self - .projected_schema - .try_project_with_key(&meta_data.custom().schema) + let record_fetching_ctx = self + .record_fetching_ctx_builder + .build(&meta_data.custom().schema) .box_err() .context(Projection)?; + self.meta_data = Some(meta_data); - self.row_projector = Some(row_projector); + self.record_fetching_ctx = Some(record_fetching_ctx); + Ok(()) } @@ -483,7 +482,7 @@ pub(crate) struct ProjectorMetrics { struct RecordBatchProjector { stream: SendableRecordBatchStream, - row_projector: ArrowRecordBatchProjector, + record_fetching_ctx: RecordFetchingContext, metrics: ProjectorMetrics, start_time: Instant, @@ -492,7 +491,7 @@ struct RecordBatchProjector { impl RecordBatchProjector { fn new( stream: SendableRecordBatchStream, - row_projector: ArrowRecordBatchProjector, + record_fetching_ctx: RecordFetchingContext, metrics_collector: Option, ) -> Self { let metrics = ProjectorMetrics { @@ -502,7 +501,7 @@ impl RecordBatchProjector { Self { stream, - row_projector, + record_fetching_ctx, metrics, start_time: Instant::now(), } @@ -510,7 +509,7 @@ impl RecordBatchProjector { } impl Stream for RecordBatchProjector { - type Item = Result; + type Item = Result; fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { let projector = self.get_mut(); @@ -531,11 +530,12 @@ impl Stream for RecordBatchProjector { } projector.metrics.row_num += record_batch.num_rows(); - let projected_batch = projector - .row_projector - .project_to_record_batch_with_key(record_batch) - .box_err() - .context(DecodeRecordBatch {}); + let projected_batch = FetchingRecordBatch::try_new( + &projector.record_fetching_ctx, + record_batch, + ) + .box_err() + .context(DecodeRecordBatch {}); Poll::Ready(Some(projected_batch)) } @@ -566,7 +566,7 @@ impl<'a> SstReader for Reader<'a> { async fn read( &mut self, - ) -> Result>>> { + ) -> Result>>> { let mut streams = self.maybe_read_parallelly(1).await?; assert_eq!(streams.len(), 1); let stream = streams.pop().expect("impossible to fetch no stream"); @@ -577,7 +577,7 @@ impl<'a> SstReader for Reader<'a> { struct RecordBatchReceiver { bg_prefetch_tx: Option>, - rx_group: Vec>>, + rx_group: Vec>>, cur_rx_idx: usize, #[allow(dead_code)] drop_helper: AbortOnDropMany<()>, @@ -585,7 +585,7 @@ struct RecordBatchReceiver { #[async_trait] impl PrefetchableStream for RecordBatchReceiver { - type Item = Result; + type Item = Result; async fn start_prefetch(&mut self) { // Start the prefetch work in background when first poll is called. @@ -602,7 +602,7 @@ impl PrefetchableStream for RecordBatchReceiver { } impl Stream for RecordBatchReceiver { - type Item = Result; + type Item = Result; fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { if self.rx_group.is_empty() { @@ -682,8 +682,8 @@ impl<'a> ThreadedReader<'a> { fn read_record_batches_from_sub_reader( &mut self, - mut reader: Box> + Send + Unpin>, - tx: Sender>, + mut reader: Box> + Send + Unpin>, + tx: Sender>, mut rx: watch::Receiver<()>, ) -> JoinHandle<()> { self.runtime.spawn(async move { @@ -710,7 +710,7 @@ impl<'a> SstReader for ThreadedReader<'a> { async fn read( &mut self, - ) -> Result>>> { + ) -> Result>>> { // Get underlying sst readers and channels. let sub_readers = self .inner @@ -733,7 +733,7 @@ impl<'a> SstReader for ThreadedReader<'a> { let channel_cap_per_sub_reader = self.channel_cap / sub_readers.len(); let (tx_group, rx_group): (Vec<_>, Vec<_>) = (0..read_parallelism) - .map(|_| mpsc::channel::>(channel_cap_per_sub_reader)) + .map(|_| mpsc::channel::>(channel_cap_per_sub_reader)) .unzip(); let (bg_prefetch_tx, bg_prefetch_rx) = watch::channel(()); diff --git a/analytic_engine/src/sst/parquet/writer.rs b/analytic_engine/src/sst/parquet/writer.rs index d2a161220d..71af8e7a53 100644 --- a/analytic_engine/src/sst/parquet/writer.rs +++ b/analytic_engine/src/sst/parquet/writer.rs @@ -18,7 +18,7 @@ use std::collections::HashSet; use async_trait::async_trait; use common_types::{ - datum::DatumKind, record_batch::RecordBatchWithKey, request_id::RequestId, time::TimeRange, + datum::DatumKind, record_batch::FetchingRecordBatch, request_id::RequestId, time::TimeRange, }; use datafusion::parquet::basic::Compression; use futures::StreamExt; @@ -39,8 +39,9 @@ use crate::{ meta_data::{ParquetFilter, ParquetMetaData, RowGroupFilterBuilder}, }, writer::{ - self, BuildParquetFilter, EncodePbData, EncodeRecordBatch, ExpectTimestampColumn, Io, - MetaData, PollRecordBatch, RecordBatchStream, Result, SstInfo, SstWriter, Storage, + self, BuildParquetFilter, BuildParquetFilterNoCause, EncodePbData, EncodeRecordBatch, + ExpectTimestampColumn, Io, MetaData, PollRecordBatch, RecordBatchStream, Result, + SstInfo, SstWriter, Storage, }, }, table::sst_util, @@ -154,8 +155,8 @@ impl RecordBatchGroupWriter { /// the left rows. async fn fetch_next_row_group( &mut self, - prev_record_batch: &mut Option, - ) -> Result> { + prev_record_batch: &mut Option, + ) -> Result> { let mut curr_row_group = vec![]; // Used to record the number of remaining rows to fill `curr_row_group`. let mut remaining = self.num_rows_per_row_group; @@ -212,9 +213,15 @@ impl RecordBatchGroupWriter { /// Build the parquet filter for the given `row_group`. fn build_row_group_filter( &self, - row_group_batch: &[RecordBatchWithKey], + row_group_batch: &[FetchingRecordBatch], ) -> Result { - let mut builder = RowGroupFilterBuilder::new(row_group_batch[0].schema_with_key()); + let schema_with_key = + row_group_batch[0] + .schema_with_key() + .with_context(|| BuildParquetFilterNoCause { + msg: "primary key indexes not exist", + })?; + let mut builder = RowGroupFilterBuilder::new(&schema_with_key); for partial_batch in row_group_batch { for (col_idx, column) in partial_batch.columns().iter().enumerate() { @@ -236,7 +243,7 @@ impl RecordBatchGroupWriter { fn update_column_values( column_values: &mut [Option], - record_batch: &RecordBatchWithKey, + record_batch: &FetchingRecordBatch, ) { for (col_idx, col_values) in column_values.iter_mut().enumerate() { let mut too_many_values = false; @@ -303,7 +310,7 @@ impl RecordBatchGroupWriter { sink: W, meta_path: &Path, ) -> Result<(usize, ParquetMetaData)> { - let mut prev_record_batch: Option = None; + let mut prev_record_batch: Option = None; let mut arrow_row_group = Vec::new(); let mut total_num_rows = 0; @@ -518,7 +525,7 @@ mod tests { use bytes_ext::Bytes; use common_types::{ - projected_schema::ProjectedSchema, + projected_schema::{ProjectedSchema, RecordFetchingContextBuilder}, tests::{build_row, build_row_for_dictionary, build_schema, build_schema_with_dictionary}, time::{TimeRange, Timestamp}, }; @@ -652,15 +659,20 @@ mod tests { let scan_options = ScanOptions::default(); // read sst back to test + let record_fetching_ctx_builder = RecordFetchingContextBuilder::new( + reader_projected_schema.to_record_schema(), + reader_projected_schema.table_schema().clone(), + None, + ); let sst_read_options = SstReadOptions { maybe_table_level_metrics: Arc::new(MaybeTableLevelMetrics::new("test")), frequency: ReadFrequency::Frequent, num_rows_per_row_group: 5, - projected_schema: reader_projected_schema, predicate: Arc::new(Predicate::empty()), meta_cache: None, scan_options, runtime: runtime.clone(), + record_fetching_ctx_builder, }; let mut reader: Box = { diff --git a/analytic_engine/src/sst/reader.rs b/analytic_engine/src/sst/reader.rs index 42d3147779..cac0658e95 100644 --- a/analytic_engine/src/sst/reader.rs +++ b/analytic_engine/src/sst/reader.rs @@ -15,7 +15,7 @@ //! Sst reader trait definition. use async_trait::async_trait; -use common_types::record_batch::RecordBatchWithKey; +use common_types::record_batch::FetchingRecordBatch; use crate::{prefetchable_stream::PrefetchableStream, sst::meta_data::SstMetaData}; @@ -105,7 +105,7 @@ pub trait SstReader { async fn read( &mut self, - ) -> Result>>>; + ) -> Result>>>; } #[cfg(test)] @@ -117,7 +117,7 @@ pub mod tests { pub async fn check_stream(stream: &mut S, expected_rows: Vec) where - S: PrefetchableStream> + Unpin, + S: PrefetchableStream> + Unpin, { let mut visited_rows = 0; while let Some(batch) = stream.fetch_next().await { diff --git a/analytic_engine/src/sst/writer.rs b/analytic_engine/src/sst/writer.rs index ed0329b0b2..84298ec53f 100644 --- a/analytic_engine/src/sst/writer.rs +++ b/analytic_engine/src/sst/writer.rs @@ -19,7 +19,7 @@ use std::cmp; use async_trait::async_trait; use bytes_ext::Bytes; use common_types::{ - record_batch::RecordBatchWithKey, request_id::RequestId, schema::Schema, time::TimeRange, + record_batch::FetchingRecordBatch, request_id::RequestId, schema::Schema, time::TimeRange, SequenceNumber, }; use futures::Stream; @@ -82,6 +82,9 @@ pub mod error { #[snafu(display("Failed to build parquet filter, err:{}", source))] BuildParquetFilter { source: GenericError }, + #[snafu(display("Failed to build parquet filter msg:{msg}.\nBacktrace:\n{backtrace}"))] + BuildParquetFilterNoCause { msg: String, backtrace: Backtrace }, + #[snafu(display("Failed to poll record batch, err:{}", source))] PollRecordBatch { source: GenericError }, @@ -97,7 +100,7 @@ pub mod error { pub use error::*; -pub type RecordBatchStreamItem = std::result::Result; +pub type RecordBatchStreamItem = std::result::Result; // TODO(yingwen): SstReader also has a RecordBatchStream, can we use same type? pub type RecordBatchStream = Box + Send + Unpin>; diff --git a/common_types/src/projected_schema.rs b/common_types/src/projected_schema.rs index 405febd710..b0d6289853 100644 --- a/common_types/src/projected_schema.rs +++ b/common_types/src/projected_schema.rs @@ -61,20 +61,141 @@ pub enum Error { pub type Result = std::result::Result; #[derive(Debug, Clone)] -pub struct RowProjector { - schema_with_key: RecordSchemaWithKey, +pub struct RecordFetchingContext { + /// The schema for data fetching + /// It is derived from table schema and some columns may not exist in data + /// source. + fetching_schema: RecordSchema, + + /// + primary_key_indexes: Option>, + + /// Schema in data source + /// It is possible to be different with the table + /// schema caused by table schema altering. source_schema: Schema, + /// The Vec stores the column index in source, and `None` means this column /// is not in source but required by reader, and need to filled by null. /// The length of Vec is the same as the number of columns reader intended /// to read. - source_projection: Vec>, + fetching_source_column_indexes: Vec>, + + /// Similar as `fetching_source_column_indexes`, but storing the projected + /// source column index + /// + /// For example: + /// source column indexes: 0,1,2,3,4 + /// data fetching indexes in source: 2,1,3 + /// + /// We can see, only columns:[1,2,3] in source is needed, + /// and their indexes in pulled projected record bath are: [0,1,2]. + /// + /// So the stored data fetching indexes in projected source are: [1,0,2]. + fetching_projected_source_column_indexes: Vec>, } -impl RowProjector { +impl RecordFetchingContext { + pub fn new( + fetching_schema: &RecordSchema, + primary_key_indexes: Option>, + table_schema: &Schema, + source_schema: &Schema, + ) -> Result { + // Get `fetching_source_column_indexes`. + let mut fetching_source_column_indexes = Vec::with_capacity(fetching_schema.num_columns()); + let mut projected_source_indexes = Vec::with_capacity(fetching_schema.num_columns()); + for column_schema in fetching_schema.columns() { + Self::try_project_column( + column_schema, + table_schema, + source_schema, + &mut fetching_source_column_indexes, + &mut projected_source_indexes, + )?; + } + + // Get `fetching_projected_source_column_indexes` from + // `fetching_source_column_indexes`. + projected_source_indexes.sort_unstable(); + let fetching_projected_source_column_indexes = fetching_source_column_indexes + .iter() + .map(|source_idx_opt| { + source_idx_opt.map(|src_idx| { + // Safe to unwrap, index exists in `fetching_source_column_indexes` is ensured + // to exist in `projected_source_indexes`. + projected_source_indexes + .iter() + .position(|proj_idx| src_idx == *proj_idx) + .unwrap() + }) + }) + .collect(); + + Ok(RecordFetchingContext { + fetching_schema: fetching_schema.clone(), + primary_key_indexes, + source_schema: source_schema.clone(), + fetching_source_column_indexes, + fetching_projected_source_column_indexes, + }) + } + + fn try_project_column( + column: &ColumnSchema, + table_schema: &Schema, + source_schema: &Schema, + fetching_source_column_indexes: &mut Vec>, + projected_source_indexes: &mut Vec, + ) -> Result<()> { + match source_schema.index_of(&column.name) { + Some(source_idx) => { + // Column is in source + if table_schema.version() == source_schema.version() { + // Same version, just use that column in source + fetching_source_column_indexes.push(Some(source_idx)); + projected_source_indexes.push(source_idx); + } else { + // Different version, need to check column schema + let source_column = source_schema.column(source_idx); + // TODO(yingwen): Data type is not checked here because we do not support alter + // data type now. + match column + .compatible_for_read(source_column) + .context(IncompatReadColumn)? + { + ReadOp::Exact => { + fetching_source_column_indexes.push(Some(source_idx)); + projected_source_indexes.push(source_idx); + } + ReadOp::FillNull => { + fetching_source_column_indexes.push(None); + } + } + } + } + None => { + // Column is not in source + ensure!(column.is_nullable, MissingReadColumn { name: &column.name }); + // Column is nullable, fill this column by null + fetching_source_column_indexes.push(None); + } + } + + Ok(()) + } + + pub fn source_schema(&self) -> &Schema { + &self.source_schema + } + + pub fn fetching_schema(&self) -> &RecordSchema { + &self.fetching_schema + } + /// The projected indexes of existed columns in the source schema. pub fn existed_source_projection(&self) -> Vec { - self.source_projection + self.fetching_source_column_indexes .iter() .filter_map(|index| *index) .collect() @@ -82,12 +203,18 @@ impl RowProjector { /// The projected indexes of all columns(existed and not exist) in the /// source schema. - pub fn source_projection(&self) -> &[Option] { - &self.source_projection + pub fn fetching_source_column_indexes(&self) -> &[Option] { + &self.fetching_source_column_indexes } - pub fn schema_with_key(&self) -> &RecordSchemaWithKey { - &self.schema_with_key + /// The projected indexes of all columns(existed and not exist) in the + /// projected source schema. + pub fn fetching_projected_source_column_indexes(&self) -> &[Option] { + &self.fetching_projected_source_column_indexes + } + + pub fn primary_key_indexes(&self) -> Option<&[usize]> { + self.primary_key_indexes.as_deref() } /// Project the row. @@ -96,9 +223,9 @@ impl RowProjector { pub fn project_row(&self, row: &Row, mut datums_buffer: Vec) -> Row { assert_eq!(self.source_schema.num_columns(), row.num_columns()); - datums_buffer.reserve(self.schema_with_key.num_columns()); + datums_buffer.reserve(self.fetching_schema.num_columns()); - for p in &self.source_projection { + for p in &self.fetching_source_column_indexes { let datum = match p { Some(index_in_source) => row[*index_in_source].clone(), None => Datum::Null, @@ -119,13 +246,43 @@ impl RowProjector { } } +#[derive(Debug, Clone)] +pub struct RecordFetchingContextBuilder { + fetching_schema: RecordSchema, + table_schema: Schema, + primary_key_indexes: Option>, +} + +impl RecordFetchingContextBuilder { + pub fn new( + fetching_schema: RecordSchema, + table_schema: Schema, + primary_key_indexes: Option>, + ) -> Self { + Self { + fetching_schema, + table_schema, + primary_key_indexes, + } + } + + pub fn build(&self, source_schema: &Schema) -> Result { + RecordFetchingContext::new( + &self.fetching_schema, + self.primary_key_indexes.clone(), + &self.table_schema, + &source_schema, + ) + } +} + #[derive(Clone)] pub struct ProjectedSchema(Arc); impl fmt::Debug for ProjectedSchema { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("ProjectedSchema") - .field("original_schema", &self.0.original_schema) + .field("original_schema", &self.0.table_schema) .field("projection", &self.0.projection) .finish() } @@ -137,8 +294,8 @@ impl ProjectedSchema { Self(Arc::new(inner)) } - pub fn new(schema: Schema, projection: Option>) -> Result { - let inner = ProjectedSchemaInner::new(schema, projection)?; + pub fn new(table_schema: Schema, projection: Option>) -> Result { + let inner = ProjectedSchemaInner::new(table_schema, projection)?; Ok(Self(Arc::new(inner))) } @@ -150,42 +307,33 @@ impl ProjectedSchema { self.0.projection() } - /// Returns the [RowProjector] to project the rows with source schema to - /// rows with [RecordSchemaWithKey]. - /// - /// REQUIRE: The key columns are the same as this schema. - #[inline] - pub fn try_project_with_key(&self, source_schema: &Schema) -> Result { - self.0.try_project_with_key(source_schema) - } - // Returns the record schema after projection with key. pub fn to_record_schema_with_key(&self) -> RecordSchemaWithKey { - self.0.schema_with_key.clone() + self.0.record_schema_with_key.clone() } pub fn as_record_schema_with_key(&self) -> &RecordSchemaWithKey { - &self.0.schema_with_key + &self.0.record_schema_with_key } // Returns the record schema after projection. pub fn to_record_schema(&self) -> RecordSchema { - self.0.record_schema.clone() + self.0.target_record_schema.clone() } /// Returns the arrow schema after projection. pub fn to_projected_arrow_schema(&self) -> ArrowSchemaRef { - self.0.record_schema.to_arrow_schema_ref() + self.0.target_record_schema.to_arrow_schema_ref() } - pub fn original_schema(&self) -> &Schema { - &self.0.original_schema + pub fn table_schema(&self) -> &Schema { + &self.0.table_schema } } impl From for ceresdbproto::schema::ProjectedSchema { fn from(request: ProjectedSchema) -> Self { - let table_schema_pb = (&request.0.original_schema).into(); + let table_schema_pb = (&request.0.table_schema).into(); let projection_pb = request.0.projection.as_ref().map(|project| { let project = project .iter() @@ -223,55 +371,56 @@ impl TryFrom for ProjectedSchema { /// Schema with projection informations struct ProjectedSchemaInner { - /// The schema before projection that the reader intended to read, may - /// differ from current schema of the table. - original_schema: Schema, + /// The table schema used to generate plan, possible to differ from recorded + /// schema in ssts. + table_schema: Schema, /// Index of the projected columns in `self.schema`, `None` if /// all columns are needed. projection: Option>, - /// The record schema from `self.schema` with key columns after projection. - schema_with_key: RecordSchemaWithKey, - /// The record schema from `self.schema` after projection. - record_schema: RecordSchema, + /// The fetching record schema from `self.schema` with key columns after + /// projection. + record_schema_with_key: RecordSchemaWithKey, + /// The fetching record schema from `self.schema` after projection. + target_record_schema: RecordSchema, } impl ProjectedSchemaInner { - fn no_projection(schema: Schema) -> Self { - let schema_with_key = schema.to_record_schema_with_key(); - let record_schema = schema.to_record_schema(); + fn no_projection(table_schema: Schema) -> Self { + let record_schema_with_key = table_schema.to_record_schema_with_key(); + let target_record_schema = table_schema.to_record_schema(); Self { - original_schema: schema, + table_schema, projection: None, - schema_with_key, - record_schema, + record_schema_with_key, + target_record_schema, } } - fn new(schema: Schema, projection: Option>) -> Result { + fn new(table_schema: Schema, projection: Option>) -> Result { if let Some(p) = &projection { // Projection is provided, validate the projection is valid. This is necessary // to avoid panic when creating RecordSchema and // RecordSchemaWithKey. if let Some(max_idx) = p.iter().max() { ensure!( - *max_idx < schema.num_columns(), + *max_idx < table_schema.num_columns(), InvalidProjectionIndex { index: *max_idx } ); } - let schema_with_key = schema.project_record_schema_with_key(p); - let record_schema = schema.project_record_schema(p); + let record_schema_with_key = table_schema.project_record_schema_with_key(p); + let target_record_schema = table_schema.project_record_schema(p); Ok(Self { - original_schema: schema, + table_schema, projection, - schema_with_key, - record_schema, + record_schema_with_key, + target_record_schema, }) } else { - Ok(Self::no_projection(schema)) + Ok(Self::no_projection(table_schema)) } } @@ -283,75 +432,6 @@ impl ProjectedSchemaInner { fn projection(&self) -> Option> { self.projection.clone() } - - // TODO(yingwen): We can fill missing not null column with default value instead - // of returning error. - fn try_project_with_key(&self, source_schema: &Schema) -> Result { - // When do primary key sample, this will assert will fail. - // TODO: maybe we can add a flag to only skip this assert when sampling. - // - // debug_assert_eq!( - // self.schema_with_key.key_columns(), - // source_schema.key_columns() - // ); - // We consider the two schema is equal if they have same version. - // if self.original_schema.version() == source_schema.version() { - // debug_assert_eq!(self.original_schema, *source_schema); - // } - - let mut source_projection = Vec::with_capacity(self.schema_with_key.num_columns()); - // For each column in `schema_with_key` - for column_schema in self.schema_with_key.columns() { - self.try_project_column(column_schema, source_schema, &mut source_projection)?; - } - - Ok(RowProjector { - schema_with_key: self.schema_with_key.clone(), - source_schema: source_schema.clone(), - source_projection, - }) - } - - fn try_project_column( - &self, - column: &ColumnSchema, - source_schema: &Schema, - source_projection: &mut Vec>, - ) -> Result<()> { - match source_schema.index_of(&column.name) { - Some(source_idx) => { - // Column is in source - if self.original_schema.version() == source_schema.version() { - // Same version, just use that column in source - source_projection.push(Some(source_idx)); - } else { - // Different version, need to check column schema - let source_column = source_schema.column(source_idx); - // TODO(yingwen): Data type is not checked here because we do not support alter - // data type now. - match column - .compatible_for_read(source_column) - .context(IncompatReadColumn)? - { - ReadOp::Exact => { - source_projection.push(Some(source_idx)); - } - ReadOp::FillNull => { - source_projection.push(None); - } - } - } - } - None => { - // Column is not in source - ensure!(column.is_nullable, MissingReadColumn { name: &column.name }); - // Column is nullable, fill this column by null - source_projection.push(None); - } - } - - Ok(()) - } } #[cfg(test)] @@ -365,7 +445,7 @@ mod tests { let projection: Vec = (0..schema.num_columns() - 1).collect(); let projected_schema = ProjectedSchema::new(schema.clone(), Some(projection)).unwrap(); assert_eq!( - projected_schema.0.schema_with_key.num_columns(), + projected_schema.0.record_schema_with_key.num_columns(), schema.num_columns() - 1 ); assert!(!projected_schema.is_all_projection()); diff --git a/common_types/src/record_batch.rs b/common_types/src/record_batch.rs index 5a9a008996..1a69bfd135 100644 --- a/common_types/src/record_batch.rs +++ b/common_types/src/record_batch.rs @@ -29,7 +29,7 @@ use snafu::{ensure, Backtrace, OptionExt, ResultExt, Snafu}; use crate::{ column_block::{cast_nanosecond_to_mills, ColumnBlock, ColumnBlockBuilder}, datum::DatumKind, - projected_schema::{ProjectedSchema, RowProjector}, + projected_schema::{ProjectedSchema, RecordFetchingContext}, row::{ contiguous::{ContiguousRow, ProjectedContiguousRow}, Row, RowViewOnBatch, @@ -362,15 +362,73 @@ fn cast_arrow_record_batch(source: ArrowRecordBatch) -> Result } #[derive(Debug)] -pub struct RecordBatchWithKey { - schema_with_key: RecordSchemaWithKey, +pub struct FetchingRecordBatch { + schema: RecordSchema, + // TODO: remove it later, `FetchingRecordBatch` is unnecessary to know anything about primary + // keys. + primary_key_indexes: Option>, data: RecordBatchData, } -impl RecordBatchWithKey { - pub fn new(schema_with_key: RecordSchemaWithKey, data: RecordBatchData) -> Self { +impl FetchingRecordBatch { + pub fn try_new( + ctx: &RecordFetchingContext, + arrow_record_batch: ArrowRecordBatch, + ) -> Result { + let column_indexes = ctx.fetching_projected_source_column_indexes(); + let schema = ctx.fetching_schema().clone(); + let mut column_blocks = Vec::with_capacity(schema.num_columns()); + + let num_rows = arrow_record_batch.num_rows(); + let num_columns = arrow_record_batch.num_columns(); + for (col_idx_opt, col_schema) in column_indexes.iter().zip(schema.columns()) { + match col_idx_opt { + Some(col_idx) => { + ensure!( + *col_idx < num_columns, + OutOfIndexProjection { + source_projection: column_indexes, + arrow_schema: arrow_record_batch.schema() + } + ); + + let array = arrow_record_batch.column(*col_idx); + let column_block = + ColumnBlock::try_from_arrow_array_ref(&col_schema.data_type, array) + .context(CreateColumnBlock)?; + + column_blocks.push(column_block); + } + None => { + // Need to push row with specific type. + let null_block = ColumnBlock::new_null_with_type( + &col_schema.data_type, + num_rows, + col_schema.is_dictionary, + ) + .context(CreateColumnBlock)?; + column_blocks.push(null_block); + } + } + } + + let data = RecordBatchData::new(schema.to_arrow_schema_ref(), column_blocks)?; + + Ok(FetchingRecordBatch { + schema, + primary_key_indexes: ctx.primary_key_indexes().map(|idxs| idxs.to_vec()), + data, + }) + } + + pub fn new_from_parts( + schema: RecordSchema, + primary_key_indexes: Option>, + data: RecordBatchData, + ) -> Self { Self { - schema_with_key, + schema, + primary_key_indexes, data, } } @@ -404,21 +462,23 @@ impl RecordBatchWithKey { /// REQUIRE: The schema_with_key of the [RecordBatchWithKey] is the same as /// the schema_with_key of [ProjectedSchema]. pub fn try_project(mut self, projected_schema: &ProjectedSchema) -> Result { - debug_assert_eq!( - &self.schema_with_key, - projected_schema.as_record_schema_with_key() - ); + // FIXME + // debug_assert_eq!( + // &self.schema, + // projected_schema.as_record_schema_with_key() + // ); // Get the schema after projection. let record_schema = projected_schema.to_record_schema(); let mut column_blocks = Vec::with_capacity(record_schema.num_columns()); for column_schema in record_schema.columns() { - let column_index = self.schema_with_key.index_of(&column_schema.name).context( - ColumnNotInSchemaWithKey { - name: &column_schema.name, - }, - )?; + let column_index = + self.schema + .index_of(&column_schema.name) + .context(ColumnNotInSchemaWithKey { + name: &column_schema.name, + })?; // Take the column block out. let column_block = self.data.take_column_block(column_index); @@ -435,7 +495,7 @@ impl RecordBatchWithKey { pub fn into_record_batch(self) -> RecordBatch { RecordBatch { - schema: self.schema_with_key.into_record_schema(), + schema: self.schema, data: self.data, } } @@ -448,9 +508,20 @@ impl RecordBatchWithKey { self.data.arrow_record_batch } + pub fn schema_with_key(&self) -> Option { + self.primary_key_indexes + .clone() + .map(|idxs| RecordSchemaWithKey::new(self.schema.clone(), idxs)) + } + + #[inline] + pub fn schema(&self) -> &RecordSchema { + &self.schema + } + #[inline] - pub fn schema_with_key(&self) -> &RecordSchemaWithKey { - &self.schema_with_key + pub fn primary_key_indexes(&self) -> Option<&[usize]> { + self.primary_key_indexes.as_deref() } #[inline] @@ -485,7 +556,8 @@ impl RecordBatchWithKey { #[must_use] pub fn slice(&self, offset: usize, length: usize) -> Self { Self { - schema_with_key: self.schema_with_key.clone(), + schema: self.schema.clone(), + primary_key_indexes: self.primary_key_indexes.clone(), data: self.data.slice(offset, length), } } @@ -506,14 +578,15 @@ impl RecordBatchWithKey { } } -pub struct RecordBatchWithKeyBuilder { - schema_with_key: RecordSchemaWithKey, +pub struct FetchingRecordBatchBuilder { + fetching_schema: RecordSchema, + primary_key_indexes: Option>, builders: Vec, } -impl RecordBatchWithKeyBuilder { - pub fn new(schema_with_key: RecordSchemaWithKey) -> Self { - let builders = schema_with_key +impl FetchingRecordBatchBuilder { + pub fn new(fetching_schema: RecordSchema, primary_key_indexes: Option>) -> Self { + let builders = fetching_schema .columns() .iter() .map(|column_schema| { @@ -525,13 +598,18 @@ impl RecordBatchWithKeyBuilder { }) .collect(); Self { - schema_with_key, + fetching_schema, + primary_key_indexes, builders, } } - pub fn with_capacity(schema_with_key: RecordSchemaWithKey, capacity: usize) -> Self { - let builders = schema_with_key + pub fn with_capacity( + record_schema: RecordSchema, + primary_key_indexes: Option>, + capacity: usize, + ) -> Self { + let builders = record_schema .columns() .iter() .map(|column_schema| { @@ -543,7 +621,8 @@ impl RecordBatchWithKeyBuilder { }) .collect(); Self { - schema_with_key, + fetching_schema: record_schema, + primary_key_indexes, builders, } } @@ -598,7 +677,7 @@ impl RecordBatchWithKeyBuilder { /// - The `record_batch` and the builder must have the same schema. pub fn append_batch_range( &mut self, - record_batch: &RecordBatchWithKey, + record_batch: &FetchingRecordBatch, start: usize, len: usize, ) -> Result { @@ -639,114 +718,40 @@ impl RecordBatchWithKeyBuilder { } /// Build [RecordBatchWithKey] and reset the builder. - pub fn build(&mut self) -> Result { + pub fn build(&mut self) -> Result { let column_blocks: Vec<_> = self .builders .iter_mut() .map(|builder| builder.build()) .collect(); - let arrow_schema = self.schema_with_key.to_arrow_schema_ref(); + let arrow_schema = self.fetching_schema.to_arrow_schema_ref(); - Ok(RecordBatchWithKey { - schema_with_key: self.schema_with_key.clone(), + Ok(FetchingRecordBatch { + schema: self.fetching_schema.clone(), + primary_key_indexes: self.primary_key_indexes.clone(), data: RecordBatchData::new(arrow_schema, column_blocks)?, }) } } -#[derive(Debug, Clone)] -pub struct ArrowRecordBatchProjector { - row_projector: RowProjector, -} - -impl From for ArrowRecordBatchProjector { - fn from(row_projector: RowProjector) -> Self { - Self { row_projector } - } -} - -impl ArrowRecordBatchProjector { - /// Project the [arrow::RecordBatch] to [RecordBatchWithKey] and these - /// things are to be done: - /// - Insert the null column if the projected column does not appear in the - /// source schema. - /// - Convert the [arrow::RecordBatch] to [RecordBatchWithKey]. - /// - /// REQUIRE: Schema of the `arrow_record_batch` is the same as the - /// projection of existing column in the source schema. - pub fn project_to_record_batch_with_key( - &self, - arrow_record_batch: ArrowRecordBatch, - ) -> Result { - let schema_with_key = self.row_projector.schema_with_key().clone(); - let source_projection = self.row_projector.source_projection(); - let mut column_blocks = Vec::with_capacity(schema_with_key.num_columns()); - - let num_rows = arrow_record_batch.num_rows(); - // ensure next_arrow_column_idx < num_columns - let mut next_arrow_column_idx = 0; - let num_columns = arrow_record_batch.num_columns(); - - for (source_idx, column_schema) in source_projection.iter().zip(schema_with_key.columns()) { - match source_idx { - Some(_) => { - ensure!( - next_arrow_column_idx < num_columns, - OutOfIndexProjection { - source_projection, - arrow_schema: arrow_record_batch.schema() - } - ); - - let array = arrow_record_batch.column(next_arrow_column_idx); - next_arrow_column_idx += 1; - - let column_block = - ColumnBlock::try_from_arrow_array_ref(&column_schema.data_type, array) - .context(CreateColumnBlock)?; - - column_blocks.push(column_block); - } - None => { - // Need to push row with specific type. - let null_block = ColumnBlock::new_null_with_type( - &column_schema.data_type, - num_rows, - column_schema.is_dictionary, - ) - .context(CreateColumnBlock)?; - column_blocks.push(null_block); - } - } - } - - let data = RecordBatchData::new(schema_with_key.to_arrow_schema_ref(), column_blocks)?; - - Ok(RecordBatchWithKey { - schema_with_key, - data, - }) - } -} - #[cfg(test)] mod tests { use crate::{ - record_batch::{RecordBatchWithKey, RecordBatchWithKeyBuilder}, + record_batch::{FetchingRecordBatch, FetchingRecordBatchBuilder}, row::RowViewOnBatch, tests::{ - build_projected_schema, build_record_batch_with_key_by_rows, build_rows, + build_fetching_record_batch_by_rows, build_projected_schema, build_rows, check_record_batch_with_key_with_rows, }, }; - fn build_record_batch_with_key() -> RecordBatchWithKey { + fn build_fetching_record_batch() -> FetchingRecordBatch { let rows = build_rows(); - build_record_batch_with_key_by_rows(rows) + build_fetching_record_batch_by_rows(rows) } fn check_record_batch_with_key( - record_batch_with_key: RecordBatchWithKey, + record_batch_with_key: FetchingRecordBatch, row_num: usize, column_num: usize, ) -> bool { @@ -756,7 +761,7 @@ mod tests { #[test] fn test_append_projected_contiguous_row() { - let record_batch_with_key = build_record_batch_with_key(); + let record_batch_with_key = build_fetching_record_batch(); assert_eq!(record_batch_with_key.num_rows(), 5); assert_eq!(record_batch_with_key.num_columns(), 5); @@ -766,15 +771,11 @@ mod tests { #[test] fn test_append_row_view() { let projected_schema = build_projected_schema(); - - let record_batch_with_key = build_record_batch_with_key(); - - let mut builder = RecordBatchWithKeyBuilder::with_capacity( - projected_schema.to_record_schema_with_key(), - 2, - ); + let fetching_record_batch = build_fetching_record_batch(); + let mut builder = + FetchingRecordBatchBuilder::with_capacity(projected_schema.to_record_schema(), None, 2); let view = RowViewOnBatch { - record_batch: &record_batch_with_key, + record_batch: &fetching_record_batch, row_idx: 1, }; builder.append_row_view(&view).unwrap(); @@ -788,13 +789,10 @@ mod tests { #[test] fn test_append_batch_range() { let projected_schema = build_projected_schema(); + let record_batch_with_key = build_fetching_record_batch(); - let record_batch_with_key = build_record_batch_with_key(); - - let mut builder = RecordBatchWithKeyBuilder::with_capacity( - projected_schema.to_record_schema_with_key(), - 2, - ); + let mut builder = + FetchingRecordBatchBuilder::with_capacity(projected_schema.to_record_schema(), None, 2); builder .append_batch_range(&record_batch_with_key, 0, 2) .unwrap(); diff --git a/common_types/src/row/contiguous.rs b/common_types/src/row/contiguous.rs index 158641e69e..fdb66d28e5 100644 --- a/common_types/src/row/contiguous.rs +++ b/common_types/src/row/contiguous.rs @@ -26,7 +26,7 @@ use snafu::{ensure, Backtrace, Snafu}; use crate::{ datum::{Datum, DatumKind, DatumView}, - projected_schema::RowProjector, + projected_schema::RecordFetchingContext, row::{ bitset::{BitSet, RoBitSet}, Row, @@ -248,27 +248,24 @@ fn datum_view_at<'a>( /// schema of source row. pub struct ProjectedContiguousRow<'a, T> { source_row: T, - projector: &'a RowProjector, + ctx: &'a RecordFetchingContext, } impl<'a, T: ContiguousRow> ProjectedContiguousRow<'a, T> { - pub fn new(source_row: T, projector: &'a RowProjector) -> Self { - Self { - source_row, - projector, - } + pub fn new(source_row: T, ctx: &'a RecordFetchingContext) -> Self { + Self { source_row, ctx } } pub fn num_datum_views(&self) -> usize { - self.projector.source_projection().len() + self.ctx.fetching_source_column_indexes().len() } pub fn datum_view_at(&self, index: usize) -> DatumView { - let p = self.projector.source_projection()[index]; + let p = self.ctx.fetching_source_column_indexes()[index]; match p { Some(index_in_source) => { - let datum_kind = self.projector.datum_kind(index_in_source); + let datum_kind = self.ctx.datum_kind(index_in_source); self.source_row.datum_view_at(index_in_source, datum_kind) } None => DatumView::Null, @@ -801,7 +798,13 @@ mod tests { let projection: Vec = (0..schema.num_columns() - 1).collect(); let projected_schema = ProjectedSchema::new(schema.clone(), Some(projection.clone())).unwrap(); - let row_projected_schema = projected_schema.try_project_with_key(&schema).unwrap(); + let ctx = RecordFetchingContext::new( + &projected_schema.to_record_schema(), + None, + &projected_schema.table_schema(), + &schema, + ) + .unwrap(); let rows = build_rows(); let index_in_writer = IndexInWriterSchema::for_same_schema(schema.num_columns()); @@ -812,7 +815,7 @@ mod tests { writer.write_row(&row).unwrap(); let source_row = ContiguousRowReader::try_new(&buf, &schema).unwrap(); - let projected_row = ProjectedContiguousRow::new(source_row, &row_projected_schema); + let projected_row = ProjectedContiguousRow::new(source_row, &ctx); let range = projection.clone(); for i in range { diff --git a/common_types/src/row/mod.rs b/common_types/src/row/mod.rs index 790170803c..efb1c9b4ea 100644 --- a/common_types/src/row/mod.rs +++ b/common_types/src/row/mod.rs @@ -24,7 +24,7 @@ use snafu::{ensure, Backtrace, OptionExt, Snafu}; use crate::{ column_schema::{ColumnId, ColumnSchema}, datum::{Datum, DatumKind, DatumView}, - record_batch::RecordBatchWithKey, + record_batch::FetchingRecordBatch, schema::{RecordSchemaWithKey, Schema}, time::Timestamp, }; @@ -566,7 +566,7 @@ pub trait RowView { /// `row_idx < record_batch.num_rows()` is ensured. #[derive(Debug)] pub struct RowViewOnBatch<'a> { - pub record_batch: &'a RecordBatchWithKey, + pub record_batch: &'a FetchingRecordBatch, pub row_idx: usize, } @@ -583,18 +583,18 @@ impl<'a> RowViewOnBatch<'a> { pub struct RowViewOnBatchColumnIter<'a> { next_column_idx: usize, row_idx: usize, - record_batch: &'a RecordBatchWithKey, + record_batch: &'a FetchingRecordBatch, } impl<'a> RowView for RowViewOnBatch<'a> { fn try_get_column_by_name(&self, column_name: &str) -> Result> { - let column_idx = self - .record_batch - .schema_with_key() - .index_of(column_name) - .context(ColumnNameNotFound { - column: column_name, - })?; + let column_idx = + self.record_batch + .schema() + .index_of(column_name) + .context(ColumnNameNotFound { + column: column_name, + })?; Ok(Some(self.column_by_idx(column_idx))) } diff --git a/common_types/src/schema.rs b/common_types/src/schema.rs index e114c1e4d6..76330d261e 100644 --- a/common_types/src/schema.rs +++ b/common_types/src/schema.rs @@ -539,6 +539,13 @@ pub struct RecordSchemaWithKey { } impl RecordSchemaWithKey { + pub fn new(record_schema: RecordSchema, primary_key_indexes: Vec) -> Self { + Self { + record_schema, + primary_key_indexes, + } + } + pub fn num_columns(&self) -> usize { self.record_schema.num_columns() } @@ -578,7 +585,11 @@ impl RecordSchemaWithKey { .collect::>() } - pub(crate) fn into_record_schema(self) -> RecordSchema { + pub fn to_record_schema(&self) -> RecordSchema { + self.record_schema.clone() + } + + pub fn into_record_schema(self) -> RecordSchema { self.record_schema } diff --git a/common_types/src/tests.rs b/common_types/src/tests.rs index 29a442fdb9..5bf998e452 100644 --- a/common_types/src/tests.rs +++ b/common_types/src/tests.rs @@ -18,8 +18,8 @@ use sqlparser::ast::{BinaryOperator, Expr, Value}; use crate::{ column_schema, datum::{Datum, DatumKind}, - projected_schema::ProjectedSchema, - record_batch::{RecordBatchWithKey, RecordBatchWithKeyBuilder}, + projected_schema::{ProjectedSchema, RecordFetchingContext}, + record_batch::{FetchingRecordBatch, FetchingRecordBatchBuilder}, row::{ contiguous::{ContiguousRowReader, ContiguousRowWriter, ProjectedContiguousRow}, Row, @@ -357,15 +357,20 @@ pub fn build_rows() -> Vec { ] } -pub fn build_record_batch_with_key_by_rows(rows: Vec) -> RecordBatchWithKey { +pub fn build_fetching_record_batch_by_rows(rows: Vec) -> FetchingRecordBatch { let schema = build_schema(); assert!(schema.num_columns() > 1); let projection: Vec = (0..schema.num_columns() - 1).collect(); let projected_schema = ProjectedSchema::new(schema.clone(), Some(projection)).unwrap(); - let row_projected_schema = projected_schema.try_project_with_key(&schema).unwrap(); + let record_fetching_ctx = + RecordFetchingContext::new(&projected_schema.to_record_schema(), None, &schema, &schema) + .unwrap(); - let mut builder = - RecordBatchWithKeyBuilder::with_capacity(projected_schema.to_record_schema_with_key(), 2); + let mut builder = FetchingRecordBatchBuilder::with_capacity( + record_fetching_ctx.fetching_schema().clone(), + None, + 2, + ); let index_in_writer = IndexInWriterSchema::for_same_schema(schema.num_columns()); let mut buf = Vec::new(); @@ -375,7 +380,7 @@ pub fn build_record_batch_with_key_by_rows(rows: Vec) -> RecordBatchWithKey writer.write_row(&row).unwrap(); let source_row = ContiguousRowReader::try_new(&buf, &schema).unwrap(); - let projected_row = ProjectedContiguousRow::new(source_row, &row_projected_schema); + let projected_row = ProjectedContiguousRow::new(source_row, &record_fetching_ctx); builder .append_projected_contiguous_row(&projected_row) .unwrap(); @@ -384,7 +389,7 @@ pub fn build_record_batch_with_key_by_rows(rows: Vec) -> RecordBatchWithKey } pub fn check_record_batch_with_key_with_rows( - record_batch_with_key: &RecordBatchWithKey, + record_batch_with_key: &FetchingRecordBatch, row_num: usize, column_num: usize, rows: Vec, diff --git a/partition_table_engine/src/scan_builder.rs b/partition_table_engine/src/scan_builder.rs index ac79b0b58d..a282849552 100644 --- a/partition_table_engine/src/scan_builder.rs +++ b/partition_table_engine/src/scan_builder.rs @@ -79,7 +79,7 @@ impl PartitionedTableScanBuilder { impl TableScanBuilder for PartitionedTableScanBuilder { async fn build(&self, request: ReadRequest) -> Result> { // Build partition rule. - let table_schema_snapshot = request.projected_schema.original_schema(); + let table_schema_snapshot = request.projected_schema.table_schema(); let df_partition_rule = DfPartitionRuleAdapter::new(self.partition_info.clone(), table_schema_snapshot) .map_err(|e| { diff --git a/system_catalog/src/tables.rs b/system_catalog/src/tables.rs index 179051399d..ac8ec9c0d8 100644 --- a/system_catalog/src/tables.rs +++ b/system_catalog/src/tables.rs @@ -21,7 +21,8 @@ use catalog::{manager::ManagerRef, schema::SchemaRef, CatalogRef}; use common_types::{ column_schema, datum::{Datum, DatumKind}, - record_batch::RecordBatchWithKeyBuilder, + projected_schema::RecordFetchingContext, + record_batch::FetchingRecordBatchBuilder, row::Row, schema, schema::Schema, @@ -153,13 +154,22 @@ impl SystemTable for Tables { .all_catalogs() .box_err() .context(table_engine::table::Scan { table: self.name() })?; - let projected_record_schema = request.projected_schema.to_record_schema_with_key(); - let mut builder = RecordBatchWithKeyBuilder::new(projected_record_schema); + let fetching_schema = request.projected_schema.to_record_schema_with_key(); + let primary_key_indexes = fetching_schema.primary_key_idx().to_vec(); + let fetching_schema = fetching_schema.to_record_schema(); + let mut builder = FetchingRecordBatchBuilder::new( + fetching_schema.clone(), + Some(primary_key_indexes.clone()), + ); - let projector = request - .projected_schema - .try_project_with_key(&self.schema) - .expect("Should succeed to try_project_key of sys_tables"); + let table_schema = request.projected_schema.table_schema(); + let record_fetching_ctx = RecordFetchingContext::new( + &fetching_schema, + Some(primary_key_indexes), + table_schema, + &self.schema, + ) + .expect("Should succeed to try_project_key of sys_tables"); for catalog in &catalogs { for schema in &catalog .all_schemas() @@ -172,7 +182,7 @@ impl SystemTable for Tables { .context(table_engine::table::Scan { table: self.name() })? { let row = self.from_table(catalog.clone(), schema.clone(), table.clone()); - let projected_row = projector.project_row(&row, Vec::new()); + let projected_row = record_fetching_ctx.project_row(&row, Vec::new()); builder .append_row(projected_row) .box_err() diff --git a/table_engine/src/provider.rs b/table_engine/src/provider.rs index 1685eea77f..24b7e4897f 100644 --- a/table_engine/src/provider.rs +++ b/table_engine/src/provider.rs @@ -147,10 +147,11 @@ impl TableScanBuilder for NormalTableScanBuilder { #[derive(Debug)] pub struct TableProviderAdapter { table: TableRef, + /// The schema of the table when this adapter is created, used as schema /// snapshot for read to avoid the reader sees different schema during /// query - read_schema: Schema, + current_table_schema: Schema, /// Table scan builder builder: B, @@ -159,11 +160,11 @@ pub struct TableProviderAdapter { impl TableProviderAdapter { pub fn new(table: TableRef, builder: B) -> Self { // Take a snapshot of the schema - let read_schema = table.schema(); + let current_table_schema = table.schema(); Self { table, - read_schema, + current_table_schema, builder, } } @@ -199,12 +200,14 @@ impl TableProviderAdapter { ); let predicate = self.check_and_build_predicate_from_filters(filters); - let projected_schema = ProjectedSchema::new(self.read_schema.clone(), projection.cloned()) - .map_err(|e| { - DataFusionError::Internal(format!( - "Invalid projection, plan:{self:?}, projection:{projection:?}, err:{e:?}" - )) - })?; + let projected_schema = + ProjectedSchema::new(self.current_table_schema.clone(), projection.cloned()).map_err( + |e| { + DataFusionError::Internal(format!( + "Invalid projection, plan:{self:?}, projection:{projection:?}, err:{e:?}" + )) + }, + )?; let opts = ReadOptions { deadline, @@ -230,7 +233,9 @@ impl TableProviderAdapter { .filter_map(|filter| { let filter_cols = visitor::find_columns_by_expr(filter); - let support_pushdown = self.table.support_pushdown(&self.read_schema, &filter_cols); + let support_pushdown = self + .table + .support_pushdown(&self.current_table_schema, &filter_cols); if support_pushdown { Some(filter.clone()) } else { @@ -241,7 +246,7 @@ impl TableProviderAdapter { PredicateBuilder::default() .add_pushdown_exprs(&pushdown_filters) - .extract_time_range(&self.read_schema, filters) + .extract_time_range(&self.current_table_schema, filters) .build() } @@ -251,7 +256,9 @@ impl TableProviderAdapter { .map(|filter| { let filter_cols = visitor::find_columns_by_expr(filter); - let support_pushdown = self.table.support_pushdown(&self.read_schema, &filter_cols); + let support_pushdown = self + .table + .support_pushdown(&self.current_table_schema, &filter_cols); if support_pushdown { TableProviderFilterPushDown::Exact } else { @@ -270,7 +277,7 @@ impl TableProvider for TableProviderAdapter { fn schema(&self) -> SchemaRef { // We use the `read_schema` as the schema of this `TableProvider` - self.read_schema.clone().into_arrow_schema_ref() + self.current_table_schema.clone().into_arrow_schema_ref() } async fn scan( @@ -303,7 +310,7 @@ impl TableSource for TableProviderAdapter { /// Get a reference to the schema for this table fn schema(&self) -> SchemaRef { - self.read_schema.clone().into_arrow_schema_ref() + self.current_table_schema.clone().into_arrow_schema_ref() } /// Get the type of this table for metadata/catalog purposes. From b87f99fb72923e238e6e5bcc6ebec675ffe8ce61 Mon Sep 17 00:00:00 2001 From: kamille Date: Thu, 16 Nov 2023 16:07:16 +0800 Subject: [PATCH 02/13] fix tools and benches. --- analytic_engine/src/lib.rs | 6 ++- benchmarks/src/merge_memtable_bench.rs | 35 ++++++++------- benchmarks/src/merge_sst_bench.rs | 47 ++++++++++++-------- benchmarks/src/scan_memtable_bench.rs | 8 +++- benchmarks/src/sst_bench.rs | 59 +++++++++++++++++--------- benchmarks/src/sst_tools.rs | 42 ++++++++++++------ benchmarks/src/util.rs | 10 ++++- tools/src/bin/sst-convert.rs | 13 +++++- 8 files changed, 149 insertions(+), 71 deletions(-) diff --git a/analytic_engine/src/lib.rs b/analytic_engine/src/lib.rs index 9e8cb1dee2..423553bb85 100644 --- a/analytic_engine/src/lib.rs +++ b/analytic_engine/src/lib.rs @@ -43,7 +43,11 @@ use size_ext::ReadableSize; use time_ext::ReadableDuration; use wal::config::StorageConfig; -pub use crate::{compaction::scheduler::SchedulerConfig, table_options::TableOptions}; +pub use crate::{ + compaction::scheduler::SchedulerConfig, + instance::{ScanType, SstReadOptionsBuilder}, + table_options::TableOptions, +}; /// Config of analytic engine #[derive(Debug, Clone, Deserialize, Serialize)] diff --git a/benchmarks/src/merge_memtable_bench.rs b/benchmarks/src/merge_memtable_bench.rs index c843010b15..0eccb910d7 100644 --- a/benchmarks/src/merge_memtable_bench.rs +++ b/benchmarks/src/merge_memtable_bench.rs @@ -39,6 +39,7 @@ use analytic_engine::{ sst_util, version::{MemTableState, MemTableVec}, }, + ScanType, SstReadOptionsBuilder, }; use arena::NoopCollector; use common_types::{ @@ -61,7 +62,8 @@ pub struct MergeMemTableBench { space_id: SpaceId, table_id: TableId, dedup: bool, - sst_read_options: SstReadOptions, + sst_read_options_builder: SstReadOptionsBuilder, + num_rows_per_row_group: usize, } impl MergeMemTableBench { @@ -113,7 +115,8 @@ impl MergeMemTableBench { id: *id, }); } - let sst_read_options = mock_sst_read_options(projected_schema.clone(), runtime.clone()); + let sst_read_options_builder = + mock_sst_read_options_builder(projected_schema.clone(), runtime.clone()); MergeMemTableBench { store, @@ -125,7 +128,8 @@ impl MergeMemTableBench { space_id, table_id, dedup: true, - sst_read_options, + sst_read_options_builder, + num_rows_per_row_group: 500, } } @@ -149,7 +153,7 @@ impl MergeMemTableBench { let projected_schema = self.projected_schema.clone(); let sst_factory: SstFactoryRef = Arc::new(FactoryImpl); let iter_options = IterOptions { - batch_size: self.sst_read_options.num_rows_per_row_group, + batch_size: self.num_rows_per_row_group, }; let request_id = RequestId::next_id(); @@ -164,7 +168,7 @@ impl MergeMemTableBench { projected_schema, predicate: Arc::new(Predicate::empty()), sst_factory: &sst_factory, - sst_read_options: self.sst_read_options.clone(), + sst_read_options_builder: self.sst_read_options_builder.clone(), store_picker: &store_picker, merge_iter_options: iter_options.clone(), need_dedup: true, @@ -205,23 +209,24 @@ impl MergeMemTableBench { } } -fn mock_sst_read_options( +fn mock_sst_read_options_builder( projected_schema: ProjectedSchema, runtime: Arc, -) -> SstReadOptions { +) -> SstReadOptionsBuilder { let scan_options = ScanOptions { background_read_parallelism: 1, max_record_batches_in_flight: 1024, num_streams_to_prefetch: 0, }; - SstReadOptions { - maybe_table_level_metrics: Arc::new(SstMaybeTableLevelMetrics::new("bench")), - frequency: ReadFrequency::Frequent, - num_rows_per_row_group: 500, - projected_schema, - predicate: Arc::new(Predicate::empty()), - meta_cache: None, + let maybe_table_level_metrics = Arc::new(SstMaybeTableLevelMetrics::new("bench")); + + SstReadOptionsBuilder::new( + ScanType::Query, scan_options, + maybe_table_level_metrics, + 500, + Arc::new(Predicate::empty()), + None, runtime, - } + ) } diff --git a/benchmarks/src/merge_sst_bench.rs b/benchmarks/src/merge_sst_bench.rs index 2b301cc8c6..141d9364a7 100644 --- a/benchmarks/src/merge_sst_bench.rs +++ b/benchmarks/src/merge_sst_bench.rs @@ -35,8 +35,13 @@ use analytic_engine::{ metrics::MaybeTableLevelMetrics as SstMaybeTableLevelMetrics, }, table::sst_util, + ScanType, SstReadOptionsBuilder, +}; +use common_types::{ + projected_schema::{ProjectedSchema, RecordFetchingContext, RecordFetchingContextBuilder}, + request_id::RequestId, + schema::Schema, }; -use common_types::{projected_schema::ProjectedSchema, request_id::RequestId, schema::Schema}; use logger::info; use object_store::{LocalFileSystem, ObjectStoreRef}; use runtime::Runtime; @@ -49,7 +54,9 @@ pub struct MergeSstBench { store: ObjectStoreRef, max_projections: usize, schema: Schema, - sst_read_options: SstReadOptions, + projected_schema: Option, + sst_read_options_builder: SstReadOptionsBuilder, + num_rows_per_row_group: usize, runtime: Arc, space_id: SpaceId, table_id: TableId, @@ -79,16 +86,18 @@ impl MergeSstBench { max_record_batches_in_flight: 1024, num_streams_to_prefetch: 0, }; - let sst_read_options = SstReadOptions { - maybe_table_level_metrics: Arc::new(SstMaybeTableLevelMetrics::new("bench")), - frequency: ReadFrequency::Frequent, - num_rows_per_row_group: config.num_rows_per_row_group, - projected_schema, - predicate, - meta_cache: meta_cache.clone(), + + let maybe_table_level_metrics = Arc::new(SstMaybeTableLevelMetrics::new("bench")); + let scan_type = ScanType::Query; + let sst_read_options_builder = SstReadOptionsBuilder::new( + scan_type, scan_options, - runtime: runtime.clone(), - }; + maybe_table_level_metrics, + config.num_rows_per_row_group, + predicate, + meta_cache.clone(), + runtime.clone(), + ); let max_projections = cmp::min(config.max_projections, schema.num_columns()); let (tx, rx) = mpsc::unbounded_channel(); @@ -107,7 +116,9 @@ impl MergeSstBench { store, max_projections, schema, - sst_read_options, + sst_read_options_builder, + num_rows_per_row_group: config.num_rows_per_row_group, + projected_schema: None, runtime, space_id, table_id, @@ -126,7 +137,7 @@ impl MergeSstBench { let projected_schema = util::projected_schema_by_number(&self.schema, i, self.max_projections); - self.sst_read_options.projected_schema = projected_schema; + self.projected_schema = Some(projected_schema); self.dedup = dedup; } @@ -134,10 +145,10 @@ impl MergeSstBench { let space_id = self.space_id; let table_id = self.table_id; let sequence = u64::MAX; - let projected_schema = self.sst_read_options.projected_schema.clone(); + let projected_schema = self.projected_schema.clone().unwrap(); let sst_factory: SstFactoryRef = Arc::new(FactoryImpl); let iter_options = IterOptions { - batch_size: self.sst_read_options.num_rows_per_row_group, + batch_size: self.num_rows_per_row_group, }; let request_id = RequestId::next_id(); @@ -152,7 +163,7 @@ impl MergeSstBench { projected_schema, predicate: Arc::new(Predicate::empty()), sst_factory: &sst_factory, - sst_read_options: self.sst_read_options.clone(), + sst_read_options_builder: self.sst_read_options_builder.clone(), store_picker: &store_picker, merge_iter_options: iter_options.clone(), need_dedup: true, @@ -190,7 +201,7 @@ impl MergeSstBench { fn run_no_dedup_bench(&self) { let space_id = self.space_id; let table_id = self.table_id; - let projected_schema = self.sst_read_options.projected_schema.clone(); + let projected_schema = self.projected_schema.clone().unwrap(); let sst_factory: SstFactoryRef = Arc::new(FactoryImpl); let request_id = RequestId::next_id(); @@ -204,7 +215,7 @@ impl MergeSstBench { projected_schema, predicate: Arc::new(Predicate::empty()), sst_factory: &sst_factory, - sst_read_options: self.sst_read_options.clone(), + sst_read_options_builder: self.sst_read_options_builder.clone(), store_picker: &store_picker, num_streams_to_prefetch: 0, }) diff --git a/benchmarks/src/scan_memtable_bench.rs b/benchmarks/src/scan_memtable_bench.rs index e9dde7efff..b5cfc36000 100644 --- a/benchmarks/src/scan_memtable_bench.rs +++ b/benchmarks/src/scan_memtable_bench.rs @@ -25,7 +25,7 @@ use analytic_engine::{ sst::meta_data::cache::MetaCacheRef, }; use arena::NoopCollector; -use common_types::projected_schema::ProjectedSchema; +use common_types::projected_schema::{ProjectedSchema, RecordFetchingContextBuilder}; use logger::info; use object_store::{LocalFileSystem, Path}; @@ -91,14 +91,18 @@ impl ScanMemTableBench { pub fn run_bench(&self) { let scan_ctx = ScanContext::default(); + let fetching_schema = self.projected_schema.to_record_schema(); + let table_schema = self.projected_schema.table_schema(); + let record_fetching_ctx_builder = + RecordFetchingContextBuilder::new(fetching_schema, table_schema.clone(), None); let scan_req = ScanRequest { start_user_key: Bound::Unbounded, end_user_key: Bound::Unbounded, sequence: common_types::MAX_SEQUENCE_NUMBER, - projected_schema: self.projected_schema.clone(), need_dedup: true, reverse: false, metrics_collector: None, + record_fetching_ctx_builder, }; let iter = self.memtable.scan(scan_ctx, scan_req).unwrap(); diff --git a/benchmarks/src/sst_bench.rs b/benchmarks/src/sst_bench.rs index 3d178a994d..cc01a828a2 100644 --- a/benchmarks/src/sst_bench.rs +++ b/benchmarks/src/sst_bench.rs @@ -16,15 +16,21 @@ use std::{cmp, sync::Arc, time::Instant}; -use analytic_engine::sst::{ - factory::{ - Factory, FactoryImpl, ObjectStorePickerRef, ReadFrequency, ScanOptions, SstReadHint, - SstReadOptions, +use analytic_engine::{ + sst::{ + factory::{ + Factory, FactoryImpl, ObjectStorePickerRef, ReadFrequency, ScanOptions, SstReadHint, + SstReadOptions, + }, + meta_data::cache::{MetaCache, MetaCacheRef}, + metrics::MaybeTableLevelMetrics as SstMaybeTableLevelMetrics, }, - meta_data::cache::{MetaCache, MetaCacheRef}, - metrics::MaybeTableLevelMetrics as SstMaybeTableLevelMetrics, + ScanType, SstReadOptionsBuilder, +}; +use common_types::{ + projected_schema::{ProjectedSchema, RecordFetchingContextBuilder}, + schema::Schema, }; -use common_types::{projected_schema::ProjectedSchema, schema::Schema}; use logger::info; use object_store::{LocalFileSystem, ObjectStoreRef, Path}; use runtime::Runtime; @@ -36,7 +42,8 @@ pub struct SstBench { pub sst_file_name: String, max_projections: usize, schema: Schema, - sst_read_options: SstReadOptions, + projected_schema: Option, + sst_read_options_builder: SstReadOptionsBuilder, runtime: Arc, } @@ -57,16 +64,16 @@ impl SstBench { max_record_batches_in_flight: 1024, num_streams_to_prefetch: 0, }; - let sst_read_options = SstReadOptions { - maybe_table_level_metrics: Arc::new(SstMaybeTableLevelMetrics::new("bench")), - frequency: ReadFrequency::Frequent, - num_rows_per_row_group: config.num_rows_per_row_group, - projected_schema, + let maybe_table_level_metrics = Arc::new(SstMaybeTableLevelMetrics::new("bench")); + let sst_read_options_builder = SstReadOptionsBuilder::new( + ScanType::Query, + scan_options, + maybe_table_level_metrics, + config.num_rows_per_row_group, predicate, meta_cache, - scan_options, - runtime: runtime.clone(), - }; + runtime.clone(), + ); let max_projections = cmp::min(config.max_projections, schema.num_columns()); SstBench { @@ -74,7 +81,8 @@ impl SstBench { sst_file_name: config.sst_file_name, max_projections, schema, - sst_read_options, + projected_schema: Some(projected_schema), + sst_read_options_builder: sst_read_options_builder.clone(), runtime, } } @@ -88,7 +96,7 @@ impl SstBench { let projected_schema = util::projected_schema_by_number(&self.schema, i, self.max_projections); - self.sst_read_options.projected_schema = projected_schema; + self.projected_schema = Some(projected_schema); } pub fn run_bench(&self) { @@ -97,11 +105,24 @@ impl SstBench { let sst_factory = FactoryImpl; let store_picker: ObjectStorePickerRef = Arc::new(self.store.clone()); + let fetching_schema = self.projected_schema.as_ref().unwrap().to_record_schema(); + let table_schema = self + .projected_schema + .as_ref() + .unwrap() + .table_schema() + .clone(); + let record_fetching_ctx_builder = + RecordFetchingContextBuilder::new(fetching_schema, table_schema, None); + let sst_read_options = self + .sst_read_options_builder + .clone() + .build(record_fetching_ctx_builder); self.runtime.block_on(async { let mut sst_reader = sst_factory .create_reader( &sst_path, - &self.sst_read_options, + &sst_read_options, SstReadHint::default(), &store_picker, None, diff --git a/benchmarks/src/sst_tools.rs b/benchmarks/src/sst_tools.rs index 85fba1841a..89dd301b5b 100644 --- a/benchmarks/src/sst_tools.rs +++ b/benchmarks/src/sst_tools.rs @@ -38,8 +38,12 @@ use analytic_engine::{ }, table::sst_util, table_options::{Compression, StorageFormatHint}, + ScanType, SstReadOptionsBuilder, +}; +use common_types::{ + projected_schema::{ProjectedSchema, RecordFetchingContextBuilder}, + request_id::RequestId, }; -use common_types::{projected_schema::ProjectedSchema, request_id::RequestId}; use generic_error::BoxError; use logger::info; use object_store::{LocalFileSystem, ObjectStoreRef, Path}; @@ -120,15 +124,20 @@ pub async fn rebuild_sst(config: RebuildSstConfig, runtime: Arc) { max_record_batches_in_flight: 1024, num_streams_to_prefetch: 2, }; + + let fetching_schema = projected_schema.to_record_schema(); + let table_schema = projected_schema.table_schema().clone(); + let record_fetching_ctx_builder = + RecordFetchingContextBuilder::new(fetching_schema, table_schema, None); let sst_read_options = SstReadOptions { maybe_table_level_metrics: Arc::new(SstMaybeTableLevelMetrics::new("bench")), frequency: ReadFrequency::Once, num_rows_per_row_group: config.num_rows_per_row_group, - projected_schema, predicate: config.predicate.into_predicate(), meta_cache: None, scan_options, runtime, + record_fetching_ctx_builder, }; let record_batch_stream = @@ -223,6 +232,7 @@ pub async fn merge_sst(config: MergeSstConfig, runtime: Arc) { let iter_options = IterOptions { batch_size: config.num_rows_per_row_group, }; + let scan_options = ScanOptions { background_read_parallelism: 1, max_record_batches_in_flight: 1024, @@ -233,16 +243,23 @@ pub async fn merge_sst(config: MergeSstConfig, runtime: Arc) { let sst_factory: SstFactoryRef = Arc::new(FactoryImpl); let store_picker: ObjectStorePickerRef = Arc::new(store); let projected_schema = ProjectedSchema::no_projection(schema.clone()); - let sst_read_options = SstReadOptions { - maybe_table_level_metrics: Arc::new(SstMaybeTableLevelMetrics::new("bench")), - frequency: ReadFrequency::Once, - num_rows_per_row_group: config.num_rows_per_row_group, - projected_schema: projected_schema.clone(), - predicate: config.predicate.into_predicate(), - meta_cache: None, + let maybe_table_level_metrics = Arc::new(SstMaybeTableLevelMetrics::new("bench")); + let sst_read_options_builder = SstReadOptionsBuilder::new( + ScanType::Query, scan_options, - runtime: runtime.clone(), - }; + maybe_table_level_metrics, + config.num_rows_per_row_group, + config.predicate.into_predicate(), + None, + runtime.clone(), + ); + let fetching_schema = projected_schema.to_record_schema_with_key(); + let primary_key_indexes = fetching_schema.primary_key_idx().to_vec(); + let fetching_schema = fetching_schema.into_record_schema(); + let table_schema = projected_schema.table_schema().clone(); + let record_fetching_ctx_builder = + RecordFetchingContextBuilder::new(fetching_schema, table_schema, Some(primary_key_indexes)); + let iter = { let space_id = config.space_id; let table_id = config.table_id; @@ -258,11 +275,11 @@ pub async fn merge_sst(config: MergeSstConfig, runtime: Arc) { projected_schema, predicate: Arc::new(Predicate::empty()), sst_factory: &sst_factory, - sst_read_options: sst_read_options.clone(), store_picker: &store_picker, merge_iter_options: iter_options.clone(), need_dedup: true, reverse: false, + sst_read_options_builder: sst_read_options_builder.clone(), }); builder .mut_ssts_of_level(Level::MIN) @@ -278,6 +295,7 @@ pub async fn merge_sst(config: MergeSstConfig, runtime: Arc) { row_iter::record_batch_with_key_iter_to_stream(iter) }; + let sst_read_options = sst_read_options_builder.build(record_fetching_ctx_builder); let sst_meta = { let meta_reader = SstMetaReader { space_id, diff --git a/benchmarks/src/util.rs b/benchmarks/src/util.rs index 0b66cc06ec..0857f58efb 100644 --- a/benchmarks/src/util.rs +++ b/benchmarks/src/util.rs @@ -35,7 +35,7 @@ use analytic_engine::{ }; use bytes_ext::{BufMut, SafeBufMut}; use common_types::{ - projected_schema::ProjectedSchema, + projected_schema::{ProjectedSchema, RecordFetchingContextBuilder}, schema::{IndexInWriterSchema, Schema}, }; use macros::define_result; @@ -123,15 +123,21 @@ pub async fn load_sst_to_memtable( max_record_batches_in_flight: 1024, num_streams_to_prefetch: 0, }; + let projected_schema = ProjectedSchema::no_projection(schema.clone()); + + let fetching_schema = projected_schema.to_record_schema(); + let table_schema = projected_schema.table_schema().clone(); + let record_fetching_ctx_builder = + RecordFetchingContextBuilder::new(fetching_schema, table_schema, None); let sst_read_options = SstReadOptions { maybe_table_level_metrics: Arc::new(SstMaybeTableLevelMetrics::new("bench")), frequency: ReadFrequency::Frequent, num_rows_per_row_group: 8192, - projected_schema: ProjectedSchema::no_projection(schema.clone()), predicate: Arc::new(Predicate::empty()), meta_cache: None, scan_options, runtime, + record_fetching_ctx_builder, }; let sst_factory = FactoryImpl; let store_picker: ObjectStorePickerRef = Arc::new(store.clone()); diff --git a/tools/src/bin/sst-convert.rs b/tools/src/bin/sst-convert.rs index fa6778140c..cd8202df6b 100644 --- a/tools/src/bin/sst-convert.rs +++ b/tools/src/bin/sst-convert.rs @@ -30,7 +30,10 @@ use analytic_engine::{ }; use anyhow::{Context, Result}; use clap::Parser; -use common_types::{projected_schema::ProjectedSchema, request_id::RequestId}; +use common_types::{ + projected_schema::{ProjectedSchema, RecordFetchingContextBuilder}, + request_id::RequestId, +}; use generic_error::BoxError; use object_store::{LocalFileSystem, Path}; use runtime::Runtime; @@ -92,15 +95,21 @@ async fn run(args: Args, runtime: Arc) -> Result<()> { let sst_meta = sst_util::meta_from_sst(&store, &input_path).await; let factory = FactoryImpl; let scan_options = ScanOptions::default(); + let projected_schema = ProjectedSchema::no_projection(sst_meta.schema.clone()); + + let fetching_schema = projected_schema.to_record_schema(); + let table_schema = projected_schema.table_schema().clone(); + let record_fetching_ctx_builder = + RecordFetchingContextBuilder::new(fetching_schema, table_schema, None); let reader_opts = SstReadOptions { maybe_table_level_metrics: Arc::new(SstMaybeTableLevelMetrics::new("tool")), frequency: ReadFrequency::Once, num_rows_per_row_group: 8192, - projected_schema: ProjectedSchema::no_projection(sst_meta.schema.clone()), predicate: Arc::new(Predicate::empty()), meta_cache: None, scan_options, runtime, + record_fetching_ctx_builder, }; let store_picker: ObjectStorePickerRef = Arc::new(store); let mut reader = factory From b59d54c83b33f969bd2eaf8a39f4a214ebe8905a Mon Sep 17 00:00:00 2001 From: kamille Date: Thu, 16 Nov 2023 16:11:37 +0800 Subject: [PATCH 03/13] fix clippy. --- analytic_engine/src/instance/mod.rs | 2 +- analytic_engine/src/memtable/columnar/iter.rs | 2 +- analytic_engine/src/memtable/mod.rs | 2 +- analytic_engine/src/memtable/skiplist/iter.rs | 2 +- analytic_engine/src/row_iter/chain.rs | 2 +- analytic_engine/src/row_iter/merge.rs | 2 +- analytic_engine/src/row_iter/record_batch_stream.rs | 2 +- analytic_engine/src/row_iter/tests.rs | 2 +- analytic_engine/src/sst/factory.rs | 2 +- benchmarks/src/merge_memtable_bench.rs | 6 +++--- benchmarks/src/merge_sst_bench.rs | 8 ++++---- benchmarks/src/sst_bench.rs | 3 +-- common_types/src/projected_schema.rs | 2 +- common_types/src/row/contiguous.rs | 2 +- 14 files changed, 19 insertions(+), 20 deletions(-) diff --git a/analytic_engine/src/instance/mod.rs b/analytic_engine/src/instance/mod.rs index 8a67d0dce5..c846759872 100644 --- a/analytic_engine/src/instance/mod.rs +++ b/analytic_engine/src/instance/mod.rs @@ -34,7 +34,7 @@ pub(crate) mod write; use std::sync::Arc; use common_types::{ - projected_schema::{ProjectedSchema, RecordFetchingContextBuilder}, + projected_schema::{RecordFetchingContextBuilder}, table::TableId, }; use generic_error::{BoxError, GenericError}; diff --git a/analytic_engine/src/memtable/columnar/iter.rs b/analytic_engine/src/memtable/columnar/iter.rs index d38888f221..ace878977d 100644 --- a/analytic_engine/src/memtable/columnar/iter.rs +++ b/analytic_engine/src/memtable/columnar/iter.rs @@ -27,7 +27,7 @@ use common_types::{ column::Column, column_schema::ColumnId, datum::Datum, - projected_schema::{ProjectedSchema, RecordFetchingContext}, + projected_schema::{RecordFetchingContext}, record_batch::{FetchingRecordBatch, FetchingRecordBatchBuilder}, row::Row, schema::Schema, diff --git a/analytic_engine/src/memtable/mod.rs b/analytic_engine/src/memtable/mod.rs index a83bd65145..3c718edc83 100644 --- a/analytic_engine/src/memtable/mod.rs +++ b/analytic_engine/src/memtable/mod.rs @@ -24,7 +24,7 @@ use std::{ops::Bound, sync::Arc, time::Instant}; use bytes_ext::{ByteVec, Bytes}; use common_types::{ - projected_schema::{ProjectedSchema, RecordFetchingContextBuilder}, + projected_schema::{RecordFetchingContextBuilder}, record_batch::FetchingRecordBatch, row::Row, schema::{IndexInWriterSchema, Schema}, diff --git a/analytic_engine/src/memtable/skiplist/iter.rs b/analytic_engine/src/memtable/skiplist/iter.rs index b78ff65c32..781d49675f 100644 --- a/analytic_engine/src/memtable/skiplist/iter.rs +++ b/analytic_engine/src/memtable/skiplist/iter.rs @@ -20,7 +20,7 @@ use arena::{Arena, BasicStats}; use bytes_ext::{Bytes, BytesMut}; use codec::row; use common_types::{ - projected_schema::{ProjectedSchema, RecordFetchingContext}, + projected_schema::{RecordFetchingContext}, record_batch::{FetchingRecordBatch, FetchingRecordBatchBuilder}, row::contiguous::{ContiguousRowReader, ProjectedContiguousRow}, schema::Schema, diff --git a/analytic_engine/src/row_iter/chain.rs b/analytic_engine/src/row_iter/chain.rs index 2d645ed8a2..ede23fa152 100644 --- a/analytic_engine/src/row_iter/chain.rs +++ b/analytic_engine/src/row_iter/chain.rs @@ -41,7 +41,7 @@ use crate::{ }, space::SpaceId, sst::{ - factory::{FactoryRef as SstFactoryRef, ObjectStorePickerRef, SstReadOptions}, + factory::{FactoryRef as SstFactoryRef, ObjectStorePickerRef}, file::FileHandle, }, table::version::{MemTableVec, SamplingMemTable}, diff --git a/analytic_engine/src/row_iter/merge.rs b/analytic_engine/src/row_iter/merge.rs index 83dcb49b25..24d9e8f6f6 100644 --- a/analytic_engine/src/row_iter/merge.rs +++ b/analytic_engine/src/row_iter/merge.rs @@ -49,7 +49,7 @@ use crate::{ }, space::SpaceId, sst::{ - factory::{FactoryRef as SstFactoryRef, ObjectStorePickerRef, SstReadOptions}, + factory::{FactoryRef as SstFactoryRef, ObjectStorePickerRef}, file::{FileHandle, Level, SST_LEVEL_NUM}, }, table::version::{MemTableVec, SamplingMemTable}, diff --git a/analytic_engine/src/row_iter/record_batch_stream.rs b/analytic_engine/src/row_iter/record_batch_stream.rs index d49f177c7c..017bc7b37d 100644 --- a/analytic_engine/src/row_iter/record_batch_stream.rs +++ b/analytic_engine/src/row_iter/record_batch_stream.rs @@ -23,7 +23,7 @@ use arrow::{ datatypes::{DataType as ArrowDataType, SchemaRef as ArrowSchemaRef}, }; use common_types::{ - projected_schema::{ProjectedSchema, RecordFetchingContextBuilder}, + projected_schema::{RecordFetchingContextBuilder}, record_batch::FetchingRecordBatch, schema::RecordSchema, SequenceNumber, diff --git a/analytic_engine/src/row_iter/tests.rs b/analytic_engine/src/row_iter/tests.rs index be45cb1a73..d9b96ebbf7 100644 --- a/analytic_engine/src/row_iter/tests.rs +++ b/analytic_engine/src/row_iter/tests.rs @@ -75,7 +75,7 @@ pub fn build_record_batch_with_key(schema: Schema, rows: Vec) -> FetchingRe let fetching_schema = projected_schema.to_record_schema(); let table_schema = projected_schema.table_schema(); let record_fetching_ctx = - RecordFetchingContext::new(&fetching_schema, None, &table_schema, &table_schema).unwrap(); + RecordFetchingContext::new(&fetching_schema, None, table_schema, table_schema).unwrap(); let mut builder = FetchingRecordBatchBuilder::with_capacity(fetching_schema, None, 2); let index_in_writer = IndexInWriterSchema::for_same_schema(schema.num_columns()); diff --git a/analytic_engine/src/sst/factory.rs b/analytic_engine/src/sst/factory.rs index 4867d5d197..e074061311 100644 --- a/analytic_engine/src/sst/factory.rs +++ b/analytic_engine/src/sst/factory.rs @@ -18,7 +18,7 @@ use std::{fmt::Debug, sync::Arc}; use async_trait::async_trait; use common_types::projected_schema::{ - ProjectedSchema, RecordFetchingContext, RecordFetchingContextBuilder, + RecordFetchingContextBuilder, }; use macros::define_result; use object_store::{ObjectStoreRef, Path}; diff --git a/benchmarks/src/merge_memtable_bench.rs b/benchmarks/src/merge_memtable_bench.rs index 0eccb910d7..9d88e7c889 100644 --- a/benchmarks/src/merge_memtable_bench.rs +++ b/benchmarks/src/merge_memtable_bench.rs @@ -29,8 +29,8 @@ use analytic_engine::{ space::SpaceId, sst::{ factory::{ - FactoryImpl, FactoryRef as SstFactoryRef, ObjectStorePickerRef, ReadFrequency, - ScanOptions, SstReadOptions, + FactoryImpl, FactoryRef as SstFactoryRef, ObjectStorePickerRef, + ScanOptions, }, meta_data::cache::MetaCacheRef, metrics::MaybeTableLevelMetrics as SstMaybeTableLevelMetrics, @@ -210,7 +210,7 @@ impl MergeMemTableBench { } fn mock_sst_read_options_builder( - projected_schema: ProjectedSchema, + _projected_schema: ProjectedSchema, runtime: Arc, ) -> SstReadOptionsBuilder { let scan_options = ScanOptions { diff --git a/benchmarks/src/merge_sst_bench.rs b/benchmarks/src/merge_sst_bench.rs index 141d9364a7..85e9098726 100644 --- a/benchmarks/src/merge_sst_bench.rs +++ b/benchmarks/src/merge_sst_bench.rs @@ -27,8 +27,8 @@ use analytic_engine::{ space::SpaceId, sst::{ factory::{ - FactoryImpl, FactoryRef as SstFactoryRef, ObjectStorePickerRef, ReadFrequency, - ScanOptions, SstReadOptions, + FactoryImpl, FactoryRef as SstFactoryRef, ObjectStorePickerRef, + ScanOptions, }, file::{FileHandle, FilePurgeQueue, Level, Request}, meta_data::cache::MetaCacheRef, @@ -38,7 +38,7 @@ use analytic_engine::{ ScanType, SstReadOptionsBuilder, }; use common_types::{ - projected_schema::{ProjectedSchema, RecordFetchingContext, RecordFetchingContextBuilder}, + projected_schema::{ProjectedSchema}, request_id::RequestId, schema::Schema, }; @@ -80,7 +80,7 @@ impl MergeSstBench { let schema = runtime.block_on(util::schema_from_sst(&store, &sst_path, &meta_cache)); let predicate = config.predicate.into_predicate(); - let projected_schema = ProjectedSchema::no_projection(schema.clone()); + let _projected_schema = ProjectedSchema::no_projection(schema.clone()); let scan_options = ScanOptions { background_read_parallelism: 1, max_record_batches_in_flight: 1024, diff --git a/benchmarks/src/sst_bench.rs b/benchmarks/src/sst_bench.rs index cc01a828a2..2e49132b76 100644 --- a/benchmarks/src/sst_bench.rs +++ b/benchmarks/src/sst_bench.rs @@ -19,8 +19,7 @@ use std::{cmp, sync::Arc, time::Instant}; use analytic_engine::{ sst::{ factory::{ - Factory, FactoryImpl, ObjectStorePickerRef, ReadFrequency, ScanOptions, SstReadHint, - SstReadOptions, + Factory, FactoryImpl, ObjectStorePickerRef, ScanOptions, SstReadHint, }, meta_data::cache::{MetaCache, MetaCacheRef}, metrics::MaybeTableLevelMetrics as SstMaybeTableLevelMetrics, diff --git a/common_types/src/projected_schema.rs b/common_types/src/projected_schema.rs index b0d6289853..09e07dcb5c 100644 --- a/common_types/src/projected_schema.rs +++ b/common_types/src/projected_schema.rs @@ -271,7 +271,7 @@ impl RecordFetchingContextBuilder { &self.fetching_schema, self.primary_key_indexes.clone(), &self.table_schema, - &source_schema, + source_schema, ) } } diff --git a/common_types/src/row/contiguous.rs b/common_types/src/row/contiguous.rs index fdb66d28e5..93b8c4bd17 100644 --- a/common_types/src/row/contiguous.rs +++ b/common_types/src/row/contiguous.rs @@ -801,7 +801,7 @@ mod tests { let ctx = RecordFetchingContext::new( &projected_schema.to_record_schema(), None, - &projected_schema.table_schema(), + projected_schema.table_schema(), &schema, ) .unwrap(); From 236fa5324d4bab3d077e54fdbfc1e48e70539b81 Mon Sep 17 00:00:00 2001 From: kamille Date: Thu, 16 Nov 2023 18:21:20 +0800 Subject: [PATCH 04/13] fix test. --- analytic_engine/src/instance/mod.rs | 5 +---- analytic_engine/src/memtable/columnar/iter.rs | 2 +- analytic_engine/src/memtable/mod.rs | 2 +- analytic_engine/src/memtable/skiplist/iter.rs | 2 +- analytic_engine/src/row_iter/dedup.rs | 8 ++++--- .../src/row_iter/record_batch_stream.rs | 8 +++---- analytic_engine/src/row_iter/tests.rs | 21 ++++++++++++++----- analytic_engine/src/sst/factory.rs | 4 +--- analytic_engine/src/sst/parquet/writer.rs | 6 +++--- benchmarks/src/merge_memtable_bench.rs | 5 +---- benchmarks/src/merge_sst_bench.rs | 11 ++-------- benchmarks/src/sst_bench.rs | 4 +--- 12 files changed, 36 insertions(+), 42 deletions(-) diff --git a/analytic_engine/src/instance/mod.rs b/analytic_engine/src/instance/mod.rs index c846759872..2ba236d09a 100644 --- a/analytic_engine/src/instance/mod.rs +++ b/analytic_engine/src/instance/mod.rs @@ -33,10 +33,7 @@ pub(crate) mod write; use std::sync::Arc; -use common_types::{ - projected_schema::{RecordFetchingContextBuilder}, - table::TableId, -}; +use common_types::{projected_schema::RecordFetchingContextBuilder, table::TableId}; use generic_error::{BoxError, GenericError}; use logger::{error, info}; use macros::define_result; diff --git a/analytic_engine/src/memtable/columnar/iter.rs b/analytic_engine/src/memtable/columnar/iter.rs index ace878977d..01dca69523 100644 --- a/analytic_engine/src/memtable/columnar/iter.rs +++ b/analytic_engine/src/memtable/columnar/iter.rs @@ -27,7 +27,7 @@ use common_types::{ column::Column, column_schema::ColumnId, datum::Datum, - projected_schema::{RecordFetchingContext}, + projected_schema::RecordFetchingContext, record_batch::{FetchingRecordBatch, FetchingRecordBatchBuilder}, row::Row, schema::Schema, diff --git a/analytic_engine/src/memtable/mod.rs b/analytic_engine/src/memtable/mod.rs index 3c718edc83..382a9d654d 100644 --- a/analytic_engine/src/memtable/mod.rs +++ b/analytic_engine/src/memtable/mod.rs @@ -24,7 +24,7 @@ use std::{ops::Bound, sync::Arc, time::Instant}; use bytes_ext::{ByteVec, Bytes}; use common_types::{ - projected_schema::{RecordFetchingContextBuilder}, + projected_schema::RecordFetchingContextBuilder, record_batch::FetchingRecordBatch, row::Row, schema::{IndexInWriterSchema, Schema}, diff --git a/analytic_engine/src/memtable/skiplist/iter.rs b/analytic_engine/src/memtable/skiplist/iter.rs index 781d49675f..92168db9c8 100644 --- a/analytic_engine/src/memtable/skiplist/iter.rs +++ b/analytic_engine/src/memtable/skiplist/iter.rs @@ -20,7 +20,7 @@ use arena::{Arena, BasicStats}; use bytes_ext::{Bytes, BytesMut}; use codec::row; use common_types::{ - projected_schema::{RecordFetchingContext}, + projected_schema::RecordFetchingContext, record_batch::{FetchingRecordBatch, FetchingRecordBatchBuilder}, row::contiguous::{ContiguousRowReader, ProjectedContiguousRow}, schema::Schema, diff --git a/analytic_engine/src/row_iter/dedup.rs b/analytic_engine/src/row_iter/dedup.rs index 89b9bbbfff..22b0bdf3c9 100644 --- a/analytic_engine/src/row_iter/dedup.rs +++ b/analytic_engine/src/row_iter/dedup.rs @@ -214,7 +214,9 @@ mod tests { use common_types::tests::{build_row, build_schema}; use super::*; - use crate::row_iter::tests::{build_record_batch_with_key, check_iterator, VectorIterator}; + use crate::row_iter::tests::{ + build_fetching_record_batch_with_key, check_iterator, VectorIterator, + }; #[tokio::test] async fn test_dedup_iterator() { @@ -223,7 +225,7 @@ mod tests { let iter = VectorIterator::new( schema.to_record_schema_with_key(), vec![ - build_record_batch_with_key( + build_fetching_record_batch_with_key( schema.clone(), vec![ build_row(b"a", 1, 10.0, "v1", 1000, 1_000_000), @@ -231,7 +233,7 @@ mod tests { build_row(b"a", 2, 10.0, "v2", 2000, 2_000_000), ], ), - build_record_batch_with_key( + build_fetching_record_batch_with_key( schema, vec![ build_row(b"a", 2, 10.0, "v", 2000, 2_000_000), diff --git a/analytic_engine/src/row_iter/record_batch_stream.rs b/analytic_engine/src/row_iter/record_batch_stream.rs index 017bc7b37d..7ce8a6777a 100644 --- a/analytic_engine/src/row_iter/record_batch_stream.rs +++ b/analytic_engine/src/row_iter/record_batch_stream.rs @@ -23,10 +23,8 @@ use arrow::{ datatypes::{DataType as ArrowDataType, SchemaRef as ArrowSchemaRef}, }; use common_types::{ - projected_schema::{RecordFetchingContextBuilder}, - record_batch::FetchingRecordBatch, - schema::RecordSchema, - SequenceNumber, + projected_schema::RecordFetchingContextBuilder, record_batch::FetchingRecordBatch, + schema::RecordSchema, SequenceNumber, }; use datafusion::{ common::ToDFSchema, @@ -369,7 +367,7 @@ pub mod tests { .into_iter() .map(|(seq, rows)| { let batch = SequencedRecordBatch { - record_batch: row_iter::tests::build_record_batch_with_key( + record_batch: row_iter::tests::build_fetching_record_batch_with_key( schema.clone(), rows, ), diff --git a/analytic_engine/src/row_iter/tests.rs b/analytic_engine/src/row_iter/tests.rs index d9b96ebbf7..198e952a6d 100644 --- a/analytic_engine/src/row_iter/tests.rs +++ b/analytic_engine/src/row_iter/tests.rs @@ -68,15 +68,26 @@ impl RecordBatchWithKeyIterator for VectorIterator { } } -pub fn build_record_batch_with_key(schema: Schema, rows: Vec) -> FetchingRecordBatch { +pub fn build_fetching_record_batch_with_key(schema: Schema, rows: Vec) -> FetchingRecordBatch { assert!(schema.num_columns() > 1); let projection: Vec = (0..schema.num_columns()).collect(); let projected_schema = ProjectedSchema::new(schema.clone(), Some(projection)).unwrap(); - let fetching_schema = projected_schema.to_record_schema(); + let fetching_schema = projected_schema.to_record_schema_with_key(); + let primary_key_indexes = fetching_schema.primary_key_idx().to_vec(); + let fetching_schema = fetching_schema.to_record_schema(); let table_schema = projected_schema.table_schema(); - let record_fetching_ctx = - RecordFetchingContext::new(&fetching_schema, None, table_schema, table_schema).unwrap(); - let mut builder = FetchingRecordBatchBuilder::with_capacity(fetching_schema, None, 2); + let record_fetching_ctx = RecordFetchingContext::new( + &fetching_schema, + Some(primary_key_indexes), + table_schema, + table_schema, + ) + .unwrap(); + let primary_key_indexes = record_fetching_ctx + .primary_key_indexes() + .map(|idxs| idxs.to_vec()); + let mut builder = + FetchingRecordBatchBuilder::with_capacity(fetching_schema, primary_key_indexes, 2); let index_in_writer = IndexInWriterSchema::for_same_schema(schema.num_columns()); let mut buf = Vec::new(); diff --git a/analytic_engine/src/sst/factory.rs b/analytic_engine/src/sst/factory.rs index e074061311..39bf921a09 100644 --- a/analytic_engine/src/sst/factory.rs +++ b/analytic_engine/src/sst/factory.rs @@ -17,9 +17,7 @@ use std::{fmt::Debug, sync::Arc}; use async_trait::async_trait; -use common_types::projected_schema::{ - RecordFetchingContextBuilder, -}; +use common_types::projected_schema::RecordFetchingContextBuilder; use macros::define_result; use object_store::{ObjectStoreRef, Path}; use runtime::Runtime; diff --git a/analytic_engine/src/sst/parquet/writer.rs b/analytic_engine/src/sst/parquet/writer.rs index 71af8e7a53..701d69dae9 100644 --- a/analytic_engine/src/sst/parquet/writer.rs +++ b/analytic_engine/src/sst/parquet/writer.rs @@ -537,7 +537,7 @@ mod tests { use super::*; use crate::{ - row_iter::tests::build_record_batch_with_key, + row_iter::tests::build_fetching_record_batch_with_key, sst::{ factory::{ Factory, FactoryImpl, ReadFrequency, ScanOptions, SstReadOptions, SstWriteOptions, @@ -633,7 +633,7 @@ mod tests { "tagv2", ), ]; - let batch = build_record_batch_with_key(schema.clone(), rows); + let batch = build_fetching_record_batch_with_key(schema.clone(), rows); Poll::Ready(Some(Ok(batch))) })); @@ -805,7 +805,7 @@ mod tests { .map(|_| build_row(b"a", 100, 10.0, "v4", 1000, 1_000_000)) .collect::>(); - let batch = build_record_batch_with_key(schema_clone.clone(), rows); + let batch = build_fetching_record_batch_with_key(schema_clone.clone(), rows); poll_cnt += 1; Poll::Ready(Some(Ok(batch))) diff --git a/benchmarks/src/merge_memtable_bench.rs b/benchmarks/src/merge_memtable_bench.rs index 9d88e7c889..4f87c3e6c0 100644 --- a/benchmarks/src/merge_memtable_bench.rs +++ b/benchmarks/src/merge_memtable_bench.rs @@ -28,10 +28,7 @@ use analytic_engine::{ }, space::SpaceId, sst::{ - factory::{ - FactoryImpl, FactoryRef as SstFactoryRef, ObjectStorePickerRef, - ScanOptions, - }, + factory::{FactoryImpl, FactoryRef as SstFactoryRef, ObjectStorePickerRef, ScanOptions}, meta_data::cache::MetaCacheRef, metrics::MaybeTableLevelMetrics as SstMaybeTableLevelMetrics, }, diff --git a/benchmarks/src/merge_sst_bench.rs b/benchmarks/src/merge_sst_bench.rs index 85e9098726..b40518c36f 100644 --- a/benchmarks/src/merge_sst_bench.rs +++ b/benchmarks/src/merge_sst_bench.rs @@ -26,10 +26,7 @@ use analytic_engine::{ }, space::SpaceId, sst::{ - factory::{ - FactoryImpl, FactoryRef as SstFactoryRef, ObjectStorePickerRef, - ScanOptions, - }, + factory::{FactoryImpl, FactoryRef as SstFactoryRef, ObjectStorePickerRef, ScanOptions}, file::{FileHandle, FilePurgeQueue, Level, Request}, meta_data::cache::MetaCacheRef, metrics::MaybeTableLevelMetrics as SstMaybeTableLevelMetrics, @@ -37,11 +34,7 @@ use analytic_engine::{ table::sst_util, ScanType, SstReadOptionsBuilder, }; -use common_types::{ - projected_schema::{ProjectedSchema}, - request_id::RequestId, - schema::Schema, -}; +use common_types::{projected_schema::ProjectedSchema, request_id::RequestId, schema::Schema}; use logger::info; use object_store::{LocalFileSystem, ObjectStoreRef}; use runtime::Runtime; diff --git a/benchmarks/src/sst_bench.rs b/benchmarks/src/sst_bench.rs index 2e49132b76..00d1412300 100644 --- a/benchmarks/src/sst_bench.rs +++ b/benchmarks/src/sst_bench.rs @@ -18,9 +18,7 @@ use std::{cmp, sync::Arc, time::Instant}; use analytic_engine::{ sst::{ - factory::{ - Factory, FactoryImpl, ObjectStorePickerRef, ScanOptions, SstReadHint, - }, + factory::{Factory, FactoryImpl, ObjectStorePickerRef, ScanOptions, SstReadHint}, meta_data::cache::{MetaCache, MetaCacheRef}, metrics::MaybeTableLevelMetrics as SstMaybeTableLevelMetrics, }, From eba48ba8aea0a73d81c28b2fec5d364c015138b0 Mon Sep 17 00:00:00 2001 From: kamille Date: Sun, 19 Nov 2023 22:44:15 +0800 Subject: [PATCH 05/13] add integration tests. --- .../src/row_iter/record_batch_stream.rs | 20 ++++++- .../cases/env/local/ddl/query-plan.result | 59 ++++++++++++++++++- .../cases/env/local/ddl/query-plan.sql | 37 ++++++++++++ 3 files changed, 112 insertions(+), 4 deletions(-) diff --git a/analytic_engine/src/row_iter/record_batch_stream.rs b/analytic_engine/src/row_iter/record_batch_stream.rs index 7ce8a6777a..966571fb1d 100644 --- a/analytic_engine/src/row_iter/record_batch_stream.rs +++ b/analytic_engine/src/row_iter/record_batch_stream.rs @@ -35,6 +35,7 @@ use datafusion::{ }; use futures::stream::{self, StreamExt}; use generic_error::{BoxError, GenericResult}; +use itertools::Itertools; use macros::define_result; use snafu::{Backtrace, OptionExt, ResultExt, Snafu}; use table_engine::{ @@ -240,7 +241,14 @@ pub fn stream_from_memtable( ..Default::default() }; let max_seq = memtable.last_sequence(); - let scan_memtable_desc = format!("scan_memtable_{max_seq}"); + let fetching_cols = ctx + .fetching_schema + .columns() + .iter() + .format_with(",", |col, f| f(&format_args!("{}", col.name))); + let scan_memtable_desc = format!( + "scan_memtable_{max_seq}, fetching_columns:[{fetching_cols}]", + ); let metrics_collector = metrics_collector.map(|v| v.span(scan_memtable_desc)); let scan_req = ScanRequest { start_user_key: Bound::Unbounded, @@ -320,7 +328,15 @@ pub async fn stream_from_sst_file( file_size: Some(sst_file.size() as usize), file_format: Some(sst_file.storage_format()), }; - let scan_sst_desc = format!("scan_sst_{}", sst_file.id()); + let fetching_cols = ctx + .fetching_schema + .columns() + .iter() + .format_with(",", |col, f| f(&format_args!("{}", col.name))); + let scan_sst_desc = format!( + "scan_sst_{}, fetching_columns:[{fetching_cols}]", + sst_file.id() + ); let metrics_collector = metrics_collector.map(|v| v.span(scan_sst_desc)); let mut sst_reader = sst_factory .create_reader( diff --git a/integration_tests/cases/env/local/ddl/query-plan.result b/integration_tests/cases/env/local/ddl/query-plan.result index 9fe35c86a2..6db29a68e9 100644 --- a/integration_tests/cases/env/local/ddl/query-plan.result +++ b/integration_tests/cases/env/local/ddl/query-plan.result @@ -2,6 +2,10 @@ DROP TABLE IF EXISTS `03_dml_select_real_time_range`; affected_rows: 0 +DROP TABLE IF EXISTS `03_append_mode_table`; + +affected_rows: 0 + CREATE TABLE `03_dml_select_real_time_range` ( name string TAG, value double NOT NULL, @@ -27,7 +31,7 @@ explain analyze select t from `03_dml_select_real_time_range` where t > 1695348001000; plan_type,plan, -String("Plan with Metrics"),String("ScanTable: table=03_dml_select_real_time_range, parallelism=8, metrics=[\nscan_table:\n do_merge_sort=true\n iter_num=1\n merge_iter_0:\n init_duration=xxs\n num_memtables=1\n num_ssts=0\n scan_count=2\n scan_duration=xxs\n times_fetch_row_from_multiple=0\n times_fetch_rows_from_one=1\n total_rows_fetch_from_one=1\n scan_memtable_1:\n\n\nPredicate { exprs:[t > TimestampMillisecond(1695348001000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348001001), exclusive_end: Timestamp(9223372036854775807) } }=0]\n"), +String("Plan with Metrics"),String("ScanTable: table=03_dml_select_real_time_range, parallelism=8, metrics=[\nscan_table:\n do_merge_sort=true\n iter_num=1\n merge_iter_0:\n init_duration=xxs\n num_memtables=1\n num_ssts=0\n scan_count=2\n scan_duration=xxs\n times_fetch_row_from_multiple=0\n times_fetch_rows_from_one=1\n total_rows_fetch_from_one=1\n scan_memtable_1, fetching_columns:[tsid,t]:\n\n\nPredicate { exprs:[t > TimestampMillisecond(1695348001000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348001001), exclusive_end: Timestamp(9223372036854775807) } }=0]\n"), -- This query should not include memtable @@ -47,7 +51,7 @@ explain analyze select t from `03_dml_select_real_time_range` where t > 1695348001000; plan_type,plan, -String("Plan with Metrics"),String("ScanTable: table=03_dml_select_real_time_range, parallelism=8, metrics=[\nscan_table:\n do_merge_sort=true\n iter_num=1\n merge_iter_0:\n init_duration=xxs\n num_memtables=0\n num_ssts=1\n scan_count=2\n scan_duration=xxs\n times_fetch_row_from_multiple=0\n times_fetch_rows_from_one=1\n total_rows_fetch_from_one=1\n scan_sst_1:\n meta_data_cache_hit=false\n parallelism=1\n project_record_batch=xxs\n read_meta_data_duration=xxs\n row_mem=320\n row_num=3\n prune_row_groups:\n pruned_by_custom_filter=0\n pruned_by_min_max=0\n row_groups_after_prune=1\n total_row_groups=1\n use_custom_filter=false\n\n\nPredicate { exprs:[t > TimestampMillisecond(1695348001000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348001001), exclusive_end: Timestamp(9223372036854775807) } }=0]\n"), +String("Plan with Metrics"),String("ScanTable: table=03_dml_select_real_time_range, parallelism=8, metrics=[\nscan_table:\n do_merge_sort=true\n iter_num=1\n merge_iter_0:\n init_duration=xxs\n num_memtables=0\n num_ssts=1\n scan_count=2\n scan_duration=xxs\n times_fetch_row_from_multiple=0\n times_fetch_rows_from_one=1\n total_rows_fetch_from_one=1\n scan_sst_1, fetching_columns:[tsid,t]:\n meta_data_cache_hit=false\n parallelism=1\n project_record_batch=xxs\n read_meta_data_duration=xxs\n row_mem=320\n row_num=3\n prune_row_groups:\n pruned_by_custom_filter=0\n pruned_by_min_max=0\n row_groups_after_prune=1\n total_row_groups=1\n use_custom_filter=false\n\n\nPredicate { exprs:[t > TimestampMillisecond(1695348001000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348001001), exclusive_end: Timestamp(9223372036854775807) } }=0]\n"), -- This query should not include SST @@ -58,7 +62,58 @@ plan_type,plan, String("Plan with Metrics"),String("ScanTable: table=03_dml_select_real_time_range, parallelism=8, metrics=[\nscan_table:\n do_merge_sort=true\n iter_num=0\n\n\nPredicate { exprs:[t > TimestampMillisecond(1695348002000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348002001), exclusive_end: Timestamp(9223372036854775807) } }=0]\n"), +-- Table with an 'append' update mode +CREATE TABLE `03_append_mode_table` ( + name string TAG, + value double NOT NULL, + t timestamp NOT NULL, + timestamp KEY (t)) ENGINE = Analytic WITH ( + enable_ttl = 'false', + segment_duration = '2h', + update_mode = 'append' +); + +affected_rows: 0 + +INSERT INTO `03_append_mode_table` (t, name, value) + VALUES + (1695348000000, "ceresdb", 100), + (1695348001000, "ceresdb", 200), + (1695348002000, "ceresdb", 300); + +affected_rows: 3 + +-- Should just fetch projected columns from memtable +-- SQLNESS REPLACE duration=\d+.?\d*(µ|m|n) duration=xx +-- SQLNESS REPLACE since_create=\d+.?\d*(µ|m|n) since_create=xx +-- SQLNESS REPLACE since_init=\d+.?\d*(µ|m|n) since_init=xx +-- SQLNESS REPLACE elapsed_compute=\d+.?\d*(µ|m|n) elapsed_compute=xx +explain analyze select t from `03_append_mode_table` +where t >= 1695348001000 and name = 'ceresdb'; + +plan_type,plan, +String("Plan with Metrics"),String("ProjectionExec: expr=[t@0 as t], metrics=[output_rows=2, elapsed_compute=xxs]\n ScanTable: table=03_append_mode_table, parallelism=8, metrics=[\nscan_table:\n do_merge_sort=false\n chain_iter_0:\n num_memtables=1\n num_ssts=0\n scan_duration=xxs\n since_create=xxs\n since_init=xxs\n total_batch_fetched=1\n total_rows_fetched=2\n scan_memtable_1, fetching_columns:[t,name]:\n\n\nPredicate { exprs:[t >= TimestampMillisecond(1695348001000, None), name = Utf8(\"ceresdb\")], time_range:TimeRange { inclusive_start: Timestamp(1695348001000), exclusive_end: Timestamp(9223372036854775807) } }=0]\n"), + + +-- Should just fetch projected columns from SST +-- SQLNESS ARG pre_cmd=flush +-- SQLNESS REPLACE duration=\d+.?\d*(µ|m|n) duration=xx +-- SQLNESS REPLACE since_create=\d+.?\d*(µ|m|n) since_create=xx +-- SQLNESS REPLACE since_init=\d+.?\d*(µ|m|n) since_init=xx +-- SQLNESS REPLACE elapsed_compute=\d+.?\d*(µ|m|n) elapsed_compute=xx +-- SQLNESS REPLACE project_record_batch=\d+.?\d*(µ|m|n) project_record_batch=xx +explain analyze select t from `03_append_mode_table` +where t >= 1695348001000 and name = 'ceresdb'; + +plan_type,plan, +String("Plan with Metrics"),String("ProjectionExec: expr=[t@0 as t], metrics=[output_rows=2, elapsed_compute=xxs]\n ScanTable: table=03_append_mode_table, parallelism=8, metrics=[\nscan_table:\n do_merge_sort=false\n chain_iter_0:\n num_memtables=0\n num_ssts=1\n scan_duration=xxs\n since_create=xxs\n since_init=xxs\n total_batch_fetched=1\n total_rows_fetched=2\n scan_sst_1, fetching_columns:[t,name]:\n meta_data_cache_hit=false\n parallelism=1\n project_record_batch=xxs\n read_meta_data_duration=xxs\n row_mem=408\n row_num=3\n prune_row_groups:\n pruned_by_custom_filter=0\n pruned_by_min_max=0\n row_groups_after_prune=1\n total_row_groups=1\n use_custom_filter=false\n\n\nPredicate { exprs:[t >= TimestampMillisecond(1695348001000, None), name = Utf8(\"ceresdb\")], time_range:TimeRange { inclusive_start: Timestamp(1695348001000), exclusive_end: Timestamp(9223372036854775807) } }=0]\n"), + + DROP TABLE `03_dml_select_real_time_range`; affected_rows: 0 +DROP TABLE `03_append_mode_table`; + +affected_rows: 0 + diff --git a/integration_tests/cases/env/local/ddl/query-plan.sql b/integration_tests/cases/env/local/ddl/query-plan.sql index 00fb19e05c..a0baff5b81 100644 --- a/integration_tests/cases/env/local/ddl/query-plan.sql +++ b/integration_tests/cases/env/local/ddl/query-plan.sql @@ -1,4 +1,5 @@ DROP TABLE IF EXISTS `03_dml_select_real_time_range`; +DROP TABLE IF EXISTS `03_append_mode_table`; CREATE TABLE `03_dml_select_real_time_range` ( name string TAG, @@ -36,4 +37,40 @@ where t > 1695348001000; explain analyze select t from `03_dml_select_real_time_range` where t > 1695348002000; +-- Table with an 'append' update mode +CREATE TABLE `03_append_mode_table` ( + name string TAG, + value double NOT NULL, + t timestamp NOT NULL, + timestamp KEY (t)) ENGINE = Analytic WITH ( + enable_ttl = 'false', + segment_duration = '2h', + update_mode = 'append' +); + +INSERT INTO `03_append_mode_table` (t, name, value) + VALUES + (1695348000000, "ceresdb", 100), + (1695348001000, "ceresdb", 200), + (1695348002000, "ceresdb", 300); + +-- Should just fetch projected columns from memtable +-- SQLNESS REPLACE duration=\d+.?\d*(µ|m|n) duration=xx +-- SQLNESS REPLACE since_create=\d+.?\d*(µ|m|n) since_create=xx +-- SQLNESS REPLACE since_init=\d+.?\d*(µ|m|n) since_init=xx +-- SQLNESS REPLACE elapsed_compute=\d+.?\d*(µ|m|n) elapsed_compute=xx +explain analyze select t from `03_append_mode_table` +where t >= 1695348001000 and name = 'ceresdb'; + +-- Should just fetch projected columns from SST +-- SQLNESS ARG pre_cmd=flush +-- SQLNESS REPLACE duration=\d+.?\d*(µ|m|n) duration=xx +-- SQLNESS REPLACE since_create=\d+.?\d*(µ|m|n) since_create=xx +-- SQLNESS REPLACE since_init=\d+.?\d*(µ|m|n) since_init=xx +-- SQLNESS REPLACE elapsed_compute=\d+.?\d*(µ|m|n) elapsed_compute=xx +-- SQLNESS REPLACE project_record_batch=\d+.?\d*(µ|m|n) project_record_batch=xx +explain analyze select t from `03_append_mode_table` +where t >= 1695348001000 and name = 'ceresdb'; + DROP TABLE `03_dml_select_real_time_range`; +DROP TABLE `03_append_mode_table`; From 8e33b8dc0f0175bca0d178f88cd4e85d84378845 Mon Sep 17 00:00:00 2001 From: kamille Date: Mon, 20 Nov 2023 15:27:25 +0800 Subject: [PATCH 06/13] fix style check. --- analytic_engine/src/row_iter/record_batch_stream.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/analytic_engine/src/row_iter/record_batch_stream.rs b/analytic_engine/src/row_iter/record_batch_stream.rs index 966571fb1d..859f840f1b 100644 --- a/analytic_engine/src/row_iter/record_batch_stream.rs +++ b/analytic_engine/src/row_iter/record_batch_stream.rs @@ -246,9 +246,8 @@ pub fn stream_from_memtable( .columns() .iter() .format_with(",", |col, f| f(&format_args!("{}", col.name))); - let scan_memtable_desc = format!( - "scan_memtable_{max_seq}, fetching_columns:[{fetching_cols}]", - ); + let scan_memtable_desc = + format!("scan_memtable_{max_seq}, fetching_columns:[{fetching_cols}]",); let metrics_collector = metrics_collector.map(|v| v.span(scan_memtable_desc)); let scan_req = ScanRequest { start_user_key: Bound::Unbounded, From 45a8788972732ee555bcea0854cb9924d1512f78 Mon Sep 17 00:00:00 2001 From: kamille Date: Wed, 29 Nov 2023 16:57:20 +0800 Subject: [PATCH 07/13] fix comments and namings. --- analytic_engine/src/instance/read.rs | 8 ++++---- analytic_engine/src/instance/reorder_memtable.rs | 4 ++-- analytic_engine/src/row_iter/chain.rs | 4 ++-- analytic_engine/src/row_iter/dedup.rs | 6 +++--- analytic_engine/src/row_iter/merge.rs | 4 ++-- analytic_engine/src/row_iter/mod.rs | 4 ++-- .../src/row_iter/record_batch_stream.rs | 2 +- analytic_engine/src/row_iter/tests.rs | 6 +++--- analytic_engine/src/sst/parquet/async_reader.rs | 15 +++++++++------ benchmarks/src/merge_memtable_bench.rs | 2 +- benchmarks/src/merge_sst_bench.rs | 2 +- common_types/src/record_batch.rs | 15 ++++----------- common_types/src/row/mod.rs | 4 ++-- 13 files changed, 36 insertions(+), 40 deletions(-) diff --git a/analytic_engine/src/instance/read.rs b/analytic_engine/src/instance/read.rs index 748ef143ad..3e01c6fc61 100644 --- a/analytic_engine/src/instance/read.rs +++ b/analytic_engine/src/instance/read.rs @@ -48,7 +48,7 @@ use crate::{ chain::{ChainConfig, ChainIterator}, dedup::DedupIterator, merge::{MergeBuilder, MergeConfig, MergeIterator}, - IterOptions, RecordBatchWithKeyIterator, + FetchingRecordBatchIterator, IterOptions, }, table::{ data::TableData, @@ -168,7 +168,7 @@ impl Instance { fn build_partitioned_streams( &self, request: &ReadRequest, - partitioned_iters: Vec, + partitioned_iters: Vec, ) -> Result { let read_parallelism = request.opts.read_parallelism; @@ -365,7 +365,7 @@ struct StreamStateOnMultiIters { projected_schema: ProjectedSchema, } -impl StreamStateOnMultiIters { +impl StreamStateOnMultiIters { fn is_exhausted(&self) -> bool { self.curr_iter_idx >= self.iters.len() } @@ -397,7 +397,7 @@ impl StreamStateOnMultiIters { } fn iters_to_stream( - iters: Vec, + iters: Vec, projected_schema: ProjectedSchema, ) -> SendableRecordBatchStream { let mut state = StreamStateOnMultiIters { diff --git a/analytic_engine/src/instance/reorder_memtable.rs b/analytic_engine/src/instance/reorder_memtable.rs index 01a1120dcf..e09f14849e 100644 --- a/analytic_engine/src/instance/reorder_memtable.rs +++ b/analytic_engine/src/instance/reorder_memtable.rs @@ -70,7 +70,7 @@ pub enum Error { define_result!(Error); pub type DfResult = std::result::Result; -type SendableRecordBatchWithkeyStream = +type SendableFetchingRecordBatchStream = Pin> + Send>>; impl From for Error { @@ -253,7 +253,7 @@ impl Reorder { // TODO: In theory we can construct a physical plan directly, here we choose // logical because it has a convenient builder API for use. - pub async fn into_stream(self) -> Result { + pub async fn into_stream(self) -> Result { // 1. Init datafusion context let runtime = Arc::new(RuntimeEnv::default()); let state = SessionState::with_config_rt(SessionConfig::new(), runtime); diff --git a/analytic_engine/src/row_iter/chain.rs b/analytic_engine/src/row_iter/chain.rs index ede23fa152..a39c4459e8 100644 --- a/analytic_engine/src/row_iter/chain.rs +++ b/analytic_engine/src/row_iter/chain.rs @@ -37,7 +37,7 @@ use crate::{ record_batch_stream::{ self, BoxedPrefetchableRecordBatchStream, MemtableStreamContext, SstStreamContext, }, - RecordBatchWithKeyIterator, + FetchingRecordBatchIterator, }, space::SpaceId, sst::{ @@ -377,7 +377,7 @@ impl Drop for ChainIterator { } #[async_trait] -impl RecordBatchWithKeyIterator for ChainIterator { +impl FetchingRecordBatchIterator for ChainIterator { type Error = Error; fn schema(&self) -> &RecordSchemaWithKey { diff --git a/analytic_engine/src/row_iter/dedup.rs b/analytic_engine/src/row_iter/dedup.rs index 22b0bdf3c9..f7dbe9dc5e 100644 --- a/analytic_engine/src/row_iter/dedup.rs +++ b/analytic_engine/src/row_iter/dedup.rs @@ -26,7 +26,7 @@ use logger::{info, trace}; use macros::define_result; use snafu::{ResultExt, Snafu}; -use crate::row_iter::{IterOptions, RecordBatchWithKeyIterator}; +use crate::row_iter::{FetchingRecordBatchIterator, IterOptions}; #[derive(Debug, Snafu)] pub enum Error { @@ -67,7 +67,7 @@ pub struct DedupIterator { total_selected_rows: usize, } -impl DedupIterator { +impl DedupIterator { pub fn new(request_id: RequestId, iter: I, iter_options: IterOptions) -> Self { let schema_with_key = iter.schema(); let primary_key_indexes = schema_with_key.primary_key_idx().to_vec(); @@ -173,7 +173,7 @@ impl DedupIterator { } #[async_trait] -impl RecordBatchWithKeyIterator for DedupIterator { +impl FetchingRecordBatchIterator for DedupIterator { type Error = Error; fn schema(&self) -> &RecordSchemaWithKey { diff --git a/analytic_engine/src/row_iter/merge.rs b/analytic_engine/src/row_iter/merge.rs index 24d9e8f6f6..2dfe6794ae 100644 --- a/analytic_engine/src/row_iter/merge.rs +++ b/analytic_engine/src/row_iter/merge.rs @@ -45,7 +45,7 @@ use crate::{ self, BoxedPrefetchableRecordBatchStream, MemtableStreamContext, SequencedRecordBatch, SstStreamContext, }, - IterOptions, RecordBatchWithKeyIterator, + FetchingRecordBatchIterator, IterOptions, }, space::SpaceId, sst::{ @@ -900,7 +900,7 @@ impl MergeIterator { } #[async_trait] -impl RecordBatchWithKeyIterator for MergeIterator { +impl FetchingRecordBatchIterator for MergeIterator { type Error = Error; fn schema(&self) -> &RecordSchemaWithKey { diff --git a/analytic_engine/src/row_iter/mod.rs b/analytic_engine/src/row_iter/mod.rs index e6451506fd..a8406a16bc 100644 --- a/analytic_engine/src/row_iter/mod.rs +++ b/analytic_engine/src/row_iter/mod.rs @@ -38,7 +38,7 @@ pub struct IterOptions { /// The `schema()` should be the same as the RecordBatch from `read()`. /// The reader is exhausted if the `read()` returns the `Ok(None)`. #[async_trait] -pub trait RecordBatchWithKeyIterator: Send { +pub trait FetchingRecordBatchIterator: Send { type Error: std::error::Error + Send + Sync + 'static; fn schema(&self) -> &RecordSchemaWithKey; @@ -47,7 +47,7 @@ pub trait RecordBatchWithKeyIterator: Send { -> std::result::Result, Self::Error>; } -pub fn record_batch_with_key_iter_to_stream( +pub fn record_batch_with_key_iter_to_stream( mut iter: I, ) -> RecordBatchStream { let record_batch_stream = try_stream! { diff --git a/analytic_engine/src/row_iter/record_batch_stream.rs b/analytic_engine/src/row_iter/record_batch_stream.rs index 859f840f1b..969f355e34 100644 --- a/analytic_engine/src/row_iter/record_batch_stream.rs +++ b/analytic_engine/src/row_iter/record_batch_stream.rs @@ -130,7 +130,7 @@ pub enum Error { define_result!(Error); -// TODO(yingwen): Can we move sequence to RecordBatchWithKey and remove this +// TODO(yingwen): Can we move sequence to FetchingRecordBatch and remove this // struct? But what is the sequence after merge? #[derive(Debug)] pub struct SequencedRecordBatch { diff --git a/analytic_engine/src/row_iter/tests.rs b/analytic_engine/src/row_iter/tests.rs index 198e952a6d..51f1f6eaee 100644 --- a/analytic_engine/src/row_iter/tests.rs +++ b/analytic_engine/src/row_iter/tests.rs @@ -25,7 +25,7 @@ use common_types::{ use macros::define_result; use snafu::Snafu; -use crate::row_iter::RecordBatchWithKeyIterator; +use crate::row_iter::FetchingRecordBatchIterator; #[derive(Debug, Snafu)] pub enum Error {} @@ -49,7 +49,7 @@ impl VectorIterator { } #[async_trait] -impl RecordBatchWithKeyIterator for VectorIterator { +impl FetchingRecordBatchIterator for VectorIterator { type Error = Error; fn schema(&self) -> &RecordSchemaWithKey { @@ -105,7 +105,7 @@ pub fn build_fetching_record_batch_with_key(schema: Schema, rows: Vec) -> F builder.build().unwrap() } -pub async fn check_iterator(iter: &mut T, expected_rows: Vec) { +pub async fn check_iterator(iter: &mut T, expected_rows: Vec) { let mut visited_rows = 0; while let Some(batch) = iter.next_batch().await.unwrap() { for row_idx in 0..batch.num_rows() { diff --git a/analytic_engine/src/sst/parquet/async_reader.rs b/analytic_engine/src/sst/parquet/async_reader.rs index 47fb82dd93..b6bccfd5f7 100644 --- a/analytic_engine/src/sst/parquet/async_reader.rs +++ b/analytic_engine/src/sst/parquet/async_reader.rs @@ -73,7 +73,7 @@ use crate::{ const PRUNE_ROW_GROUPS_METRICS_COLLECTOR_NAME: &str = "prune_row_groups"; type SendableRecordBatchStream = Pin> + Send>>; -type RecordBatchWithKeyStream = Box> + Send + Unpin>; +type FetchingRecordBatchStream = Box> + Send + Unpin>; pub struct Reader<'a> { /// The path where the data is persisted. @@ -147,7 +147,7 @@ impl<'a> Reader<'a> { async fn maybe_read_parallelly( &mut self, read_parallelism: usize, - ) -> Result> { + ) -> Result> { assert!(read_parallelism > 0); self.init_if_necessary().await?; @@ -156,13 +156,13 @@ impl<'a> Reader<'a> { return Ok(Vec::new()); } - let row_projector = self.record_fetching_ctx.take().unwrap(); + let record_fetching_ctx = self.record_fetching_ctx.take().unwrap(); let streams: Vec<_> = streams .into_iter() .map(|stream| { Box::new(RecordBatchProjector::new( stream, - row_projector.clone(), + record_fetching_ctx.clone(), self.metrics.metrics_collector.clone(), )) as _ }) @@ -240,7 +240,7 @@ impl<'a> Reader<'a> { assert!(self.meta_data.is_some()); let meta_data = self.meta_data.as_ref().unwrap(); - let row_projector = self.record_fetching_ctx.as_ref().unwrap(); + let record_fetching_ctx = self.record_fetching_ctx.as_ref().unwrap(); let arrow_schema = meta_data.custom().schema.to_arrow_schema_ref(); // Get target row groups. let target_row_groups = { @@ -296,7 +296,10 @@ impl<'a> Reader<'a> { let parquet_metadata = meta_data.parquet(); let proj_mask = ProjectionMask::leaves( meta_data.parquet().file_metadata().schema_descr(), - row_projector.existed_source_projection().iter().copied(), + record_fetching_ctx + .existed_source_projection() + .iter() + .copied(), ); debug!( "Reader fetch record batches, parallelism suggest:{}, real:{}, chunk_size:{}, project:{:?}", diff --git a/benchmarks/src/merge_memtable_bench.rs b/benchmarks/src/merge_memtable_bench.rs index 4f87c3e6c0..23ec8927ed 100644 --- a/benchmarks/src/merge_memtable_bench.rs +++ b/benchmarks/src/merge_memtable_bench.rs @@ -24,7 +24,7 @@ use analytic_engine::{ row_iter::{ dedup::DedupIterator, merge::{MergeBuilder, MergeConfig}, - IterOptions, RecordBatchWithKeyIterator, + FetchingRecordBatchIterator, IterOptions, }, space::SpaceId, sst::{ diff --git a/benchmarks/src/merge_sst_bench.rs b/benchmarks/src/merge_sst_bench.rs index b40518c36f..6552190ff3 100644 --- a/benchmarks/src/merge_sst_bench.rs +++ b/benchmarks/src/merge_sst_bench.rs @@ -22,7 +22,7 @@ use analytic_engine::{ chain::ChainConfig, dedup::DedupIterator, merge::{MergeBuilder, MergeConfig}, - IterOptions, RecordBatchWithKeyIterator, + FetchingRecordBatchIterator, IterOptions, }, space::SpaceId, sst::{ diff --git a/common_types/src/record_batch.rs b/common_types/src/record_batch.rs index 1a69bfd135..daa50ef3c6 100644 --- a/common_types/src/record_batch.rs +++ b/common_types/src/record_batch.rs @@ -456,18 +456,11 @@ impl FetchingRecordBatch { Row::from_datums(datums) } - /// Project the [RecordBatchWithKey] into a [RecordBatch] according to + /// Project the [FetchingRecordBatch] into a [RecordBatch] according to /// [ProjectedSchema]. - /// - /// REQUIRE: The schema_with_key of the [RecordBatchWithKey] is the same as - /// the schema_with_key of [ProjectedSchema]. + // TODO: how do we ensure `ProjectedSchema` passed here is same as the source + // `ProjectedSchema` of `RecordSchema` here? pub fn try_project(mut self, projected_schema: &ProjectedSchema) -> Result { - // FIXME - // debug_assert_eq!( - // &self.schema, - // projected_schema.as_record_schema_with_key() - // ); - // Get the schema after projection. let record_schema = projected_schema.to_record_schema(); let mut column_blocks = Vec::with_capacity(record_schema.num_columns()); @@ -717,7 +710,7 @@ impl FetchingRecordBatchBuilder { } } - /// Build [RecordBatchWithKey] and reset the builder. + /// Build [FetchingRecordBatch] and reset the builder. pub fn build(&mut self) -> Result { let column_blocks: Vec<_> = self .builders diff --git a/common_types/src/row/mod.rs b/common_types/src/row/mod.rs index efb1c9b4ea..d3dede6885 100644 --- a/common_types/src/row/mod.rs +++ b/common_types/src/row/mod.rs @@ -560,8 +560,8 @@ pub trait RowView { fn column_by_idx(&self, column_idx: usize) -> Datum; } -// TODO(yingwen): Add a method to get row view on RecordBatchWithKey. -/// A row view on the [RecordBatchWithKey]. +// TODO(yingwen): Add a method to get row view on FetchingRecordBatch. +/// A row view on the [FetchingRecordBatch]. /// /// `row_idx < record_batch.num_rows()` is ensured. #[derive(Debug)] From f838fff85dbb6c072af49c94fb5447a1a6ea5705 Mon Sep 17 00:00:00 2001 From: kamille Date: Thu, 21 Dec 2023 20:40:07 +0800 Subject: [PATCH 08/13] rename `FetchingRecordBatch` to `FetchedRecordBatch`. --- .../src/instance/flush_compaction.rs | 12 +++---- analytic_engine/src/instance/read.rs | 12 +++---- .../src/instance/reorder_memtable.rs | 6 ++-- analytic_engine/src/memtable/columnar/iter.rs | 8 ++--- analytic_engine/src/memtable/mod.rs | 4 +-- analytic_engine/src/memtable/reversed_iter.rs | 10 +++--- analytic_engine/src/memtable/skiplist/iter.rs | 8 ++--- analytic_engine/src/memtable/skiplist/mod.rs | 4 +-- analytic_engine/src/row_iter/chain.rs | 10 +++--- analytic_engine/src/row_iter/dedup.rs | 20 ++++++------ analytic_engine/src/row_iter/merge.rs | 24 +++++++------- analytic_engine/src/row_iter/mod.rs | 8 ++--- .../src/row_iter/record_batch_stream.rs | 6 ++-- analytic_engine/src/row_iter/tests.rs | 18 +++++------ .../src/sst/parquet/async_reader.rs | 26 +++++++-------- analytic_engine/src/sst/parquet/writer.rs | 12 +++---- analytic_engine/src/sst/reader.rs | 6 ++-- analytic_engine/src/sst/writer.rs | 4 +-- benchmarks/src/merge_memtable_bench.rs | 2 +- benchmarks/src/merge_sst_bench.rs | 2 +- common_types/src/projected_schema.rs | 4 +-- common_types/src/record_batch.rs | 32 +++++++++---------- common_types/src/row/mod.rs | 10 +++--- common_types/src/tests.rs | 8 ++--- system_catalog/src/tables.rs | 4 +-- 25 files changed, 130 insertions(+), 130 deletions(-) diff --git a/analytic_engine/src/instance/flush_compaction.rs b/analytic_engine/src/instance/flush_compaction.rs index 8fb060d31d..9e434baa42 100644 --- a/analytic_engine/src/instance/flush_compaction.rs +++ b/analytic_engine/src/instance/flush_compaction.rs @@ -18,7 +18,7 @@ use std::{cmp, collections::Bound, fmt, sync::Arc}; use common_types::{ projected_schema::{ProjectedSchema, RecordFetchingContextBuilder}, - record_batch::{FetchingRecordBatch, FetchingRecordBatchBuilder}, + record_batch::{FetchedRecordBatch, FetchedRecordBatchBuilder}, request_id::RequestId, row::RowViewOnBatch, time::TimeRange, @@ -542,7 +542,7 @@ impl FlushTask { for time_range in &time_ranges { let (batch_record_sender, batch_record_receiver) = - channel::>(DEFAULT_CHANNEL_SIZE); + channel::>(DEFAULT_CHANNEL_SIZE); let file_id = self .table_data .alloc_file_id(&self.space_store.manifest) @@ -1068,16 +1068,16 @@ impl SpaceStore { } fn split_record_batch_with_time_ranges( - record_batch: FetchingRecordBatch, + record_batch: FetchedRecordBatch, time_ranges: &[TimeRange], timestamp_idx: usize, -) -> Result> { +) -> Result> { let fetching_schema = record_batch.schema(); let primary_key_indexes = record_batch.primary_key_indexes(); - let mut builders: Vec = (0..time_ranges.len()) + let mut builders: Vec = (0..time_ranges.len()) .map(|_| { let primary_key_indexes = primary_key_indexes.map(|idxs| idxs.to_vec()); - FetchingRecordBatchBuilder::new(fetching_schema.clone(), primary_key_indexes) + FetchedRecordBatchBuilder::new(fetching_schema.clone(), primary_key_indexes) }) .collect(); diff --git a/analytic_engine/src/instance/read.rs b/analytic_engine/src/instance/read.rs index 3e01c6fc61..8fdb500746 100644 --- a/analytic_engine/src/instance/read.rs +++ b/analytic_engine/src/instance/read.rs @@ -23,7 +23,7 @@ use std::{ use async_stream::try_stream; use common_types::{ projected_schema::ProjectedSchema, - record_batch::{FetchingRecordBatch, RecordBatch}, + record_batch::{FetchedRecordBatch, RecordBatch}, schema::RecordSchema, time::TimeRange, }; @@ -48,7 +48,7 @@ use crate::{ chain::{ChainConfig, ChainIterator}, dedup::DedupIterator, merge::{MergeBuilder, MergeConfig, MergeIterator}, - FetchingRecordBatchIterator, IterOptions, + FetchedRecordBatchIterator, IterOptions, }, table::{ data::TableData, @@ -168,7 +168,7 @@ impl Instance { fn build_partitioned_streams( &self, request: &ReadRequest, - partitioned_iters: Vec, + partitioned_iters: Vec, ) -> Result { let read_parallelism = request.opts.read_parallelism; @@ -365,7 +365,7 @@ struct StreamStateOnMultiIters { projected_schema: ProjectedSchema, } -impl StreamStateOnMultiIters { +impl StreamStateOnMultiIters { fn is_exhausted(&self) -> bool { self.curr_iter_idx >= self.iters.len() } @@ -380,7 +380,7 @@ impl StreamStateOnMultiIters { async fn fetch_next_batch( &mut self, - ) -> Option> { + ) -> Option> { loop { if self.is_exhausted() { return None; @@ -397,7 +397,7 @@ impl StreamStateOnMultiIters { } fn iters_to_stream( - iters: Vec, + iters: Vec, projected_schema: ProjectedSchema, ) -> SendableRecordBatchStream { let mut state = StreamStateOnMultiIters { diff --git a/analytic_engine/src/instance/reorder_memtable.rs b/analytic_engine/src/instance/reorder_memtable.rs index e09f14849e..a678fbfea5 100644 --- a/analytic_engine/src/instance/reorder_memtable.rs +++ b/analytic_engine/src/instance/reorder_memtable.rs @@ -26,7 +26,7 @@ pub use arrow::{ }; use async_trait::async_trait; use common_types::{ - record_batch::{FetchingRecordBatch, RecordBatchData}, + record_batch::{FetchedRecordBatch, RecordBatchData}, schema::Schema, }; use datafusion::{ @@ -71,7 +71,7 @@ define_result!(Error); pub type DfResult = std::result::Result; type SendableFetchingRecordBatchStream = - Pin> + Send>>; + Pin> + Send>>; impl From for Error { fn from(df_err: DataFusionError) -> Self { @@ -280,7 +280,7 @@ impl Reorder { let batch = batch.context(FetchRecordBatch)?; let data = RecordBatchData::try_from(batch).context(ConvertRecordBatchData)?; - Ok(FetchingRecordBatch::new_from_parts( + Ok(FetchedRecordBatch::new_from_parts( record_schema.clone(), None, data, diff --git a/analytic_engine/src/memtable/columnar/iter.rs b/analytic_engine/src/memtable/columnar/iter.rs index 01dca69523..12e7168960 100644 --- a/analytic_engine/src/memtable/columnar/iter.rs +++ b/analytic_engine/src/memtable/columnar/iter.rs @@ -28,7 +28,7 @@ use common_types::{ column_schema::ColumnId, datum::Datum, projected_schema::RecordFetchingContext, - record_batch::{FetchingRecordBatch, FetchingRecordBatchBuilder}, + record_batch::{FetchedRecordBatch, FetchedRecordBatchBuilder}, row::Row, schema::Schema, SequenceNumber, @@ -188,7 +188,7 @@ impl + Clone + Sync + Send> ColumnarIterImpl { } /// Fetch next record batch - fn fetch_next_record_batch(&mut self) -> Result> { + fn fetch_next_record_batch(&mut self) -> Result> { debug_assert_eq!(State::Initialized, self.state); assert!(self.batch_size > 0); let rows = if !self.need_dedup { @@ -210,7 +210,7 @@ impl + Clone + Sync + Send> ColumnarIterImpl { .record_fetching_ctx .primary_key_indexes() .map(|idxs| idxs.to_vec()); - let mut builder = FetchingRecordBatchBuilder::with_capacity( + let mut builder = FetchedRecordBatchBuilder::with_capacity( fetching_schema, primary_key_indexes, self.batch_size, @@ -392,7 +392,7 @@ impl + Clone + Sync + Send> ColumnarIterImpl { } impl Iterator for ColumnarIterImpl { - type Item = Result; + type Item = Result; fn next(&mut self) -> Option { if self.state != State::Initialized { diff --git a/analytic_engine/src/memtable/mod.rs b/analytic_engine/src/memtable/mod.rs index 382a9d654d..5243afcc2a 100644 --- a/analytic_engine/src/memtable/mod.rs +++ b/analytic_engine/src/memtable/mod.rs @@ -25,7 +25,7 @@ use std::{ops::Bound, sync::Arc, time::Instant}; use bytes_ext::{ByteVec, Bytes}; use common_types::{ projected_schema::RecordFetchingContextBuilder, - record_batch::FetchingRecordBatch, + record_batch::FetchedRecordBatch, row::Row, schema::{IndexInWriterSchema, Schema}, time::TimeRange, @@ -291,4 +291,4 @@ pub struct Metrics { pub type MemTableRef = Arc; /// A pointer to columnar iterator -pub type ColumnarIterPtr = Box> + Send + Sync>; +pub type ColumnarIterPtr = Box> + Send + Sync>; diff --git a/analytic_engine/src/memtable/reversed_iter.rs b/analytic_engine/src/memtable/reversed_iter.rs index 00e2fcaaa0..8d387ad2cc 100644 --- a/analytic_engine/src/memtable/reversed_iter.rs +++ b/analytic_engine/src/memtable/reversed_iter.rs @@ -14,7 +14,7 @@ use std::iter::Rev; -use common_types::record_batch::FetchingRecordBatch; +use common_types::record_batch::FetchedRecordBatch; use generic_error::BoxError; use snafu::ResultExt; @@ -26,13 +26,13 @@ use crate::memtable::{IterReverse, Result}; // reverse order naturally. pub struct ReversedColumnarIterator { iter: I, - reversed_iter: Option>>>, + reversed_iter: Option>>>, num_record_batch: usize, } impl ReversedColumnarIterator where - I: Iterator>, + I: Iterator>, { pub fn new(iter: I, num_rows: usize, batch_size: usize) -> Self { Self { @@ -57,9 +57,9 @@ where impl Iterator for ReversedColumnarIterator where - I: Iterator>, + I: Iterator>, { - type Item = Result; + type Item = Result; fn next(&mut self) -> Option { self.init_if_necessary(); diff --git a/analytic_engine/src/memtable/skiplist/iter.rs b/analytic_engine/src/memtable/skiplist/iter.rs index 92168db9c8..036fa92a23 100644 --- a/analytic_engine/src/memtable/skiplist/iter.rs +++ b/analytic_engine/src/memtable/skiplist/iter.rs @@ -21,7 +21,7 @@ use bytes_ext::{Bytes, BytesMut}; use codec::row; use common_types::{ projected_schema::RecordFetchingContext, - record_batch::{FetchingRecordBatch, FetchingRecordBatchBuilder}, + record_batch::{FetchedRecordBatch, FetchedRecordBatchBuilder}, row::contiguous::{ContiguousRowReader, ProjectedContiguousRow}, schema::Schema, SequenceNumber, @@ -146,7 +146,7 @@ impl + Clone + Sync + Send> ColumnarIterImpl { } /// Fetch next record batch - fn fetch_next_record_batch(&mut self) -> Result> { + fn fetch_next_record_batch(&mut self) -> Result> { debug_assert_eq!(State::Initialized, self.state); assert!(self.batch_size > 0); @@ -155,7 +155,7 @@ impl + Clone + Sync + Send> ColumnarIterImpl { .record_fetching_ctx .primary_key_indexes() .map(|idxs| idxs.to_vec()); - let mut builder = FetchingRecordBatchBuilder::with_capacity( + let mut builder = FetchedRecordBatchBuilder::with_capacity( record_schema, primary_key_indexes, self.batch_size, @@ -298,7 +298,7 @@ impl + Clone + Sync + Send> ColumnarIterImpl { } impl + Clone + Sync + Send> Iterator for ColumnarIterImpl { - type Item = Result; + type Item = Result; fn next(&mut self) -> Option { if self.state != State::Initialized { diff --git a/analytic_engine/src/memtable/skiplist/mod.rs b/analytic_engine/src/memtable/skiplist/mod.rs index fd66a20cc7..970e8e7055 100644 --- a/analytic_engine/src/memtable/skiplist/mod.rs +++ b/analytic_engine/src/memtable/skiplist/mod.rs @@ -275,7 +275,7 @@ mod tests { use common_types::{ datum::Datum, projected_schema::{ProjectedSchema, RecordFetchingContextBuilder}, - record_batch::FetchingRecordBatch, + record_batch::FetchedRecordBatch, row::Row, schema::IndexInWriterSchema, tests::{build_row, build_schema}, @@ -463,7 +463,7 @@ mod tests { test_memtable_scan_for_projection(schema, memtable); } - fn check_iterator>>( + fn check_iterator>>( iter: T, expected_rows: Vec, ) { diff --git a/analytic_engine/src/row_iter/chain.rs b/analytic_engine/src/row_iter/chain.rs index a39c4459e8..60948719a0 100644 --- a/analytic_engine/src/row_iter/chain.rs +++ b/analytic_engine/src/row_iter/chain.rs @@ -20,7 +20,7 @@ use std::{ use async_trait::async_trait; use common_types::{ projected_schema::{ProjectedSchema, RecordFetchingContextBuilder}, - record_batch::FetchingRecordBatch, + record_batch::FetchedRecordBatch, request_id::RequestId, schema::RecordSchemaWithKey, }; @@ -37,7 +37,7 @@ use crate::{ record_batch_stream::{ self, BoxedPrefetchableRecordBatchStream, MemtableStreamContext, SstStreamContext, }, - FetchingRecordBatchIterator, + FetchedRecordBatchIterator, }, space::SpaceId, sst::{ @@ -327,7 +327,7 @@ impl ChainIterator { } } - async fn next_batch_internal(&mut self) -> Result> { + async fn next_batch_internal(&mut self) -> Result> { self.init_if_necessary(); self.maybe_prefetch().await; @@ -377,14 +377,14 @@ impl Drop for ChainIterator { } #[async_trait] -impl FetchingRecordBatchIterator for ChainIterator { +impl FetchedRecordBatchIterator for ChainIterator { type Error = Error; fn schema(&self) -> &RecordSchemaWithKey { &self.schema } - async fn next_batch(&mut self) -> Result> { + async fn next_batch(&mut self) -> Result> { let timer = Instant::now(); let res = self.next_batch_internal().await; self.metrics.scan_duration += timer.elapsed(); diff --git a/analytic_engine/src/row_iter/dedup.rs b/analytic_engine/src/row_iter/dedup.rs index f7dbe9dc5e..4293e2b739 100644 --- a/analytic_engine/src/row_iter/dedup.rs +++ b/analytic_engine/src/row_iter/dedup.rs @@ -16,7 +16,7 @@ use std::cmp::Ordering; use async_trait::async_trait; use common_types::{ - record_batch::{FetchingRecordBatch, FetchingRecordBatchBuilder}, + record_batch::{FetchedRecordBatch, FetchedRecordBatchBuilder}, request_id::RequestId, row::{Row, RowViewOnBatch, RowWithMeta}, schema::RecordSchemaWithKey, @@ -26,7 +26,7 @@ use logger::{info, trace}; use macros::define_result; use snafu::{ResultExt, Snafu}; -use crate::row_iter::{FetchingRecordBatchIterator, IterOptions}; +use crate::row_iter::{FetchedRecordBatchIterator, IterOptions}; #[derive(Debug, Snafu)] pub enum Error { @@ -54,7 +54,7 @@ define_result!(Error); pub struct DedupIterator { request_id: RequestId, schema: RecordSchemaWithKey, - record_batch_builder: FetchingRecordBatchBuilder, + record_batch_builder: FetchedRecordBatchBuilder, iter: I, /// Previous row returned. prev_row: Option, @@ -67,12 +67,12 @@ pub struct DedupIterator { total_selected_rows: usize, } -impl DedupIterator { +impl DedupIterator { pub fn new(request_id: RequestId, iter: I, iter_options: IterOptions) -> Self { let schema_with_key = iter.schema(); let primary_key_indexes = schema_with_key.primary_key_idx().to_vec(); let fetching_schema = schema_with_key.to_record_schema(); - let record_batch_builder = FetchingRecordBatchBuilder::with_capacity( + let record_batch_builder = FetchedRecordBatchBuilder::with_capacity( fetching_schema, Some(primary_key_indexes), iter_options.batch_size, @@ -89,7 +89,7 @@ impl DedupIterator { } } - fn dedup_batch(&mut self, record_batch: FetchingRecordBatch) -> Result { + fn dedup_batch(&mut self, record_batch: FetchedRecordBatch) -> Result { self.selected_rows.clear(); // Ignore all rows by default. self.selected_rows.resize(record_batch.num_rows(), false); @@ -145,9 +145,9 @@ impl DedupIterator { /// Filter batch by `selected_rows`. fn filter_batch( &mut self, - record_batch: FetchingRecordBatch, + record_batch: FetchedRecordBatch, selected_num: usize, - ) -> Result { + ) -> Result { self.total_selected_rows += selected_num; self.total_duplications += record_batch.num_rows() - selected_num; @@ -173,14 +173,14 @@ impl DedupIterator { } #[async_trait] -impl FetchingRecordBatchIterator for DedupIterator { +impl FetchedRecordBatchIterator for DedupIterator { type Error = Error; fn schema(&self) -> &RecordSchemaWithKey { &self.schema } - async fn next_batch(&mut self) -> Result> { + async fn next_batch(&mut self) -> Result> { match self .iter .next_batch() diff --git a/analytic_engine/src/row_iter/merge.rs b/analytic_engine/src/row_iter/merge.rs index 2dfe6794ae..681b1c650e 100644 --- a/analytic_engine/src/row_iter/merge.rs +++ b/analytic_engine/src/row_iter/merge.rs @@ -24,7 +24,7 @@ use std::{ use async_trait::async_trait; use common_types::{ projected_schema::{ProjectedSchema, RecordFetchingContextBuilder}, - record_batch::{FetchingRecordBatch, FetchingRecordBatchBuilder}, + record_batch::{FetchedRecordBatch, FetchedRecordBatchBuilder}, request_id::RequestId, row::RowViewOnBatch, schema::RecordSchemaWithKey, @@ -45,7 +45,7 @@ use crate::{ self, BoxedPrefetchableRecordBatchStream, MemtableStreamContext, SequencedRecordBatch, SstStreamContext, }, - FetchingRecordBatchIterator, IterOptions, + FetchedRecordBatchIterator, IterOptions, }, space::SpaceId, sst::{ @@ -349,7 +349,7 @@ impl BufferedStreamState { /// Returns number of rows added. fn append_rows_to( &mut self, - builder: &mut FetchingRecordBatchBuilder, + builder: &mut FetchedRecordBatchBuilder, len: usize, ) -> Result { let added = builder @@ -361,7 +361,7 @@ impl BufferedStreamState { /// Take record batch slice with at most `len` rows from cursor and advance /// the cursor. - fn take_record_batch_slice(&mut self, len: usize) -> FetchingRecordBatch { + fn take_record_batch_slice(&mut self, len: usize) -> FetchedRecordBatch { let len_to_fetch = cmp::min( self.buffered_record_batch.record_batch.num_rows() - self.cursor, len, @@ -428,14 +428,14 @@ impl BufferedStream { /// REQUIRE: the buffer is not exhausted. fn append_rows_to( &mut self, - builder: &mut FetchingRecordBatchBuilder, + builder: &mut FetchedRecordBatchBuilder, len: usize, ) -> Result { self.state.as_mut().unwrap().append_rows_to(builder, len) } /// REQUIRE: the buffer is not exhausted. - fn take_record_batch_slice(&mut self, len: usize) -> FetchingRecordBatch { + fn take_record_batch_slice(&mut self, len: usize) -> FetchedRecordBatch { self.state.as_mut().unwrap().take_record_batch_slice(len) } @@ -659,7 +659,7 @@ pub struct MergeIterator { request_id: RequestId, inited: bool, schema: RecordSchemaWithKey, - record_batch_builder: FetchingRecordBatchBuilder, + record_batch_builder: FetchedRecordBatchBuilder, origin_streams: Vec, /// ssts are kept here to avoid them from being purged. #[allow(dead_code)] @@ -688,7 +688,7 @@ impl MergeIterator { let heap_cap = streams.len(); let primary_key_indexes = schema.primary_key_idx().to_vec(); let fetching_schema = schema.to_record_schema(); - let record_batch_builder = FetchingRecordBatchBuilder::with_capacity( + let record_batch_builder = FetchedRecordBatchBuilder::with_capacity( fetching_schema, Some(primary_key_indexes), iter_options.batch_size, @@ -821,7 +821,7 @@ impl MergeIterator { async fn fetch_rows_from_one_stream( &mut self, num_rows_to_fetch: usize, - ) -> Result> { + ) -> Result> { assert_eq!(self.hot.len(), 1); self.metrics.times_fetch_rows_from_one += 1; @@ -865,7 +865,7 @@ impl MergeIterator { /// Fetch the next batch from the streams. /// /// `init_if_necessary` should be finished before this method. - async fn fetch_next_batch(&mut self) -> Result> { + async fn fetch_next_batch(&mut self) -> Result> { self.init_if_necessary().await?; self.record_batch_builder.clear(); @@ -900,14 +900,14 @@ impl MergeIterator { } #[async_trait] -impl FetchingRecordBatchIterator for MergeIterator { +impl FetchedRecordBatchIterator for MergeIterator { type Error = Error; fn schema(&self) -> &RecordSchemaWithKey { &self.schema } - async fn next_batch(&mut self) -> Result> { + async fn next_batch(&mut self) -> Result> { let record_batch = self.fetch_next_batch().await?; trace!("MergeIterator send next record batch:{:?}", record_batch); diff --git a/analytic_engine/src/row_iter/mod.rs b/analytic_engine/src/row_iter/mod.rs index a8406a16bc..e8f8f7be2d 100644 --- a/analytic_engine/src/row_iter/mod.rs +++ b/analytic_engine/src/row_iter/mod.rs @@ -16,7 +16,7 @@ use async_stream::try_stream; use async_trait::async_trait; -use common_types::{record_batch::FetchingRecordBatch, schema::RecordSchemaWithKey}; +use common_types::{record_batch::FetchedRecordBatch, schema::RecordSchemaWithKey}; use generic_error::BoxError; use crate::sst::writer::RecordBatchStream; @@ -38,16 +38,16 @@ pub struct IterOptions { /// The `schema()` should be the same as the RecordBatch from `read()`. /// The reader is exhausted if the `read()` returns the `Ok(None)`. #[async_trait] -pub trait FetchingRecordBatchIterator: Send { +pub trait FetchedRecordBatchIterator: Send { type Error: std::error::Error + Send + Sync + 'static; fn schema(&self) -> &RecordSchemaWithKey; async fn next_batch(&mut self) - -> std::result::Result, Self::Error>; + -> std::result::Result, Self::Error>; } -pub fn record_batch_with_key_iter_to_stream( +pub fn record_batch_with_key_iter_to_stream( mut iter: I, ) -> RecordBatchStream { let record_batch_stream = try_stream! { diff --git a/analytic_engine/src/row_iter/record_batch_stream.rs b/analytic_engine/src/row_iter/record_batch_stream.rs index 969f355e34..963cda59ef 100644 --- a/analytic_engine/src/row_iter/record_batch_stream.rs +++ b/analytic_engine/src/row_iter/record_batch_stream.rs @@ -23,7 +23,7 @@ use arrow::{ datatypes::{DataType as ArrowDataType, SchemaRef as ArrowSchemaRef}, }; use common_types::{ - projected_schema::RecordFetchingContextBuilder, record_batch::FetchingRecordBatch, + projected_schema::RecordFetchingContextBuilder, record_batch::FetchedRecordBatch, schema::RecordSchema, SequenceNumber, }; use datafusion::{ @@ -130,11 +130,11 @@ pub enum Error { define_result!(Error); -// TODO(yingwen): Can we move sequence to FetchingRecordBatch and remove this +// TODO(yingwen): Can we move sequence to FetchedRecordBatch and remove this // struct? But what is the sequence after merge? #[derive(Debug)] pub struct SequencedRecordBatch { - pub record_batch: FetchingRecordBatch, + pub record_batch: FetchedRecordBatch, pub sequence: SequenceNumber, } diff --git a/analytic_engine/src/row_iter/tests.rs b/analytic_engine/src/row_iter/tests.rs index 51f1f6eaee..b17bd75cd2 100644 --- a/analytic_engine/src/row_iter/tests.rs +++ b/analytic_engine/src/row_iter/tests.rs @@ -15,7 +15,7 @@ use async_trait::async_trait; use common_types::{ projected_schema::{ProjectedSchema, RecordFetchingContext}, - record_batch::{FetchingRecordBatch, FetchingRecordBatchBuilder}, + record_batch::{FetchedRecordBatch, FetchedRecordBatchBuilder}, row::{ contiguous::{ContiguousRowReader, ContiguousRowWriter, ProjectedContiguousRow}, Row, @@ -25,7 +25,7 @@ use common_types::{ use macros::define_result; use snafu::Snafu; -use crate::row_iter::FetchingRecordBatchIterator; +use crate::row_iter::FetchedRecordBatchIterator; #[derive(Debug, Snafu)] pub enum Error {} @@ -34,12 +34,12 @@ define_result!(Error); pub struct VectorIterator { schema: RecordSchemaWithKey, - items: Vec>, + items: Vec>, idx: usize, } impl VectorIterator { - pub fn new(schema: RecordSchemaWithKey, items: Vec) -> Self { + pub fn new(schema: RecordSchemaWithKey, items: Vec) -> Self { Self { schema, items: items.into_iter().map(Some).collect(), @@ -49,14 +49,14 @@ impl VectorIterator { } #[async_trait] -impl FetchingRecordBatchIterator for VectorIterator { +impl FetchedRecordBatchIterator for VectorIterator { type Error = Error; fn schema(&self) -> &RecordSchemaWithKey { &self.schema } - async fn next_batch(&mut self) -> Result> { + async fn next_batch(&mut self) -> Result> { if self.idx == self.items.len() { return Ok(None); } @@ -68,7 +68,7 @@ impl FetchingRecordBatchIterator for VectorIterator { } } -pub fn build_fetching_record_batch_with_key(schema: Schema, rows: Vec) -> FetchingRecordBatch { +pub fn build_fetching_record_batch_with_key(schema: Schema, rows: Vec) -> FetchedRecordBatch { assert!(schema.num_columns() > 1); let projection: Vec = (0..schema.num_columns()).collect(); let projected_schema = ProjectedSchema::new(schema.clone(), Some(projection)).unwrap(); @@ -87,7 +87,7 @@ pub fn build_fetching_record_batch_with_key(schema: Schema, rows: Vec) -> F .primary_key_indexes() .map(|idxs| idxs.to_vec()); let mut builder = - FetchingRecordBatchBuilder::with_capacity(fetching_schema, primary_key_indexes, 2); + FetchedRecordBatchBuilder::with_capacity(fetching_schema, primary_key_indexes, 2); let index_in_writer = IndexInWriterSchema::for_same_schema(schema.num_columns()); let mut buf = Vec::new(); @@ -105,7 +105,7 @@ pub fn build_fetching_record_batch_with_key(schema: Schema, rows: Vec) -> F builder.build().unwrap() } -pub async fn check_iterator(iter: &mut T, expected_rows: Vec) { +pub async fn check_iterator(iter: &mut T, expected_rows: Vec) { let mut visited_rows = 0; while let Some(batch) = iter.next_batch().await.unwrap() { for row_idx in 0..batch.num_rows() { diff --git a/analytic_engine/src/sst/parquet/async_reader.rs b/analytic_engine/src/sst/parquet/async_reader.rs index b6bccfd5f7..c89211fa84 100644 --- a/analytic_engine/src/sst/parquet/async_reader.rs +++ b/analytic_engine/src/sst/parquet/async_reader.rs @@ -27,7 +27,7 @@ use async_trait::async_trait; use bytes_ext::Bytes; use common_types::{ projected_schema::{RecordFetchingContext, RecordFetchingContextBuilder}, - record_batch::FetchingRecordBatch, + record_batch::FetchedRecordBatch, }; use datafusion::{ common::ToDFSchema, @@ -73,7 +73,7 @@ use crate::{ const PRUNE_ROW_GROUPS_METRICS_COLLECTOR_NAME: &str = "prune_row_groups"; type SendableRecordBatchStream = Pin> + Send>>; -type FetchingRecordBatchStream = Box> + Send + Unpin>; +type FetchedRecordBatchStream = Box> + Send + Unpin>; pub struct Reader<'a> { /// The path where the data is persisted. @@ -147,7 +147,7 @@ impl<'a> Reader<'a> { async fn maybe_read_parallelly( &mut self, read_parallelism: usize, - ) -> Result> { + ) -> Result> { assert!(read_parallelism > 0); self.init_if_necessary().await?; @@ -512,7 +512,7 @@ impl RecordBatchProjector { } impl Stream for RecordBatchProjector { - type Item = Result; + type Item = Result; fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { let projector = self.get_mut(); @@ -533,7 +533,7 @@ impl Stream for RecordBatchProjector { } projector.metrics.row_num += record_batch.num_rows(); - let projected_batch = FetchingRecordBatch::try_new( + let projected_batch = FetchedRecordBatch::try_new( &projector.record_fetching_ctx, record_batch, ) @@ -569,7 +569,7 @@ impl<'a> SstReader for Reader<'a> { async fn read( &mut self, - ) -> Result>>> { + ) -> Result>>> { let mut streams = self.maybe_read_parallelly(1).await?; assert_eq!(streams.len(), 1); let stream = streams.pop().expect("impossible to fetch no stream"); @@ -580,7 +580,7 @@ impl<'a> SstReader for Reader<'a> { struct RecordBatchReceiver { bg_prefetch_tx: Option>, - rx_group: Vec>>, + rx_group: Vec>>, cur_rx_idx: usize, #[allow(dead_code)] drop_helper: AbortOnDropMany<()>, @@ -588,7 +588,7 @@ struct RecordBatchReceiver { #[async_trait] impl PrefetchableStream for RecordBatchReceiver { - type Item = Result; + type Item = Result; async fn start_prefetch(&mut self) { // Start the prefetch work in background when first poll is called. @@ -605,7 +605,7 @@ impl PrefetchableStream for RecordBatchReceiver { } impl Stream for RecordBatchReceiver { - type Item = Result; + type Item = Result; fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { if self.rx_group.is_empty() { @@ -685,8 +685,8 @@ impl<'a> ThreadedReader<'a> { fn read_record_batches_from_sub_reader( &mut self, - mut reader: Box> + Send + Unpin>, - tx: Sender>, + mut reader: Box> + Send + Unpin>, + tx: Sender>, mut rx: watch::Receiver<()>, ) -> JoinHandle<()> { self.runtime.spawn(async move { @@ -713,7 +713,7 @@ impl<'a> SstReader for ThreadedReader<'a> { async fn read( &mut self, - ) -> Result>>> { + ) -> Result>>> { // Get underlying sst readers and channels. let sub_readers = self .inner @@ -736,7 +736,7 @@ impl<'a> SstReader for ThreadedReader<'a> { let channel_cap_per_sub_reader = self.channel_cap / sub_readers.len(); let (tx_group, rx_group): (Vec<_>, Vec<_>) = (0..read_parallelism) - .map(|_| mpsc::channel::>(channel_cap_per_sub_reader)) + .map(|_| mpsc::channel::>(channel_cap_per_sub_reader)) .unzip(); let (bg_prefetch_tx, bg_prefetch_rx) = watch::channel(()); diff --git a/analytic_engine/src/sst/parquet/writer.rs b/analytic_engine/src/sst/parquet/writer.rs index 701d69dae9..88c3762e6d 100644 --- a/analytic_engine/src/sst/parquet/writer.rs +++ b/analytic_engine/src/sst/parquet/writer.rs @@ -18,7 +18,7 @@ use std::collections::HashSet; use async_trait::async_trait; use common_types::{ - datum::DatumKind, record_batch::FetchingRecordBatch, request_id::RequestId, time::TimeRange, + datum::DatumKind, record_batch::FetchedRecordBatch, request_id::RequestId, time::TimeRange, }; use datafusion::parquet::basic::Compression; use futures::StreamExt; @@ -155,8 +155,8 @@ impl RecordBatchGroupWriter { /// the left rows. async fn fetch_next_row_group( &mut self, - prev_record_batch: &mut Option, - ) -> Result> { + prev_record_batch: &mut Option, + ) -> Result> { let mut curr_row_group = vec![]; // Used to record the number of remaining rows to fill `curr_row_group`. let mut remaining = self.num_rows_per_row_group; @@ -213,7 +213,7 @@ impl RecordBatchGroupWriter { /// Build the parquet filter for the given `row_group`. fn build_row_group_filter( &self, - row_group_batch: &[FetchingRecordBatch], + row_group_batch: &[FetchedRecordBatch], ) -> Result { let schema_with_key = row_group_batch[0] @@ -243,7 +243,7 @@ impl RecordBatchGroupWriter { fn update_column_values( column_values: &mut [Option], - record_batch: &FetchingRecordBatch, + record_batch: &FetchedRecordBatch, ) { for (col_idx, col_values) in column_values.iter_mut().enumerate() { let mut too_many_values = false; @@ -310,7 +310,7 @@ impl RecordBatchGroupWriter { sink: W, meta_path: &Path, ) -> Result<(usize, ParquetMetaData)> { - let mut prev_record_batch: Option = None; + let mut prev_record_batch: Option = None; let mut arrow_row_group = Vec::new(); let mut total_num_rows = 0; diff --git a/analytic_engine/src/sst/reader.rs b/analytic_engine/src/sst/reader.rs index cac0658e95..e9cdf2f3fd 100644 --- a/analytic_engine/src/sst/reader.rs +++ b/analytic_engine/src/sst/reader.rs @@ -15,7 +15,7 @@ //! Sst reader trait definition. use async_trait::async_trait; -use common_types::record_batch::FetchingRecordBatch; +use common_types::record_batch::FetchedRecordBatch; use crate::{prefetchable_stream::PrefetchableStream, sst::meta_data::SstMetaData}; @@ -105,7 +105,7 @@ pub trait SstReader { async fn read( &mut self, - ) -> Result>>>; + ) -> Result>>>; } #[cfg(test)] @@ -117,7 +117,7 @@ pub mod tests { pub async fn check_stream(stream: &mut S, expected_rows: Vec) where - S: PrefetchableStream> + Unpin, + S: PrefetchableStream> + Unpin, { let mut visited_rows = 0; while let Some(batch) = stream.fetch_next().await { diff --git a/analytic_engine/src/sst/writer.rs b/analytic_engine/src/sst/writer.rs index 84298ec53f..65fb0635a9 100644 --- a/analytic_engine/src/sst/writer.rs +++ b/analytic_engine/src/sst/writer.rs @@ -19,7 +19,7 @@ use std::cmp; use async_trait::async_trait; use bytes_ext::Bytes; use common_types::{ - record_batch::FetchingRecordBatch, request_id::RequestId, schema::Schema, time::TimeRange, + record_batch::FetchedRecordBatch, request_id::RequestId, schema::Schema, time::TimeRange, SequenceNumber, }; use futures::Stream; @@ -100,7 +100,7 @@ pub mod error { pub use error::*; -pub type RecordBatchStreamItem = std::result::Result; +pub type RecordBatchStreamItem = std::result::Result; // TODO(yingwen): SstReader also has a RecordBatchStream, can we use same type? pub type RecordBatchStream = Box + Send + Unpin>; diff --git a/benchmarks/src/merge_memtable_bench.rs b/benchmarks/src/merge_memtable_bench.rs index 23ec8927ed..ed49ad7774 100644 --- a/benchmarks/src/merge_memtable_bench.rs +++ b/benchmarks/src/merge_memtable_bench.rs @@ -24,7 +24,7 @@ use analytic_engine::{ row_iter::{ dedup::DedupIterator, merge::{MergeBuilder, MergeConfig}, - FetchingRecordBatchIterator, IterOptions, + FetchedRecordBatchIterator, IterOptions, }, space::SpaceId, sst::{ diff --git a/benchmarks/src/merge_sst_bench.rs b/benchmarks/src/merge_sst_bench.rs index 6552190ff3..340873ce17 100644 --- a/benchmarks/src/merge_sst_bench.rs +++ b/benchmarks/src/merge_sst_bench.rs @@ -22,7 +22,7 @@ use analytic_engine::{ chain::ChainConfig, dedup::DedupIterator, merge::{MergeBuilder, MergeConfig}, - FetchingRecordBatchIterator, IterOptions, + FetchedRecordBatchIterator, IterOptions, }, space::SpaceId, sst::{ diff --git a/common_types/src/projected_schema.rs b/common_types/src/projected_schema.rs index 09e07dcb5c..15bcc46801 100644 --- a/common_types/src/projected_schema.rs +++ b/common_types/src/projected_schema.rs @@ -371,8 +371,8 @@ impl TryFrom for ProjectedSchema { /// Schema with projection informations struct ProjectedSchemaInner { - /// The table schema used to generate plan, possible to differ from recorded - /// schema in ssts. + /// The table schema used to generate plan, possible to differ from + /// schema in ssts/memtable. table_schema: Schema, /// Index of the projected columns in `self.schema`, `None` if /// all columns are needed. diff --git a/common_types/src/record_batch.rs b/common_types/src/record_batch.rs index daa50ef3c6..55a72a2287 100644 --- a/common_types/src/record_batch.rs +++ b/common_types/src/record_batch.rs @@ -362,15 +362,15 @@ fn cast_arrow_record_batch(source: ArrowRecordBatch) -> Result } #[derive(Debug)] -pub struct FetchingRecordBatch { +pub struct FetchedRecordBatch { schema: RecordSchema, - // TODO: remove it later, `FetchingRecordBatch` is unnecessary to know anything about primary + // TODO: remove it later, `FetchedRecordBatch` is unnecessary to know anything about primary // keys. primary_key_indexes: Option>, data: RecordBatchData, } -impl FetchingRecordBatch { +impl FetchedRecordBatch { pub fn try_new( ctx: &RecordFetchingContext, arrow_record_batch: ArrowRecordBatch, @@ -414,7 +414,7 @@ impl FetchingRecordBatch { let data = RecordBatchData::new(schema.to_arrow_schema_ref(), column_blocks)?; - Ok(FetchingRecordBatch { + Ok(FetchedRecordBatch { schema, primary_key_indexes: ctx.primary_key_indexes().map(|idxs| idxs.to_vec()), data, @@ -456,7 +456,7 @@ impl FetchingRecordBatch { Row::from_datums(datums) } - /// Project the [FetchingRecordBatch] into a [RecordBatch] according to + /// Project the [FetchedRecordBatch] into a [RecordBatch] according to /// [ProjectedSchema]. // TODO: how do we ensure `ProjectedSchema` passed here is same as the source // `ProjectedSchema` of `RecordSchema` here? @@ -571,13 +571,13 @@ impl FetchingRecordBatch { } } -pub struct FetchingRecordBatchBuilder { +pub struct FetchedRecordBatchBuilder { fetching_schema: RecordSchema, primary_key_indexes: Option>, builders: Vec, } -impl FetchingRecordBatchBuilder { +impl FetchedRecordBatchBuilder { pub fn new(fetching_schema: RecordSchema, primary_key_indexes: Option>) -> Self { let builders = fetching_schema .columns() @@ -670,7 +670,7 @@ impl FetchingRecordBatchBuilder { /// - The `record_batch` and the builder must have the same schema. pub fn append_batch_range( &mut self, - record_batch: &FetchingRecordBatch, + record_batch: &FetchedRecordBatch, start: usize, len: usize, ) -> Result { @@ -710,8 +710,8 @@ impl FetchingRecordBatchBuilder { } } - /// Build [FetchingRecordBatch] and reset the builder. - pub fn build(&mut self) -> Result { + /// Build [FetchedRecordBatch] and reset the builder. + pub fn build(&mut self) -> Result { let column_blocks: Vec<_> = self .builders .iter_mut() @@ -719,7 +719,7 @@ impl FetchingRecordBatchBuilder { .collect(); let arrow_schema = self.fetching_schema.to_arrow_schema_ref(); - Ok(FetchingRecordBatch { + Ok(FetchedRecordBatch { schema: self.fetching_schema.clone(), primary_key_indexes: self.primary_key_indexes.clone(), data: RecordBatchData::new(arrow_schema, column_blocks)?, @@ -730,7 +730,7 @@ impl FetchingRecordBatchBuilder { #[cfg(test)] mod tests { use crate::{ - record_batch::{FetchingRecordBatch, FetchingRecordBatchBuilder}, + record_batch::{FetchedRecordBatch, FetchedRecordBatchBuilder}, row::RowViewOnBatch, tests::{ build_fetching_record_batch_by_rows, build_projected_schema, build_rows, @@ -738,13 +738,13 @@ mod tests { }, }; - fn build_fetching_record_batch() -> FetchingRecordBatch { + fn build_fetching_record_batch() -> FetchedRecordBatch { let rows = build_rows(); build_fetching_record_batch_by_rows(rows) } fn check_record_batch_with_key( - record_batch_with_key: FetchingRecordBatch, + record_batch_with_key: FetchedRecordBatch, row_num: usize, column_num: usize, ) -> bool { @@ -766,7 +766,7 @@ mod tests { let projected_schema = build_projected_schema(); let fetching_record_batch = build_fetching_record_batch(); let mut builder = - FetchingRecordBatchBuilder::with_capacity(projected_schema.to_record_schema(), None, 2); + FetchedRecordBatchBuilder::with_capacity(projected_schema.to_record_schema(), None, 2); let view = RowViewOnBatch { record_batch: &fetching_record_batch, row_idx: 1, @@ -785,7 +785,7 @@ mod tests { let record_batch_with_key = build_fetching_record_batch(); let mut builder = - FetchingRecordBatchBuilder::with_capacity(projected_schema.to_record_schema(), None, 2); + FetchedRecordBatchBuilder::with_capacity(projected_schema.to_record_schema(), None, 2); builder .append_batch_range(&record_batch_with_key, 0, 2) .unwrap(); diff --git a/common_types/src/row/mod.rs b/common_types/src/row/mod.rs index d3dede6885..b91a0d8e6f 100644 --- a/common_types/src/row/mod.rs +++ b/common_types/src/row/mod.rs @@ -24,7 +24,7 @@ use snafu::{ensure, Backtrace, OptionExt, Snafu}; use crate::{ column_schema::{ColumnId, ColumnSchema}, datum::{Datum, DatumKind, DatumView}, - record_batch::FetchingRecordBatch, + record_batch::FetchedRecordBatch, schema::{RecordSchemaWithKey, Schema}, time::Timestamp, }; @@ -560,13 +560,13 @@ pub trait RowView { fn column_by_idx(&self, column_idx: usize) -> Datum; } -// TODO(yingwen): Add a method to get row view on FetchingRecordBatch. -/// A row view on the [FetchingRecordBatch]. +// TODO(yingwen): Add a method to get row view on FetchedRecordBatch. +/// A row view on the [FetchedRecordBatch]. /// /// `row_idx < record_batch.num_rows()` is ensured. #[derive(Debug)] pub struct RowViewOnBatch<'a> { - pub record_batch: &'a FetchingRecordBatch, + pub record_batch: &'a FetchedRecordBatch, pub row_idx: usize, } @@ -583,7 +583,7 @@ impl<'a> RowViewOnBatch<'a> { pub struct RowViewOnBatchColumnIter<'a> { next_column_idx: usize, row_idx: usize, - record_batch: &'a FetchingRecordBatch, + record_batch: &'a FetchedRecordBatch, } impl<'a> RowView for RowViewOnBatch<'a> { diff --git a/common_types/src/tests.rs b/common_types/src/tests.rs index 5bf998e452..5400e9efa8 100644 --- a/common_types/src/tests.rs +++ b/common_types/src/tests.rs @@ -19,7 +19,7 @@ use crate::{ column_schema, datum::{Datum, DatumKind}, projected_schema::{ProjectedSchema, RecordFetchingContext}, - record_batch::{FetchingRecordBatch, FetchingRecordBatchBuilder}, + record_batch::{FetchedRecordBatch, FetchedRecordBatchBuilder}, row::{ contiguous::{ContiguousRowReader, ContiguousRowWriter, ProjectedContiguousRow}, Row, @@ -357,7 +357,7 @@ pub fn build_rows() -> Vec { ] } -pub fn build_fetching_record_batch_by_rows(rows: Vec) -> FetchingRecordBatch { +pub fn build_fetching_record_batch_by_rows(rows: Vec) -> FetchedRecordBatch { let schema = build_schema(); assert!(schema.num_columns() > 1); let projection: Vec = (0..schema.num_columns() - 1).collect(); @@ -366,7 +366,7 @@ pub fn build_fetching_record_batch_by_rows(rows: Vec) -> FetchingRecordBatc RecordFetchingContext::new(&projected_schema.to_record_schema(), None, &schema, &schema) .unwrap(); - let mut builder = FetchingRecordBatchBuilder::with_capacity( + let mut builder = FetchedRecordBatchBuilder::with_capacity( record_fetching_ctx.fetching_schema().clone(), None, 2, @@ -389,7 +389,7 @@ pub fn build_fetching_record_batch_by_rows(rows: Vec) -> FetchingRecordBatc } pub fn check_record_batch_with_key_with_rows( - record_batch_with_key: &FetchingRecordBatch, + record_batch_with_key: &FetchedRecordBatch, row_num: usize, column_num: usize, rows: Vec, diff --git a/system_catalog/src/tables.rs b/system_catalog/src/tables.rs index ac8ec9c0d8..97af4173ac 100644 --- a/system_catalog/src/tables.rs +++ b/system_catalog/src/tables.rs @@ -22,7 +22,7 @@ use common_types::{ column_schema, datum::{Datum, DatumKind}, projected_schema::RecordFetchingContext, - record_batch::FetchingRecordBatchBuilder, + record_batch::FetchedRecordBatchBuilder, row::Row, schema, schema::Schema, @@ -157,7 +157,7 @@ impl SystemTable for Tables { let fetching_schema = request.projected_schema.to_record_schema_with_key(); let primary_key_indexes = fetching_schema.primary_key_idx().to_vec(); let fetching_schema = fetching_schema.to_record_schema(); - let mut builder = FetchingRecordBatchBuilder::new( + let mut builder = FetchedRecordBatchBuilder::new( fetching_schema.clone(), Some(primary_key_indexes.clone()), ); From eff6ee9fa04de14d51457bc035c61c39787c0ef8 Mon Sep 17 00:00:00 2001 From: kamille Date: Thu, 21 Dec 2023 20:56:42 +0800 Subject: [PATCH 09/13] rename `RecordFetchingContext` to `RowProjector`. --- .../src/instance/flush_compaction.rs | 34 +++---- analytic_engine/src/instance/mod.rs | 6 +- analytic_engine/src/memtable/columnar/iter.rs | 26 ++--- analytic_engine/src/memtable/mod.rs | 4 +- analytic_engine/src/memtable/skiplist/iter.rs | 16 ++-- analytic_engine/src/memtable/skiplist/mod.rs | 22 ++--- analytic_engine/src/row_iter/chain.rs | 16 ++-- analytic_engine/src/row_iter/dedup.rs | 10 +- analytic_engine/src/row_iter/merge.rs | 24 ++--- .../src/row_iter/record_batch_stream.rs | 28 +++--- analytic_engine/src/row_iter/tests.rs | 20 ++-- analytic_engine/src/sst/factory.rs | 4 +- .../src/sst/parquet/async_reader.rs | 36 +++---- analytic_engine/src/sst/parquet/writer.rs | 12 +-- benchmarks/src/scan_memtable_bench.rs | 10 +- benchmarks/src/sst_bench.rs | 10 +- benchmarks/src/sst_tools.rs | 22 ++--- benchmarks/src/util.rs | 10 +- common_types/src/projected_schema.rs | 94 +++++++++---------- common_types/src/record_batch.rs | 36 +++---- common_types/src/row/contiguous.rs | 12 +-- common_types/src/tests.rs | 12 +-- components/object_store/src/disk_cache.rs | 2 +- .../cases/env/local/ddl/query-plan.result | 8 +- src/wal/src/message_queue_impl/region.rs | 2 +- system_catalog/src/tables.rs | 16 ++-- tools/src/bin/sst-convert.rs | 10 +- 27 files changed, 251 insertions(+), 251 deletions(-) diff --git a/analytic_engine/src/instance/flush_compaction.rs b/analytic_engine/src/instance/flush_compaction.rs index 9e434baa42..553ccc91c4 100644 --- a/analytic_engine/src/instance/flush_compaction.rs +++ b/analytic_engine/src/instance/flush_compaction.rs @@ -17,7 +17,7 @@ use std::{cmp, collections::Bound, fmt, sync::Arc}; use common_types::{ - projected_schema::{ProjectedSchema, RecordFetchingContextBuilder}, + projected_schema::{ProjectedSchema, RowProjectorBuilder}, record_batch::{FetchedRecordBatch, FetchedRecordBatchBuilder}, request_id::RequestId, row::RowViewOnBatch, @@ -893,12 +893,12 @@ impl SpaceStore { self.meta_cache.clone(), runtime, ); - let fetching_schema = projected_schema.to_record_schema_with_key(); - let primary_key_indexes = fetching_schema.primary_key_idx().to_vec(); - let fetching_schema = fetching_schema.into_record_schema(); + let fetched_schema = projected_schema.to_record_schema_with_key(); + let primary_key_indexes = fetched_schema.primary_key_idx().to_vec(); + let fetched_schema = fetched_schema.into_record_schema(); let table_schema = projected_schema.table_schema().clone(); - let record_fetching_ctx_builder = RecordFetchingContextBuilder::new( - fetching_schema, + let row_projector_builder = RowProjectorBuilder::new( + fetched_schema, table_schema, Some(primary_key_indexes), ); @@ -947,7 +947,7 @@ impl SpaceStore { }; // TODO: eliminate the duplicated building of `SstReadOptions`. - let sst_read_options = sst_read_options_builder.build(record_fetching_ctx_builder); + let sst_read_options = sst_read_options_builder.build(row_projector_builder); let sst_meta = { let meta_reader = SstMetaReader { space_id: table_data.space_id, @@ -1072,12 +1072,12 @@ fn split_record_batch_with_time_ranges( time_ranges: &[TimeRange], timestamp_idx: usize, ) -> Result> { - let fetching_schema = record_batch.schema(); + let fetched_schema = record_batch.schema(); let primary_key_indexes = record_batch.primary_key_indexes(); let mut builders: Vec = (0..time_ranges.len()) .map(|_| { let primary_key_indexes = primary_key_indexes.map(|idxs| idxs.to_vec()); - FetchedRecordBatchBuilder::new(fetching_schema.clone(), primary_key_indexes) + FetchedRecordBatchBuilder::new(fetched_schema.clone(), primary_key_indexes) }) .collect(); @@ -1120,17 +1120,17 @@ fn build_mem_table_iter( ) -> Result { let scan_ctx = ScanContext::default(); let projected_schema = ProjectedSchema::no_projection(table_data.schema()); - let fetching_schema = projected_schema.to_record_schema_with_key(); - let primary_key_indexes = fetching_schema.primary_key_idx().to_vec(); - let fetching_schema = fetching_schema.into_record_schema(); + let fetched_schema = projected_schema.to_record_schema_with_key(); + let primary_key_indexes = fetched_schema.primary_key_idx().to_vec(); + let fetched_schema = fetched_schema.into_record_schema(); let table_schema = projected_schema.table_schema().clone(); - let record_fetching_ctx_builder = - RecordFetchingContextBuilder::new(fetching_schema, table_schema, Some(primary_key_indexes)); + let row_projector_builder = + RowProjectorBuilder::new(fetched_schema, table_schema, Some(primary_key_indexes)); let scan_req = ScanRequest { start_user_key: Bound::Unbounded, end_user_key: Bound::Unbounded, sequence: common_types::MAX_SEQUENCE_NUMBER, - record_fetching_ctx_builder, + row_projector_builder, need_dedup: table_data.dedup(), reverse: false, metrics_collector: None, @@ -1145,7 +1145,7 @@ fn build_mem_table_iter( mod tests { use common_types::{ tests::{ - build_fetching_record_batch_by_rows, build_row, build_row_opt, + build_fetched_record_batch_by_rows, build_row, build_row_opt, check_record_batch_with_key_with_rows, }, time::TimeRange, @@ -1187,7 +1187,7 @@ mod tests { .into_iter() .flatten() .collect(); - let record_batch_with_key = build_fetching_record_batch_by_rows(rows); + let record_batch_with_key = build_fetched_record_batch_by_rows(rows); let column_num = record_batch_with_key.num_columns(); let time_ranges = vec![ TimeRange::new_unchecked_for_test(0, 100), diff --git a/analytic_engine/src/instance/mod.rs b/analytic_engine/src/instance/mod.rs index 2ba236d09a..dbd0aa17c3 100644 --- a/analytic_engine/src/instance/mod.rs +++ b/analytic_engine/src/instance/mod.rs @@ -33,7 +33,7 @@ pub(crate) mod write; use std::sync::Arc; -use common_types::{projected_schema::RecordFetchingContextBuilder, table::TableId}; +use common_types::{projected_schema::RowProjectorBuilder, table::TableId}; use generic_error::{BoxError, GenericError}; use logger::{error, info}; use macros::define_result; @@ -347,13 +347,13 @@ impl SstReadOptionsBuilder { pub fn build( self, - record_fetching_ctx_builder: RecordFetchingContextBuilder, + row_projector_builder: RowProjectorBuilder, ) -> SstReadOptions { SstReadOptions { maybe_table_level_metrics: self.maybe_table_level_metrics, num_rows_per_row_group: self.num_rows_per_row_group, frequency: self.scan_type.into(), - record_fetching_ctx_builder, + row_projector_builder, predicate: self.predicate, meta_cache: self.meta_cache, scan_options: self.scan_options, diff --git a/analytic_engine/src/memtable/columnar/iter.rs b/analytic_engine/src/memtable/columnar/iter.rs index 12e7168960..e880e6fb50 100644 --- a/analytic_engine/src/memtable/columnar/iter.rs +++ b/analytic_engine/src/memtable/columnar/iter.rs @@ -27,7 +27,7 @@ use common_types::{ column::Column, column_schema::ColumnId, datum::Datum, - projected_schema::RecordFetchingContext, + projected_schema::RowProjector, record_batch::{FetchedRecordBatch, FetchedRecordBatchBuilder}, row::Row, schema::Schema, @@ -66,7 +66,7 @@ pub struct ColumnarIterImpl + Clone + Sync + Send> /// Schema of this memtable, used to decode row memtable_schema: Schema, /// Projection of schema to read - record_fetching_ctx: RecordFetchingContext, + row_projector: RowProjector, // Options related: batch_size: usize, @@ -100,8 +100,8 @@ impl + Clone + Sync + Send> ColumnarIterImpl { last_sequence: SequenceNumber, skiplist: Skiplist, ) -> Result { - let record_fetching_ctx = request - .record_fetching_ctx_builder + let row_projector = request + .row_projector_builder .build(&schema) .context(ProjectSchema)?; let mut columnar_iter = Self { @@ -109,7 +109,7 @@ impl + Clone + Sync + Send> ColumnarIterImpl { row_num, current_idx: 0, memtable_schema: schema, - record_fetching_ctx, + row_projector, batch_size: ctx.batch_size, deadline: ctx.deadline, start_user_key: request.start_user_key, @@ -205,13 +205,13 @@ impl + Clone + Sync + Send> ColumnarIterImpl { } } - let fetching_schema = self.record_fetching_ctx.fetching_schema().clone(); + let fetched_schema = self.row_projector.fetched_schema().clone(); let primary_key_indexes = self - .record_fetching_ctx + .row_projector .primary_key_indexes() .map(|idxs| idxs.to_vec()); let mut builder = FetchedRecordBatchBuilder::with_capacity( - fetching_schema, + fetched_schema, primary_key_indexes, self.batch_size, ); @@ -313,8 +313,8 @@ impl + Clone + Sync + Send> ColumnarIterImpl { self.batch_size ]; for (col_idx, column_schema_idx) in self - .record_fetching_ctx - .fetching_source_column_indexes() + .row_projector + .fetched_source_column_indexes() .iter() .enumerate() { @@ -337,13 +337,13 @@ impl + Clone + Sync + Send> ColumnarIterImpl { let mut num_rows = 0; let memtable = self.memtable.read().unwrap(); - let record_schema = self.record_fetching_ctx.fetching_schema(); + let record_schema = self.row_projector.fetched_schema(); let mut rows = vec![Row::from_datums(vec![Datum::Null; record_schema.num_columns()]); self.batch_size]; for (col_idx, column_schema_idx) in self - .record_fetching_ctx - .fetching_source_column_indexes() + .row_projector + .fetched_source_column_indexes() .iter() .enumerate() { diff --git a/analytic_engine/src/memtable/mod.rs b/analytic_engine/src/memtable/mod.rs index 5243afcc2a..a8b532bce7 100644 --- a/analytic_engine/src/memtable/mod.rs +++ b/analytic_engine/src/memtable/mod.rs @@ -24,7 +24,7 @@ use std::{ops::Bound, sync::Arc, time::Instant}; use bytes_ext::{ByteVec, Bytes}; use common_types::{ - projected_schema::RecordFetchingContextBuilder, + projected_schema::RowProjectorBuilder, record_batch::FetchedRecordBatch, row::Row, schema::{IndexInWriterSchema, Schema}, @@ -203,7 +203,7 @@ pub struct ScanRequest { /// visible. pub sequence: SequenceNumber, /// Schema and projection to read. - pub record_fetching_ctx_builder: RecordFetchingContextBuilder, + pub row_projector_builder: RowProjectorBuilder, pub need_dedup: bool, pub reverse: bool, /// Collector for scan metrics. diff --git a/analytic_engine/src/memtable/skiplist/iter.rs b/analytic_engine/src/memtable/skiplist/iter.rs index 036fa92a23..24a7a0a787 100644 --- a/analytic_engine/src/memtable/skiplist/iter.rs +++ b/analytic_engine/src/memtable/skiplist/iter.rs @@ -20,7 +20,7 @@ use arena::{Arena, BasicStats}; use bytes_ext::{Bytes, BytesMut}; use codec::row; use common_types::{ - projected_schema::RecordFetchingContext, + projected_schema::RowProjector, record_batch::{FetchedRecordBatch, FetchedRecordBatchBuilder}, row::contiguous::{ContiguousRowReader, ProjectedContiguousRow}, schema::Schema, @@ -57,7 +57,7 @@ pub struct ColumnarIterImpl + Clone + Sync + Send> /// Schema of this memtable, used to decode row memtable_schema: Schema, /// Projection of schema to read - record_fetching_ctx: RecordFetchingContext, + row_projector: RowProjector, // Options related: batch_size: usize, @@ -85,8 +85,8 @@ impl + Clone + Sync + Send> ColumnarIterImpl { request: ScanRequest, ) -> Result { // Create projection for the memtable schema - let record_fetching_ctx = request - .record_fetching_ctx_builder + let row_projector = request + .row_projector_builder .build(&memtable.schema) .context(ProjectSchema)?; @@ -94,7 +94,7 @@ impl + Clone + Sync + Send> ColumnarIterImpl { let mut columnar_iter = Self { iter, memtable_schema: memtable.schema.clone(), - record_fetching_ctx, + row_projector, batch_size: ctx.batch_size, deadline: ctx.deadline, start_user_key: request.start_user_key, @@ -150,9 +150,9 @@ impl + Clone + Sync + Send> ColumnarIterImpl { debug_assert_eq!(State::Initialized, self.state); assert!(self.batch_size > 0); - let record_schema = self.record_fetching_ctx.fetching_schema().clone(); + let record_schema = self.row_projector.fetched_schema().clone(); let primary_key_indexes = self - .record_fetching_ctx + .row_projector .primary_key_indexes() .map(|idxs| idxs.to_vec()); let mut builder = FetchedRecordBatchBuilder::with_capacity( @@ -166,7 +166,7 @@ impl + Clone + Sync + Send> ColumnarIterImpl { let row_reader = ContiguousRowReader::try_new(&row, &self.memtable_schema) .context(DecodeContinuousRow)?; let projected_row = - ProjectedContiguousRow::new(row_reader, &self.record_fetching_ctx); + ProjectedContiguousRow::new(row_reader, &self.row_projector); trace!("Column iterator fetch next row, row:{:?}", projected_row); diff --git a/analytic_engine/src/memtable/skiplist/mod.rs b/analytic_engine/src/memtable/skiplist/mod.rs index 970e8e7055..7bf68784d7 100644 --- a/analytic_engine/src/memtable/skiplist/mod.rs +++ b/analytic_engine/src/memtable/skiplist/mod.rs @@ -274,7 +274,7 @@ mod tests { use codec::memcomparable::MemComparable; use common_types::{ datum::Datum, - projected_schema::{ProjectedSchema, RecordFetchingContextBuilder}, + projected_schema::{ProjectedSchema, RowProjectorBuilder}, record_batch::FetchedRecordBatch, row::Row, schema::IndexInWriterSchema, @@ -294,10 +294,10 @@ mod tests { ) { let projection: Vec = (0..schema.num_columns()).collect(); let projected_schema = ProjectedSchema::new(schema, Some(projection)).unwrap(); - let fetching_schema = projected_schema.to_record_schema(); + let fetched_schema = projected_schema.to_record_schema(); let table_schema = projected_schema.table_schema(); - let record_fetching_ctx_builder = - RecordFetchingContextBuilder::new(fetching_schema, table_schema.clone(), None); + let row_projector_builder = + RowProjectorBuilder::new(fetched_schema, table_schema.clone(), None); let testcases = vec![ ( // limited by sequence @@ -305,7 +305,7 @@ mod tests { start_user_key: Bound::Unbounded, end_user_key: Bound::Unbounded, sequence: 2, - record_fetching_ctx_builder: record_fetching_ctx_builder.clone(), + row_projector_builder: row_projector_builder.clone(), need_dedup: true, reverse: false, metrics_collector: None, @@ -325,7 +325,7 @@ mod tests { start_user_key: Bound::Included(build_scan_key("a", 1)), end_user_key: Bound::Excluded(build_scan_key("e", 5)), sequence: 2, - record_fetching_ctx_builder: record_fetching_ctx_builder.clone(), + row_projector_builder: row_projector_builder.clone(), need_dedup: true, reverse: false, metrics_collector: None, @@ -344,7 +344,7 @@ mod tests { start_user_key: Bound::Included(build_scan_key("a", 1)), end_user_key: Bound::Excluded(build_scan_key("e", 5)), sequence: 1, - record_fetching_ctx_builder, + row_projector_builder, need_dedup: true, reverse: false, metrics_collector: None, @@ -370,16 +370,16 @@ mod tests { ) { let projection: Vec = (0..2).collect(); let projected_schema = ProjectedSchema::new(schema, Some(projection)).unwrap(); - let fetching_schema = projected_schema.to_record_schema(); + let fetched_schema = projected_schema.to_record_schema(); let table_schema = projected_schema.table_schema(); - let record_fetching_ctx_builder = - RecordFetchingContextBuilder::new(fetching_schema, table_schema.clone(), None); + let row_projector_builder = + RowProjectorBuilder::new(fetched_schema, table_schema.clone(), None); let testcases = vec![( ScanRequest { start_user_key: Bound::Included(build_scan_key("a", 1)), end_user_key: Bound::Excluded(build_scan_key("e", 5)), sequence: 2, - record_fetching_ctx_builder, + row_projector_builder, need_dedup: true, reverse: false, metrics_collector: None, diff --git a/analytic_engine/src/row_iter/chain.rs b/analytic_engine/src/row_iter/chain.rs index 60948719a0..15928ce141 100644 --- a/analytic_engine/src/row_iter/chain.rs +++ b/analytic_engine/src/row_iter/chain.rs @@ -19,7 +19,7 @@ use std::{ use async_trait::async_trait; use common_types::{ - projected_schema::{ProjectedSchema, RecordFetchingContextBuilder}, + projected_schema::{ProjectedSchema, RowProjectorBuilder}, record_batch::FetchedRecordBatch, request_id::RequestId, schema::RecordSchemaWithKey, @@ -124,18 +124,18 @@ impl<'a> Builder<'a> { impl<'a> Builder<'a> { pub async fn build(self) -> Result { - let fetching_schema = self.config.projected_schema.to_record_schema(); + let fetched_schema = self.config.projected_schema.to_record_schema(); let table_schema = self.config.projected_schema.table_schema(); - let record_fetching_ctx_builder = - RecordFetchingContextBuilder::new(fetching_schema.clone(), table_schema.clone(), None); + let row_projector_builder = + RowProjectorBuilder::new(fetched_schema.clone(), table_schema.clone(), None); let sst_read_options = self .config .sst_read_options_builder - .build(record_fetching_ctx_builder.clone()); + .build(row_projector_builder.clone()); let memtable_stream_ctx = MemtableStreamContext { - record_fetching_ctx_builder, - fetching_schema: fetching_schema.clone(), + row_projector_builder, + fetched_schema: fetched_schema.clone(), predicate: self.config.predicate, need_dedup: false, reverse: false, @@ -144,7 +144,7 @@ impl<'a> Builder<'a> { let sst_stream_ctx = SstStreamContext { sst_read_options, - fetching_schema, + fetched_schema, }; let total_sst_streams: usize = self.ssts.iter().map(|v| v.len()).sum(); diff --git a/analytic_engine/src/row_iter/dedup.rs b/analytic_engine/src/row_iter/dedup.rs index 4293e2b739..727144a3ab 100644 --- a/analytic_engine/src/row_iter/dedup.rs +++ b/analytic_engine/src/row_iter/dedup.rs @@ -71,9 +71,9 @@ impl DedupIterator { pub fn new(request_id: RequestId, iter: I, iter_options: IterOptions) -> Self { let schema_with_key = iter.schema(); let primary_key_indexes = schema_with_key.primary_key_idx().to_vec(); - let fetching_schema = schema_with_key.to_record_schema(); + let fetched_schema = schema_with_key.to_record_schema(); let record_batch_builder = FetchedRecordBatchBuilder::with_capacity( - fetching_schema, + fetched_schema, Some(primary_key_indexes), iter_options.batch_size, ); @@ -215,7 +215,7 @@ mod tests { use super::*; use crate::row_iter::tests::{ - build_fetching_record_batch_with_key, check_iterator, VectorIterator, + build_fetched_record_batch_with_key, check_iterator, VectorIterator, }; #[tokio::test] @@ -225,7 +225,7 @@ mod tests { let iter = VectorIterator::new( schema.to_record_schema_with_key(), vec![ - build_fetching_record_batch_with_key( + build_fetched_record_batch_with_key( schema.clone(), vec![ build_row(b"a", 1, 10.0, "v1", 1000, 1_000_000), @@ -233,7 +233,7 @@ mod tests { build_row(b"a", 2, 10.0, "v2", 2000, 2_000_000), ], ), - build_fetching_record_batch_with_key( + build_fetched_record_batch_with_key( schema, vec![ build_row(b"a", 2, 10.0, "v", 2000, 2_000_000), diff --git a/analytic_engine/src/row_iter/merge.rs b/analytic_engine/src/row_iter/merge.rs index 681b1c650e..836614b0a3 100644 --- a/analytic_engine/src/row_iter/merge.rs +++ b/analytic_engine/src/row_iter/merge.rs @@ -23,7 +23,7 @@ use std::{ use async_trait::async_trait; use common_types::{ - projected_schema::{ProjectedSchema, RecordFetchingContextBuilder}, + projected_schema::{ProjectedSchema, RowProjectorBuilder}, record_batch::{FetchedRecordBatch, FetchedRecordBatchBuilder}, request_id::RequestId, row::RowViewOnBatch, @@ -175,23 +175,23 @@ impl<'a> MergeBuilder<'a> { } pub async fn build(self) -> Result { - let fetching_schema = self.config.projected_schema.to_record_schema_with_key(); - let primary_key_indexes = fetching_schema.primary_key_idx().to_vec(); - let fetching_schema = fetching_schema.into_record_schema(); + let fetched_schema = self.config.projected_schema.to_record_schema_with_key(); + let primary_key_indexes = fetched_schema.primary_key_idx().to_vec(); + let fetched_schema = fetched_schema.into_record_schema(); let table_schema = self.config.projected_schema.table_schema(); - let record_fetching_ctx_builder = RecordFetchingContextBuilder::new( - fetching_schema.clone(), + let row_projector_builder = RowProjectorBuilder::new( + fetched_schema.clone(), table_schema.clone(), Some(primary_key_indexes), ); let sst_read_options = self .config .sst_read_options_builder - .build(record_fetching_ctx_builder.clone()); + .build(row_projector_builder.clone()); let memtable_stream_ctx = MemtableStreamContext { - record_fetching_ctx_builder, - fetching_schema: fetching_schema.clone(), + row_projector_builder, + fetched_schema: fetched_schema.clone(), predicate: self.config.predicate, need_dedup: self.config.need_dedup, reverse: self.config.reverse, @@ -200,7 +200,7 @@ impl<'a> MergeBuilder<'a> { let sst_stream_ctx = SstStreamContext { sst_read_options, - fetching_schema, + fetched_schema, }; let sst_streams_num: usize = self @@ -687,9 +687,9 @@ impl MergeIterator { ) -> Self { let heap_cap = streams.len(); let primary_key_indexes = schema.primary_key_idx().to_vec(); - let fetching_schema = schema.to_record_schema(); + let fetched_schema = schema.to_record_schema(); let record_batch_builder = FetchedRecordBatchBuilder::with_capacity( - fetching_schema, + fetched_schema, Some(primary_key_indexes), iter_options.batch_size, ); diff --git a/analytic_engine/src/row_iter/record_batch_stream.rs b/analytic_engine/src/row_iter/record_batch_stream.rs index 963cda59ef..504c79a978 100644 --- a/analytic_engine/src/row_iter/record_batch_stream.rs +++ b/analytic_engine/src/row_iter/record_batch_stream.rs @@ -23,7 +23,7 @@ use arrow::{ datatypes::{DataType as ArrowDataType, SchemaRef as ArrowSchemaRef}, }; use common_types::{ - projected_schema::RecordFetchingContextBuilder, record_batch::FetchedRecordBatch, + projected_schema::RowProjectorBuilder, record_batch::FetchedRecordBatch, schema::RecordSchema, SequenceNumber, }; use datafusion::{ @@ -224,7 +224,7 @@ pub fn filtered_stream_from_memtable( stream_from_memtable(memtable, ctx, metrics_collector).and_then(|origin_stream| { filter_stream( origin_stream, - ctx.fetching_schema.to_arrow_schema_ref(), + ctx.fetched_schema.to_arrow_schema_ref(), &ctx.predicate, ) }) @@ -241,19 +241,19 @@ pub fn stream_from_memtable( ..Default::default() }; let max_seq = memtable.last_sequence(); - let fetching_cols = ctx - .fetching_schema + let fetched_cols = ctx + .fetched_schema .columns() .iter() .format_with(",", |col, f| f(&format_args!("{}", col.name))); let scan_memtable_desc = - format!("scan_memtable_{max_seq}, fetching_columns:[{fetching_cols}]",); + format!("scan_memtable_{max_seq}, fetched_columns:[{fetched_cols}]",); let metrics_collector = metrics_collector.map(|v| v.span(scan_memtable_desc)); let scan_req = ScanRequest { start_user_key: Bound::Unbounded, end_user_key: Bound::Unbounded, sequence: max_seq, - record_fetching_ctx_builder: ctx.record_fetching_ctx_builder.clone(), + row_projector_builder: ctx.row_projector_builder.clone(), need_dedup: ctx.need_dedup, reverse: ctx.reverse, metrics_collector, @@ -272,8 +272,8 @@ pub fn stream_from_memtable( } pub struct MemtableStreamContext { - pub record_fetching_ctx_builder: RecordFetchingContextBuilder, - pub fetching_schema: RecordSchema, + pub row_projector_builder: RowProjectorBuilder, + pub fetched_schema: RecordSchema, pub predicate: PredicateRef, pub need_dedup: bool, pub reverse: bool, @@ -304,7 +304,7 @@ pub async fn filtered_stream_from_sst_file( .and_then(|origin_stream| { filter_stream( origin_stream, - ctx.fetching_schema.to_arrow_schema_ref(), + ctx.fetched_schema.to_arrow_schema_ref(), &ctx.sst_read_options.predicate, ) }) @@ -327,13 +327,13 @@ pub async fn stream_from_sst_file( file_size: Some(sst_file.size() as usize), file_format: Some(sst_file.storage_format()), }; - let fetching_cols = ctx - .fetching_schema + let fetched_cols = ctx + .fetched_schema .columns() .iter() .format_with(",", |col, f| f(&format_args!("{}", col.name))); let scan_sst_desc = format!( - "scan_sst_{}, fetching_columns:[{fetching_cols}]", + "scan_sst_{}, fetched_columns:[{fetched_cols}]", sst_file.id() ); let metrics_collector = metrics_collector.map(|v| v.span(scan_sst_desc)); @@ -363,7 +363,7 @@ pub async fn stream_from_sst_file( pub struct SstStreamContext { pub sst_read_options: SstReadOptions, - pub fetching_schema: RecordSchema, + pub fetched_schema: RecordSchema, } #[cfg(test)] @@ -382,7 +382,7 @@ pub mod tests { .into_iter() .map(|(seq, rows)| { let batch = SequencedRecordBatch { - record_batch: row_iter::tests::build_fetching_record_batch_with_key( + record_batch: row_iter::tests::build_fetched_record_batch_with_key( schema.clone(), rows, ), diff --git a/analytic_engine/src/row_iter/tests.rs b/analytic_engine/src/row_iter/tests.rs index b17bd75cd2..8c02e20524 100644 --- a/analytic_engine/src/row_iter/tests.rs +++ b/analytic_engine/src/row_iter/tests.rs @@ -14,7 +14,7 @@ use async_trait::async_trait; use common_types::{ - projected_schema::{ProjectedSchema, RecordFetchingContext}, + projected_schema::{ProjectedSchema, RowProjector}, record_batch::{FetchedRecordBatch, FetchedRecordBatchBuilder}, row::{ contiguous::{ContiguousRowReader, ContiguousRowWriter, ProjectedContiguousRow}, @@ -68,26 +68,26 @@ impl FetchedRecordBatchIterator for VectorIterator { } } -pub fn build_fetching_record_batch_with_key(schema: Schema, rows: Vec) -> FetchedRecordBatch { +pub fn build_fetched_record_batch_with_key(schema: Schema, rows: Vec) -> FetchedRecordBatch { assert!(schema.num_columns() > 1); let projection: Vec = (0..schema.num_columns()).collect(); let projected_schema = ProjectedSchema::new(schema.clone(), Some(projection)).unwrap(); - let fetching_schema = projected_schema.to_record_schema_with_key(); - let primary_key_indexes = fetching_schema.primary_key_idx().to_vec(); - let fetching_schema = fetching_schema.to_record_schema(); + let fetched_schema = projected_schema.to_record_schema_with_key(); + let primary_key_indexes = fetched_schema.primary_key_idx().to_vec(); + let fetched_schema = fetched_schema.to_record_schema(); let table_schema = projected_schema.table_schema(); - let record_fetching_ctx = RecordFetchingContext::new( - &fetching_schema, + let row_projector = RowProjector::new( + &fetched_schema, Some(primary_key_indexes), table_schema, table_schema, ) .unwrap(); - let primary_key_indexes = record_fetching_ctx + let primary_key_indexes = row_projector .primary_key_indexes() .map(|idxs| idxs.to_vec()); let mut builder = - FetchedRecordBatchBuilder::with_capacity(fetching_schema, primary_key_indexes, 2); + FetchedRecordBatchBuilder::with_capacity(fetched_schema, primary_key_indexes, 2); let index_in_writer = IndexInWriterSchema::for_same_schema(schema.num_columns()); let mut buf = Vec::new(); @@ -97,7 +97,7 @@ pub fn build_fetching_record_batch_with_key(schema: Schema, rows: Vec) -> F writer.write_row(&row).unwrap(); let source_row = ContiguousRowReader::try_new(&buf, &schema).unwrap(); - let projected_row = ProjectedContiguousRow::new(source_row, &record_fetching_ctx); + let projected_row = ProjectedContiguousRow::new(source_row, &row_projector); builder .append_projected_contiguous_row(&projected_row) .unwrap(); diff --git a/analytic_engine/src/sst/factory.rs b/analytic_engine/src/sst/factory.rs index 39bf921a09..07c40f5e3e 100644 --- a/analytic_engine/src/sst/factory.rs +++ b/analytic_engine/src/sst/factory.rs @@ -17,7 +17,7 @@ use std::{fmt::Debug, sync::Arc}; use async_trait::async_trait; -use common_types::projected_schema::RecordFetchingContextBuilder; +use common_types::projected_schema::RowProjectorBuilder; use macros::define_result; use object_store::{ObjectStoreRef, Path}; use runtime::Runtime; @@ -136,7 +136,7 @@ pub struct SstReadOptions { pub frequency: ReadFrequency, pub num_rows_per_row_group: usize, - pub record_fetching_ctx_builder: RecordFetchingContextBuilder, + pub row_projector_builder: RowProjectorBuilder, pub predicate: PredicateRef, pub meta_cache: Option, pub scan_options: ScanOptions, diff --git a/analytic_engine/src/sst/parquet/async_reader.rs b/analytic_engine/src/sst/parquet/async_reader.rs index c89211fa84..173bd68a90 100644 --- a/analytic_engine/src/sst/parquet/async_reader.rs +++ b/analytic_engine/src/sst/parquet/async_reader.rs @@ -26,7 +26,7 @@ use arrow::{datatypes::SchemaRef, record_batch::RecordBatch as ArrowRecordBatch} use async_trait::async_trait; use bytes_ext::Bytes; use common_types::{ - projected_schema::{RecordFetchingContext, RecordFetchingContextBuilder}, + projected_schema::{RowProjector, RowProjectorBuilder}, record_batch::FetchedRecordBatch, }; use datafusion::{ @@ -90,8 +90,8 @@ pub struct Reader<'a> { /// Init those fields in `init_if_necessary` meta_data: Option, - record_fetching_ctx_builder: RecordFetchingContextBuilder, - record_fetching_ctx: Option, + row_projector_builder: RowProjectorBuilder, + row_projector: Option, /// Options for `read_parallelly` metrics: Metrics, @@ -136,8 +136,8 @@ impl<'a> Reader<'a> { predicate: options.predicate.clone(), frequency: options.frequency, meta_data: None, - record_fetching_ctx_builder: options.record_fetching_ctx_builder.clone(), - record_fetching_ctx: None, + row_projector_builder: options.row_projector_builder.clone(), + row_projector: None, metrics, df_plan_metrics, table_level_sst_metrics: options.maybe_table_level_metrics.clone(), @@ -156,13 +156,13 @@ impl<'a> Reader<'a> { return Ok(Vec::new()); } - let record_fetching_ctx = self.record_fetching_ctx.take().unwrap(); + let row_projector = self.row_projector.take().unwrap(); let streams: Vec<_> = streams .into_iter() .map(|stream| { Box::new(RecordBatchProjector::new( stream, - record_fetching_ctx.clone(), + row_projector.clone(), self.metrics.metrics_collector.clone(), )) as _ }) @@ -240,7 +240,7 @@ impl<'a> Reader<'a> { assert!(self.meta_data.is_some()); let meta_data = self.meta_data.as_ref().unwrap(); - let record_fetching_ctx = self.record_fetching_ctx.as_ref().unwrap(); + let row_projector = self.row_projector.as_ref().unwrap(); let arrow_schema = meta_data.custom().schema.to_arrow_schema_ref(); // Get target row groups. let target_row_groups = { @@ -296,7 +296,7 @@ impl<'a> Reader<'a> { let parquet_metadata = meta_data.parquet(); let proj_mask = ProjectionMask::leaves( meta_data.parquet().file_metadata().schema_descr(), - record_fetching_ctx + row_projector .existed_source_projection() .iter() .copied(), @@ -355,14 +355,14 @@ impl<'a> Reader<'a> { meta_data }; - let record_fetching_ctx = self - .record_fetching_ctx_builder + let row_projector = self + .row_projector_builder .build(&meta_data.custom().schema) .box_err() .context(Projection)?; self.meta_data = Some(meta_data); - self.record_fetching_ctx = Some(record_fetching_ctx); + self.row_projector = Some(row_projector); Ok(()) } @@ -485,7 +485,7 @@ pub(crate) struct ProjectorMetrics { struct RecordBatchProjector { stream: SendableRecordBatchStream, - record_fetching_ctx: RecordFetchingContext, + row_projector: RowProjector, metrics: ProjectorMetrics, start_time: Instant, @@ -494,7 +494,7 @@ struct RecordBatchProjector { impl RecordBatchProjector { fn new( stream: SendableRecordBatchStream, - record_fetching_ctx: RecordFetchingContext, + row_projector: RowProjector, metrics_collector: Option, ) -> Self { let metrics = ProjectorMetrics { @@ -504,7 +504,7 @@ impl RecordBatchProjector { Self { stream, - record_fetching_ctx, + row_projector, metrics, start_time: Instant::now(), } @@ -534,7 +534,7 @@ impl Stream for RecordBatchProjector { projector.metrics.row_num += record_batch.num_rows(); let projected_batch = FetchedRecordBatch::try_new( - &projector.record_fetching_ctx, + &projector.row_projector, record_batch, ) .box_err() @@ -594,7 +594,7 @@ impl PrefetchableStream for RecordBatchReceiver { // Start the prefetch work in background when first poll is called. if let Some(tx) = self.bg_prefetch_tx.take() { if tx.send(()).is_err() { - error!("The receiver for start prefetching has been closed"); + error!("The receiver for start prefetched has been closed"); } } } @@ -615,7 +615,7 @@ impl Stream for RecordBatchReceiver { // Start the prefetch work in background when first poll is called. if let Some(tx) = self.bg_prefetch_tx.take() { if tx.send(()).is_err() { - error!("The receiver for start prefetching has been closed"); + error!("The receiver for start prefetched has been closed"); } } diff --git a/analytic_engine/src/sst/parquet/writer.rs b/analytic_engine/src/sst/parquet/writer.rs index 88c3762e6d..0ff23fb39b 100644 --- a/analytic_engine/src/sst/parquet/writer.rs +++ b/analytic_engine/src/sst/parquet/writer.rs @@ -525,7 +525,7 @@ mod tests { use bytes_ext::Bytes; use common_types::{ - projected_schema::{ProjectedSchema, RecordFetchingContextBuilder}, + projected_schema::{ProjectedSchema, RowProjectorBuilder}, tests::{build_row, build_row_for_dictionary, build_schema, build_schema_with_dictionary}, time::{TimeRange, Timestamp}, }; @@ -537,7 +537,7 @@ mod tests { use super::*; use crate::{ - row_iter::tests::build_fetching_record_batch_with_key, + row_iter::tests::build_fetched_record_batch_with_key, sst::{ factory::{ Factory, FactoryImpl, ReadFrequency, ScanOptions, SstReadOptions, SstWriteOptions, @@ -633,7 +633,7 @@ mod tests { "tagv2", ), ]; - let batch = build_fetching_record_batch_with_key(schema.clone(), rows); + let batch = build_fetched_record_batch_with_key(schema.clone(), rows); Poll::Ready(Some(Ok(batch))) })); @@ -659,7 +659,7 @@ mod tests { let scan_options = ScanOptions::default(); // read sst back to test - let record_fetching_ctx_builder = RecordFetchingContextBuilder::new( + let row_projector_builder = RowProjectorBuilder::new( reader_projected_schema.to_record_schema(), reader_projected_schema.table_schema().clone(), None, @@ -672,7 +672,7 @@ mod tests { meta_cache: None, scan_options, runtime: runtime.clone(), - record_fetching_ctx_builder, + row_projector_builder, }; let mut reader: Box = { @@ -805,7 +805,7 @@ mod tests { .map(|_| build_row(b"a", 100, 10.0, "v4", 1000, 1_000_000)) .collect::>(); - let batch = build_fetching_record_batch_with_key(schema_clone.clone(), rows); + let batch = build_fetched_record_batch_with_key(schema_clone.clone(), rows); poll_cnt += 1; Poll::Ready(Some(Ok(batch))) diff --git a/benchmarks/src/scan_memtable_bench.rs b/benchmarks/src/scan_memtable_bench.rs index b5cfc36000..70641c27d0 100644 --- a/benchmarks/src/scan_memtable_bench.rs +++ b/benchmarks/src/scan_memtable_bench.rs @@ -25,7 +25,7 @@ use analytic_engine::{ sst::meta_data::cache::MetaCacheRef, }; use arena::NoopCollector; -use common_types::projected_schema::{ProjectedSchema, RecordFetchingContextBuilder}; +use common_types::projected_schema::{ProjectedSchema, RowProjectorBuilder}; use logger::info; use object_store::{LocalFileSystem, Path}; @@ -91,10 +91,10 @@ impl ScanMemTableBench { pub fn run_bench(&self) { let scan_ctx = ScanContext::default(); - let fetching_schema = self.projected_schema.to_record_schema(); + let fetched_schema = self.projected_schema.to_record_schema(); let table_schema = self.projected_schema.table_schema(); - let record_fetching_ctx_builder = - RecordFetchingContextBuilder::new(fetching_schema, table_schema.clone(), None); + let row_projector_builder = + RowProjectorBuilder::new(fetched_schema, table_schema.clone(), None); let scan_req = ScanRequest { start_user_key: Bound::Unbounded, end_user_key: Bound::Unbounded, @@ -102,7 +102,7 @@ impl ScanMemTableBench { need_dedup: true, reverse: false, metrics_collector: None, - record_fetching_ctx_builder, + row_projector_builder, }; let iter = self.memtable.scan(scan_ctx, scan_req).unwrap(); diff --git a/benchmarks/src/sst_bench.rs b/benchmarks/src/sst_bench.rs index 00d1412300..6ed3183d50 100644 --- a/benchmarks/src/sst_bench.rs +++ b/benchmarks/src/sst_bench.rs @@ -25,7 +25,7 @@ use analytic_engine::{ ScanType, SstReadOptionsBuilder, }; use common_types::{ - projected_schema::{ProjectedSchema, RecordFetchingContextBuilder}, + projected_schema::{ProjectedSchema, RowProjectorBuilder}, schema::Schema, }; use logger::info; @@ -102,19 +102,19 @@ impl SstBench { let sst_factory = FactoryImpl; let store_picker: ObjectStorePickerRef = Arc::new(self.store.clone()); - let fetching_schema = self.projected_schema.as_ref().unwrap().to_record_schema(); + let fetched_schema = self.projected_schema.as_ref().unwrap().to_record_schema(); let table_schema = self .projected_schema .as_ref() .unwrap() .table_schema() .clone(); - let record_fetching_ctx_builder = - RecordFetchingContextBuilder::new(fetching_schema, table_schema, None); + let row_projector_builder = + RowProjectorBuilder::new(fetched_schema, table_schema, None); let sst_read_options = self .sst_read_options_builder .clone() - .build(record_fetching_ctx_builder); + .build(row_projector_builder); self.runtime.block_on(async { let mut sst_reader = sst_factory .create_reader( diff --git a/benchmarks/src/sst_tools.rs b/benchmarks/src/sst_tools.rs index 89dd301b5b..53e8cd08e7 100644 --- a/benchmarks/src/sst_tools.rs +++ b/benchmarks/src/sst_tools.rs @@ -41,7 +41,7 @@ use analytic_engine::{ ScanType, SstReadOptionsBuilder, }; use common_types::{ - projected_schema::{ProjectedSchema, RecordFetchingContextBuilder}, + projected_schema::{ProjectedSchema, RowProjectorBuilder}, request_id::RequestId, }; use generic_error::BoxError; @@ -125,10 +125,10 @@ pub async fn rebuild_sst(config: RebuildSstConfig, runtime: Arc) { num_streams_to_prefetch: 2, }; - let fetching_schema = projected_schema.to_record_schema(); + let fetched_schema = projected_schema.to_record_schema(); let table_schema = projected_schema.table_schema().clone(); - let record_fetching_ctx_builder = - RecordFetchingContextBuilder::new(fetching_schema, table_schema, None); + let row_projector_builder = + RowProjectorBuilder::new(fetched_schema, table_schema, None); let sst_read_options = SstReadOptions { maybe_table_level_metrics: Arc::new(SstMaybeTableLevelMetrics::new("bench")), frequency: ReadFrequency::Once, @@ -137,7 +137,7 @@ pub async fn rebuild_sst(config: RebuildSstConfig, runtime: Arc) { meta_cache: None, scan_options, runtime, - record_fetching_ctx_builder, + row_projector_builder, }; let record_batch_stream = @@ -253,12 +253,12 @@ pub async fn merge_sst(config: MergeSstConfig, runtime: Arc) { None, runtime.clone(), ); - let fetching_schema = projected_schema.to_record_schema_with_key(); - let primary_key_indexes = fetching_schema.primary_key_idx().to_vec(); - let fetching_schema = fetching_schema.into_record_schema(); + let fetched_schema = projected_schema.to_record_schema_with_key(); + let primary_key_indexes = fetched_schema.primary_key_idx().to_vec(); + let fetched_schema = fetched_schema.into_record_schema(); let table_schema = projected_schema.table_schema().clone(); - let record_fetching_ctx_builder = - RecordFetchingContextBuilder::new(fetching_schema, table_schema, Some(primary_key_indexes)); + let row_projector_builder = + RowProjectorBuilder::new(fetched_schema, table_schema, Some(primary_key_indexes)); let iter = { let space_id = config.space_id; @@ -295,7 +295,7 @@ pub async fn merge_sst(config: MergeSstConfig, runtime: Arc) { row_iter::record_batch_with_key_iter_to_stream(iter) }; - let sst_read_options = sst_read_options_builder.build(record_fetching_ctx_builder); + let sst_read_options = sst_read_options_builder.build(row_projector_builder); let sst_meta = { let meta_reader = SstMetaReader { space_id, diff --git a/benchmarks/src/util.rs b/benchmarks/src/util.rs index 0857f58efb..6fb9ad8543 100644 --- a/benchmarks/src/util.rs +++ b/benchmarks/src/util.rs @@ -35,7 +35,7 @@ use analytic_engine::{ }; use bytes_ext::{BufMut, SafeBufMut}; use common_types::{ - projected_schema::{ProjectedSchema, RecordFetchingContextBuilder}, + projected_schema::{ProjectedSchema, RowProjectorBuilder}, schema::{IndexInWriterSchema, Schema}, }; use macros::define_result; @@ -125,10 +125,10 @@ pub async fn load_sst_to_memtable( }; let projected_schema = ProjectedSchema::no_projection(schema.clone()); - let fetching_schema = projected_schema.to_record_schema(); + let fetched_schema = projected_schema.to_record_schema(); let table_schema = projected_schema.table_schema().clone(); - let record_fetching_ctx_builder = - RecordFetchingContextBuilder::new(fetching_schema, table_schema, None); + let row_projector_builder = + RowProjectorBuilder::new(fetched_schema, table_schema, None); let sst_read_options = SstReadOptions { maybe_table_level_metrics: Arc::new(SstMaybeTableLevelMetrics::new("bench")), frequency: ReadFrequency::Frequent, @@ -137,7 +137,7 @@ pub async fn load_sst_to_memtable( meta_cache: None, scan_options, runtime, - record_fetching_ctx_builder, + row_projector_builder, }; let sst_factory = FactoryImpl; let store_picker: ObjectStorePickerRef = Arc::new(store.clone()); diff --git a/common_types/src/projected_schema.rs b/common_types/src/projected_schema.rs index 15bcc46801..93322564de 100644 --- a/common_types/src/projected_schema.rs +++ b/common_types/src/projected_schema.rs @@ -61,11 +61,11 @@ pub enum Error { pub type Result = std::result::Result; #[derive(Debug, Clone)] -pub struct RecordFetchingContext { - /// The schema for data fetching +pub struct RowProjector { + /// The schema for data fetched /// It is derived from table schema and some columns may not exist in data /// source. - fetching_schema: RecordSchema, + fetched_schema: RecordSchema, /// primary_key_indexes: Option>, @@ -79,50 +79,50 @@ pub struct RecordFetchingContext { /// is not in source but required by reader, and need to filled by null. /// The length of Vec is the same as the number of columns reader intended /// to read. - fetching_source_column_indexes: Vec>, + fetched_source_column_indexes: Vec>, - /// Similar as `fetching_source_column_indexes`, but storing the projected + /// Similar as `fetched_source_column_indexes`, but storing the projected /// source column index /// /// For example: /// source column indexes: 0,1,2,3,4 - /// data fetching indexes in source: 2,1,3 + /// data fetched indexes in source: 2,1,3 /// /// We can see, only columns:[1,2,3] in source is needed, /// and their indexes in pulled projected record bath are: [0,1,2]. /// - /// So the stored data fetching indexes in projected source are: [1,0,2]. - fetching_projected_source_column_indexes: Vec>, + /// So the stored data fetched indexes in projected source are: [1,0,2]. + fetched_projected_source_column_indexes: Vec>, } -impl RecordFetchingContext { +impl RowProjector { pub fn new( - fetching_schema: &RecordSchema, + fetched_schema: &RecordSchema, primary_key_indexes: Option>, table_schema: &Schema, source_schema: &Schema, ) -> Result { - // Get `fetching_source_column_indexes`. - let mut fetching_source_column_indexes = Vec::with_capacity(fetching_schema.num_columns()); - let mut projected_source_indexes = Vec::with_capacity(fetching_schema.num_columns()); - for column_schema in fetching_schema.columns() { + // Get `fetched_source_column_indexes`. + let mut fetched_source_column_indexes = Vec::with_capacity(fetched_schema.num_columns()); + let mut projected_source_indexes = Vec::with_capacity(fetched_schema.num_columns()); + for column_schema in fetched_schema.columns() { Self::try_project_column( column_schema, table_schema, source_schema, - &mut fetching_source_column_indexes, + &mut fetched_source_column_indexes, &mut projected_source_indexes, )?; } - // Get `fetching_projected_source_column_indexes` from - // `fetching_source_column_indexes`. + // Get `fetched_projected_source_column_indexes` from + // `fetched_source_column_indexes`. projected_source_indexes.sort_unstable(); - let fetching_projected_source_column_indexes = fetching_source_column_indexes + let fetched_projected_source_column_indexes = fetched_source_column_indexes .iter() .map(|source_idx_opt| { source_idx_opt.map(|src_idx| { - // Safe to unwrap, index exists in `fetching_source_column_indexes` is ensured + // Safe to unwrap, index exists in `fetched_source_column_indexes` is ensured // to exist in `projected_source_indexes`. projected_source_indexes .iter() @@ -132,12 +132,12 @@ impl RecordFetchingContext { }) .collect(); - Ok(RecordFetchingContext { - fetching_schema: fetching_schema.clone(), + Ok(RowProjector { + fetched_schema: fetched_schema.clone(), primary_key_indexes, source_schema: source_schema.clone(), - fetching_source_column_indexes, - fetching_projected_source_column_indexes, + fetched_source_column_indexes, + fetched_projected_source_column_indexes, }) } @@ -145,7 +145,7 @@ impl RecordFetchingContext { column: &ColumnSchema, table_schema: &Schema, source_schema: &Schema, - fetching_source_column_indexes: &mut Vec>, + fetched_source_column_indexes: &mut Vec>, projected_source_indexes: &mut Vec, ) -> Result<()> { match source_schema.index_of(&column.name) { @@ -153,7 +153,7 @@ impl RecordFetchingContext { // Column is in source if table_schema.version() == source_schema.version() { // Same version, just use that column in source - fetching_source_column_indexes.push(Some(source_idx)); + fetched_source_column_indexes.push(Some(source_idx)); projected_source_indexes.push(source_idx); } else { // Different version, need to check column schema @@ -165,11 +165,11 @@ impl RecordFetchingContext { .context(IncompatReadColumn)? { ReadOp::Exact => { - fetching_source_column_indexes.push(Some(source_idx)); + fetched_source_column_indexes.push(Some(source_idx)); projected_source_indexes.push(source_idx); } ReadOp::FillNull => { - fetching_source_column_indexes.push(None); + fetched_source_column_indexes.push(None); } } } @@ -178,7 +178,7 @@ impl RecordFetchingContext { // Column is not in source ensure!(column.is_nullable, MissingReadColumn { name: &column.name }); // Column is nullable, fill this column by null - fetching_source_column_indexes.push(None); + fetched_source_column_indexes.push(None); } } @@ -189,13 +189,13 @@ impl RecordFetchingContext { &self.source_schema } - pub fn fetching_schema(&self) -> &RecordSchema { - &self.fetching_schema + pub fn fetched_schema(&self) -> &RecordSchema { + &self.fetched_schema } /// The projected indexes of existed columns in the source schema. pub fn existed_source_projection(&self) -> Vec { - self.fetching_source_column_indexes + self.fetched_source_column_indexes .iter() .filter_map(|index| *index) .collect() @@ -203,14 +203,14 @@ impl RecordFetchingContext { /// The projected indexes of all columns(existed and not exist) in the /// source schema. - pub fn fetching_source_column_indexes(&self) -> &[Option] { - &self.fetching_source_column_indexes + pub fn fetched_source_column_indexes(&self) -> &[Option] { + &self.fetched_source_column_indexes } /// The projected indexes of all columns(existed and not exist) in the /// projected source schema. - pub fn fetching_projected_source_column_indexes(&self) -> &[Option] { - &self.fetching_projected_source_column_indexes + pub fn fetched_projected_source_column_indexes(&self) -> &[Option] { + &self.fetched_projected_source_column_indexes } pub fn primary_key_indexes(&self) -> Option<&[usize]> { @@ -223,9 +223,9 @@ impl RecordFetchingContext { pub fn project_row(&self, row: &Row, mut datums_buffer: Vec) -> Row { assert_eq!(self.source_schema.num_columns(), row.num_columns()); - datums_buffer.reserve(self.fetching_schema.num_columns()); + datums_buffer.reserve(self.fetched_schema.num_columns()); - for p in &self.fetching_source_column_indexes { + for p in &self.fetched_source_column_indexes { let datum = match p { Some(index_in_source) => row[*index_in_source].clone(), None => Datum::Null, @@ -247,28 +247,28 @@ impl RecordFetchingContext { } #[derive(Debug, Clone)] -pub struct RecordFetchingContextBuilder { - fetching_schema: RecordSchema, +pub struct RowProjectorBuilder { + fetched_schema: RecordSchema, table_schema: Schema, primary_key_indexes: Option>, } -impl RecordFetchingContextBuilder { +impl RowProjectorBuilder { pub fn new( - fetching_schema: RecordSchema, + fetched_schema: RecordSchema, table_schema: Schema, primary_key_indexes: Option>, ) -> Self { Self { - fetching_schema, + fetched_schema, table_schema, primary_key_indexes, } } - pub fn build(&self, source_schema: &Schema) -> Result { - RecordFetchingContext::new( - &self.fetching_schema, + pub fn build(&self, source_schema: &Schema) -> Result { + RowProjector::new( + &self.fetched_schema, self.primary_key_indexes.clone(), &self.table_schema, source_schema, @@ -378,10 +378,10 @@ struct ProjectedSchemaInner { /// all columns are needed. projection: Option>, - /// The fetching record schema from `self.schema` with key columns after + /// The fetched record schema from `self.schema` with key columns after /// projection. record_schema_with_key: RecordSchemaWithKey, - /// The fetching record schema from `self.schema` after projection. + /// The fetched record schema from `self.schema` after projection. target_record_schema: RecordSchema, } diff --git a/common_types/src/record_batch.rs b/common_types/src/record_batch.rs index 55a72a2287..3dc81d5c43 100644 --- a/common_types/src/record_batch.rs +++ b/common_types/src/record_batch.rs @@ -29,7 +29,7 @@ use snafu::{ensure, Backtrace, OptionExt, ResultExt, Snafu}; use crate::{ column_block::{cast_nanosecond_to_mills, ColumnBlock, ColumnBlockBuilder}, datum::DatumKind, - projected_schema::{ProjectedSchema, RecordFetchingContext}, + projected_schema::{ProjectedSchema, RowProjector}, row::{ contiguous::{ContiguousRow, ProjectedContiguousRow}, Row, RowViewOnBatch, @@ -372,11 +372,11 @@ pub struct FetchedRecordBatch { impl FetchedRecordBatch { pub fn try_new( - ctx: &RecordFetchingContext, + ctx: &RowProjector, arrow_record_batch: ArrowRecordBatch, ) -> Result { - let column_indexes = ctx.fetching_projected_source_column_indexes(); - let schema = ctx.fetching_schema().clone(); + let column_indexes = ctx.fetched_projected_source_column_indexes(); + let schema = ctx.fetched_schema().clone(); let mut column_blocks = Vec::with_capacity(schema.num_columns()); let num_rows = arrow_record_batch.num_rows(); @@ -572,14 +572,14 @@ impl FetchedRecordBatch { } pub struct FetchedRecordBatchBuilder { - fetching_schema: RecordSchema, + fetched_schema: RecordSchema, primary_key_indexes: Option>, builders: Vec, } impl FetchedRecordBatchBuilder { - pub fn new(fetching_schema: RecordSchema, primary_key_indexes: Option>) -> Self { - let builders = fetching_schema + pub fn new(fetched_schema: RecordSchema, primary_key_indexes: Option>) -> Self { + let builders = fetched_schema .columns() .iter() .map(|column_schema| { @@ -591,7 +591,7 @@ impl FetchedRecordBatchBuilder { }) .collect(); Self { - fetching_schema, + fetched_schema, primary_key_indexes, builders, } @@ -614,7 +614,7 @@ impl FetchedRecordBatchBuilder { }) .collect(); Self { - fetching_schema: record_schema, + fetched_schema: record_schema, primary_key_indexes, builders, } @@ -717,10 +717,10 @@ impl FetchedRecordBatchBuilder { .iter_mut() .map(|builder| builder.build()) .collect(); - let arrow_schema = self.fetching_schema.to_arrow_schema_ref(); + let arrow_schema = self.fetched_schema.to_arrow_schema_ref(); Ok(FetchedRecordBatch { - schema: self.fetching_schema.clone(), + schema: self.fetched_schema.clone(), primary_key_indexes: self.primary_key_indexes.clone(), data: RecordBatchData::new(arrow_schema, column_blocks)?, }) @@ -733,14 +733,14 @@ mod tests { record_batch::{FetchedRecordBatch, FetchedRecordBatchBuilder}, row::RowViewOnBatch, tests::{ - build_fetching_record_batch_by_rows, build_projected_schema, build_rows, + build_fetched_record_batch_by_rows, build_projected_schema, build_rows, check_record_batch_with_key_with_rows, }, }; - fn build_fetching_record_batch() -> FetchedRecordBatch { + fn build_fetched_record_batch() -> FetchedRecordBatch { let rows = build_rows(); - build_fetching_record_batch_by_rows(rows) + build_fetched_record_batch_by_rows(rows) } fn check_record_batch_with_key( @@ -754,7 +754,7 @@ mod tests { #[test] fn test_append_projected_contiguous_row() { - let record_batch_with_key = build_fetching_record_batch(); + let record_batch_with_key = build_fetched_record_batch(); assert_eq!(record_batch_with_key.num_rows(), 5); assert_eq!(record_batch_with_key.num_columns(), 5); @@ -764,11 +764,11 @@ mod tests { #[test] fn test_append_row_view() { let projected_schema = build_projected_schema(); - let fetching_record_batch = build_fetching_record_batch(); + let fetched_record_batch = build_fetched_record_batch(); let mut builder = FetchedRecordBatchBuilder::with_capacity(projected_schema.to_record_schema(), None, 2); let view = RowViewOnBatch { - record_batch: &fetching_record_batch, + record_batch: &fetched_record_batch, row_idx: 1, }; builder.append_row_view(&view).unwrap(); @@ -782,7 +782,7 @@ mod tests { #[test] fn test_append_batch_range() { let projected_schema = build_projected_schema(); - let record_batch_with_key = build_fetching_record_batch(); + let record_batch_with_key = build_fetched_record_batch(); let mut builder = FetchedRecordBatchBuilder::with_capacity(projected_schema.to_record_schema(), None, 2); diff --git a/common_types/src/row/contiguous.rs b/common_types/src/row/contiguous.rs index 93b8c4bd17..be1cdd5a73 100644 --- a/common_types/src/row/contiguous.rs +++ b/common_types/src/row/contiguous.rs @@ -26,7 +26,7 @@ use snafu::{ensure, Backtrace, Snafu}; use crate::{ datum::{Datum, DatumKind, DatumView}, - projected_schema::RecordFetchingContext, + projected_schema::RowProjector, row::{ bitset::{BitSet, RoBitSet}, Row, @@ -248,20 +248,20 @@ fn datum_view_at<'a>( /// schema of source row. pub struct ProjectedContiguousRow<'a, T> { source_row: T, - ctx: &'a RecordFetchingContext, + ctx: &'a RowProjector, } impl<'a, T: ContiguousRow> ProjectedContiguousRow<'a, T> { - pub fn new(source_row: T, ctx: &'a RecordFetchingContext) -> Self { + pub fn new(source_row: T, ctx: &'a RowProjector) -> Self { Self { source_row, ctx } } pub fn num_datum_views(&self) -> usize { - self.ctx.fetching_source_column_indexes().len() + self.ctx.fetched_source_column_indexes().len() } pub fn datum_view_at(&self, index: usize) -> DatumView { - let p = self.ctx.fetching_source_column_indexes()[index]; + let p = self.ctx.fetched_source_column_indexes()[index]; match p { Some(index_in_source) => { @@ -798,7 +798,7 @@ mod tests { let projection: Vec = (0..schema.num_columns() - 1).collect(); let projected_schema = ProjectedSchema::new(schema.clone(), Some(projection.clone())).unwrap(); - let ctx = RecordFetchingContext::new( + let ctx = RowProjector::new( &projected_schema.to_record_schema(), None, projected_schema.table_schema(), diff --git a/common_types/src/tests.rs b/common_types/src/tests.rs index 5400e9efa8..24bc3659d6 100644 --- a/common_types/src/tests.rs +++ b/common_types/src/tests.rs @@ -18,7 +18,7 @@ use sqlparser::ast::{BinaryOperator, Expr, Value}; use crate::{ column_schema, datum::{Datum, DatumKind}, - projected_schema::{ProjectedSchema, RecordFetchingContext}, + projected_schema::{ProjectedSchema, RowProjector}, record_batch::{FetchedRecordBatch, FetchedRecordBatchBuilder}, row::{ contiguous::{ContiguousRowReader, ContiguousRowWriter, ProjectedContiguousRow}, @@ -357,17 +357,17 @@ pub fn build_rows() -> Vec { ] } -pub fn build_fetching_record_batch_by_rows(rows: Vec) -> FetchedRecordBatch { +pub fn build_fetched_record_batch_by_rows(rows: Vec) -> FetchedRecordBatch { let schema = build_schema(); assert!(schema.num_columns() > 1); let projection: Vec = (0..schema.num_columns() - 1).collect(); let projected_schema = ProjectedSchema::new(schema.clone(), Some(projection)).unwrap(); - let record_fetching_ctx = - RecordFetchingContext::new(&projected_schema.to_record_schema(), None, &schema, &schema) + let row_projector = + RowProjector::new(&projected_schema.to_record_schema(), None, &schema, &schema) .unwrap(); let mut builder = FetchedRecordBatchBuilder::with_capacity( - record_fetching_ctx.fetching_schema().clone(), + row_projector.fetched_schema().clone(), None, 2, ); @@ -380,7 +380,7 @@ pub fn build_fetching_record_batch_by_rows(rows: Vec) -> FetchedRecordBatch writer.write_row(&row).unwrap(); let source_row = ContiguousRowReader::try_new(&buf, &schema).unwrap(); - let projected_row = ProjectedContiguousRow::new(source_row, &record_fetching_ctx); + let projected_row = ProjectedContiguousRow::new(source_row, &row_projector); builder .append_projected_contiguous_row(&projected_row) .unwrap(); diff --git a/components/object_store/src/disk_cache.rs b/components/object_store/src/disk_cache.rs index 12d8d31492..3be5786457 100644 --- a/components/object_store/src/disk_cache.rs +++ b/components/object_store/src/disk_cache.rs @@ -825,7 +825,7 @@ impl ObjectStore for DiskCacheStore { } async fn get(&self, location: &Path) -> Result { - // In sst module, we only use get_range, fetching a whole file is not used, and + // In sst module, we only use get_range, fetched a whole file is not used, and // it is not good for disk cache. self.underlying_store.get(location).await } diff --git a/integration_tests/cases/env/local/ddl/query-plan.result b/integration_tests/cases/env/local/ddl/query-plan.result index 6db29a68e9..0642f86fe2 100644 --- a/integration_tests/cases/env/local/ddl/query-plan.result +++ b/integration_tests/cases/env/local/ddl/query-plan.result @@ -31,7 +31,7 @@ explain analyze select t from `03_dml_select_real_time_range` where t > 1695348001000; plan_type,plan, -String("Plan with Metrics"),String("ScanTable: table=03_dml_select_real_time_range, parallelism=8, metrics=[\nscan_table:\n do_merge_sort=true\n iter_num=1\n merge_iter_0:\n init_duration=xxs\n num_memtables=1\n num_ssts=0\n scan_count=2\n scan_duration=xxs\n times_fetch_row_from_multiple=0\n times_fetch_rows_from_one=1\n total_rows_fetch_from_one=1\n scan_memtable_1, fetching_columns:[tsid,t]:\n\n\nPredicate { exprs:[t > TimestampMillisecond(1695348001000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348001001), exclusive_end: Timestamp(9223372036854775807) } }=0]\n"), +String("Plan with Metrics"),String("ScanTable: table=03_dml_select_real_time_range, parallelism=8, metrics=[\nscan_table:\n do_merge_sort=true\n iter_num=1\n merge_iter_0:\n init_duration=xxs\n num_memtables=1\n num_ssts=0\n scan_count=2\n scan_duration=xxs\n times_fetch_row_from_multiple=0\n times_fetch_rows_from_one=1\n total_rows_fetch_from_one=1\n scan_memtable_1, fetched_columns:[tsid,t]:\n\n\nPredicate { exprs:[t > TimestampMillisecond(1695348001000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348001001), exclusive_end: Timestamp(9223372036854775807) } }=0]\n"), -- This query should not include memtable @@ -51,7 +51,7 @@ explain analyze select t from `03_dml_select_real_time_range` where t > 1695348001000; plan_type,plan, -String("Plan with Metrics"),String("ScanTable: table=03_dml_select_real_time_range, parallelism=8, metrics=[\nscan_table:\n do_merge_sort=true\n iter_num=1\n merge_iter_0:\n init_duration=xxs\n num_memtables=0\n num_ssts=1\n scan_count=2\n scan_duration=xxs\n times_fetch_row_from_multiple=0\n times_fetch_rows_from_one=1\n total_rows_fetch_from_one=1\n scan_sst_1, fetching_columns:[tsid,t]:\n meta_data_cache_hit=false\n parallelism=1\n project_record_batch=xxs\n read_meta_data_duration=xxs\n row_mem=320\n row_num=3\n prune_row_groups:\n pruned_by_custom_filter=0\n pruned_by_min_max=0\n row_groups_after_prune=1\n total_row_groups=1\n use_custom_filter=false\n\n\nPredicate { exprs:[t > TimestampMillisecond(1695348001000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348001001), exclusive_end: Timestamp(9223372036854775807) } }=0]\n"), +String("Plan with Metrics"),String("ScanTable: table=03_dml_select_real_time_range, parallelism=8, metrics=[\nscan_table:\n do_merge_sort=true\n iter_num=1\n merge_iter_0:\n init_duration=xxs\n num_memtables=0\n num_ssts=1\n scan_count=2\n scan_duration=xxs\n times_fetch_row_from_multiple=0\n times_fetch_rows_from_one=1\n total_rows_fetch_from_one=1\n scan_sst_1, fetched_columns:[tsid,t]:\n meta_data_cache_hit=false\n parallelism=1\n project_record_batch=xxs\n read_meta_data_duration=xxs\n row_mem=320\n row_num=3\n prune_row_groups:\n pruned_by_custom_filter=0\n pruned_by_min_max=0\n row_groups_after_prune=1\n total_row_groups=1\n use_custom_filter=false\n\n\nPredicate { exprs:[t > TimestampMillisecond(1695348001000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348001001), exclusive_end: Timestamp(9223372036854775807) } }=0]\n"), -- This query should not include SST @@ -92,7 +92,7 @@ explain analyze select t from `03_append_mode_table` where t >= 1695348001000 and name = 'ceresdb'; plan_type,plan, -String("Plan with Metrics"),String("ProjectionExec: expr=[t@0 as t], metrics=[output_rows=2, elapsed_compute=xxs]\n ScanTable: table=03_append_mode_table, parallelism=8, metrics=[\nscan_table:\n do_merge_sort=false\n chain_iter_0:\n num_memtables=1\n num_ssts=0\n scan_duration=xxs\n since_create=xxs\n since_init=xxs\n total_batch_fetched=1\n total_rows_fetched=2\n scan_memtable_1, fetching_columns:[t,name]:\n\n\nPredicate { exprs:[t >= TimestampMillisecond(1695348001000, None), name = Utf8(\"ceresdb\")], time_range:TimeRange { inclusive_start: Timestamp(1695348001000), exclusive_end: Timestamp(9223372036854775807) } }=0]\n"), +String("Plan with Metrics"),String("ProjectionExec: expr=[t@0 as t], metrics=[output_rows=2, elapsed_compute=xxs]\n ScanTable: table=03_append_mode_table, parallelism=8, metrics=[\nscan_table:\n do_merge_sort=false\n chain_iter_0:\n num_memtables=1\n num_ssts=0\n scan_duration=xxs\n since_create=xxs\n since_init=xxs\n total_batch_fetched=1\n total_rows_fetched=2\n scan_memtable_1, fetched_columns:[t,name]:\n\n\nPredicate { exprs:[t >= TimestampMillisecond(1695348001000, None), name = Utf8(\"ceresdb\")], time_range:TimeRange { inclusive_start: Timestamp(1695348001000), exclusive_end: Timestamp(9223372036854775807) } }=0]\n"), -- Should just fetch projected columns from SST @@ -106,7 +106,7 @@ explain analyze select t from `03_append_mode_table` where t >= 1695348001000 and name = 'ceresdb'; plan_type,plan, -String("Plan with Metrics"),String("ProjectionExec: expr=[t@0 as t], metrics=[output_rows=2, elapsed_compute=xxs]\n ScanTable: table=03_append_mode_table, parallelism=8, metrics=[\nscan_table:\n do_merge_sort=false\n chain_iter_0:\n num_memtables=0\n num_ssts=1\n scan_duration=xxs\n since_create=xxs\n since_init=xxs\n total_batch_fetched=1\n total_rows_fetched=2\n scan_sst_1, fetching_columns:[t,name]:\n meta_data_cache_hit=false\n parallelism=1\n project_record_batch=xxs\n read_meta_data_duration=xxs\n row_mem=408\n row_num=3\n prune_row_groups:\n pruned_by_custom_filter=0\n pruned_by_min_max=0\n row_groups_after_prune=1\n total_row_groups=1\n use_custom_filter=false\n\n\nPredicate { exprs:[t >= TimestampMillisecond(1695348001000, None), name = Utf8(\"ceresdb\")], time_range:TimeRange { inclusive_start: Timestamp(1695348001000), exclusive_end: Timestamp(9223372036854775807) } }=0]\n"), +String("Plan with Metrics"),String("ProjectionExec: expr=[t@0 as t], metrics=[output_rows=2, elapsed_compute=xxs]\n ScanTable: table=03_append_mode_table, parallelism=8, metrics=[\nscan_table:\n do_merge_sort=false\n chain_iter_0:\n num_memtables=0\n num_ssts=1\n scan_duration=xxs\n since_create=xxs\n since_init=xxs\n total_batch_fetched=1\n total_rows_fetched=2\n scan_sst_1, fetched_columns:[t,name]:\n meta_data_cache_hit=false\n parallelism=1\n project_record_batch=xxs\n read_meta_data_duration=xxs\n row_mem=408\n row_num=3\n prune_row_groups:\n pruned_by_custom_filter=0\n pruned_by_min_max=0\n row_groups_after_prune=1\n total_row_groups=1\n use_custom_filter=false\n\n\nPredicate { exprs:[t >= TimestampMillisecond(1695348001000, None), name = Utf8(\"ceresdb\")], time_range:TimeRange { inclusive_start: Timestamp(1695348001000), exclusive_end: Timestamp(9223372036854775807) } }=0]\n"), DROP TABLE `03_dml_select_real_time_range`; diff --git a/src/wal/src/message_queue_impl/region.rs b/src/wal/src/message_queue_impl/region.rs index 7008f3e761..62cc5b522e 100644 --- a/src/wal/src/message_queue_impl/region.rs +++ b/src/wal/src/message_queue_impl/region.rs @@ -810,7 +810,7 @@ pub struct MessageQueueLogIterator { /// Polling's end point /// - /// While fetching in slave node, it will be set to `None`, and + /// While fetched in slave node, it will be set to `None`, and /// reading will not stop. /// Otherwise, it will be set to high watermark. terminate_offset: Option, diff --git a/system_catalog/src/tables.rs b/system_catalog/src/tables.rs index 97af4173ac..ea17057a80 100644 --- a/system_catalog/src/tables.rs +++ b/system_catalog/src/tables.rs @@ -21,7 +21,7 @@ use catalog::{manager::ManagerRef, schema::SchemaRef, CatalogRef}; use common_types::{ column_schema, datum::{Datum, DatumKind}, - projected_schema::RecordFetchingContext, + projected_schema::RowProjector, record_batch::FetchedRecordBatchBuilder, row::Row, schema, @@ -154,17 +154,17 @@ impl SystemTable for Tables { .all_catalogs() .box_err() .context(table_engine::table::Scan { table: self.name() })?; - let fetching_schema = request.projected_schema.to_record_schema_with_key(); - let primary_key_indexes = fetching_schema.primary_key_idx().to_vec(); - let fetching_schema = fetching_schema.to_record_schema(); + let fetched_schema = request.projected_schema.to_record_schema_with_key(); + let primary_key_indexes = fetched_schema.primary_key_idx().to_vec(); + let fetched_schema = fetched_schema.to_record_schema(); let mut builder = FetchedRecordBatchBuilder::new( - fetching_schema.clone(), + fetched_schema.clone(), Some(primary_key_indexes.clone()), ); let table_schema = request.projected_schema.table_schema(); - let record_fetching_ctx = RecordFetchingContext::new( - &fetching_schema, + let row_projector = RowProjector::new( + &fetched_schema, Some(primary_key_indexes), table_schema, &self.schema, @@ -182,7 +182,7 @@ impl SystemTable for Tables { .context(table_engine::table::Scan { table: self.name() })? { let row = self.from_table(catalog.clone(), schema.clone(), table.clone()); - let projected_row = record_fetching_ctx.project_row(&row, Vec::new()); + let projected_row = row_projector.project_row(&row, Vec::new()); builder .append_row(projected_row) .box_err() diff --git a/tools/src/bin/sst-convert.rs b/tools/src/bin/sst-convert.rs index cd8202df6b..9e2c852941 100644 --- a/tools/src/bin/sst-convert.rs +++ b/tools/src/bin/sst-convert.rs @@ -31,7 +31,7 @@ use analytic_engine::{ use anyhow::{Context, Result}; use clap::Parser; use common_types::{ - projected_schema::{ProjectedSchema, RecordFetchingContextBuilder}, + projected_schema::{ProjectedSchema, RowProjectorBuilder}, request_id::RequestId, }; use generic_error::BoxError; @@ -97,10 +97,10 @@ async fn run(args: Args, runtime: Arc) -> Result<()> { let scan_options = ScanOptions::default(); let projected_schema = ProjectedSchema::no_projection(sst_meta.schema.clone()); - let fetching_schema = projected_schema.to_record_schema(); + let fetched_schema = projected_schema.to_record_schema(); let table_schema = projected_schema.table_schema().clone(); - let record_fetching_ctx_builder = - RecordFetchingContextBuilder::new(fetching_schema, table_schema, None); + let row_projector_builder = + RowProjectorBuilder::new(fetched_schema, table_schema, None); let reader_opts = SstReadOptions { maybe_table_level_metrics: Arc::new(SstMaybeTableLevelMetrics::new("tool")), frequency: ReadFrequency::Once, @@ -109,7 +109,7 @@ async fn run(args: Args, runtime: Arc) -> Result<()> { meta_cache: None, scan_options, runtime, - record_fetching_ctx_builder, + row_projector_builder, }; let store_picker: ObjectStorePickerRef = Arc::new(store); let mut reader = factory From 4a727c96e433815b421d89d19aed041e4f1c36d8 Mon Sep 17 00:00:00 2001 From: kamille Date: Thu, 21 Dec 2023 21:52:12 +0800 Subject: [PATCH 10/13] better naming and more detailed commands for fields in `RowProjector`. --- .../src/instance/flush_compaction.rs | 7 +-- analytic_engine/src/instance/mod.rs | 5 +- analytic_engine/src/instance/read.rs | 2 +- analytic_engine/src/memtable/skiplist/iter.rs | 3 +- analytic_engine/src/row_iter/mod.rs | 3 +- .../src/row_iter/record_batch_stream.rs | 7 +-- .../src/sst/parquet/async_reader.rs | 15 ++--- benchmarks/src/sst_bench.rs | 3 +- benchmarks/src/sst_tools.rs | 3 +- benchmarks/src/util.rs | 3 +- common_types/src/projected_schema.rs | 63 ++++++++++++------- common_types/src/record_batch.rs | 5 +- common_types/src/tests.rs | 10 +-- tools/src/bin/sst-convert.rs | 3 +- 14 files changed, 61 insertions(+), 71 deletions(-) diff --git a/analytic_engine/src/instance/flush_compaction.rs b/analytic_engine/src/instance/flush_compaction.rs index 553ccc91c4..5aefe59e86 100644 --- a/analytic_engine/src/instance/flush_compaction.rs +++ b/analytic_engine/src/instance/flush_compaction.rs @@ -897,11 +897,8 @@ impl SpaceStore { let primary_key_indexes = fetched_schema.primary_key_idx().to_vec(); let fetched_schema = fetched_schema.into_record_schema(); let table_schema = projected_schema.table_schema().clone(); - let row_projector_builder = RowProjectorBuilder::new( - fetched_schema, - table_schema, - Some(primary_key_indexes), - ); + let row_projector_builder = + RowProjectorBuilder::new(fetched_schema, table_schema, Some(primary_key_indexes)); let iter_options = IterOptions { batch_size: table_options.num_rows_per_row_group, diff --git a/analytic_engine/src/instance/mod.rs b/analytic_engine/src/instance/mod.rs index dbd0aa17c3..91ab989571 100644 --- a/analytic_engine/src/instance/mod.rs +++ b/analytic_engine/src/instance/mod.rs @@ -345,10 +345,7 @@ impl SstReadOptionsBuilder { } } - pub fn build( - self, - row_projector_builder: RowProjectorBuilder, - ) -> SstReadOptions { + pub fn build(self, row_projector_builder: RowProjectorBuilder) -> SstReadOptions { SstReadOptions { maybe_table_level_metrics: self.maybe_table_level_metrics, num_rows_per_row_group: self.num_rows_per_row_group, diff --git a/analytic_engine/src/instance/read.rs b/analytic_engine/src/instance/read.rs index 8fdb500746..935ae10060 100644 --- a/analytic_engine/src/instance/read.rs +++ b/analytic_engine/src/instance/read.rs @@ -205,7 +205,7 @@ impl Instance { let version = table_data.current_version(); let read_views = self.partition_ssts_and_memtables(time_range, version, table_options); let iter_options = self.make_iter_options(table_options.num_rows_per_row_group); - // generate builder + let mut iters = Vec::with_capacity(read_views.len()); for (idx, read_view) in read_views.into_iter().enumerate() { let metrics_collector = request diff --git a/analytic_engine/src/memtable/skiplist/iter.rs b/analytic_engine/src/memtable/skiplist/iter.rs index 24a7a0a787..6b69b9988b 100644 --- a/analytic_engine/src/memtable/skiplist/iter.rs +++ b/analytic_engine/src/memtable/skiplist/iter.rs @@ -165,8 +165,7 @@ impl + Clone + Sync + Send> ColumnarIterImpl { if let Some(row) = self.fetch_next_row()? { let row_reader = ContiguousRowReader::try_new(&row, &self.memtable_schema) .context(DecodeContinuousRow)?; - let projected_row = - ProjectedContiguousRow::new(row_reader, &self.row_projector); + let projected_row = ProjectedContiguousRow::new(row_reader, &self.row_projector); trace!("Column iterator fetch next row, row:{:?}", projected_row); diff --git a/analytic_engine/src/row_iter/mod.rs b/analytic_engine/src/row_iter/mod.rs index e8f8f7be2d..d509bec85e 100644 --- a/analytic_engine/src/row_iter/mod.rs +++ b/analytic_engine/src/row_iter/mod.rs @@ -43,8 +43,7 @@ pub trait FetchedRecordBatchIterator: Send { fn schema(&self) -> &RecordSchemaWithKey; - async fn next_batch(&mut self) - -> std::result::Result, Self::Error>; + async fn next_batch(&mut self) -> std::result::Result, Self::Error>; } pub fn record_batch_with_key_iter_to_stream( diff --git a/analytic_engine/src/row_iter/record_batch_stream.rs b/analytic_engine/src/row_iter/record_batch_stream.rs index 504c79a978..3c65160985 100644 --- a/analytic_engine/src/row_iter/record_batch_stream.rs +++ b/analytic_engine/src/row_iter/record_batch_stream.rs @@ -23,8 +23,8 @@ use arrow::{ datatypes::{DataType as ArrowDataType, SchemaRef as ArrowSchemaRef}, }; use common_types::{ - projected_schema::RowProjectorBuilder, record_batch::FetchedRecordBatch, - schema::RecordSchema, SequenceNumber, + projected_schema::RowProjectorBuilder, record_batch::FetchedRecordBatch, schema::RecordSchema, + SequenceNumber, }; use datafusion::{ common::ToDFSchema, @@ -246,8 +246,7 @@ pub fn stream_from_memtable( .columns() .iter() .format_with(",", |col, f| f(&format_args!("{}", col.name))); - let scan_memtable_desc = - format!("scan_memtable_{max_seq}, fetched_columns:[{fetched_cols}]",); + let scan_memtable_desc = format!("scan_memtable_{max_seq}, fetched_columns:[{fetched_cols}]",); let metrics_collector = metrics_collector.map(|v| v.span(scan_memtable_desc)); let scan_req = ScanRequest { start_user_key: Bound::Unbounded, diff --git a/analytic_engine/src/sst/parquet/async_reader.rs b/analytic_engine/src/sst/parquet/async_reader.rs index 173bd68a90..81724a238a 100644 --- a/analytic_engine/src/sst/parquet/async_reader.rs +++ b/analytic_engine/src/sst/parquet/async_reader.rs @@ -296,10 +296,7 @@ impl<'a> Reader<'a> { let parquet_metadata = meta_data.parquet(); let proj_mask = ProjectionMask::leaves( meta_data.parquet().file_metadata().schema_descr(), - row_projector - .existed_source_projection() - .iter() - .copied(), + row_projector.existed_source_projection().iter().copied(), ); debug!( "Reader fetch record batches, parallelism suggest:{}, real:{}, chunk_size:{}, project:{:?}", @@ -533,12 +530,10 @@ impl Stream for RecordBatchProjector { } projector.metrics.row_num += record_batch.num_rows(); - let projected_batch = FetchedRecordBatch::try_new( - &projector.row_projector, - record_batch, - ) - .box_err() - .context(DecodeRecordBatch {}); + let projected_batch = + FetchedRecordBatch::try_new(&projector.row_projector, record_batch) + .box_err() + .context(DecodeRecordBatch {}); Poll::Ready(Some(projected_batch)) } diff --git a/benchmarks/src/sst_bench.rs b/benchmarks/src/sst_bench.rs index 6ed3183d50..25fd2c64d6 100644 --- a/benchmarks/src/sst_bench.rs +++ b/benchmarks/src/sst_bench.rs @@ -109,8 +109,7 @@ impl SstBench { .unwrap() .table_schema() .clone(); - let row_projector_builder = - RowProjectorBuilder::new(fetched_schema, table_schema, None); + let row_projector_builder = RowProjectorBuilder::new(fetched_schema, table_schema, None); let sst_read_options = self .sst_read_options_builder .clone() diff --git a/benchmarks/src/sst_tools.rs b/benchmarks/src/sst_tools.rs index 53e8cd08e7..798077322e 100644 --- a/benchmarks/src/sst_tools.rs +++ b/benchmarks/src/sst_tools.rs @@ -127,8 +127,7 @@ pub async fn rebuild_sst(config: RebuildSstConfig, runtime: Arc) { let fetched_schema = projected_schema.to_record_schema(); let table_schema = projected_schema.table_schema().clone(); - let row_projector_builder = - RowProjectorBuilder::new(fetched_schema, table_schema, None); + let row_projector_builder = RowProjectorBuilder::new(fetched_schema, table_schema, None); let sst_read_options = SstReadOptions { maybe_table_level_metrics: Arc::new(SstMaybeTableLevelMetrics::new("bench")), frequency: ReadFrequency::Once, diff --git a/benchmarks/src/util.rs b/benchmarks/src/util.rs index 6fb9ad8543..bd8e6a8468 100644 --- a/benchmarks/src/util.rs +++ b/benchmarks/src/util.rs @@ -127,8 +127,7 @@ pub async fn load_sst_to_memtable( let fetched_schema = projected_schema.to_record_schema(); let table_schema = projected_schema.table_schema().clone(); - let row_projector_builder = - RowProjectorBuilder::new(fetched_schema, table_schema, None); + let row_projector_builder = RowProjectorBuilder::new(fetched_schema, table_schema, None); let sst_read_options = SstReadOptions { maybe_table_level_metrics: Arc::new(SstMaybeTableLevelMetrics::new("bench")), frequency: ReadFrequency::Frequent, diff --git a/common_types/src/projected_schema.rs b/common_types/src/projected_schema.rs index 93322564de..4d6ca9b804 100644 --- a/common_types/src/projected_schema.rs +++ b/common_types/src/projected_schema.rs @@ -65,9 +65,11 @@ pub struct RowProjector { /// The schema for data fetched /// It is derived from table schema and some columns may not exist in data /// source. - fetched_schema: RecordSchema, + target_record_schema: RecordSchema, - /// + /// Primary key indexes in `fetched_schema`. + /// It will be `None` if update mode of table is `append`, + /// and will be `Some` if the mode is `overwrite`. primary_key_indexes: Option>, /// Schema in data source @@ -75,24 +77,37 @@ pub struct RowProjector { /// schema caused by table schema altering. source_schema: Schema, - /// The Vec stores the column index in source, and `None` means this column - /// is not in source but required by reader, and need to filled by null. - /// The length of Vec is the same as the number of columns reader intended - /// to read. - fetched_source_column_indexes: Vec>, + /// The Vec stores the column index in data source, and `None` means this + /// column is not in source but required by reader, and need to filled + /// by null. The length of Vec is the same as the number of columns + /// reader intended to read. + source_projection_indexes: Vec>, - /// Similar as `fetched_source_column_indexes`, but storing the projected - /// source column index + /// Used to reorder columns in arrow record batch fetched from sst to the + /// needed projection order. + /// Actually, It stores the record column indexes in + /// projected order similar as `source_projection_indexes`. + /// + /// Why we need it? + /// Because in current rust parquet impl, we can just define which columns + /// we wanted to fetch without their order. /// /// For example: - /// source column indexes: 0,1,2,3,4 - /// data fetched indexes in source: 2,1,3 + /// wanted columns in order: 2,1,3 + /// actual fetched columns: 1,2,3 + /// + /// However, projection is not only wanted columns but with wanted order, so + /// we need this remapping to reorder the fetched record. /// - /// We can see, only columns:[1,2,3] in source is needed, - /// and their indexes in pulled projected record bath are: [0,1,2]. + /// For example: + /// source columns in sst: 0,1,2,3,4 + /// target projection columns: 2,1,3 + /// + /// the actual columns in fetched record: 1,2,3 + /// relative columns indexes in fetched record: 0,1,2 /// - /// So the stored data fetched indexes in projected source are: [1,0,2]. - fetched_projected_source_column_indexes: Vec>, + /// finally, the remapping to the relative indexes: 1,0,2 + target_record_projection_remapping: Vec>, } impl RowProjector { @@ -133,11 +148,11 @@ impl RowProjector { .collect(); Ok(RowProjector { - fetched_schema: fetched_schema.clone(), + target_record_schema: fetched_schema.clone(), primary_key_indexes, source_schema: source_schema.clone(), - fetched_source_column_indexes, - fetched_projected_source_column_indexes, + source_projection_indexes: fetched_source_column_indexes, + target_record_projection_remapping: fetched_projected_source_column_indexes, }) } @@ -190,12 +205,12 @@ impl RowProjector { } pub fn fetched_schema(&self) -> &RecordSchema { - &self.fetched_schema + &self.target_record_schema } /// The projected indexes of existed columns in the source schema. pub fn existed_source_projection(&self) -> Vec { - self.fetched_source_column_indexes + self.source_projection_indexes .iter() .filter_map(|index| *index) .collect() @@ -204,13 +219,13 @@ impl RowProjector { /// The projected indexes of all columns(existed and not exist) in the /// source schema. pub fn fetched_source_column_indexes(&self) -> &[Option] { - &self.fetched_source_column_indexes + &self.source_projection_indexes } /// The projected indexes of all columns(existed and not exist) in the /// projected source schema. pub fn fetched_projected_source_column_indexes(&self) -> &[Option] { - &self.fetched_projected_source_column_indexes + &self.target_record_projection_remapping } pub fn primary_key_indexes(&self) -> Option<&[usize]> { @@ -223,9 +238,9 @@ impl RowProjector { pub fn project_row(&self, row: &Row, mut datums_buffer: Vec) -> Row { assert_eq!(self.source_schema.num_columns(), row.num_columns()); - datums_buffer.reserve(self.fetched_schema.num_columns()); + datums_buffer.reserve(self.target_record_schema.num_columns()); - for p in &self.fetched_source_column_indexes { + for p in &self.source_projection_indexes { let datum = match p { Some(index_in_source) => row[*index_in_source].clone(), None => Datum::Null, diff --git a/common_types/src/record_batch.rs b/common_types/src/record_batch.rs index 3dc81d5c43..b657a1fced 100644 --- a/common_types/src/record_batch.rs +++ b/common_types/src/record_batch.rs @@ -371,10 +371,7 @@ pub struct FetchedRecordBatch { } impl FetchedRecordBatch { - pub fn try_new( - ctx: &RowProjector, - arrow_record_batch: ArrowRecordBatch, - ) -> Result { + pub fn try_new(ctx: &RowProjector, arrow_record_batch: ArrowRecordBatch) -> Result { let column_indexes = ctx.fetched_projected_source_column_indexes(); let schema = ctx.fetched_schema().clone(); let mut column_blocks = Vec::with_capacity(schema.num_columns()); diff --git a/common_types/src/tests.rs b/common_types/src/tests.rs index 24bc3659d6..4abbaa5817 100644 --- a/common_types/src/tests.rs +++ b/common_types/src/tests.rs @@ -363,14 +363,10 @@ pub fn build_fetched_record_batch_by_rows(rows: Vec) -> FetchedRecordBatch let projection: Vec = (0..schema.num_columns() - 1).collect(); let projected_schema = ProjectedSchema::new(schema.clone(), Some(projection)).unwrap(); let row_projector = - RowProjector::new(&projected_schema.to_record_schema(), None, &schema, &schema) - .unwrap(); + RowProjector::new(&projected_schema.to_record_schema(), None, &schema, &schema).unwrap(); - let mut builder = FetchedRecordBatchBuilder::with_capacity( - row_projector.fetched_schema().clone(), - None, - 2, - ); + let mut builder = + FetchedRecordBatchBuilder::with_capacity(row_projector.fetched_schema().clone(), None, 2); let index_in_writer = IndexInWriterSchema::for_same_schema(schema.num_columns()); let mut buf = Vec::new(); diff --git a/tools/src/bin/sst-convert.rs b/tools/src/bin/sst-convert.rs index 9e2c852941..65797e2f2a 100644 --- a/tools/src/bin/sst-convert.rs +++ b/tools/src/bin/sst-convert.rs @@ -99,8 +99,7 @@ async fn run(args: Args, runtime: Arc) -> Result<()> { let fetched_schema = projected_schema.to_record_schema(); let table_schema = projected_schema.table_schema().clone(); - let row_projector_builder = - RowProjectorBuilder::new(fetched_schema, table_schema, None); + let row_projector_builder = RowProjectorBuilder::new(fetched_schema, table_schema, None); let reader_opts = SstReadOptions { maybe_table_level_metrics: Arc::new(SstMaybeTableLevelMetrics::new("tool")), frequency: ReadFrequency::Once, From ed4719d5d40a400cbcd7fd7fcbb7a66e48fa766d Mon Sep 17 00:00:00 2001 From: kamille Date: Fri, 22 Dec 2023 16:26:43 +0800 Subject: [PATCH 11/13] fix integration test. --- catalog/src/schema.rs | 21 +++++++++++++++---- .../cases/env/local/ddl/query-plan.result | 8 +++---- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/catalog/src/schema.rs b/catalog/src/schema.rs index 01d27d5447..d46cd541f2 100644 --- a/catalog/src/schema.rs +++ b/catalog/src/schema.rs @@ -1,3 +1,17 @@ +// Copyright 2023 The CeresDB Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + // Copyright 2023 The HoraeDB Authors // // Licensed under the Apache License, Version 2.0 (the "License"); @@ -26,6 +40,7 @@ use table_engine::{ table::{SchemaId, TableId, TableRef}, }; +// FIXME: `CreateExistTable` can lead to `segmentation fault` if including backtrace. #[derive(Debug, Snafu)] #[snafu(visibility(pub))] pub enum Error { @@ -119,11 +134,9 @@ pub enum Error { CloseTableWithCause { source: GenericError }, #[snafu(display( - "Failed to create table, table already exists, table:{}.\nBacktrace:\n{}", - table, - backtrace + "Failed to create table, table already exists, table:{table}." ))] - CreateExistTable { table: String, backtrace: Backtrace }, + CreateExistTable { table: String }, #[snafu(display( "Failed to create table, cannot persist meta, table:{}, err:{}", diff --git a/integration_tests/cases/env/local/ddl/query-plan.result b/integration_tests/cases/env/local/ddl/query-plan.result index 6a252cb531..26dcf9098e 100644 --- a/integration_tests/cases/env/local/ddl/query-plan.result +++ b/integration_tests/cases/env/local/ddl/query-plan.result @@ -31,7 +31,7 @@ explain analyze select t from `03_dml_select_real_time_range` where t > 1695348001000; plan_type,plan, -String("Plan with Metrics"),String("ScanTable: table=03_dml_select_real_time_range, parallelism=8, metrics=[\nscan_table:\n do_merge_sort=true\n iter_num=1\n merge_iter_0:\n init_duration=xxs\n num_memtables=1\n num_ssts=0\n scan_count=2\n scan_duration=xxs\n times_fetch_row_from_multiple=0\n times_fetch_rows_from_one=1\n total_rows_fetch_from_one=1\n scan_memtable_1, fetched_columns:[tsid,t]:\n\n\nPredicate { exprs:[t > TimestampMillisecond(1695348001000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348001001), exclusive_end: Timestamp(9223372036854775807) } }=0]\n"), +String("Plan with Metrics"),String("ScanTable: table=03_dml_select_real_time_range, parallelism=8, metrics=[\nPredicate { exprs:[t > TimestampMillisecond(1695348001000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348001001), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=true\n iter_num=1\n merge_iter_0:\n init_duration=xxs\n num_memtables=1\n num_ssts=0\n scan_count=2\n scan_duration=xxs\n times_fetch_row_from_multiple=0\n times_fetch_rows_from_one=1\n total_rows_fetch_from_one=1\n scan_memtable_1, fetched_columns:[tsid,t]:\n=0]\n"), -- This query should not include memtable @@ -51,7 +51,7 @@ explain analyze select t from `03_dml_select_real_time_range` where t > 1695348001000; plan_type,plan, -String("Plan with Metrics"),String("ScanTable: table=03_dml_select_real_time_range, parallelism=8, metrics=[\nscan_table:\n do_merge_sort=true\n iter_num=1\n merge_iter_0:\n init_duration=xxs\n num_memtables=0\n num_ssts=1\n scan_count=2\n scan_duration=xxs\n times_fetch_row_from_multiple=0\n times_fetch_rows_from_one=1\n total_rows_fetch_from_one=1\n scan_sst_1, fetched_columns:[tsid,t]:\n meta_data_cache_hit=false\n parallelism=1\n project_record_batch=xxs\n read_meta_data_duration=xxs\n row_mem=320\n row_num=3\n prune_row_groups:\n pruned_by_custom_filter=0\n pruned_by_min_max=0\n row_groups_after_prune=1\n total_row_groups=1\n use_custom_filter=false\n\n\nPredicate { exprs:[t > TimestampMillisecond(1695348001000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348001001), exclusive_end: Timestamp(9223372036854775807) } }=0]\n"), +String("Plan with Metrics"),String("ScanTable: table=03_dml_select_real_time_range, parallelism=8, metrics=[\nPredicate { exprs:[t > TimestampMillisecond(1695348001000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348001001), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=true\n iter_num=1\n merge_iter_0:\n init_duration=xxs\n num_memtables=0\n num_ssts=1\n scan_count=2\n scan_duration=xxs\n times_fetch_row_from_multiple=0\n times_fetch_rows_from_one=1\n total_rows_fetch_from_one=1\n scan_sst_1, fetched_columns:[tsid,t]:\n meta_data_cache_hit=false\n parallelism=1\n project_record_batch=xxs\n read_meta_data_duration=xxs\n row_mem=320\n row_num=3\n prune_row_groups:\n pruned_by_custom_filter=0\n pruned_by_min_max=0\n row_groups_after_prune=1\n total_row_groups=1\n use_custom_filter=false\n=0]\n"), -- This query should not include SST @@ -92,7 +92,7 @@ explain analyze select t from `03_append_mode_table` where t >= 1695348001000 and name = 'ceresdb'; plan_type,plan, -String("Plan with Metrics"),String("ProjectionExec: expr=[t@0 as t], metrics=[output_rows=2, elapsed_compute=xxs]\n ScanTable: table=03_append_mode_table, parallelism=8, metrics=[\nscan_table:\n do_merge_sort=false\n chain_iter_0:\n num_memtables=1\n num_ssts=0\n scan_duration=xxs\n since_create=xxs\n since_init=xxs\n total_batch_fetched=1\n total_rows_fetched=2\n scan_memtable_1, fetched_columns:[t,name]:\n\n\nPredicate { exprs:[t >= TimestampMillisecond(1695348001000, None), name = Utf8(\"ceresdb\")], time_range:TimeRange { inclusive_start: Timestamp(1695348001000), exclusive_end: Timestamp(9223372036854775807) } }=0]\n"), +String("Plan with Metrics"),String("ProjectionExec: expr=[t@0 as t], metrics=[output_rows=2, elapsed_compute=xxs]\n ScanTable: table=03_append_mode_table, parallelism=8, metrics=[\nPredicate { exprs:[t >= TimestampMillisecond(1695348001000, None), name = Utf8(\"ceresdb\")], time_range:TimeRange { inclusive_start: Timestamp(1695348001000), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=false\n chain_iter_0:\n num_memtables=1\n num_ssts=0\n scan_duration=xxs\n since_create=xxs\n since_init=xxs\n total_batch_fetched=1\n total_rows_fetched=2\n scan_memtable_1, fetched_columns:[t,name]:\n=0]\n"), -- Should just fetch projected columns from SST @@ -106,7 +106,7 @@ explain analyze select t from `03_append_mode_table` where t >= 1695348001000 and name = 'ceresdb'; plan_type,plan, -String("Plan with Metrics"),String("ProjectionExec: expr=[t@0 as t], metrics=[output_rows=2, elapsed_compute=xxs]\n ScanTable: table=03_append_mode_table, parallelism=8, metrics=[\nscan_table:\n do_merge_sort=false\n chain_iter_0:\n num_memtables=0\n num_ssts=1\n scan_duration=xxs\n since_create=xxs\n since_init=xxs\n total_batch_fetched=1\n total_rows_fetched=2\n scan_sst_1, fetched_columns:[t,name]:\n meta_data_cache_hit=false\n parallelism=1\n project_record_batch=xxs\n read_meta_data_duration=xxs\n row_mem=408\n row_num=3\n prune_row_groups:\n pruned_by_custom_filter=0\n pruned_by_min_max=0\n row_groups_after_prune=1\n total_row_groups=1\n use_custom_filter=false\n\n\nPredicate { exprs:[t >= TimestampMillisecond(1695348001000, None), name = Utf8(\"ceresdb\")], time_range:TimeRange { inclusive_start: Timestamp(1695348001000), exclusive_end: Timestamp(9223372036854775807) } }=0]\n"), +String("Plan with Metrics"),String("ProjectionExec: expr=[t@0 as t], metrics=[output_rows=2, elapsed_compute=xxs]\n ScanTable: table=03_append_mode_table, parallelism=8, metrics=[\nPredicate { exprs:[t >= TimestampMillisecond(1695348001000, None), name = Utf8(\"ceresdb\")], time_range:TimeRange { inclusive_start: Timestamp(1695348001000), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=false\n chain_iter_0:\n num_memtables=0\n num_ssts=1\n scan_duration=xxs\n since_create=xxs\n since_init=xxs\n total_batch_fetched=1\n total_rows_fetched=2\n scan_sst_1, fetched_columns:[t,name]:\n meta_data_cache_hit=false\n parallelism=1\n project_record_batch=xxs\n read_meta_data_duration=xxs\n row_mem=408\n row_num=3\n prune_row_groups:\n pruned_by_custom_filter=0\n pruned_by_min_max=0\n row_groups_after_prune=1\n total_row_groups=1\n use_custom_filter=false\n=0]\n"), DROP TABLE `03_dml_select_real_time_range`; From f70da9b4eba443cd397f1cc56ec5498478e9c473 Mon Sep 17 00:00:00 2001 From: kamille Date: Fri, 22 Dec 2023 16:38:15 +0800 Subject: [PATCH 12/13] fix license. --- .../src/instance/flush_compaction.rs | 14 ------------- analytic_engine/src/instance/mod.rs | 14 ------------- analytic_engine/src/instance/read.rs | 14 ------------- analytic_engine/src/lib.rs | 14 ------------- analytic_engine/src/memtable/columnar/iter.rs | 14 ------------- analytic_engine/src/memtable/mod.rs | 14 ------------- analytic_engine/src/memtable/reversed_iter.rs | 14 ------------- analytic_engine/src/memtable/skiplist/iter.rs | 14 ------------- analytic_engine/src/memtable/skiplist/mod.rs | 14 ------------- analytic_engine/src/row_iter/chain.rs | 14 ------------- analytic_engine/src/row_iter/dedup.rs | 14 ------------- analytic_engine/src/row_iter/merge.rs | 14 ------------- analytic_engine/src/row_iter/mod.rs | 14 ------------- .../src/row_iter/record_batch_stream.rs | 14 ------------- analytic_engine/src/row_iter/tests.rs | 14 ------------- analytic_engine/src/sst/factory.rs | 14 ------------- .../src/sst/parquet/async_reader.rs | 14 ------------- analytic_engine/src/sst/parquet/writer.rs | 14 ------------- analytic_engine/src/sst/reader.rs | 14 ------------- analytic_engine/src/sst/writer.rs | 14 ------------- benchmarks/src/merge_memtable_bench.rs | 14 ------------- benchmarks/src/merge_sst_bench.rs | 14 ------------- benchmarks/src/scan_memtable_bench.rs | 14 ------------- benchmarks/src/sst_bench.rs | 14 ------------- benchmarks/src/sst_tools.rs | 14 ------------- benchmarks/src/util.rs | 14 ------------- catalog/src/schema.rs | 21 +++---------------- common_types/src/projected_schema.rs | 14 ------------- common_types/src/record_batch.rs | 14 ------------- common_types/src/row/contiguous.rs | 14 ------------- common_types/src/row/mod.rs | 14 ------------- common_types/src/schema.rs | 14 ------------- common_types/src/tests.rs | 14 ------------- components/object_store/src/disk_cache.rs | 14 ------------- partition_table_engine/src/scan_builder.rs | 14 ------------- src/wal/src/message_queue_impl/region.rs | 14 ------------- system_catalog/src/tables.rs | 14 ------------- table_engine/src/provider.rs | 14 ------------- tools/src/bin/sst-convert.rs | 14 ------------- 39 files changed, 3 insertions(+), 550 deletions(-) diff --git a/analytic_engine/src/instance/flush_compaction.rs b/analytic_engine/src/instance/flush_compaction.rs index e046a1f590..880eb10ca9 100644 --- a/analytic_engine/src/instance/flush_compaction.rs +++ b/analytic_engine/src/instance/flush_compaction.rs @@ -1,17 +1,3 @@ -// Copyright 2023 The CeresDB Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - // Copyright 2023 The HoraeDB Authors // // Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/analytic_engine/src/instance/mod.rs b/analytic_engine/src/instance/mod.rs index a0c7b9c858..ab8df1ef9b 100644 --- a/analytic_engine/src/instance/mod.rs +++ b/analytic_engine/src/instance/mod.rs @@ -1,17 +1,3 @@ -// Copyright 2023 The CeresDB Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - // Copyright 2023 The HoraeDB Authors // // Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/analytic_engine/src/instance/read.rs b/analytic_engine/src/instance/read.rs index d523cb9ce8..9624f4cfbb 100644 --- a/analytic_engine/src/instance/read.rs +++ b/analytic_engine/src/instance/read.rs @@ -1,17 +1,3 @@ -// Copyright 2023 The CeresDB Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - // Copyright 2023 The HoraeDB Authors // // Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/analytic_engine/src/lib.rs b/analytic_engine/src/lib.rs index 8f74211eba..e7d7f81027 100644 --- a/analytic_engine/src/lib.rs +++ b/analytic_engine/src/lib.rs @@ -1,17 +1,3 @@ -// Copyright 2023 The CeresDB Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - // Copyright 2023 The HoraeDB Authors // // Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/analytic_engine/src/memtable/columnar/iter.rs b/analytic_engine/src/memtable/columnar/iter.rs index 70537440db..57ea4e6ebd 100644 --- a/analytic_engine/src/memtable/columnar/iter.rs +++ b/analytic_engine/src/memtable/columnar/iter.rs @@ -1,17 +1,3 @@ -// Copyright 2023 The CeresDB Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - // Copyright 2023 The HoraeDB Authors // // Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/analytic_engine/src/memtable/mod.rs b/analytic_engine/src/memtable/mod.rs index bd8b7a6aeb..ed3b20d348 100644 --- a/analytic_engine/src/memtable/mod.rs +++ b/analytic_engine/src/memtable/mod.rs @@ -1,17 +1,3 @@ -// Copyright 2023 The CeresDB Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - // Copyright 2023 The HoraeDB Authors // // Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/analytic_engine/src/memtable/reversed_iter.rs b/analytic_engine/src/memtable/reversed_iter.rs index 06229a1d20..5a9d5d75d3 100644 --- a/analytic_engine/src/memtable/reversed_iter.rs +++ b/analytic_engine/src/memtable/reversed_iter.rs @@ -1,17 +1,3 @@ -// Copyright 2023 The CeresDB Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - // Copyright 2023 The HoraeDB Authors // // Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/analytic_engine/src/memtable/skiplist/iter.rs b/analytic_engine/src/memtable/skiplist/iter.rs index 62cac59084..60dd18ca0b 100644 --- a/analytic_engine/src/memtable/skiplist/iter.rs +++ b/analytic_engine/src/memtable/skiplist/iter.rs @@ -1,17 +1,3 @@ -// Copyright 2023 The CeresDB Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - // Copyright 2023 The HoraeDB Authors // // Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/analytic_engine/src/memtable/skiplist/mod.rs b/analytic_engine/src/memtable/skiplist/mod.rs index 7e02be93d4..a71a82a612 100644 --- a/analytic_engine/src/memtable/skiplist/mod.rs +++ b/analytic_engine/src/memtable/skiplist/mod.rs @@ -1,17 +1,3 @@ -// Copyright 2023 The CeresDB Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - // Copyright 2023 The HoraeDB Authors // // Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/analytic_engine/src/row_iter/chain.rs b/analytic_engine/src/row_iter/chain.rs index 8bedd12f5e..3f8bff6bb9 100644 --- a/analytic_engine/src/row_iter/chain.rs +++ b/analytic_engine/src/row_iter/chain.rs @@ -1,17 +1,3 @@ -// Copyright 2023 The CeresDB Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - // Copyright 2023 The HoraeDB Authors // // Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/analytic_engine/src/row_iter/dedup.rs b/analytic_engine/src/row_iter/dedup.rs index 1e7928d0fc..a35d1489f2 100644 --- a/analytic_engine/src/row_iter/dedup.rs +++ b/analytic_engine/src/row_iter/dedup.rs @@ -1,17 +1,3 @@ -// Copyright 2023 The CeresDB Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - // Copyright 2023 The HoraeDB Authors // // Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/analytic_engine/src/row_iter/merge.rs b/analytic_engine/src/row_iter/merge.rs index e7413ce53b..e9029060cc 100644 --- a/analytic_engine/src/row_iter/merge.rs +++ b/analytic_engine/src/row_iter/merge.rs @@ -1,17 +1,3 @@ -// Copyright 2023 The CeresDB Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - // Copyright 2023 The HoraeDB Authors // // Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/analytic_engine/src/row_iter/mod.rs b/analytic_engine/src/row_iter/mod.rs index bbb0123a9e..f3c5ac4d35 100644 --- a/analytic_engine/src/row_iter/mod.rs +++ b/analytic_engine/src/row_iter/mod.rs @@ -1,17 +1,3 @@ -// Copyright 2023 The CeresDB Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - // Copyright 2023 The HoraeDB Authors // // Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/analytic_engine/src/row_iter/record_batch_stream.rs b/analytic_engine/src/row_iter/record_batch_stream.rs index 851148bb17..dd0f4d132e 100644 --- a/analytic_engine/src/row_iter/record_batch_stream.rs +++ b/analytic_engine/src/row_iter/record_batch_stream.rs @@ -1,17 +1,3 @@ -// Copyright 2023 The CeresDB Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - // Copyright 2023 The HoraeDB Authors // // Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/analytic_engine/src/row_iter/tests.rs b/analytic_engine/src/row_iter/tests.rs index 0a86738687..0db3c8bd91 100644 --- a/analytic_engine/src/row_iter/tests.rs +++ b/analytic_engine/src/row_iter/tests.rs @@ -1,17 +1,3 @@ -// Copyright 2023 The CeresDB Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - // Copyright 2023 The HoraeDB Authors // // Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/analytic_engine/src/sst/factory.rs b/analytic_engine/src/sst/factory.rs index c920f41766..9f0c00313d 100644 --- a/analytic_engine/src/sst/factory.rs +++ b/analytic_engine/src/sst/factory.rs @@ -1,17 +1,3 @@ -// Copyright 2023 The CeresDB Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - // Copyright 2023 The HoraeDB Authors // // Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/analytic_engine/src/sst/parquet/async_reader.rs b/analytic_engine/src/sst/parquet/async_reader.rs index e668d6a541..be98479619 100644 --- a/analytic_engine/src/sst/parquet/async_reader.rs +++ b/analytic_engine/src/sst/parquet/async_reader.rs @@ -1,17 +1,3 @@ -// Copyright 2023 The CeresDB Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - // Copyright 2023 The HoraeDB Authors // // Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/analytic_engine/src/sst/parquet/writer.rs b/analytic_engine/src/sst/parquet/writer.rs index 5eb7823144..e84adea7f8 100644 --- a/analytic_engine/src/sst/parquet/writer.rs +++ b/analytic_engine/src/sst/parquet/writer.rs @@ -1,17 +1,3 @@ -// Copyright 2023 The CeresDB Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - // Copyright 2023 The HoraeDB Authors // // Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/analytic_engine/src/sst/reader.rs b/analytic_engine/src/sst/reader.rs index 3f31b79f00..66cebc047c 100644 --- a/analytic_engine/src/sst/reader.rs +++ b/analytic_engine/src/sst/reader.rs @@ -1,17 +1,3 @@ -// Copyright 2023 The CeresDB Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - // Copyright 2023 The HoraeDB Authors // // Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/analytic_engine/src/sst/writer.rs b/analytic_engine/src/sst/writer.rs index 9ab54a7d03..773715cdb9 100644 --- a/analytic_engine/src/sst/writer.rs +++ b/analytic_engine/src/sst/writer.rs @@ -1,17 +1,3 @@ -// Copyright 2023 The CeresDB Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - // Copyright 2023 The HoraeDB Authors // // Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/benchmarks/src/merge_memtable_bench.rs b/benchmarks/src/merge_memtable_bench.rs index b62bfcb6ac..35765a0a96 100644 --- a/benchmarks/src/merge_memtable_bench.rs +++ b/benchmarks/src/merge_memtable_bench.rs @@ -1,17 +1,3 @@ -// Copyright 2023 The CeresDB Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - // Copyright 2023 The HoraeDB Authors // // Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/benchmarks/src/merge_sst_bench.rs b/benchmarks/src/merge_sst_bench.rs index 575d1f27b0..c8b07a21b2 100644 --- a/benchmarks/src/merge_sst_bench.rs +++ b/benchmarks/src/merge_sst_bench.rs @@ -1,17 +1,3 @@ -// Copyright 2023 The CeresDB Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - // Copyright 2023 The HoraeDB Authors // // Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/benchmarks/src/scan_memtable_bench.rs b/benchmarks/src/scan_memtable_bench.rs index 4aa8bd3fdd..72e09a054c 100644 --- a/benchmarks/src/scan_memtable_bench.rs +++ b/benchmarks/src/scan_memtable_bench.rs @@ -1,17 +1,3 @@ -// Copyright 2023 The CeresDB Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - // Copyright 2023 The HoraeDB Authors // // Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/benchmarks/src/sst_bench.rs b/benchmarks/src/sst_bench.rs index 40328ba6e0..3e9ed3d8da 100644 --- a/benchmarks/src/sst_bench.rs +++ b/benchmarks/src/sst_bench.rs @@ -1,17 +1,3 @@ -// Copyright 2023 The CeresDB Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - // Copyright 2023 The HoraeDB Authors // // Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/benchmarks/src/sst_tools.rs b/benchmarks/src/sst_tools.rs index 5d112c80ac..12a090e0ba 100644 --- a/benchmarks/src/sst_tools.rs +++ b/benchmarks/src/sst_tools.rs @@ -1,17 +1,3 @@ -// Copyright 2023 The CeresDB Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - // Copyright 2023 The HoraeDB Authors // // Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/benchmarks/src/util.rs b/benchmarks/src/util.rs index ade4c6d107..3c52b26011 100644 --- a/benchmarks/src/util.rs +++ b/benchmarks/src/util.rs @@ -1,17 +1,3 @@ -// Copyright 2023 The CeresDB Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - // Copyright 2023 The HoraeDB Authors // // Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/catalog/src/schema.rs b/catalog/src/schema.rs index d46cd541f2..51fb7f82d2 100644 --- a/catalog/src/schema.rs +++ b/catalog/src/schema.rs @@ -1,17 +1,3 @@ -// Copyright 2023 The CeresDB Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - // Copyright 2023 The HoraeDB Authors // // Licensed under the Apache License, Version 2.0 (the "License"); @@ -40,7 +26,8 @@ use table_engine::{ table::{SchemaId, TableId, TableRef}, }; -// FIXME: `CreateExistTable` can lead to `segmentation fault` if including backtrace. +// FIXME: `CreateExistTable` can lead to `segmentation fault` if including +// backtrace. #[derive(Debug, Snafu)] #[snafu(visibility(pub))] pub enum Error { @@ -133,9 +120,7 @@ pub enum Error { #[snafu(display("Failed to close table, source:{}", source))] CloseTableWithCause { source: GenericError }, - #[snafu(display( - "Failed to create table, table already exists, table:{table}." - ))] + #[snafu(display("Failed to create table, table already exists, table:{table}."))] CreateExistTable { table: String }, #[snafu(display( diff --git a/common_types/src/projected_schema.rs b/common_types/src/projected_schema.rs index 87480a956f..d0f780d8b6 100644 --- a/common_types/src/projected_schema.rs +++ b/common_types/src/projected_schema.rs @@ -1,17 +1,3 @@ -// Copyright 2023 The CeresDB Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - // Copyright 2023 The HoraeDB Authors // // Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/common_types/src/record_batch.rs b/common_types/src/record_batch.rs index b7ad7e7aaa..1b7d610d8e 100644 --- a/common_types/src/record_batch.rs +++ b/common_types/src/record_batch.rs @@ -1,17 +1,3 @@ -// Copyright 2023 The CeresDB Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - // Copyright 2023 The HoraeDB Authors // // Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/common_types/src/row/contiguous.rs b/common_types/src/row/contiguous.rs index 89ebf6e895..d16960959b 100644 --- a/common_types/src/row/contiguous.rs +++ b/common_types/src/row/contiguous.rs @@ -1,17 +1,3 @@ -// Copyright 2023 The CeresDB Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - // Copyright 2023 The HoraeDB Authors // // Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/common_types/src/row/mod.rs b/common_types/src/row/mod.rs index dbc2910958..652611a892 100644 --- a/common_types/src/row/mod.rs +++ b/common_types/src/row/mod.rs @@ -1,17 +1,3 @@ -// Copyright 2023 The CeresDB Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - // Copyright 2023 The HoraeDB Authors // // Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/common_types/src/schema.rs b/common_types/src/schema.rs index 8c80f3346c..5abdeabb95 100644 --- a/common_types/src/schema.rs +++ b/common_types/src/schema.rs @@ -1,17 +1,3 @@ -// Copyright 2023 The CeresDB Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - // Copyright 2023 The HoraeDB Authors // // Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/common_types/src/tests.rs b/common_types/src/tests.rs index 3c1bb3393a..4d5d8e1f54 100644 --- a/common_types/src/tests.rs +++ b/common_types/src/tests.rs @@ -1,17 +1,3 @@ -// Copyright 2023 The CeresDB Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - // Copyright 2023 The HoraeDB Authors // // Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/components/object_store/src/disk_cache.rs b/components/object_store/src/disk_cache.rs index 7c72534f02..53d537ffa6 100644 --- a/components/object_store/src/disk_cache.rs +++ b/components/object_store/src/disk_cache.rs @@ -1,17 +1,3 @@ -// Copyright 2023 The CeresDB Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - // Copyright 2023 The HoraeDB Authors // // Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/partition_table_engine/src/scan_builder.rs b/partition_table_engine/src/scan_builder.rs index 5a1e8e6870..247bcae98b 100644 --- a/partition_table_engine/src/scan_builder.rs +++ b/partition_table_engine/src/scan_builder.rs @@ -1,17 +1,3 @@ -// Copyright 2023 The CeresDB Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - // Copyright 2023 The HoraeDB Authors // // Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/src/wal/src/message_queue_impl/region.rs b/src/wal/src/message_queue_impl/region.rs index fed2072256..292d0469c9 100644 --- a/src/wal/src/message_queue_impl/region.rs +++ b/src/wal/src/message_queue_impl/region.rs @@ -1,17 +1,3 @@ -// Copyright 2023 The CeresDB Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - // Copyright 2023 The HoraeDB Authors // // Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/system_catalog/src/tables.rs b/system_catalog/src/tables.rs index 6a4d7ad411..dc1113f784 100644 --- a/system_catalog/src/tables.rs +++ b/system_catalog/src/tables.rs @@ -1,17 +1,3 @@ -// Copyright 2023 The CeresDB Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - // Copyright 2023 The HoraeDB Authors // // Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/table_engine/src/provider.rs b/table_engine/src/provider.rs index 4806a4260f..6b0c38a770 100644 --- a/table_engine/src/provider.rs +++ b/table_engine/src/provider.rs @@ -1,17 +1,3 @@ -// Copyright 2023 The CeresDB Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - // Copyright 2023 The HoraeDB Authors // // Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tools/src/bin/sst-convert.rs b/tools/src/bin/sst-convert.rs index 3fcc6efd4f..57c8f8f5fa 100644 --- a/tools/src/bin/sst-convert.rs +++ b/tools/src/bin/sst-convert.rs @@ -1,17 +1,3 @@ -// Copyright 2023 The CeresDB Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - // Copyright 2023 The HoraeDB Authors // // Licensed under the Apache License, Version 2.0 (the "License"); From 1983d31f89f89b9edc37ff0dfc23237b948c51ae Mon Sep 17 00:00:00 2001 From: kamille Date: Fri, 22 Dec 2023 17:31:59 +0800 Subject: [PATCH 13/13] fix license again. --- analytic_engine/src/instance/reorder_memtable.rs | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/analytic_engine/src/instance/reorder_memtable.rs b/analytic_engine/src/instance/reorder_memtable.rs index ef7c91d210..5a7a03de42 100644 --- a/analytic_engine/src/instance/reorder_memtable.rs +++ b/analytic_engine/src/instance/reorder_memtable.rs @@ -1,17 +1,3 @@ -// Copyright 2023 The CeresDB Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - // Copyright 2023 The HoraeDB Authors // // Licensed under the Apache License, Version 2.0 (the "License");