Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: optimize sst iterator and filter build to consume less CPU #975

Merged
merged 3 commits into from
Jun 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions analytic_engine/src/sst/parquet/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -152,8 +152,8 @@ impl RecordBatchGroupWriter {
for partial_batch in row_group_batch {
for (col_idx, column) in partial_batch.columns().iter().enumerate() {
for row in 0..column.num_rows() {
let datum = column.datum(row);
datum.do_with_bytes(|bytes| {
let datum_view = column.datum_view(row);
datum_view.do_with_bytes(|bytes| {
builder.add_key(col_idx, bytes);
});
}
Expand Down
14 changes: 14 additions & 0 deletions common_types/src/column.rs
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,14 @@ macro_rules! impl_column {
Some(self.datum(index))
}

pub fn datum_view_opt(&self, index: usize) -> Option<DatumView> {
if index >= self.0.len() {
return None;
}

Some(self.datum_view(index))
}

pub fn datum_view(&self, index: usize) -> DatumView {
// If this datum is null.
if self.0.is_null(index) {
Expand Down Expand Up @@ -545,6 +553,12 @@ macro_rules! impl_column_block {
}
}

pub fn datum_view_opt(&self, index: usize) -> Option<DatumView> {
match self {
$(ColumnBlock::$Kind(col) => col.datum_view_opt(index),)*
}
}

/// Panic if index is out fo bound.
pub fn datum_view(&self, index: usize) -> DatumView {
match self {
Expand Down
70 changes: 70 additions & 0 deletions common_types/src/datum.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1010,6 +1010,76 @@ impl<'a> DatumView<'a> {
DatumView::Time(_) => DatumKind::Time,
}
}

pub fn do_with_bytes<F>(&self, mut f: F)
jiacai2050 marked this conversation as resolved.
Show resolved Hide resolved
where
F: FnMut(&[u8]),
{
match self {
DatumView::Double(v) => {
let arr = v.to_le_bytes();
f(arr.as_slice())
}
DatumView::Float(v) => {
let arr = v.to_le_bytes();
f(arr.as_slice())
}
DatumView::UInt64(v) => {
let arr = v.to_le_bytes();
f(arr.as_slice())
}
DatumView::UInt32(v) => {
let arr = v.to_le_bytes();
f(arr.as_slice())
}
DatumView::UInt16(v) => {
let arr = v.to_le_bytes();
f(arr.as_slice())
}
DatumView::UInt8(v) => {
let arr = v.to_le_bytes();
f(arr.as_slice())
}
DatumView::Int64(v) => {
let arr = v.to_le_bytes();
f(arr.as_slice())
}
DatumView::Int32(v) => {
let arr = v.to_le_bytes();
f(arr.as_slice())
}
DatumView::Int16(v) => {
let arr = v.to_le_bytes();
f(arr.as_slice())
}
DatumView::Int8(v) => {
let arr = v.to_le_bytes();
f(arr.as_slice())
}
DatumView::Boolean(v) => {
if *v {
f(&[1])
} else {
f(&[0])
}
}
DatumView::Null => f(&[0]),
DatumView::Timestamp(v) => {
let arr = v.as_i64().to_le_bytes();
f(arr.as_slice())
}
DatumView::Varbinary(v) => f(v),
DatumView::String(v) => f(v.as_bytes()),
DatumView::Date(v) => {
let arr = v.to_le_bytes();
f(arr.as_slice())
}
DatumView::Time(v) => {
let arr = v.to_le_bytes();
f(arr.as_slice())
}
}
}
}

#[cfg(feature = "arrow")]
Expand Down
6 changes: 3 additions & 3 deletions common_types/src/record_batch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -535,9 +535,9 @@ impl RecordBatchWithKeyBuilder {
///
/// REQUIRE: The `row_view` and the builder must have the same schema.
pub fn append_row_view(&mut self, row_view: &RowViewOnBatch) -> Result<()> {
for (builder, datum) in self.builders.iter_mut().zip(row_view.iter_columns()) {
let datum = datum.context(IterateDatum)?;
builder.append(datum).context(AppendDatum)?;
for (builder, datum_view) in self.builders.iter_mut().zip(row_view.iter_columns()) {
let datum_view = datum_view.context(IterateDatum)?;
builder.append_view(datum_view).context(AppendDatum)?;
}

Ok(())
Expand Down
8 changes: 4 additions & 4 deletions common_types/src/row/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use snafu::{ensure, Backtrace, OptionExt, Snafu};

use crate::{
column_schema::ColumnSchema,
datum::{Datum, DatumKind},
datum::{Datum, DatumKind, DatumView},
record_batch::RecordBatchWithKey,
schema::{RecordSchemaWithKey, Schema},
time::Timestamp,
Expand Down Expand Up @@ -607,7 +607,7 @@ impl<'a> RowView for RowViewOnBatch<'a> {
}

impl<'a> Iterator for RowViewOnBatchColumnIter<'a> {
type Item = Result<Datum>;
type Item = Result<DatumView<'a>>;

fn next(&mut self) -> Option<Self::Item> {
if self.next_column_idx >= self.record_batch.num_columns() {
Expand All @@ -616,11 +616,11 @@ impl<'a> Iterator for RowViewOnBatchColumnIter<'a> {

let curr_column_idx = self.next_column_idx;
let column = self.record_batch.column(curr_column_idx);
let datum = column.datum_opt(self.row_idx).map(Ok);
let datum_view = column.datum_view_opt(self.row_idx).map(Ok);

self.next_column_idx += 1;

datum
datum_view
}
}

Expand Down