Skip to content

Commit

Permalink
refactor: optimize sst filter build to consume less CPU (#967)
Browse files Browse the repository at this point in the history
## Rationale
When doing benchmark, xor filter build cost too much CPU.

## Detailed Changes
- Remove datum to_vec

## Test Plan

---------

Co-authored-by: jiacai2050 <dev@liujiacai.net>
  • Loading branch information
zouxiang1993 and jiacai2050 authored Jun 6, 2023
1 parent e1b9e58 commit 52b78ae
Show file tree
Hide file tree
Showing 2 changed files with 73 additions and 2 deletions.
5 changes: 3 additions & 2 deletions analytic_engine/src/sst/parquet/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -153,8 +153,9 @@ impl RecordBatchGroupWriter {
for (col_idx, column) in partial_batch.columns().iter().enumerate() {
for row in 0..column.num_rows() {
let datum = column.datum(row);
let bytes = datum.to_bytes();
builder.add_key(col_idx, &bytes);
datum.do_with_bytes(|bytes| {
builder.add_key(col_idx, bytes);
});
}
}
}
Expand Down
70 changes: 70 additions & 0 deletions common_types/src/datum.rs
Original file line number Diff line number Diff line change
Expand Up @@ -562,6 +562,76 @@ impl Datum {
}
}

pub fn do_with_bytes<F>(&self, mut f: F)
where
F: FnMut(&[u8]),
{
match self {
Datum::Double(v) => {
let arr = v.to_le_bytes();
f(arr.as_slice())
}
Datum::Float(v) => {
let arr = v.to_le_bytes();
f(arr.as_slice())
}
Datum::UInt64(v) => {
let arr = v.to_le_bytes();
f(arr.as_slice())
}
Datum::UInt32(v) => {
let arr = v.to_le_bytes();
f(arr.as_slice())
}
Datum::UInt16(v) => {
let arr = v.to_le_bytes();
f(arr.as_slice())
}
Datum::UInt8(v) => {
let arr = v.to_le_bytes();
f(arr.as_slice())
}
Datum::Int64(v) => {
let arr = v.to_le_bytes();
f(arr.as_slice())
}
Datum::Int32(v) => {
let arr = v.to_le_bytes();
f(arr.as_slice())
}
Datum::Int16(v) => {
let arr = v.to_le_bytes();
f(arr.as_slice())
}
Datum::Int8(v) => {
let arr = v.to_le_bytes();
f(arr.as_slice())
}
Datum::Boolean(v) => {
if *v {
f(&[1])
} else {
f(&[0])
}
}
Datum::Null => f(&[0]),
Datum::Timestamp(v) => {
let arr = v.as_i64().to_le_bytes();
f(arr.as_slice())
}
Datum::Varbinary(v) => f(v.as_ref()),
Datum::String(v) => f(v.as_bytes()),
Datum::Date(v) => {
let arr = v.to_le_bytes();
f(arr.as_slice())
}
Datum::Time(v) => {
let arr = v.to_le_bytes();
f(arr.as_slice())
}
}
}

pub fn to_bytes(&self) -> Vec<u8> {
match self {
Datum::Double(v) => v.to_le_bytes().to_vec(),
Expand Down

0 comments on commit 52b78ae

Please sign in to comment.