Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(fuse): add prewhere support in native storage format #9600

Merged
merged 17 commits into from
Jan 19, 2023
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 39 additions & 17 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ members = [
# databend maintains:
openraft = { git = "https://github.com/drmingdrmer/openraft", tag = "v0.7.4-alpha.3" }
sled = { git = "https://github.com/datafuse-extras/sled", tag = "v0.34.7-datafuse.1", default-features = false }
opendal = "0.24"
opendal = { git = "https://github.com/datafuselabs/opendal", rev = "caab12d" }
ordered-float = { version = "3.4.0", default-features = false }

# error
Expand Down
2 changes: 1 addition & 1 deletion src/common/arrow/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ arrow = { package = "arrow2", version = "0.15.0", default-features = false, feat

arrow-format = { version = "0.8.0", features = ["flight-data", "flight-service", "ipc"] }
futures = "0.3.24"
native = { package = "strawboat", version = "0.1.0" }
native = { package = "strawboat", git = "https://github.com/sundy-li/strawboat", rev = "92a8e4f" }
parquet2 = { version = "0.17.0", default_features = false, features = ["serde_types"] }

[dev-dependencies]
8 changes: 1 addition & 7 deletions src/common/hashtable/src/hashtable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -189,13 +189,7 @@ where
_alignment: [0; 0],
}));
}
while (self.table.len() + other.table.len()) * 2 > self.table.capacity() {
if (self.table.entries.len() >> 22) == 0 {
self.table.grow(2);
} else {
self.table.grow(1);
}
}

unsafe {
self.table.set_merge(&other.table);
}
Expand Down
11 changes: 5 additions & 6 deletions src/common/io/src/position.rs
Original file line number Diff line number Diff line change
Expand Up @@ -383,12 +383,11 @@ fn position_sse42<
if _mm_cmpestrc::<0>(chars_set, chars_count, bytes, 16) > 0 {
return index + _mm_cmpestri::<0>(chars_set, chars_count, bytes, 16) as usize;
}
} else {
if _mm_cmpestrc::<_SIDD_NEGATIVE_POLARITY>(chars_set, chars_count, bytes, 16) > 0 {
return index
+ _mm_cmpestri::<_SIDD_NEGATIVE_POLARITY>(chars_set, chars_count, bytes, 16)
as usize;
}
} else if _mm_cmpestrc::<_SIDD_NEGATIVE_POLARITY>(chars_set, chars_count, bytes, 16) > 0
{
return index
+ _mm_cmpestri::<_SIDD_NEGATIVE_POLARITY>(chars_set, chars_count, bytes, 16)
as usize;
}

index += 16;
Expand Down
2 changes: 1 addition & 1 deletion src/common/storage/src/operator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@ fn init_s3_operator(cfg: &StorageS3Config) -> Result<Operator> {

// Disable credential loader
if cfg.disable_credential_loader {
builder.disable_credential_loader();
builder.disable_config_load();
}

// Enable virtual host style
Expand Down
18 changes: 18 additions & 0 deletions src/query/expression/src/block.rs
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,24 @@ impl DataBlock {

Ok(DataBlock::new(cols, arrow_chunk.len()))
}

pub fn from_arrow_chunk_with_types<A: AsRef<dyn Array>>(
arrow_chunk: &ArrowChunk<A>,
data_types: &[DataType],
) -> Result<Self> {
let cols = data_types
.iter()
.zip(arrow_chunk.arrays())
.map(|(data_type, col)| {
Ok(BlockEntry {
data_type: data_type.clone(),
value: Value::Column(Column::from_arrow(col.as_ref(), data_type)),
})
})
.collect::<Result<_>>()?;

Ok(DataBlock::new(cols, arrow_chunk.len()))
}
}

impl TryFrom<DataBlock> for ArrowChunk<ArrayRef> {
Expand Down
2 changes: 1 addition & 1 deletion src/query/expression/src/types/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ impl StringColumn {
}

pub struct StringIterator<'a> {
data: &'a Buffer<u8>,
data: &'a [u8],
offsets: std::slice::Windows<'a, u64>,
}

Expand Down
3 changes: 1 addition & 2 deletions src/query/functions/src/aggregates/aggregator_common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -111,12 +111,11 @@ pub fn eval_aggr(
name: &str,
params: Vec<Scalar>,
columns: &[Column],
types: &[DataType],
rows: usize,
) -> Result<(Column, DataType)> {
let factory = AggregateFunctionFactory::instance();
let arguments = types.to_owned();
let cols: Vec<Column> = columns.to_owned();
let arguments = columns.iter().map(|x| x.data_type()).collect();

let func = factory.get(name, params, arguments)?;
let data_type = func.return_type()?;
Expand Down
16 changes: 14 additions & 2 deletions src/query/functions/src/scalars/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -100,11 +100,23 @@ pub fn register(registry: &mut FunctionRegistry) {
|val, _| 8 * val.len() as u64,
);

registry.register_1_arg::<StringType, NumberType<u64>, _, _>(
registry.register_passthrough_nullable_1_arg::<StringType, NumberType<u64>, _, _>(
"length",
FunctionProperty::default(),
|_| FunctionDomain::Full,
|val, _| val.len() as u64,
|val, _| match val {
ValueRef::Scalar(s) => Value::Scalar(s.len() as u64),
ValueRef::Column(c) => {
let diffs = c
.offsets
.iter()
.zip(c.offsets.iter().skip(1))
.map(|(a, b)| b - a)
.collect::<Vec<_>>();

Value::Column(diffs.into())
}
},
);

registry.register_passthrough_nullable_1_arg::<StringType, NumberType<u64>, _, _>(
Expand Down
22 changes: 4 additions & 18 deletions src/query/functions/tests/it/aggregates/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,14 +37,8 @@ use itertools::Itertools;

use super::scalars::parser;

pub trait AggregationSimulator = Fn(
&str,
Vec<Scalar>,
&[Column],
&[DataType],
usize,
) -> common_exception::Result<(Column, DataType)>
+ Copy;
pub trait AggregationSimulator =
Fn(&str, Vec<Scalar>, &[Column], usize) -> common_exception::Result<(Column, DataType)> + Copy;

/// run ast which is agg expr
pub fn run_agg_ast(
Expand Down Expand Up @@ -97,7 +91,6 @@ pub fn run_agg_ast(
.map(|p| Scalar::Number(NumberScalar::UInt64(*p as u64)))
.collect();

let arg_types: Vec<DataType> = args.iter().map(|(_, ty)| ty.clone()).collect();
let arg_columns: Vec<Column> = args
.iter()
.map(|(arg, ty)| match arg {
Expand All @@ -109,13 +102,7 @@ pub fn run_agg_ast(
})
.collect();

simulator(
name.as_str(),
params,
&arg_columns,
&arg_types,
block.num_rows(),
)?
simulator(name.as_str(), params, &arg_columns, block.num_rows())?
}
_ => unimplemented!(),
}
Expand Down Expand Up @@ -187,11 +174,10 @@ pub fn simulate_two_groups_group_by(
name: &str,
params: Vec<Scalar>,
columns: &[Column],
types: &[DataType],
rows: usize,
) -> common_exception::Result<(Column, DataType)> {
let factory = AggregateFunctionFactory::instance();
let arguments = types.to_owned();
let arguments: Vec<DataType> = columns.iter().map(|c| c.data_type()).collect();
let cols: Vec<Column> = columns.to_owned();

let func = factory.get(name, params, arguments)?;
Expand Down
5 changes: 4 additions & 1 deletion src/query/service/src/pipelines/pipeline_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -345,7 +345,10 @@ impl PipelineBuilder {
&aggregate.agg_funcs,
)?;

if self.ctx.get_cluster().is_empty()
// this has bugs now, so we disable it for now, cc @winter
sundy-li marked this conversation as resolved.
Show resolved Hide resolved
#[allow(clippy::overly_complex_bool_expr)]
if 1 == 2
&& self.ctx.get_cluster().is_empty()
&& !params.group_columns.is_empty()
&& self.main_pipeline.output_len() > 1
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ async fn test_recluster_mutator_block_select() -> Result<()> {
min: vec![Scalar::from(1i64)],
max: vec![Scalar::from(3i64)],
level: 0,
pages: None,
}))
.await?;
test_segment_locations.push(segment_location);
Expand All @@ -88,6 +89,7 @@ async fn test_recluster_mutator_block_select() -> Result<()> {
min: vec![Scalar::from(2i64)],
max: vec![Scalar::from(4i64)],
level: 0,
pages: None,
}))
.await?;
test_segment_locations.push(segment_location);
Expand All @@ -98,6 +100,7 @@ async fn test_recluster_mutator_block_select() -> Result<()> {
min: vec![Scalar::from(4i64)],
max: vec![Scalar::from(5i64)],
level: 0,
pages: None,
}))
.await?;
test_segment_locations.push(segment_location);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ fn test_to_partitions() -> Result<()> {

let blocks_metas = (0..num_of_block)
.into_iter()
.map(|_| block_meta.clone())
.map(|_| (None, block_meta.clone()))
.collect::<Vec<_>>();

let column_nodes = (0..num_of_col)
Expand Down
Loading