Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore(query): refactor lambda fucntion as expression evaluator #13836

Merged
merged 6 commits into from
Nov 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
173 changes: 172 additions & 1 deletion src/query/expression/src/evaluator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ use crate::FunctionContext;
use crate::FunctionDomain;
use crate::FunctionEval;
use crate::FunctionRegistry;
use crate::RemoteExpr;

pub struct Evaluator<'a> {
input_columns: &'a DataBlock,
Expand Down Expand Up @@ -163,6 +164,27 @@ impl<'a> Evaluator<'a> {
ctx.render_error(*span, id.params(), &args, &function.signature.name)?;
Ok(result)
}
Expr::LambdaFunctionCall {
name,
args,
lambda_expr,
..
} => {
let args = args
.iter()
.map(|expr| self.partial_run(expr, validity.clone()))
.collect::<Result<Vec<_>>>()?;
assert!(
args.iter()
.filter_map(|val| match val {
Value::Column(col) => Some(col.len()),
Value::Scalar(_) => None,
})
.all_equal()
);

self.run_lambda(name, args, lambda_expr)
}
};

#[cfg(debug_assertions)]
Expand Down Expand Up @@ -881,6 +903,110 @@ impl<'a> Evaluator<'a> {

unreachable!("expr is not a set returning function: {expr}")
}

fn run_lambda(
&self,
func_name: &str,
args: Vec<Value<AnyType>>,
lambda_expr: &RemoteExpr,
) -> Result<Value<AnyType>> {
let expr = lambda_expr.as_expr(self.fn_registry);
// TODO: Support multi args
match &args[0] {
Value::Scalar(s) => match s {
Scalar::Array(c) => {
let entry = BlockEntry::new(c.data_type(), Value::Column(c.clone()));
let block = DataBlock::new(vec![entry], c.len());

let evaluator = Evaluator::new(&block, self.func_ctx, self.fn_registry);
let result = evaluator.run(&expr)?;
let result_col = result.convert_to_full_column(expr.data_type(), c.len());

if func_name == "array_filter" {
let result_col = result_col.remove_nullable();
let bitmap = result_col.as_boolean().unwrap();
let filtered_inner_col = c.filter(bitmap);
Ok(Value::Scalar(Scalar::Array(filtered_inner_col)))
} else {
Ok(Value::Scalar(Scalar::Array(result_col)))
}
}
_ => unreachable!(),
},
Value::Column(c) => {
let (inner_col, inner_ty, offsets, validity) = match c {
Column::Array(box array_col) => (
array_col.values.clone(),
array_col.values.data_type(),
array_col.offsets.clone(),
None,
),
Column::Nullable(box nullable_col) => match &nullable_col.column {
Column::Array(box array_col) => (
array_col.values.clone(),
array_col.values.data_type(),
array_col.offsets.clone(),
Some(nullable_col.validity.clone()),
),
_ => unreachable!(),
},
_ => unreachable!(),
};
let entry = BlockEntry::new(inner_ty, Value::Column(inner_col.clone()));
let block = DataBlock::new(vec![entry], inner_col.len());

let evaluator = Evaluator::new(&block, self.func_ctx, self.fn_registry);
let result = evaluator.run(&expr)?;
let result_col = result.convert_to_full_column(expr.data_type(), inner_col.len());

let col = if func_name == "array_filter" {
let result_col = result_col.remove_nullable();
let bitmap = result_col.as_boolean().unwrap();
let filtered_inner_col = inner_col.filter(bitmap);
// generate new offsets after filter.
let mut new_offset = 0;
let mut filtered_offsets = Vec::with_capacity(offsets.len());
filtered_offsets.push(0);
for offset in offsets.windows(2) {
let off = offset[0] as usize;
let len = (offset[1] - offset[0]) as usize;
let unset_count = bitmap.null_count_range(off, len);
new_offset += (len - unset_count) as u64;
filtered_offsets.push(new_offset);
}

let array_col = Column::Array(Box::new(ArrayColumn {
values: filtered_inner_col,
offsets: filtered_offsets.into(),
}));
match validity {
Some(validity) => {
Value::Column(Column::Nullable(Box::new(NullableColumn {
column: array_col,
validity,
})))
}
None => Value::Column(array_col),
}
} else {
let array_col = Column::Array(Box::new(ArrayColumn {
values: result_col,
offsets,
}));
match validity {
Some(validity) => {
Value::Column(Column::Nullable(Box::new(NullableColumn {
column: array_col,
validity,
})))
}
None => Value::Column(array_col),
}
};
Ok(col)
}
}
}
}

pub struct ConstantFolder<'a, Index: ColumnIndex> {
Expand Down Expand Up @@ -1159,7 +1285,10 @@ impl<'a, Index: ColumnIndex> ConstantFolder<'a, Index> {
args,
return_type,
} => {
let (mut args_expr, mut args_domain) = (Vec::new(), Some(Vec::new()));
let (mut args_expr, mut args_domain) = (
Vec::with_capacity(args.len()),
Some(Vec::with_capacity(args.len())),
);
for arg in args {
let (expr, domain) = self.fold_once(arg);
args_expr.push(expr);
Expand Down Expand Up @@ -1223,6 +1352,48 @@ impl<'a, Index: ColumnIndex> ConstantFolder<'a, Index> {

(func_expr, func_domain)
}
Expr::LambdaFunctionCall {
span,
name,
args,
lambda_expr,
lambda_display,
return_type,
} => {
let mut args_expr = Vec::with_capacity(args.len());
for arg in args {
let (expr, _) = self.fold_once(arg);
args_expr.push(expr);
}
let all_args_is_scalar = args_expr.iter().all(|arg| arg.as_constant().is_some());

let func_expr = Expr::LambdaFunctionCall {
span: *span,
name: name.clone(),
args: args_expr,
lambda_expr: lambda_expr.clone(),
lambda_display: lambda_display.clone(),
return_type: return_type.clone(),
};

if all_args_is_scalar {
let block = DataBlock::empty();
let evaluator = Evaluator::new(&block, self.func_ctx, self.fn_registry);
// Since we know the expression is constant, it'll be safe to change its column index type.
let func_expr = func_expr.project_column_ref(|_| unreachable!());
if let Ok(Value::Scalar(scalar)) = evaluator.run(&func_expr) {
return (
Expr::Constant {
span: *span,
scalar,
data_type: return_type.clone(),
},
None,
);
}
}
(func_expr, None)
}
};

debug_assert_eq!(expr.data_type(), new_expr.data_type());
Expand Down
101 changes: 99 additions & 2 deletions src/query/expression/src/expression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,14 @@ pub enum RawExpr<Index: ColumnIndex = usize> {
params: Vec<usize>,
args: Vec<RawExpr<Index>>,
},
LambdaFunctionCall {
span: Span,
name: String,
args: Vec<RawExpr<Index>>,
lambda_expr: RemoteExpr,
lambda_display: String,
return_type: DataType,
},
}

/// A type-checked and ready to be evaluated expression, having all overloads chosen for function calls.
Expand Down Expand Up @@ -102,6 +110,15 @@ pub enum Expr<Index: ColumnIndex = usize> {
args: Vec<Expr<Index>>,
return_type: DataType,
},
LambdaFunctionCall {
#[educe(Hash(ignore), PartialEq(ignore), Eq(ignore))]
span: Span,
name: String,
args: Vec<Expr<Index>>,
lambda_expr: RemoteExpr,
lambda_display: String,
return_type: DataType,
},
}

/// Serializable expression used to share executable expression between nodes.
Expand Down Expand Up @@ -141,6 +158,15 @@ pub enum RemoteExpr<Index: ColumnIndex = usize> {
args: Vec<RemoteExpr<Index>>,
return_type: DataType,
},
LambdaFunctionCall {
#[educe(Hash(ignore), PartialEq(ignore), Eq(ignore))]
span: Span,
name: String,
args: Vec<RemoteExpr<Index>>,
lambda_expr: Box<RemoteExpr>,
lambda_display: String,
return_type: DataType,
},
}

impl<Index: ColumnIndex> RawExpr<Index> {
Expand All @@ -151,8 +177,11 @@ impl<Index: ColumnIndex> RawExpr<Index> {
buf.insert(id.clone(), data_type.clone());
}
RawExpr::Cast { expr, .. } => walk(expr, buf),
RawExpr::FunctionCall { args, .. } => args.iter().for_each(|expr| walk(expr, buf)),
RawExpr::Constant { .. } => (),
RawExpr::FunctionCall { args, .. } => args.iter().for_each(|expr| walk(expr, buf)),
RawExpr::LambdaFunctionCall { args, .. } => {
args.iter().for_each(|expr| walk(expr, buf))
}
}
}

Expand Down Expand Up @@ -203,6 +232,21 @@ impl<Index: ColumnIndex> RawExpr<Index> {
params: params.clone(),
args: args.iter().map(|expr| expr.project_column_ref(f)).collect(),
},
RawExpr::LambdaFunctionCall {
span,
name,
args,
lambda_expr,
lambda_display,
return_type,
} => RawExpr::LambdaFunctionCall {
span: *span,
name: name.clone(),
args: args.iter().map(|expr| expr.project_column_ref(f)).collect(),
lambda_expr: lambda_expr.clone(),
lambda_display: lambda_display.clone(),
return_type: return_type.clone(),
},
}
}
}
Expand All @@ -214,6 +258,7 @@ impl<Index: ColumnIndex> Expr<Index> {
Expr::ColumnRef { span, .. } => *span,
Expr::Cast { span, .. } => *span,
Expr::FunctionCall { span, .. } => *span,
Expr::LambdaFunctionCall { span, .. } => *span,
}
}

Expand All @@ -223,6 +268,7 @@ impl<Index: ColumnIndex> Expr<Index> {
Expr::ColumnRef { data_type, .. } => data_type,
Expr::Cast { dest_type, .. } => dest_type,
Expr::FunctionCall { return_type, .. } => return_type,
Expr::LambdaFunctionCall { return_type, .. } => return_type,
}
}

Expand All @@ -233,8 +279,11 @@ impl<Index: ColumnIndex> Expr<Index> {
buf.insert(id.clone(), data_type.clone());
}
Expr::Cast { expr, .. } => walk(expr, buf),
Expr::FunctionCall { args, .. } => args.iter().for_each(|expr| walk(expr, buf)),
Expr::Constant { .. } => (),
Expr::FunctionCall { args, .. } => args.iter().for_each(|expr| walk(expr, buf)),
Expr::LambdaFunctionCall { args, .. } => {
args.iter().for_each(|expr| walk(expr, buf))
}
}
}

Expand Down Expand Up @@ -294,6 +343,21 @@ impl<Index: ColumnIndex> Expr<Index> {
args: args.iter().map(|expr| expr.project_column_ref(f)).collect(),
return_type: return_type.clone(),
},
Expr::LambdaFunctionCall {
span,
name,
args,
lambda_expr,
lambda_display,
return_type,
} => Expr::LambdaFunctionCall {
span: *span,
name: name.clone(),
args: args.iter().map(|expr| expr.project_column_ref(f)).collect(),
lambda_expr: lambda_expr.clone(),
lambda_display: lambda_display.clone(),
return_type: return_type.clone(),
},
}
}

Expand Down Expand Up @@ -344,6 +408,21 @@ impl<Index: ColumnIndex> Expr<Index> {
args: args.iter().map(Expr::as_remote_expr).collect(),
return_type: return_type.clone(),
},
Expr::LambdaFunctionCall {
span,
name,
args,
lambda_expr,
lambda_display,
return_type,
} => RemoteExpr::LambdaFunctionCall {
span: *span,
name: name.clone(),
args: args.iter().map(Expr::as_remote_expr).collect(),
lambda_expr: Box::new(lambda_expr.clone()),
lambda_display: lambda_display.clone(),
return_type: return_type.clone(),
},
}
}

Expand All @@ -359,6 +438,9 @@ impl<Index: ColumnIndex> Expr<Index> {
.non_deterministic
&& args.iter().all(|arg| arg.is_deterministic(registry))
}
Expr::LambdaFunctionCall { args, .. } => {
args.iter().all(|arg| arg.is_deterministic(registry))
}
}
}
}
Expand Down Expand Up @@ -414,6 +496,21 @@ impl<Index: ColumnIndex> RemoteExpr<Index> {
return_type: return_type.clone(),
}
}
RemoteExpr::LambdaFunctionCall {
span,
name,
args,
lambda_expr,
lambda_display,
return_type,
} => Expr::LambdaFunctionCall {
span: *span,
name: name.clone(),
args: args.iter().map(|arg| arg.as_expr(fn_registry)).collect(),
lambda_expr: *lambda_expr.clone(),
lambda_display: lambda_display.clone(),
return_type: return_type.clone(),
},
}
}
}
Loading
Loading