-
Notifications
You must be signed in to change notification settings - Fork 1.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Move Coercion for MakeArray to coerce_arguments_for_signature
and introduce another one for ArrayAppend
#8317
Changes from 1 commit
3ba90b4
32c9931
f760de5
6384a05
3686a2f
3226add
1045066
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -95,6 +95,8 @@ pub enum TypeSignature { | |
VariadicEqual, | ||
/// One or more arguments with arbitrary types | ||
VariadicAny, | ||
/// A function such as `make_array` should be coerced to the same type | ||
VariadicCoerced, | ||
/// fixed number of arguments of an arbitrary but equal type out of a list of valid types. | ||
/// | ||
/// # Examples | ||
|
@@ -113,6 +115,8 @@ pub enum TypeSignature { | |
/// Function `make_array` takes 0 or more arguments with arbitrary types, its `TypeSignature` | ||
/// is `OneOf(vec![Any(0), VariadicAny])`. | ||
OneOf(Vec<TypeSignature>), | ||
/// Specialized Signature for ArrayAppend and similar functions | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What do you think about using a more generic name. Perhaps something like /// The first argument is an array type ([`DataType::List`], or [`DataType::LargeList`]
/// and the subsequent arguments are coerced to the List's element type
///
/// For example a call to `func(a: List(int64), b: int32, c: utf8)` would attempt to coerce
/// all the arguments to `int64`:
/// ```
/// func(a: List(int64), cast(b as int64): int64, cast(c as int64): int64)
/// ```
ArrayAndElements There may be more general ways of expressing the array function types too 🤔 |
||
ArrayAppendLikeSignature, | ||
} | ||
|
||
impl TypeSignature { | ||
|
@@ -136,11 +140,17 @@ impl TypeSignature { | |
.collect::<Vec<&str>>() | ||
.join(", ")] | ||
} | ||
TypeSignature::VariadicCoerced => { | ||
vec!["CoercibleT, .., CoercibleT".to_string()] | ||
} | ||
TypeSignature::VariadicEqual => vec!["T, .., T".to_string()], | ||
TypeSignature::VariadicAny => vec!["Any, .., Any".to_string()], | ||
TypeSignature::OneOf(sigs) => { | ||
sigs.iter().flat_map(|s| s.to_string_repr()).collect() | ||
} | ||
TypeSignature::ArrayAppendLikeSignature => { | ||
vec!["ArrayAppendLikeSignature(List<T>, T)".to_string()] | ||
} | ||
} | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,13 +15,19 @@ | |
// specific language governing permissions and limitations | ||
// under the License. | ||
|
||
use std::sync::Arc; | ||
|
||
use crate::signature::TIMEZONE_WILDCARD; | ||
use crate::{Signature, TypeSignature}; | ||
use arrow::datatypes::Field; | ||
use arrow::{ | ||
compute::can_cast_types, | ||
datatypes::{DataType, TimeUnit}, | ||
}; | ||
use datafusion_common::{plan_err, DataFusionError, Result}; | ||
use datafusion_common::utils::list_ndims; | ||
use datafusion_common::{internal_err, plan_err, DataFusionError, Result}; | ||
|
||
use super::binary::comparison_coercion; | ||
|
||
/// Performs type coercion for function arguments. | ||
/// | ||
|
@@ -85,6 +91,24 @@ fn get_valid_types( | |
.iter() | ||
.map(|valid_type| (0..*number).map(|_| valid_type.clone()).collect()) | ||
.collect(), | ||
TypeSignature::VariadicCoerced => { | ||
let new_type = current_types.iter().skip(1).try_fold( | ||
current_types.first().unwrap().clone(), | ||
|acc, x| { | ||
let coerced_type = comparison_coercion(&acc, x); | ||
if let Some(coerced_type) = coerced_type { | ||
Ok(coerced_type) | ||
} else { | ||
internal_err!("Coercion from {acc:?} to {x:?} failed.") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We call unwrap_or previously so |
||
} | ||
}, | ||
); | ||
|
||
match new_type { | ||
Ok(new_type) => vec![vec![new_type; current_types.len()]], | ||
Err(e) => return Err(e), | ||
} | ||
} | ||
TypeSignature::VariadicEqual => { | ||
// one entry with the same len as current_types, whose type is `current_types[0]`. | ||
vec![current_types | ||
|
@@ -95,7 +119,48 @@ fn get_valid_types( | |
TypeSignature::VariadicAny => { | ||
vec![current_types.to_vec()] | ||
} | ||
|
||
TypeSignature::Exact(valid_types) => vec![valid_types.clone()], | ||
TypeSignature::ArrayAppendLikeSignature => { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Have a specialized checking for array append at the end. And I think we will need specialized check for other pattern too |
||
if current_types.len() != 2 { | ||
return Ok(vec![vec![]]); | ||
} | ||
|
||
let array_type = ¤t_types[0]; | ||
let elem_type = ¤t_types[1]; | ||
|
||
// Special case for `array_append(Null, T)`, just return and process in physical expression step. | ||
if array_type.eq(&DataType::Null) { | ||
let array_type = | ||
DataType::List(Arc::new(Field::new("item", elem_type.clone(), true))); | ||
return Ok(vec![vec![array_type.to_owned(), elem_type.to_owned()]]); | ||
} | ||
|
||
// We need to find the coerced base type, mainly for cases like: | ||
// `array_append(List(null), i64)` -> `List(i64)` | ||
let array_base_type = datafusion_common::utils::base_type(array_type); | ||
let elem_base_type = datafusion_common::utils::base_type(elem_type); | ||
let new_base_type = comparison_coercion(&array_base_type, &elem_base_type); | ||
|
||
if new_base_type.is_none() { | ||
return internal_err!( | ||
"Coercion from {array_base_type:?} to {elem_base_type:?} not supported." | ||
); | ||
} | ||
let new_base_type = new_base_type.unwrap(); | ||
|
||
let array_type = datafusion_common::utils::coerced_type_with_base_type_only( | ||
array_type, | ||
&new_base_type, | ||
); | ||
|
||
if let DataType::List(ref field) = array_type { | ||
let elem_type = field.data_type(); | ||
return Ok(vec![vec![array_type.clone(), elem_type.to_owned()]]); | ||
} else { | ||
return Ok(vec![vec![]]); | ||
} | ||
} | ||
TypeSignature::Any(number) => { | ||
if current_types.len() != *number { | ||
return plan_err!( | ||
|
@@ -241,6 +306,15 @@ fn coerced_from<'a>( | |
Utf8 | LargeUtf8 => Some(type_into.clone()), | ||
Null if can_cast_types(type_from, type_into) => Some(type_into.clone()), | ||
|
||
// Only accept list with the same number of dimensions unless the type is Null. | ||
// List with different dimensions should be handled in TypeSignature or other places before this. | ||
List(_) | ||
if datafusion_common::utils::base_type(type_from).eq(&Null) | ||
|| list_ndims(type_from) == list_ndims(type_into) => | ||
{ | ||
Some(type_into.clone()) | ||
} | ||
|
||
Timestamp(unit, Some(tz)) if tz.as_ref() == TIMEZONE_WILDCARD => { | ||
match type_from { | ||
Timestamp(_, Some(from_tz)) => { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -590,26 +590,6 @@ fn coerce_arguments_for_fun( | |
.collect::<Result<Vec<_>>>()?; | ||
} | ||
|
||
if *fun == BuiltinScalarFunction::MakeArray { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ❤️ |
||
// Find the final data type for the function arguments | ||
let current_types = expressions | ||
.iter() | ||
.map(|e| e.get_type(schema)) | ||
.collect::<Result<Vec<_>>>()?; | ||
|
||
let new_type = current_types | ||
.iter() | ||
.skip(1) | ||
.fold(current_types.first().unwrap().clone(), |acc, x| { | ||
comparison_coercion(&acc, x).unwrap_or(acc) | ||
}); | ||
|
||
return expressions | ||
.iter() | ||
.zip(current_types) | ||
.map(|(expr, from_type)| cast_array_expr(expr, &from_type, &new_type, schema)) | ||
.collect(); | ||
} | ||
Ok(expressions) | ||
} | ||
|
||
|
@@ -618,20 +598,6 @@ fn cast_expr(expr: &Expr, to_type: &DataType, schema: &DFSchema) -> Result<Expr> | |
expr.clone().cast_to(to_type, schema) | ||
} | ||
|
||
/// Cast array `expr` to the specified type, if possible | ||
fn cast_array_expr( | ||
expr: &Expr, | ||
from_type: &DataType, | ||
to_type: &DataType, | ||
schema: &DFSchema, | ||
) -> Result<Expr> { | ||
if from_type.equals_datatype(&DataType::Null) { | ||
Ok(expr.clone()) | ||
} else { | ||
cast_expr(expr, to_type, schema) | ||
} | ||
} | ||
|
||
/// Returns the coerced exprs for each `input_exprs`. | ||
/// Get the coerced data type from `aggregate_rule::coerce_types` and add `try_cast` if the | ||
/// data type of `input_exprs` need to be coerced. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -265,10 +265,8 @@ AS VALUES | |
(make_array([28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]), [28, 29, 30], [37, 38, 39], 10) | ||
; | ||
|
||
query ? | ||
query error | ||
select [1, true, null] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is an error because
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes |
||
---- | ||
[1, 1, ] | ||
|
||
query error DataFusion error: This feature is not implemented: ScalarFunctions without MakeArray are not supported: now() | ||
SELECT [now()] | ||
|
@@ -1092,18 +1090,27 @@ select list_sort(make_array(1, 3, null, 5, NULL, -5)), list_sort(make_array(1, 3 | |
|
||
## array_append (aliases: `list_append`, `array_push_back`, `list_push_back`) | ||
|
||
# TODO: array_append with NULLs | ||
# array_append scalar function #1 | ||
# query ? | ||
# select array_append(make_array(), 4); | ||
# ---- | ||
# [4] | ||
# array_append with NULLs | ||
|
||
# array_append scalar function #2 | ||
# query ?? | ||
# select array_append(make_array(), make_array()), array_append(make_array(), make_array(4)); | ||
# ---- | ||
# [[]] [[4]] | ||
query ??????? | ||
select | ||
array_append(null, 1), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we want to support Postgres does not allow this:
It also does't try to find a fancy type with an empty list:
🤔 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Summary of other sql behavior Duckdb: [[2, 3]], [[2, 3]] There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think for array_append([], [2,3]), it is fine to not follow postgres and return [[2, 3]] like clickhouse and duckdb. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Clickhouse and Duckdb has the same output for |
||
array_append(null, [2, 3]), | ||
array_append(null, [[4]]), | ||
array_append(make_array(), 4), | ||
array_append(make_array(), null), | ||
array_append(make_array(1, null, 3), 4), | ||
array_append(make_array(null, null), 1) | ||
; | ||
---- | ||
[1] [[2, 3]] [[[4]]] [4] [] [1, , 3, 4] [, , 1] | ||
|
||
query ?? | ||
select | ||
array_append(make_array(make_array(1, null, 3)), make_array(null)), | ||
array_append(make_array(make_array(1, null, 3)), null); | ||
---- | ||
[[1, , 3], []] [[1, , 3], ] | ||
|
||
# array_append scalar function #3 | ||
query ??? | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you explain how this is different than
VariadicEqual
? It seems from the comments here they are quite similar 🤔There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I just noticed that
VariadicEqual
is not used in any function. Maybe we can just keep one.I thought Equal is the one that don't care about coercion. All the type should be equal like (i32, i32, i32).
Coerced is the one that ensuring the final coerced type is the same (all of the type coercible to the same one), like (i32, i64) -> i64.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think current
VariadicCoerced
includesVariadicEqual
use case as well. We can just have one signature