Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: df patched upgrade to 2024-03-05 #1

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions datafusion/common/src/scalar/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1746,7 +1746,7 @@ impl ScalarValue {
}

/// Converts `Vec<ScalarValue>` where each element has type corresponding to
/// `data_type`, to a [`ListArray`].
/// `data_type`, to a single element [`ListArray`].
///
/// Example
/// ```
Expand Down Expand Up @@ -4453,7 +4453,8 @@ mod tests {
// The alignment requirements differ across architectures and
// thus the size of the enum appears to as well

assert_eq!(std::mem::size_of::<ScalarValue>(), 48);
// The value can be changed depending on rust version
assert_eq!(std::mem::size_of::<ScalarValue>(), 64);
}

#[test]
Expand Down
2 changes: 1 addition & 1 deletion datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ use tokio::task::JoinSet;
/// same results
#[tokio::test(flavor = "multi_thread")]
async fn streaming_aggregate_test() {
let test_cases = vec![
let test_cases = [
vec!["a"],
vec!["b", "a"],
vec!["c", "a"],
Expand Down
20 changes: 13 additions & 7 deletions datafusion/expr/src/built_in_function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,7 @@ use std::sync::{Arc, OnceLock};
use crate::signature::TIMEZONE_WILDCARD;
use crate::type_coercion::binary::get_wider_type;
use crate::type_coercion::functions::data_types;
use crate::{
conditional_expressions, FuncMonotonicity, Signature, TypeSignature, Volatility,
};
use crate::{FuncMonotonicity, Signature, TypeSignature, Volatility};

use arrow::datatypes::{DataType, Field, Fields, IntervalUnit, TimeUnit};
use datafusion_common::{exec_err, plan_err, DataFusionError, Result};
Expand Down Expand Up @@ -899,10 +897,9 @@ impl BuiltinScalarFunction {
| BuiltinScalarFunction::ConcatWithSeparator => {
Signature::variadic(vec![Utf8], self.volatility())
}
BuiltinScalarFunction::Coalesce => Signature::variadic(
conditional_expressions::SUPPORTED_COALESCE_TYPES.to_vec(),
self.volatility(),
),
BuiltinScalarFunction::Coalesce => {
Signature::variadic_equal(self.volatility())
}
BuiltinScalarFunction::SHA224
| BuiltinScalarFunction::SHA256
| BuiltinScalarFunction::SHA384
Expand Down Expand Up @@ -1575,4 +1572,13 @@ mod tests {
assert_eq!(func_from_str, *func_original);
}
}

#[test]
fn test_coalesce_return_types() {
let coalesce = BuiltinScalarFunction::Coalesce;
let return_type = coalesce
.return_type(&[DataType::Date32, DataType::Date32])
.unwrap();
assert_eq!(return_type, DataType::Date32);
}
}
19 changes: 0 additions & 19 deletions datafusion/expr/src/conditional_expressions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,25 +22,6 @@ use arrow::datatypes::DataType;
use datafusion_common::{plan_err, DFSchema, Result};
use std::collections::HashSet;

/// Currently supported types by the coalesce function.
/// The order of these types correspond to the order on which coercion applies
/// This should thus be from least informative to most informative
pub static SUPPORTED_COALESCE_TYPES: &[DataType] = &[
DataType::Boolean,
DataType::UInt8,
DataType::UInt16,
DataType::UInt32,
DataType::UInt64,
DataType::Int8,
DataType::Int16,
DataType::Int32,
DataType::Int64,
DataType::Float32,
DataType::Float64,
DataType::Utf8,
DataType::LargeUtf8,
];

/// Helper struct for building [Expr::Case]
pub struct CaseBuilder {
expr: Option<Box<Expr>>,
Expand Down
2 changes: 1 addition & 1 deletion datafusion/expr/src/expr_rewriter/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -397,7 +397,7 @@ mod test {
let expr = col("a") + col("b");
let schema_a =
make_schema_with_empty_metadata(vec![make_field("\"tableA\"", "a")]);
let schemas = vec![schema_a];
let schemas = [schema_a];
let schemas = schemas.iter().collect::<Vec<_>>();

let error =
Expand Down
2 changes: 1 addition & 1 deletion datafusion/expr/src/logical_plan/plan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2415,7 +2415,7 @@ impl DistinctOn {

/// Aggregates its input based on a set of grouping and aggregate
/// expressions (e.g. SUM).
#[derive(Clone, PartialEq, Eq, Hash)]
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
// mark non_exhaustive to encourage use of try_new/new()
#[non_exhaustive]
pub struct Aggregate {
Expand Down
2 changes: 1 addition & 1 deletion datafusion/expr/src/type_coercion/binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -361,7 +361,7 @@ fn string_temporal_coercion(

/// Coerce `lhs_type` and `rhs_type` to a common type for the purposes of a comparison operation
/// where one both are numeric
fn comparison_binary_numeric_coercion(
pub(crate) fn comparison_binary_numeric_coercion(
lhs_type: &DataType,
rhs_type: &DataType,
) -> Option<DataType> {
Expand Down
27 changes: 21 additions & 6 deletions datafusion/expr/src/type_coercion/functions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ use arrow::{
use datafusion_common::utils::{coerced_fixed_size_list_to_list, list_ndims};
use datafusion_common::{internal_datafusion_err, internal_err, plan_err, Result};

use super::binary::comparison_coercion;
use super::binary::{comparison_binary_numeric_coercion, comparison_coercion};

/// Performs type coercion for function arguments.
///
Expand Down Expand Up @@ -187,6 +187,10 @@ fn get_valid_types(
let new_type = current_types.iter().skip(1).try_fold(
current_types.first().unwrap().clone(),
|acc, x| {
// The coerced types found by `comparison_coercion` are not guaranteed to be
// coercible for the arguments. `comparison_coercion` returns more loose
// types that can be coerced to both `acc` and `x` for comparison purpose.
// See `maybe_data_types` for the actual coercion.
let coerced_type = comparison_coercion(&acc, x);
if let Some(coerced_type) = coerced_type {
Ok(coerced_type)
Expand Down Expand Up @@ -276,9 +280,9 @@ fn maybe_data_types(
if current_type == valid_type {
new_type.push(current_type.clone())
} else {
// attempt to coerce
if let Some(valid_type) = coerced_from(valid_type, current_type) {
new_type.push(valid_type)
// attempt to coerce.
if let Some(coerced_type) = coerced_from(valid_type, current_type) {
new_type.push(coerced_type)
} else {
// not possible
return None;
Expand Down Expand Up @@ -427,8 +431,19 @@ fn coerced_from<'a>(
Some(type_into.clone())
}

// cannot coerce
_ => None,
// More coerce rules.
// Note that not all rules in `comparison_coercion` can be reused here.
// For example, all numeric types can be coerced into Utf8 for comparison,
// but not for function arguments.
_ => comparison_binary_numeric_coercion(type_into, type_from).and_then(
|coerced_type| {
if *type_into == coerced_type {
Some(coerced_type)
} else {
None
}
},
),
}
}

Expand Down
4 changes: 2 additions & 2 deletions datafusion/functions/benches/regx.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ fn data(rng: &mut ThreadRng) -> StringArray {
}

fn regex(rng: &mut ThreadRng) -> StringArray {
let samples = vec![
let samples = [
".*([A-Z]{1}).*".to_string(),
"^(A).*".to_string(),
r#"[\p{Letter}-]+"#.to_string(),
Expand All @@ -60,7 +60,7 @@ fn regex(rng: &mut ThreadRng) -> StringArray {
}

fn flags(rng: &mut ThreadRng) -> StringArray {
let samples = vec![Some("i".to_string()), Some("im".to_string()), None];
let samples = [Some("i".to_string()), Some("im".to_string()), None];
let mut sb = StringBuilder::new();
for _ in 0..1000 {
let sample = samples.choose(rng).unwrap();
Expand Down
Loading