From a60020a053261c3e80fa1e61e17fd44d6bc10b4c Mon Sep 17 00:00:00 2001 From: Jonathan Chen Date: Mon, 11 Nov 2024 21:13:51 -0500 Subject: [PATCH 1/4] support stringview --- datafusion/expr-common/src/type_coercion/binary.rs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/datafusion/expr-common/src/type_coercion/binary.rs b/datafusion/expr-common/src/type_coercion/binary.rs index 31fe6a59baee..e4beb862933a 100644 --- a/datafusion/expr-common/src/type_coercion/binary.rs +++ b/datafusion/expr-common/src/type_coercion/binary.rs @@ -841,7 +841,7 @@ pub fn get_wider_type(lhs: &DataType, rhs: &DataType) -> Result { // Right Float is larger than left Float. (Float16, Float32 | Float64) | (Float32, Float64) | // Right String is larger than left String. - (Utf8, LargeUtf8) | + (Utf8 | Utf8View, LargeUtf8) | // Any right type is wider than a left hand side Null. (Null, _) => rhs.clone(), // Left UInt is larger than right UInt. @@ -851,7 +851,7 @@ pub fn get_wider_type(lhs: &DataType, rhs: &DataType) -> Result { // Left Float is larger than right Float. (Float32 | Float64, Float16) | (Float64, Float32) | // Left String is larger than right String. - (LargeUtf8, Utf8) | + (LargeUtf8, Utf8 | Utf8View) | // Any left type is wider than a right hand side Null. (_, Null) => lhs.clone(), (List(lhs_field), List(rhs_field)) => { @@ -1172,16 +1172,22 @@ fn binary_to_string_coercion( match (lhs_type, rhs_type) { (Binary, Utf8) => Some(Utf8), (Binary, LargeUtf8) => Some(LargeUtf8), + (Binary, Utf8View) => Some(Utf8View), (BinaryView, Utf8) => Some(Utf8View), (BinaryView, LargeUtf8) => Some(LargeUtf8), + (BinaryView, Utf8View) => Some(Utf8View), (LargeBinary, Utf8) => Some(LargeUtf8), (LargeBinary, LargeUtf8) => Some(LargeUtf8), + (LargeBinary, Utf8View) => Some(Utf8View), (Utf8, Binary) => Some(Utf8), (Utf8, LargeBinary) => Some(LargeUtf8), (Utf8, BinaryView) => Some(Utf8View), (LargeUtf8, Binary) => Some(LargeUtf8), (LargeUtf8, LargeBinary) => Some(LargeUtf8), (LargeUtf8, BinaryView) => Some(LargeUtf8), + (Utf8View, Binary) => Some(Utf8), + (Utf8View, LargeBinary) => Some(LargeUtf8), + (Utf8View, BinaryView) => Some(Utf8View), _ => None, } } From 5a6c0bf5d5c1efcd70b5a10e6e3c75e78c728c3f Mon Sep 17 00:00:00 2001 From: Jonathan Chen Date: Mon, 11 Nov 2024 22:23:09 -0500 Subject: [PATCH 2/4] add tests --- .../expr-common/src/type_coercion/binary.rs | 60 ++++++++++++++++++- 1 file changed, 59 insertions(+), 1 deletion(-) diff --git a/datafusion/expr-common/src/type_coercion/binary.rs b/datafusion/expr-common/src/type_coercion/binary.rs index e4beb862933a..b8ffc1784e66 100644 --- a/datafusion/expr-common/src/type_coercion/binary.rs +++ b/datafusion/expr-common/src/type_coercion/binary.rs @@ -834,6 +834,8 @@ pub fn get_wider_type(lhs: &DataType, rhs: &DataType) -> Result { use arrow::datatypes::DataType::*; Ok(match (lhs, rhs) { (lhs, rhs) if lhs == rhs => lhs.clone(), + // Either Utf8View + Utf8 will return Utf8View + (Utf8View, Utf8) | (Utf8, Utf8View) => Utf8View, // Right UInt is larger than left UInt. (UInt8, UInt16 | UInt32 | UInt64) | (UInt16, UInt32 | UInt64) | (UInt32, UInt64) | // Right Int is larger than left Int. @@ -1185,7 +1187,7 @@ fn binary_to_string_coercion( (LargeUtf8, Binary) => Some(LargeUtf8), (LargeUtf8, LargeBinary) => Some(LargeUtf8), (LargeUtf8, BinaryView) => Some(LargeUtf8), - (Utf8View, Binary) => Some(Utf8), + (Utf8View, Binary) => Some(Utf8View), (Utf8View, LargeBinary) => Some(LargeUtf8), (Utf8View, BinaryView) => Some(Utf8View), _ => None, @@ -1550,6 +1552,62 @@ mod tests { ); } + #[test] + fn test_get_wider_type_with_utf8view() { + assert_eq!( + get_wider_type(&DataType::Utf8View, &DataType::Utf8View).unwrap(), + DataType::Utf8View + ); + + assert_eq!( + get_wider_type(&DataType::Utf8View, &DataType::Utf8).unwrap(), + DataType::Utf8View + ); + + assert_eq!( + get_wider_type(&DataType::Utf8View, &DataType::LargeUtf8).unwrap(), + DataType::LargeUtf8 + ); + + assert_eq!( + get_wider_type(&DataType::Utf8View, &DataType::Null).unwrap(), + DataType::Utf8View + ); + + assert_eq!( + get_wider_type(&DataType::Utf8View, &DataType::Int32).is_err(), + true + ); + } + + #[test] + fn test_binary_to_string_coercion_with_utf8view() { + assert_eq!( + binary_to_string_coercion(&DataType::Binary, &DataType::Utf8View), + Some(DataType::Utf8View) + ); + assert_eq!( + binary_to_string_coercion(&DataType::Utf8View, &DataType::Binary), + Some(DataType::Utf8View) + ); + assert_eq!( + binary_to_string_coercion(&DataType::BinaryView, &DataType::Utf8View), + Some(DataType::Utf8View) + ); + assert_eq!( + binary_to_string_coercion(&DataType::Utf8View, &DataType::LargeBinary), + Some(DataType::LargeUtf8) + ); + assert_eq!( + binary_to_string_coercion(&DataType::LargeBinary, &DataType::Utf8View), + Some(DataType::Utf8View) + ); + assert_eq!( + binary_to_string_coercion(&DataType::Utf8View, &DataType::Int32), + None + ); + } + /// Test coercion rules for binary operators /// /// Applies coercion rules for `$LHS_TYPE $OP $RHS_TYPE` and asserts that the From 513b8653ed39f370d409b71ea720d9087bd972e1 Mon Sep 17 00:00:00 2001 From: Jonathan Chen Date: Mon, 11 Nov 2024 23:03:18 -0500 Subject: [PATCH 3/4] clippy fix --- datafusion/expr-common/src/type_coercion/binary.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/datafusion/expr-common/src/type_coercion/binary.rs b/datafusion/expr-common/src/type_coercion/binary.rs index b8ffc1784e66..1612d4df2770 100644 --- a/datafusion/expr-common/src/type_coercion/binary.rs +++ b/datafusion/expr-common/src/type_coercion/binary.rs @@ -1574,10 +1574,7 @@ mod tests { DataType::Utf8View ); - assert_eq!( - get_wider_type(&DataType::Utf8View, &DataType::Int32).is_err(), - true - ); + assert!(get_wider_type(&DataType::Utf8View, &DataType::Int32).is_err()); } #[test] From 8b9d6775755689a461ca2b7c2e52db193bd7ddc0 Mon Sep 17 00:00:00 2001 From: Jonathan Chen Date: Sat, 16 Nov 2024 17:33:25 -0500 Subject: [PATCH 4/4] small fix --- datafusion/expr-common/src/type_coercion/binary.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/expr-common/src/type_coercion/binary.rs b/datafusion/expr-common/src/type_coercion/binary.rs index 1612d4df2770..64a3080c00e4 100644 --- a/datafusion/expr-common/src/type_coercion/binary.rs +++ b/datafusion/expr-common/src/type_coercion/binary.rs @@ -1180,7 +1180,7 @@ fn binary_to_string_coercion( (BinaryView, Utf8View) => Some(Utf8View), (LargeBinary, Utf8) => Some(LargeUtf8), (LargeBinary, LargeUtf8) => Some(LargeUtf8), - (LargeBinary, Utf8View) => Some(Utf8View), + (LargeBinary, Utf8View) => Some(LargeUtf8), (Utf8, Binary) => Some(Utf8), (Utf8, LargeBinary) => Some(LargeUtf8), (Utf8, BinaryView) => Some(Utf8View), @@ -1597,7 +1597,7 @@ mod tests { ); assert_eq!( binary_to_string_coercion(&DataType::LargeBinary, &DataType::Utf8View), - Some(DataType::Utf8View) + Some(DataType::LargeUtf8) ); assert_eq!( binary_to_string_coercion(&DataType::Utf8View, &DataType::Int32),