From ae69e1ec8ed0aa7f1fadad5a3171ef86188190d7 Mon Sep 17 00:00:00 2001 From: Jacob Sherin Date: Tue, 3 Sep 2024 16:43:35 +0530 Subject: [PATCH 01/48] Adds new library `functions-window-common` --- Cargo.toml | 1 + datafusion/functions-window-common/Cargo.toml | 40 +++++++++++++++++++ datafusion/functions-window-common/README.md | 26 ++++++++++++ datafusion/functions-window-common/src/lib.rs | 20 ++++++++++ 4 files changed, 87 insertions(+) create mode 100644 datafusion/functions-window-common/Cargo.toml create mode 100644 datafusion/functions-window-common/README.md create mode 100644 datafusion/functions-window-common/src/lib.rs diff --git a/Cargo.toml b/Cargo.toml index 877cead93673..26ad7eeb3936 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -31,6 +31,7 @@ members = [ "datafusion/functions-aggregate-common", "datafusion/functions-nested", "datafusion/functions-window", + "datafusion/functions-window-common", "datafusion/optimizer", "datafusion/physical-expr", "datafusion/physical-expr-common", diff --git a/datafusion/functions-window-common/Cargo.toml b/datafusion/functions-window-common/Cargo.toml new file mode 100644 index 000000000000..fe7a612070bf --- /dev/null +++ b/datafusion/functions-window-common/Cargo.toml @@ -0,0 +1,40 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +[package] +name = "datafusion-functions-window-common" +description = "Common functions for implementing user-defined window functions for the DataFusion query engine" +keywords = ["datafusion", "logical", "plan", "expressions"] +readme = "README.md" +authors = { workspace = true } +edition = { workspace = true } +homepage = { workspace = true } +license = { workspace = true } +repository = { workspace = true } +rust-version = { workspace = true } +version = { workspace = true } + +[lints] +workspace = true + +[lib] +name = "datafusion_functions_window_common" +path = "src/lib.rs" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] diff --git a/datafusion/functions-window-common/README.md b/datafusion/functions-window-common/README.md new file mode 100644 index 000000000000..de12d25f9731 --- /dev/null +++ b/datafusion/functions-window-common/README.md @@ -0,0 +1,26 @@ + + +# DataFusion Window Function Common Library + +[DataFusion][df] is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory format. + +This crate contains common functions for implementing user-defined window functions. + +[df]: https://crates.io/crates/datafusion diff --git a/datafusion/functions-window-common/src/lib.rs b/datafusion/functions-window-common/src/lib.rs new file mode 100644 index 000000000000..bc514352adfe --- /dev/null +++ b/datafusion/functions-window-common/src/lib.rs @@ -0,0 +1,20 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Common user-defined window functionality for [DataFusion] +//! +//! [DataFusion]: From f09383ca8589f01a2f1936e7a29d476bfbaee99c Mon Sep 17 00:00:00 2001 From: Jacob Sherin Date: Tue, 3 Sep 2024 17:18:24 +0530 Subject: [PATCH 02/48] Adds `FieldArgs` struct for field of final result --- Cargo.toml | 1 + datafusion-cli/Cargo.lock | 8 +++++++ datafusion/functions-window-common/Cargo.toml | 1 + .../functions-window-common/src/field.rs | 22 +++++++++++++++++++ datafusion/functions-window-common/src/lib.rs | 1 + datafusion/physical-plan/Cargo.toml | 1 + datafusion/physical-plan/src/windows/mod.rs | 4 ++++ 7 files changed, 38 insertions(+) create mode 100644 datafusion/functions-window-common/src/field.rs diff --git a/Cargo.toml b/Cargo.toml index 26ad7eeb3936..f51b33b0d53b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -104,6 +104,7 @@ datafusion-functions-aggregate = { path = "datafusion/functions-aggregate", vers datafusion-functions-aggregate-common = { path = "datafusion/functions-aggregate-common", version = "41.0.0" } datafusion-functions-nested = { path = "datafusion/functions-nested", version = "41.0.0" } datafusion-functions-window = { path = "datafusion/functions-window", version = "41.0.0" } +datafusion-functions-window-common = { path = "datafusion/functions-window-common", version = "41.0.0" } datafusion-optimizer = { path = "datafusion/optimizer", version = "41.0.0", default-features = false } datafusion-physical-expr = { path = "datafusion/physical-expr", version = "41.0.0", default-features = false } datafusion-physical-expr-common = { path = "datafusion/physical-expr-common", version = "41.0.0", default-features = false } diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index ddc6242977d3..41792c88b686 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -1452,6 +1452,13 @@ dependencies = [ "log", ] +[[package]] +name = "datafusion-functions-window-common" +version = "41.0.0" +dependencies = [ + "datafusion-common", +] + [[package]] name = "datafusion-optimizer" version = "41.0.0" @@ -1541,6 +1548,7 @@ dependencies = [ "datafusion-expr", "datafusion-functions-aggregate", "datafusion-functions-aggregate-common", + "datafusion-functions-window-common", "datafusion-physical-expr", "datafusion-physical-expr-common", "futures", diff --git a/datafusion/functions-window-common/Cargo.toml b/datafusion/functions-window-common/Cargo.toml index fe7a612070bf..e7758a84cbd6 100644 --- a/datafusion/functions-window-common/Cargo.toml +++ b/datafusion/functions-window-common/Cargo.toml @@ -38,3 +38,4 @@ path = "src/lib.rs" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +datafusion-common.workspace = true diff --git a/datafusion/functions-window-common/src/field.rs b/datafusion/functions-window-common/src/field.rs new file mode 100644 index 000000000000..1574a9a2d383 --- /dev/null +++ b/datafusion/functions-window-common/src/field.rs @@ -0,0 +1,22 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use datafusion_common::arrow::datatypes::DataType; + +pub struct FieldArgs { + pub return_type: DataType, +} diff --git a/datafusion/functions-window-common/src/lib.rs b/datafusion/functions-window-common/src/lib.rs index bc514352adfe..2e4bcbbc83b9 100644 --- a/datafusion/functions-window-common/src/lib.rs +++ b/datafusion/functions-window-common/src/lib.rs @@ -18,3 +18,4 @@ //! Common user-defined window functionality for [DataFusion] //! //! [DataFusion]: +pub mod field; diff --git a/datafusion/physical-plan/Cargo.toml b/datafusion/physical-plan/Cargo.toml index 24387c5f15ee..c3f1b7eb0e95 100644 --- a/datafusion/physical-plan/Cargo.toml +++ b/datafusion/physical-plan/Cargo.toml @@ -53,6 +53,7 @@ datafusion-execution = { workspace = true } datafusion-expr = { workspace = true } datafusion-functions-aggregate = { workspace = true } datafusion-functions-aggregate-common = { workspace = true } +datafusion-functions-window-common = { workspace = true } datafusion-physical-expr = { workspace = true, default-features = true } datafusion-physical-expr-common = { workspace = true } futures = { workspace = true } diff --git a/datafusion/physical-plan/src/windows/mod.rs b/datafusion/physical-plan/src/windows/mod.rs index 56823e6dec2d..cee562dbca58 100644 --- a/datafusion/physical-plan/src/windows/mod.rs +++ b/datafusion/physical-plan/src/windows/mod.rs @@ -51,6 +51,7 @@ mod utils; mod window_agg_exec; pub use bounded_window_agg_exec::BoundedWindowAggExec; +use datafusion_functions_window_common::field::FieldArgs; use datafusion_physical_expr::expressions::Column; pub use datafusion_physical_expr::window::{ BuiltInWindowExpr, PlainAggregateWindowExpr, WindowExpr, @@ -361,6 +362,9 @@ impl BuiltInWindowFunctionExpr for WindowUDFExpr { } fn field(&self) -> Result { + let _field_args = FieldArgs { + return_type: self.data_type.clone(), + }; Ok(Field::new( &self.name, self.data_type.clone(), From ca256fa6b152e4508abf43bf42e55863e6eb7499 Mon Sep 17 00:00:00 2001 From: Jacob Sherin Date: Tue, 3 Sep 2024 23:28:58 +0530 Subject: [PATCH 03/48] Adds `field` method to `WindowUDFImpl` trait --- datafusion/core/Cargo.toml | 1 + .../user_defined_window_functions.rs | 7 ++++++- datafusion/expr/Cargo.toml | 1 + datafusion/expr/src/expr_fn.rs | 5 +++++ datafusion/expr/src/udwf.rs | 17 ++++++++++++++++- datafusion/functions-window-common/src/field.rs | 3 ++- datafusion/functions-window/Cargo.toml | 1 + datafusion/functions-window/src/row_number.rs | 7 +++++++ datafusion/optimizer/Cargo.toml | 1 + .../src/simplify_expressions/expr_simplifier.rs | 5 +++++ datafusion/physical-plan/src/windows/mod.rs | 10 +++------- datafusion/proto/Cargo.toml | 1 + .../proto/tests/cases/roundtrip_logical_plan.rs | 5 +++++ 13 files changed, 54 insertions(+), 10 deletions(-) diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml index de228e058096..ba1311e00fdd 100644 --- a/datafusion/core/Cargo.toml +++ b/datafusion/core/Cargo.toml @@ -145,6 +145,7 @@ bigdecimal = { workspace = true } criterion = { version = "0.5", features = ["async_tokio"] } csv = "1.1.6" ctor = { workspace = true } +datafusion-functions-window-common = {workspace = true} doc-comment = { workspace = true } env_logger = { workspace = true } half = { workspace = true, default-features = true } diff --git a/datafusion/core/tests/user_defined/user_defined_window_functions.rs b/datafusion/core/tests/user_defined/user_defined_window_functions.rs index 3c607301fc98..5845fa2ed3fa 100644 --- a/datafusion/core/tests/user_defined/user_defined_window_functions.rs +++ b/datafusion/core/tests/user_defined/user_defined_window_functions.rs @@ -29,12 +29,13 @@ use std::{ use arrow::array::AsArray; use arrow_array::{ArrayRef, Int64Array, RecordBatch, StringArray}; -use arrow_schema::DataType; +use arrow_schema::{DataType, Field}; use datafusion::{assert_batches_eq, prelude::SessionContext}; use datafusion_common::{Result, ScalarValue}; use datafusion_expr::{ PartitionEvaluator, Signature, Volatility, WindowUDF, WindowUDFImpl, }; +use datafusion_functions_window_common::field::FieldArgs; /// A query with a window function evaluated over the entire partition const UNBOUNDED_WINDOW_QUERY: &str = "SELECT x, y, val, \ @@ -565,6 +566,10 @@ impl OddCounter { fn aliases(&self) -> &[String] { &self.aliases } + + fn field(&self, field_args: FieldArgs) -> Result { + Ok(Field::new(field_args.display_name, field_args.return_type, true)) + } } ctx.register_udwf(WindowUDF::from(SimpleWindowUDF::new(test_state))) diff --git a/datafusion/expr/Cargo.toml b/datafusion/expr/Cargo.toml index b5d34d9a3834..e3e03d469240 100644 --- a/datafusion/expr/Cargo.toml +++ b/datafusion/expr/Cargo.toml @@ -46,6 +46,7 @@ chrono = { workspace = true } datafusion-common = { workspace = true } datafusion-expr-common = { workspace = true } datafusion-functions-aggregate-common = { workspace = true } +datafusion-functions-window-common = {workspace = true} datafusion-physical-expr-common = { workspace = true } paste = "^1.0" serde_json = { workspace = true } diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs index 8d01712b95ad..426faf38ff83 100644 --- a/datafusion/expr/src/expr_fn.rs +++ b/datafusion/expr/src/expr_fn.rs @@ -38,6 +38,7 @@ use arrow::compute::kernels::cast_utils::{ }; use arrow::datatypes::{DataType, Field}; use datafusion_common::{plan_err, Column, Result, ScalarValue, TableReference}; +use datafusion_functions_window_common::field::FieldArgs; use sqlparser::ast::NullTreatment; use std::any::Any; use std::fmt::Debug; @@ -665,6 +666,10 @@ impl WindowUDFImpl for SimpleWindowUDF { fn partition_evaluator(&self) -> Result> { (self.partition_evaluator_factory)() } + + fn field(&self, field_args: FieldArgs) -> Result { + Ok(Field::new(field_args.display_name, field_args.return_type, true)) + } } pub fn interval_year_month_lit(value: &str) -> Expr { diff --git a/datafusion/expr/src/udwf.rs b/datafusion/expr/src/udwf.rs index e5fdaaceb439..2085c823f947 100644 --- a/datafusion/expr/src/udwf.rs +++ b/datafusion/expr/src/udwf.rs @@ -25,9 +25,10 @@ use std::{ sync::Arc, }; -use arrow::datatypes::DataType; +use arrow::datatypes::{DataType, Field}; use datafusion_common::{not_impl_err, Result}; +use datafusion_functions_window_common::field::FieldArgs; use crate::expr::WindowFunction; use crate::{ @@ -185,6 +186,10 @@ impl WindowUDF { self.inner.nullable() } + pub fn field(&self, field_args: FieldArgs) -> Result { + self.inner.field(field_args) + } + /// Returns custom result ordering introduced by this window function /// which is used to update ordering equivalences. /// @@ -353,6 +358,8 @@ pub trait WindowUDFImpl: Debug + Send + Sync { true } + fn field(&self, field_args: FieldArgs) -> Result; + /// Allows the window UDF to define a custom result ordering. /// /// By default, a window UDF doesn't introduce an ordering. @@ -454,6 +461,10 @@ impl WindowUDFImpl for AliasedWindowUDFImpl { self.inner.nullable() } + fn field(&self, field_args: FieldArgs) -> Result { + self.inner.field(field_args) + } + fn sort_options(&self) -> Option { self.inner.sort_options() } @@ -510,4 +521,8 @@ impl WindowUDFImpl for WindowUDFLegacyWrapper { fn partition_evaluator(&self) -> Result> { (self.partition_evaluator_factory)() } + + fn field(&self, field_args: FieldArgs) -> Result { + Ok(Field::new(field_args.display_name, field_args.return_type, true)) + } } diff --git a/datafusion/functions-window-common/src/field.rs b/datafusion/functions-window-common/src/field.rs index 1574a9a2d383..36ba51074e4b 100644 --- a/datafusion/functions-window-common/src/field.rs +++ b/datafusion/functions-window-common/src/field.rs @@ -17,6 +17,7 @@ use datafusion_common::arrow::datatypes::DataType; -pub struct FieldArgs { +pub struct FieldArgs<'a> { pub return_type: DataType, + pub display_name: &'a str, } diff --git a/datafusion/functions-window/Cargo.toml b/datafusion/functions-window/Cargo.toml index 94dd421284fd..8dcec6bc964b 100644 --- a/datafusion/functions-window/Cargo.toml +++ b/datafusion/functions-window/Cargo.toml @@ -40,6 +40,7 @@ path = "src/lib.rs" [dependencies] datafusion-common = { workspace = true } datafusion-expr = { workspace = true } +datafusion-functions-window-common = { workspace = true } datafusion-physical-expr-common = { workspace = true } log = { workspace = true } diff --git a/datafusion/functions-window/src/row_number.rs b/datafusion/functions-window/src/row_number.rs index 43d2796ad7dc..fc7dcb5b780a 100644 --- a/datafusion/functions-window/src/row_number.rs +++ b/datafusion/functions-window/src/row_number.rs @@ -25,9 +25,12 @@ use datafusion_common::arrow::array::ArrayRef; use datafusion_common::arrow::array::UInt64Array; use datafusion_common::arrow::compute::SortOptions; use datafusion_common::arrow::datatypes::DataType; +use datafusion_common::arrow::datatypes::Field; use datafusion_common::{Result, ScalarValue}; use datafusion_expr::expr::WindowFunction; use datafusion_expr::{Expr, PartitionEvaluator, Signature, Volatility, WindowUDFImpl}; +use datafusion_functions_window_common::field; +use field::FieldArgs; /// Create a [`WindowFunction`](Expr::WindowFunction) expression for /// `row_number` user-defined window function. @@ -96,6 +99,10 @@ impl WindowUDFImpl for RowNumber { false } + fn field(&self, field_args: FieldArgs) -> Result { + Ok(Field::new(field_args.display_name, field_args.return_type, false)) + } + fn sort_options(&self) -> Option { Some(SortOptions { descending: false, diff --git a/datafusion/optimizer/Cargo.toml b/datafusion/optimizer/Cargo.toml index 1a9e9630c076..3ee6ea37b949 100644 --- a/datafusion/optimizer/Cargo.toml +++ b/datafusion/optimizer/Cargo.toml @@ -57,5 +57,6 @@ regex-syntax = "0.8.0" arrow-buffer = { workspace = true } ctor = { workspace = true } datafusion-functions-aggregate = { workspace = true } +datafusion-functions-window-common = {workspace = true} datafusion-sql = { workspace = true } env_logger = { workspace = true } diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs index fc3921d29615..5c0fb388c856 100644 --- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs +++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs @@ -1798,6 +1798,7 @@ mod tests { interval_arithmetic::Interval, *, }; + use datafusion_functions_window_common::field::FieldArgs; use std::{ collections::HashMap, ops::{BitAnd, BitOr, BitXor}, @@ -3916,5 +3917,9 @@ mod tests { fn partition_evaluator(&self) -> Result> { unimplemented!("not needed for tests") } + + fn field(&self, _field_args: FieldArgs) -> Result { + unimplemented!("not needed for tests") + } } } diff --git a/datafusion/physical-plan/src/windows/mod.rs b/datafusion/physical-plan/src/windows/mod.rs index cee562dbca58..ef1e28459d5a 100644 --- a/datafusion/physical-plan/src/windows/mod.rs +++ b/datafusion/physical-plan/src/windows/mod.rs @@ -362,14 +362,10 @@ impl BuiltInWindowFunctionExpr for WindowUDFExpr { } fn field(&self) -> Result { - let _field_args = FieldArgs { + self.fun.field(FieldArgs { return_type: self.data_type.clone(), - }; - Ok(Field::new( - &self.name, - self.data_type.clone(), - self.fun.nullable(), - )) + display_name: &self.name, + }) } fn expressions(&self) -> Vec> { diff --git a/datafusion/proto/Cargo.toml b/datafusion/proto/Cargo.toml index 2804ed019b61..7b1120649e63 100644 --- a/datafusion/proto/Cargo.toml +++ b/datafusion/proto/Cargo.toml @@ -60,6 +60,7 @@ serde_json = { workspace = true, optional = true } [dev-dependencies] datafusion-functions = { workspace = true, default-features = true } datafusion-functions-aggregate = { workspace = true } +datafusion-functions-window-common = {workspace = true} doc-comment = { workspace = true } strum = { version = "0.26.1", features = ["derive"] } tokio = { workspace = true, features = ["rt-multi-thread"] } diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs index 994ed8ad2352..65a8150ab0ae 100644 --- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs +++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs @@ -74,6 +74,7 @@ use datafusion_functions_aggregate::expr_fn::{ nth_value, }; use datafusion_functions_aggregate::string_agg::string_agg; +use datafusion_functions_window_common::field::FieldArgs; use datafusion_proto::bytes::{ logical_plan_from_bytes, logical_plan_from_bytes_with_extension_codec, logical_plan_to_bytes, logical_plan_to_bytes_with_extension_codec, @@ -2417,6 +2418,10 @@ fn roundtrip_window() { fn partition_evaluator(&self) -> Result> { make_partition_evaluator() } + + fn field(&self, field_args: FieldArgs) -> Result { + Ok(Field::new(field_args.display_name, field_args.return_type, true)) + } } fn make_partition_evaluator() -> Result> { From 9b21b7f926105a0a7a4723ce2544b9a3644fde25 Mon Sep 17 00:00:00 2001 From: Jacob Sherin Date: Wed, 4 Sep 2024 13:03:17 +0530 Subject: [PATCH 04/48] Minor: fixes formatting --- .../tests/user_defined/user_defined_window_functions.rs | 6 +++++- datafusion/expr/src/expr_fn.rs | 6 +++++- datafusion/expr/src/udwf.rs | 6 +++++- datafusion/functions-window/src/row_number.rs | 6 +++++- datafusion/proto/tests/cases/roundtrip_logical_plan.rs | 6 +++++- 5 files changed, 25 insertions(+), 5 deletions(-) diff --git a/datafusion/core/tests/user_defined/user_defined_window_functions.rs b/datafusion/core/tests/user_defined/user_defined_window_functions.rs index 5845fa2ed3fa..b4814eb87210 100644 --- a/datafusion/core/tests/user_defined/user_defined_window_functions.rs +++ b/datafusion/core/tests/user_defined/user_defined_window_functions.rs @@ -568,7 +568,11 @@ impl OddCounter { } fn field(&self, field_args: FieldArgs) -> Result { - Ok(Field::new(field_args.display_name, field_args.return_type, true)) + Ok(Field::new( + field_args.display_name, + field_args.return_type, + true, + )) } } diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs index 426faf38ff83..052b6c9d7c8b 100644 --- a/datafusion/expr/src/expr_fn.rs +++ b/datafusion/expr/src/expr_fn.rs @@ -668,7 +668,11 @@ impl WindowUDFImpl for SimpleWindowUDF { } fn field(&self, field_args: FieldArgs) -> Result { - Ok(Field::new(field_args.display_name, field_args.return_type, true)) + Ok(Field::new( + field_args.display_name, + field_args.return_type, + true, + )) } } diff --git a/datafusion/expr/src/udwf.rs b/datafusion/expr/src/udwf.rs index 2085c823f947..9e6ec7cf42f7 100644 --- a/datafusion/expr/src/udwf.rs +++ b/datafusion/expr/src/udwf.rs @@ -523,6 +523,10 @@ impl WindowUDFImpl for WindowUDFLegacyWrapper { } fn field(&self, field_args: FieldArgs) -> Result { - Ok(Field::new(field_args.display_name, field_args.return_type, true)) + Ok(Field::new( + field_args.display_name, + field_args.return_type, + true, + )) } } diff --git a/datafusion/functions-window/src/row_number.rs b/datafusion/functions-window/src/row_number.rs index fc7dcb5b780a..cff906cd0eed 100644 --- a/datafusion/functions-window/src/row_number.rs +++ b/datafusion/functions-window/src/row_number.rs @@ -100,7 +100,11 @@ impl WindowUDFImpl for RowNumber { } fn field(&self, field_args: FieldArgs) -> Result { - Ok(Field::new(field_args.display_name, field_args.return_type, false)) + Ok(Field::new( + field_args.display_name, + field_args.return_type, + false, + )) } fn sort_options(&self) -> Option { diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs index 65a8150ab0ae..b52c349d1db4 100644 --- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs +++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs @@ -2420,7 +2420,11 @@ fn roundtrip_window() { } fn field(&self, field_args: FieldArgs) -> Result { - Ok(Field::new(field_args.display_name, field_args.return_type, true)) + Ok(Field::new( + field_args.display_name, + field_args.return_type, + true, + )) } } From 9324df6f225dfed2aa3623d7610f86b9f9be2507 Mon Sep 17 00:00:00 2001 From: Jacob Sherin Date: Wed, 4 Sep 2024 13:11:25 +0530 Subject: [PATCH 05/48] Fixes: udwf doc test --- datafusion/expr/src/udwf.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/datafusion/expr/src/udwf.rs b/datafusion/expr/src/udwf.rs index 9e6ec7cf42f7..90db26d80bcd 100644 --- a/datafusion/expr/src/udwf.rs +++ b/datafusion/expr/src/udwf.rs @@ -226,10 +226,11 @@ where /// # Basic Example /// ``` /// # use std::any::Any; -/// # use arrow::datatypes::DataType; +/// # use arrow::datatypes::{DataType, Field}; /// # use datafusion_common::{DataFusionError, plan_err, Result}; /// # use datafusion_expr::{col, Signature, Volatility, PartitionEvaluator, WindowFrame, ExprFunctionExt}; /// # use datafusion_expr::{WindowUDFImpl, WindowUDF}; +/// use datafusion_functions_window_common::field::FieldArgs; /// #[derive(Debug, Clone)] /// struct SmoothIt { /// signature: Signature @@ -256,6 +257,7 @@ where /// } /// // The actual implementation would add one to the argument /// fn partition_evaluator(&self) -> Result> { unimplemented!() } +/// fn field(&self, field_args: FieldArgs) -> Result { unimplemented!() } /// } /// /// // Create a new WindowUDF from the implementation From 4cd146706475886acfb1f2a4237e647f73a15a2c Mon Sep 17 00:00:00 2001 From: Jacob Sherin Date: Wed, 4 Sep 2024 13:16:12 +0530 Subject: [PATCH 06/48] Fixes: implements missing trait items --- datafusion-examples/examples/advanced_udwf.rs | 6 ++++++ .../examples/simplify_udwf_expression.rs | 10 +++++++--- datafusion/expr/src/function.rs | 2 ++ 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/datafusion-examples/examples/advanced_udwf.rs b/datafusion-examples/examples/advanced_udwf.rs index ec0318a561b9..79ff7dc9ea6d 100644 --- a/datafusion-examples/examples/advanced_udwf.rs +++ b/datafusion-examples/examples/advanced_udwf.rs @@ -22,9 +22,11 @@ use arrow::{ array::{ArrayRef, AsArray, Float64Array}, datatypes::Float64Type, }; +use arrow_schema::Field; use datafusion::error::Result; use datafusion::prelude::*; use datafusion_common::ScalarValue; +use datafusion_expr::function::FieldArgs; use datafusion_expr::{ PartitionEvaluator, Signature, WindowFrame, WindowUDF, WindowUDFImpl, }; @@ -80,6 +82,10 @@ impl WindowUDFImpl for SmoothItUdf { fn partition_evaluator(&self) -> Result> { Ok(Box::new(MyPartitionEvaluator::new())) } + + fn field(&self, _field_args: FieldArgs) -> Result { + unimplemented!() + } } /// This implements the lowest level evaluation for a window function diff --git a/datafusion-examples/examples/simplify_udwf_expression.rs b/datafusion-examples/examples/simplify_udwf_expression.rs index a17e45dba2a3..0b6ed045c36e 100644 --- a/datafusion-examples/examples/simplify_udwf_expression.rs +++ b/datafusion-examples/examples/simplify_udwf_expression.rs @@ -17,12 +17,12 @@ use std::any::Any; -use arrow_schema::DataType; +use arrow_schema::{DataType, Field}; use datafusion::execution::context::SessionContext; use datafusion::functions_aggregate::average::avg_udaf; use datafusion::{error::Result, execution::options::CsvReadOptions}; -use datafusion_expr::function::WindowFunctionSimplification; +use datafusion_expr::function::{FieldArgs, WindowFunctionSimplification}; use datafusion_expr::{ expr::WindowFunction, simplify::SimplifyInfo, Expr, PartitionEvaluator, Signature, Volatility, WindowUDF, WindowUDFImpl, @@ -65,7 +65,7 @@ impl WindowUDFImpl for SimplifySmoothItUdf { } fn partition_evaluator(&self) -> Result> { - todo!() + unimplemented!() } /// this function will simplify `SimplifySmoothItUdf` to `SmoothItUdf`. @@ -84,6 +84,10 @@ impl WindowUDFImpl for SimplifySmoothItUdf { Some(Box::new(simplify)) } + + fn field(&self, _field_args: FieldArgs) -> Result { + unimplemented!() + } } // create local execution context with `cars.csv` registered as a table named `cars` diff --git a/datafusion/expr/src/function.rs b/datafusion/expr/src/function.rs index cd7a0c8aa918..59f6f6865ca2 100644 --- a/datafusion/expr/src/function.rs +++ b/datafusion/expr/src/function.rs @@ -27,6 +27,8 @@ pub use datafusion_functions_aggregate_common::accumulator::{ AccumulatorArgs, AccumulatorFactoryFunction, StateFieldsArgs, }; +pub use datafusion_functions_window_common::field::FieldArgs; + #[derive(Debug, Clone, Copy)] pub enum Hint { /// Indicates the argument needs to be padded if it is scalar From deff13ce2513c7e148b5278d46affd13df670f97 Mon Sep 17 00:00:00 2001 From: Jacob Sherin Date: Wed, 4 Sep 2024 13:32:29 +0530 Subject: [PATCH 07/48] Updates `datafusion-cli` dependencies --- datafusion-cli/Cargo.lock | 2 ++ 1 file changed, 2 insertions(+) diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index 41792c88b686..1f2f2c95bd23 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -1348,6 +1348,7 @@ dependencies = [ "datafusion-common", "datafusion-expr-common", "datafusion-functions-aggregate-common", + "datafusion-functions-window-common", "datafusion-physical-expr-common", "paste", "serde_json", @@ -1448,6 +1449,7 @@ version = "41.0.0" dependencies = [ "datafusion-common", "datafusion-expr", + "datafusion-functions-window-common", "datafusion-physical-expr-common", "log", ] From ac5ef0a1d3e8afbd34264f26fcbae756c41545c9 Mon Sep 17 00:00:00 2001 From: Jacob Sherin Date: Wed, 4 Sep 2024 13:32:49 +0530 Subject: [PATCH 08/48] Fixes: formatting of `Cargo.toml` files --- datafusion/core/Cargo.toml | 2 +- datafusion/expr/Cargo.toml | 2 +- datafusion/optimizer/Cargo.toml | 2 +- datafusion/proto/Cargo.toml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml index ba1311e00fdd..702c5c2b5856 100644 --- a/datafusion/core/Cargo.toml +++ b/datafusion/core/Cargo.toml @@ -145,7 +145,7 @@ bigdecimal = { workspace = true } criterion = { version = "0.5", features = ["async_tokio"] } csv = "1.1.6" ctor = { workspace = true } -datafusion-functions-window-common = {workspace = true} +datafusion-functions-window-common = { workspace = true } doc-comment = { workspace = true } env_logger = { workspace = true } half = { workspace = true, default-features = true } diff --git a/datafusion/expr/Cargo.toml b/datafusion/expr/Cargo.toml index e3e03d469240..55387fea22ee 100644 --- a/datafusion/expr/Cargo.toml +++ b/datafusion/expr/Cargo.toml @@ -46,7 +46,7 @@ chrono = { workspace = true } datafusion-common = { workspace = true } datafusion-expr-common = { workspace = true } datafusion-functions-aggregate-common = { workspace = true } -datafusion-functions-window-common = {workspace = true} +datafusion-functions-window-common = { workspace = true } datafusion-physical-expr-common = { workspace = true } paste = "^1.0" serde_json = { workspace = true } diff --git a/datafusion/optimizer/Cargo.toml b/datafusion/optimizer/Cargo.toml index 3ee6ea37b949..337a24ffae20 100644 --- a/datafusion/optimizer/Cargo.toml +++ b/datafusion/optimizer/Cargo.toml @@ -57,6 +57,6 @@ regex-syntax = "0.8.0" arrow-buffer = { workspace = true } ctor = { workspace = true } datafusion-functions-aggregate = { workspace = true } -datafusion-functions-window-common = {workspace = true} +datafusion-functions-window-common = { workspace = true } datafusion-sql = { workspace = true } env_logger = { workspace = true } diff --git a/datafusion/proto/Cargo.toml b/datafusion/proto/Cargo.toml index 7b1120649e63..a5151d588cbc 100644 --- a/datafusion/proto/Cargo.toml +++ b/datafusion/proto/Cargo.toml @@ -60,7 +60,7 @@ serde_json = { workspace = true, optional = true } [dev-dependencies] datafusion-functions = { workspace = true, default-features = true } datafusion-functions-aggregate = { workspace = true } -datafusion-functions-window-common = {workspace = true} +datafusion-functions-window-common = { workspace = true } doc-comment = { workspace = true } strum = { version = "0.26.1", features = ["derive"] } tokio = { workspace = true, features = ["rt-multi-thread"] } From 7c4293a522dd4c1e0fa6e8bb1d51127a9e762134 Mon Sep 17 00:00:00 2001 From: Jacob Sherin Date: Wed, 4 Sep 2024 15:52:02 +0530 Subject: [PATCH 09/48] Fixes: implementation of `field` in udwf example --- datafusion-examples/examples/advanced_udwf.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/datafusion-examples/examples/advanced_udwf.rs b/datafusion-examples/examples/advanced_udwf.rs index 79ff7dc9ea6d..fec62149d81e 100644 --- a/datafusion-examples/examples/advanced_udwf.rs +++ b/datafusion-examples/examples/advanced_udwf.rs @@ -83,8 +83,12 @@ impl WindowUDFImpl for SmoothItUdf { Ok(Box::new(MyPartitionEvaluator::new())) } - fn field(&self, _field_args: FieldArgs) -> Result { - unimplemented!() + fn field(&self, field_args: FieldArgs) -> Result { + Ok(Field::new( + field_args.display_name, + field_args.return_type, + true, + )) } } From 83d833b323c3cd568a5cb4b5358aa02d4854295f Mon Sep 17 00:00:00 2001 From: Jacob Sherin Date: Wed, 4 Sep 2024 23:40:21 +0530 Subject: [PATCH 10/48] Pass `FieldArgs` argument to `field` --- datafusion/functions-window-common/Cargo.toml | 2 +- .../functions-window-common/src/field.rs | 8 ++++- datafusion/physical-plan/src/windows/mod.rs | 32 +++++++++++-------- 3 files changed, 27 insertions(+), 15 deletions(-) diff --git a/datafusion/functions-window-common/Cargo.toml b/datafusion/functions-window-common/Cargo.toml index e7758a84cbd6..98b6f8c6dba5 100644 --- a/datafusion/functions-window-common/Cargo.toml +++ b/datafusion/functions-window-common/Cargo.toml @@ -38,4 +38,4 @@ path = "src/lib.rs" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -datafusion-common.workspace = true +datafusion-common = { workspace = true } diff --git a/datafusion/functions-window-common/src/field.rs b/datafusion/functions-window-common/src/field.rs index 36ba51074e4b..116a2dd4d1d8 100644 --- a/datafusion/functions-window-common/src/field.rs +++ b/datafusion/functions-window-common/src/field.rs @@ -18,6 +18,12 @@ use datafusion_common::arrow::datatypes::DataType; pub struct FieldArgs<'a> { - pub return_type: DataType, + pub input_types: &'a [DataType], pub display_name: &'a str, } + +impl FieldArgs<'_> { + pub fn get_input_type(&self, index: usize) -> Option { + self.input_types.get(index).cloned() + } +} diff --git a/datafusion/physical-plan/src/windows/mod.rs b/datafusion/physical-plan/src/windows/mod.rs index ef1e28459d5a..bf2d284ba212 100644 --- a/datafusion/physical-plan/src/windows/mod.rs +++ b/datafusion/physical-plan/src/windows/mod.rs @@ -74,7 +74,16 @@ pub fn schema_add_window_field( .iter() .map(|e| Arc::clone(e).as_ref().nullable(schema)) .collect::>>()?; - let window_expr_return_type = window_fn.return_type(&data_types, &nullability)?; + let window_expr_return_type = + if let WindowFunctionDefinition::WindowUDF(udwf) = window_fn { + udwf.field(FieldArgs { + input_types: &data_types, + display_name: fn_name, + }) + .map(|field| field.data_type().clone())? + } else { + window_fn.return_type(&data_types, &nullability)? + }; let mut window_fields = schema .fields() .iter() @@ -329,19 +338,11 @@ fn create_udwf_window_expr( input_schema: &Schema, name: String, ) -> Result> { - // need to get the types into an owned vec for some reason - let input_types: Vec<_> = args - .iter() - .map(|arg| arg.data_type(input_schema)) - .collect::>()?; - - // figure out the output type - let data_type = fun.return_type(&input_types)?; Ok(Arc::new(WindowUDFExpr { fun: Arc::clone(fun), args: args.to_vec(), + input_schema: input_schema.clone(), name, - data_type, })) } @@ -350,10 +351,9 @@ fn create_udwf_window_expr( struct WindowUDFExpr { fun: Arc, args: Vec>, + input_schema: Schema, /// Display name name: String, - /// result type - data_type: DataType, } impl BuiltInWindowFunctionExpr for WindowUDFExpr { @@ -362,8 +362,14 @@ impl BuiltInWindowFunctionExpr for WindowUDFExpr { } fn field(&self) -> Result { + let input_types: Vec = self + .args + .iter() + .map(|arg| arg.data_type(&self.input_schema)) + .collect::>()?; + self.fun.field(FieldArgs { - return_type: self.data_type.clone(), + input_types: &input_types, display_name: &self.name, }) } From 927f1cc9a54809d1eb005413bf73225a3d4b721b Mon Sep 17 00:00:00 2001 From: Jacob Sherin Date: Wed, 4 Sep 2024 23:42:27 +0530 Subject: [PATCH 11/48] Use `field` in place of `return_type` for udwf --- datafusion/expr/src/expr.rs | 2 +- datafusion/expr/src/expr_schema.rs | 20 ++++++++++++++++++-- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index 8914214d084f..f3e1895c383f 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -714,7 +714,7 @@ impl WindowFunctionDefinition { WindowFunctionDefinition::AggregateUDF(fun) => { fun.return_type(input_expr_types) } - WindowFunctionDefinition::WindowUDF(fun) => fun.return_type(input_expr_types), + WindowFunctionDefinition::WindowUDF(_) => unimplemented!(), } } diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index 894b7e58d954..3bfffc024f97 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -31,6 +31,7 @@ use datafusion_common::{ not_impl_err, plan_datafusion_err, plan_err, Column, ExprSchema, Result, TableReference, }; +use datafusion_functions_window_common::field::FieldArgs; use std::collections::HashMap; use std::sync::Arc; @@ -166,7 +167,13 @@ impl ExprSchemable for Expr { // expressiveness of `TypeSignature`), then infer return type Ok(func.return_type_from_exprs(args, schema, &arg_data_types)?) } - Expr::WindowFunction(WindowFunction { fun, args, .. }) => { + Expr::WindowFunction(WindowFunction { + fun, + args, + partition_by, + order_by, + .. + }) => { let data_types = args .iter() .map(|e| e.get_type(schema)) @@ -204,7 +211,16 @@ impl ExprSchemable for Expr { ) ) })?; - Ok(fun.return_type(&new_types, &nullability)?) + // Ok(fun.return_type(&new_types, &nullability)?) + let display_name = format!( + "{}({:?}) PARTITION BY: [{:?}], ORDER BY: [{:?}]", + fun, args, partition_by, order_by + ); + udwf.field(FieldArgs { + input_types: new_types.as_ref(), + display_name: &display_name, + }) + .map(|field| field.data_type().clone()) } _ => fun.return_type(&data_types, &nullability), } From e29a4935baf0d2ed5d753fbf96d52eaa56fd87b3 Mon Sep 17 00:00:00 2001 From: Jacob Sherin Date: Wed, 4 Sep 2024 23:44:33 +0530 Subject: [PATCH 12/48] Update `field` in udwf implementations --- datafusion-examples/examples/advanced_udwf.rs | 6 +----- .../user_defined/user_defined_window_functions.rs | 2 +- datafusion/expr/src/expr_fn.rs | 2 +- datafusion/expr/src/udwf.rs | 4 +++- datafusion/functions-window/src/row_number.rs | 6 +----- .../proto/tests/cases/roundtrip_logical_plan.rs | 14 +++++++++----- 6 files changed, 16 insertions(+), 18 deletions(-) diff --git a/datafusion-examples/examples/advanced_udwf.rs b/datafusion-examples/examples/advanced_udwf.rs index fec62149d81e..bfc61f5d94db 100644 --- a/datafusion-examples/examples/advanced_udwf.rs +++ b/datafusion-examples/examples/advanced_udwf.rs @@ -84,11 +84,7 @@ impl WindowUDFImpl for SmoothItUdf { } fn field(&self, field_args: FieldArgs) -> Result { - Ok(Field::new( - field_args.display_name, - field_args.return_type, - true, - )) + Ok(Field::new(field_args.display_name, DataType::Float64, true)) } } diff --git a/datafusion/core/tests/user_defined/user_defined_window_functions.rs b/datafusion/core/tests/user_defined/user_defined_window_functions.rs index b4814eb87210..bccc1236be84 100644 --- a/datafusion/core/tests/user_defined/user_defined_window_functions.rs +++ b/datafusion/core/tests/user_defined/user_defined_window_functions.rs @@ -570,7 +570,7 @@ impl OddCounter { fn field(&self, field_args: FieldArgs) -> Result { Ok(Field::new( field_args.display_name, - field_args.return_type, + self.return_type.clone(), true, )) } diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs index 052b6c9d7c8b..2d6a9d760d54 100644 --- a/datafusion/expr/src/expr_fn.rs +++ b/datafusion/expr/src/expr_fn.rs @@ -670,7 +670,7 @@ impl WindowUDFImpl for SimpleWindowUDF { fn field(&self, field_args: FieldArgs) -> Result { Ok(Field::new( field_args.display_name, - field_args.return_type, + self.return_type.clone(), true, )) } diff --git a/datafusion/expr/src/udwf.rs b/datafusion/expr/src/udwf.rs index 90db26d80bcd..26962447a416 100644 --- a/datafusion/expr/src/udwf.rs +++ b/datafusion/expr/src/udwf.rs @@ -525,9 +525,11 @@ impl WindowUDFImpl for WindowUDFLegacyWrapper { } fn field(&self, field_args: FieldArgs) -> Result { + let return_type = (self.return_type)(field_args.input_types)?; + Ok(Field::new( field_args.display_name, - field_args.return_type, + return_type.as_ref().clone(), true, )) } diff --git a/datafusion/functions-window/src/row_number.rs b/datafusion/functions-window/src/row_number.rs index cff906cd0eed..1c5376b84360 100644 --- a/datafusion/functions-window/src/row_number.rs +++ b/datafusion/functions-window/src/row_number.rs @@ -100,11 +100,7 @@ impl WindowUDFImpl for RowNumber { } fn field(&self, field_args: FieldArgs) -> Result { - Ok(Field::new( - field_args.display_name, - field_args.return_type, - false, - )) + Ok(Field::new(field_args.display_name, DataType::UInt64, false)) } fn sort_options(&self) -> Option { diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs index b52c349d1db4..789e8a5a8bb2 100644 --- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs +++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs @@ -2420,11 +2420,15 @@ fn roundtrip_window() { } fn field(&self, field_args: FieldArgs) -> Result { - Ok(Field::new( - field_args.display_name, - field_args.return_type, - true, - )) + if let Some(return_type) = field_args.get_input_type(0) { + Ok(Field::new(field_args.display_name, return_type, true)) + } else { + plan_err!( + "dummy_udwf expects 1 argument, got {}: {:?}", + field_args.input_types.len(), + field_args.input_types + ) + } } } From fc969e887271b3de6dc5e534b36bfc5b68856b1c Mon Sep 17 00:00:00 2001 From: Jacob Sherin Date: Thu, 5 Sep 2024 01:19:06 +0530 Subject: [PATCH 13/48] Fixes: implementation of `field` in udwf example --- datafusion-examples/examples/simplify_udwf_expression.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion-examples/examples/simplify_udwf_expression.rs b/datafusion-examples/examples/simplify_udwf_expression.rs index 0b6ed045c36e..0f497c36b635 100644 --- a/datafusion-examples/examples/simplify_udwf_expression.rs +++ b/datafusion-examples/examples/simplify_udwf_expression.rs @@ -85,8 +85,8 @@ impl WindowUDFImpl for SimplifySmoothItUdf { Some(Box::new(simplify)) } - fn field(&self, _field_args: FieldArgs) -> Result { - unimplemented!() + fn field(&self, field_args: FieldArgs) -> Result { + Ok(Field::new(field_args.display_name, DataType::Float64, true)) } } From df2b1c85ba1f10d198e206d1b30d14056fa865de Mon Sep 17 00:00:00 2001 From: Jacob Sherin Date: Thu, 5 Sep 2024 01:21:45 +0530 Subject: [PATCH 14/48] Revert unrelated change --- datafusion-examples/examples/simplify_udwf_expression.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion-examples/examples/simplify_udwf_expression.rs b/datafusion-examples/examples/simplify_udwf_expression.rs index 0f497c36b635..63d9bd835052 100644 --- a/datafusion-examples/examples/simplify_udwf_expression.rs +++ b/datafusion-examples/examples/simplify_udwf_expression.rs @@ -65,7 +65,7 @@ impl WindowUDFImpl for SimplifySmoothItUdf { } fn partition_evaluator(&self) -> Result> { - unimplemented!() + todo!() } /// this function will simplify `SimplifySmoothItUdf` to `SmoothItUdf`. From e26aba614de79340957b0d1b4b486868d117a37a Mon Sep 17 00:00:00 2001 From: Jacob Sherin Date: Thu, 5 Sep 2024 01:26:00 +0530 Subject: [PATCH 15/48] Mark `return_type` for udwf as unreachable --- datafusion/expr/src/expr.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index f3e1895c383f..0cf21c4cb3e2 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -714,7 +714,7 @@ impl WindowFunctionDefinition { WindowFunctionDefinition::AggregateUDF(fun) => { fun.return_type(input_expr_types) } - WindowFunctionDefinition::WindowUDF(_) => unimplemented!(), + WindowFunctionDefinition::WindowUDF(_) => unreachable!(), } } From 258986d01ab32c975de6f01c35f59e3da1f54879 Mon Sep 17 00:00:00 2001 From: Jacob Sherin Date: Thu, 5 Sep 2024 01:42:52 +0530 Subject: [PATCH 16/48] Delete code --- datafusion/expr/src/expr_schema.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index 3bfffc024f97..1a19e5fd598e 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -211,7 +211,6 @@ impl ExprSchemable for Expr { ) ) })?; - // Ok(fun.return_type(&new_types, &nullability)?) let display_name = format!( "{}({:?}) PARTITION BY: [{:?}], ORDER BY: [{:?}]", fun, args, partition_by, order_by From ba9e39aa4b1628bfaaa85f11ef04225928452e7f Mon Sep 17 00:00:00 2001 From: Jacob Sherin Date: Thu, 5 Sep 2024 15:47:25 +0530 Subject: [PATCH 17/48] Uses schema name of udwf to construct `FieldArgs` --- datafusion/expr/src/expr_schema.rs | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index 1a19e5fd598e..3372cffb7e51 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -167,13 +167,7 @@ impl ExprSchemable for Expr { // expressiveness of `TypeSignature`), then infer return type Ok(func.return_type_from_exprs(args, schema, &arg_data_types)?) } - Expr::WindowFunction(WindowFunction { - fun, - args, - partition_by, - order_by, - .. - }) => { + Expr::WindowFunction(WindowFunction { fun, args, .. }) => { let data_types = args .iter() .map(|e| e.get_type(schema)) @@ -211,13 +205,9 @@ impl ExprSchemable for Expr { ) ) })?; - let display_name = format!( - "{}({:?}) PARTITION BY: [{:?}], ORDER BY: [{:?}]", - fun, args, partition_by, order_by - ); udwf.field(FieldArgs { input_types: new_types.as_ref(), - display_name: &display_name, + display_name: self.schema_name().to_string().as_ref(), }) .map(|field| field.data_type().clone()) } From b671b36c1884779348ca23bd43b21748eb8b6b00 Mon Sep 17 00:00:00 2001 From: Jacob Sherin Date: Thu, 5 Sep 2024 16:25:11 +0530 Subject: [PATCH 18/48] Adds deprecated notice to `return_type` trait method --- datafusion/expr/src/udwf.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/datafusion/expr/src/udwf.rs b/datafusion/expr/src/udwf.rs index 26962447a416..8ccad9005db6 100644 --- a/datafusion/expr/src/udwf.rs +++ b/datafusion/expr/src/udwf.rs @@ -163,6 +163,7 @@ impl WindowUDF { /// Return the type of the function given its input types /// /// See [`WindowUDFImpl::return_type`] for more details. + #[allow(deprecated)] pub fn return_type(&self, args: &[DataType]) -> Result { self.inner.return_type(args) } @@ -288,6 +289,10 @@ pub trait WindowUDFImpl: Debug + Send + Sync { /// What [`DataType`] will be returned by this function, given the types of /// the arguments + #[deprecated( + since = "41.0.0", + note = "Use `field` instead to define the final result of evaluating this user-defined window function." + )] fn return_type(&self, arg_types: &[DataType]) -> Result; /// Invoke the function, returning the [`PartitionEvaluator`] instance @@ -428,6 +433,7 @@ impl WindowUDFImpl for AliasedWindowUDFImpl { self.inner.signature() } + #[allow(deprecated)] fn return_type(&self, arg_types: &[DataType]) -> Result { self.inner.return_type(arg_types) } From 712263e943502b289960c81016b7996d02150873 Mon Sep 17 00:00:00 2001 From: Jacob Sherin Date: Thu, 5 Sep 2024 16:44:18 +0530 Subject: [PATCH 19/48] Add doc comments to `field` trait method --- datafusion/expr/src/udwf.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/datafusion/expr/src/udwf.rs b/datafusion/expr/src/udwf.rs index 8ccad9005db6..9be10a9c2da3 100644 --- a/datafusion/expr/src/udwf.rs +++ b/datafusion/expr/src/udwf.rs @@ -187,6 +187,9 @@ impl WindowUDF { self.inner.nullable() } + /// Returns the field of the final result of evaluating this window function. + /// + /// See [`WindowUDFImpl::field`] for more details. pub fn field(&self, field_args: FieldArgs) -> Result { self.inner.field(field_args) } @@ -365,6 +368,10 @@ pub trait WindowUDFImpl: Debug + Send + Sync { true } + /// The field of the final result of evaluating this window function. + /// + /// The [`FieldArgs`] argument allows the window UDF to customize the + /// field given the types of the input expressions. fn field(&self, field_args: FieldArgs) -> Result; /// Allows the window UDF to define a custom result ordering. From 94269781f1dacddbc76c7ff5403124911d3aac43 Mon Sep 17 00:00:00 2001 From: Jacob Sherin Date: Thu, 5 Sep 2024 17:29:11 +0530 Subject: [PATCH 20/48] Reify `input_types` when creating the udwf window expression --- datafusion/physical-plan/src/windows/mod.rs | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/datafusion/physical-plan/src/windows/mod.rs b/datafusion/physical-plan/src/windows/mod.rs index bf2d284ba212..be096df81106 100644 --- a/datafusion/physical-plan/src/windows/mod.rs +++ b/datafusion/physical-plan/src/windows/mod.rs @@ -338,10 +338,16 @@ fn create_udwf_window_expr( input_schema: &Schema, name: String, ) -> Result> { + // need to get the types into an owned vec for some reason + let input_types: Vec<_> = args + .iter() + .map(|arg| arg.data_type(input_schema)) + .collect::>()?; + Ok(Arc::new(WindowUDFExpr { fun: Arc::clone(fun), args: args.to_vec(), - input_schema: input_schema.clone(), + input_types, name, })) } @@ -351,9 +357,10 @@ fn create_udwf_window_expr( struct WindowUDFExpr { fun: Arc, args: Vec>, - input_schema: Schema, /// Display name name: String, + /// Types of input expressions + input_types: Vec, } impl BuiltInWindowFunctionExpr for WindowUDFExpr { @@ -362,14 +369,8 @@ impl BuiltInWindowFunctionExpr for WindowUDFExpr { } fn field(&self) -> Result { - let input_types: Vec = self - .args - .iter() - .map(|arg| arg.data_type(&self.input_schema)) - .collect::>()?; - self.fun.field(FieldArgs { - input_types: &input_types, + input_types: &self.input_types, display_name: &self.name, }) } From 327db324a58675384983e624150dd5a0a8cf98e4 Mon Sep 17 00:00:00 2001 From: Jacob Sherin Date: Thu, 5 Sep 2024 17:32:53 +0530 Subject: [PATCH 21/48] Rename name field to `schema_name` in `FieldArgs` --- datafusion-examples/examples/advanced_udwf.rs | 2 +- datafusion-examples/examples/simplify_udwf_expression.rs | 2 +- .../core/tests/user_defined/user_defined_window_functions.rs | 2 +- datafusion/expr/src/expr_fn.rs | 2 +- datafusion/expr/src/expr_schema.rs | 2 +- datafusion/expr/src/udwf.rs | 2 +- datafusion/functions-window-common/src/field.rs | 2 +- datafusion/functions-window/src/row_number.rs | 2 +- datafusion/physical-plan/src/windows/mod.rs | 4 ++-- datafusion/proto/tests/cases/roundtrip_logical_plan.rs | 2 +- 10 files changed, 11 insertions(+), 11 deletions(-) diff --git a/datafusion-examples/examples/advanced_udwf.rs b/datafusion-examples/examples/advanced_udwf.rs index bfc61f5d94db..d8c05bc93722 100644 --- a/datafusion-examples/examples/advanced_udwf.rs +++ b/datafusion-examples/examples/advanced_udwf.rs @@ -84,7 +84,7 @@ impl WindowUDFImpl for SmoothItUdf { } fn field(&self, field_args: FieldArgs) -> Result { - Ok(Field::new(field_args.display_name, DataType::Float64, true)) + Ok(Field::new(field_args.schema_name, DataType::Float64, true)) } } diff --git a/datafusion-examples/examples/simplify_udwf_expression.rs b/datafusion-examples/examples/simplify_udwf_expression.rs index 63d9bd835052..e273e56046e8 100644 --- a/datafusion-examples/examples/simplify_udwf_expression.rs +++ b/datafusion-examples/examples/simplify_udwf_expression.rs @@ -86,7 +86,7 @@ impl WindowUDFImpl for SimplifySmoothItUdf { } fn field(&self, field_args: FieldArgs) -> Result { - Ok(Field::new(field_args.display_name, DataType::Float64, true)) + Ok(Field::new(field_args.schema_name, DataType::Float64, true)) } } diff --git a/datafusion/core/tests/user_defined/user_defined_window_functions.rs b/datafusion/core/tests/user_defined/user_defined_window_functions.rs index bccc1236be84..2e45b2755fde 100644 --- a/datafusion/core/tests/user_defined/user_defined_window_functions.rs +++ b/datafusion/core/tests/user_defined/user_defined_window_functions.rs @@ -569,7 +569,7 @@ impl OddCounter { fn field(&self, field_args: FieldArgs) -> Result { Ok(Field::new( - field_args.display_name, + field_args.schema_name, self.return_type.clone(), true, )) diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs index 2d6a9d760d54..4836cc100875 100644 --- a/datafusion/expr/src/expr_fn.rs +++ b/datafusion/expr/src/expr_fn.rs @@ -669,7 +669,7 @@ impl WindowUDFImpl for SimpleWindowUDF { fn field(&self, field_args: FieldArgs) -> Result { Ok(Field::new( - field_args.display_name, + field_args.schema_name, self.return_type.clone(), true, )) diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index 3372cffb7e51..01c296c6803a 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -207,7 +207,7 @@ impl ExprSchemable for Expr { })?; udwf.field(FieldArgs { input_types: new_types.as_ref(), - display_name: self.schema_name().to_string().as_ref(), + schema_name: self.schema_name().to_string().as_ref(), }) .map(|field| field.data_type().clone()) } diff --git a/datafusion/expr/src/udwf.rs b/datafusion/expr/src/udwf.rs index 9be10a9c2da3..a97f737164e6 100644 --- a/datafusion/expr/src/udwf.rs +++ b/datafusion/expr/src/udwf.rs @@ -541,7 +541,7 @@ impl WindowUDFImpl for WindowUDFLegacyWrapper { let return_type = (self.return_type)(field_args.input_types)?; Ok(Field::new( - field_args.display_name, + field_args.schema_name, return_type.as_ref().clone(), true, )) diff --git a/datafusion/functions-window-common/src/field.rs b/datafusion/functions-window-common/src/field.rs index 116a2dd4d1d8..6d65256718ab 100644 --- a/datafusion/functions-window-common/src/field.rs +++ b/datafusion/functions-window-common/src/field.rs @@ -19,7 +19,7 @@ use datafusion_common::arrow::datatypes::DataType; pub struct FieldArgs<'a> { pub input_types: &'a [DataType], - pub display_name: &'a str, + pub schema_name: &'a str, } impl FieldArgs<'_> { diff --git a/datafusion/functions-window/src/row_number.rs b/datafusion/functions-window/src/row_number.rs index 1c5376b84360..e950e40ca703 100644 --- a/datafusion/functions-window/src/row_number.rs +++ b/datafusion/functions-window/src/row_number.rs @@ -100,7 +100,7 @@ impl WindowUDFImpl for RowNumber { } fn field(&self, field_args: FieldArgs) -> Result { - Ok(Field::new(field_args.display_name, DataType::UInt64, false)) + Ok(Field::new(field_args.schema_name, DataType::UInt64, false)) } fn sort_options(&self) -> Option { diff --git a/datafusion/physical-plan/src/windows/mod.rs b/datafusion/physical-plan/src/windows/mod.rs index be096df81106..2e6e2403e1b4 100644 --- a/datafusion/physical-plan/src/windows/mod.rs +++ b/datafusion/physical-plan/src/windows/mod.rs @@ -78,7 +78,7 @@ pub fn schema_add_window_field( if let WindowFunctionDefinition::WindowUDF(udwf) = window_fn { udwf.field(FieldArgs { input_types: &data_types, - display_name: fn_name, + schema_name: fn_name, }) .map(|field| field.data_type().clone())? } else { @@ -371,7 +371,7 @@ impl BuiltInWindowFunctionExpr for WindowUDFExpr { fn field(&self) -> Result { self.fun.field(FieldArgs { input_types: &self.input_types, - display_name: &self.name, + schema_name: &self.name, }) } diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs index 4a27b2714cdb..80be9d80ffa8 100644 --- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs +++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs @@ -2423,7 +2423,7 @@ fn roundtrip_window() { fn field(&self, field_args: FieldArgs) -> Result { if let Some(return_type) = field_args.get_input_type(0) { - Ok(Field::new(field_args.display_name, return_type, true)) + Ok(Field::new(field_args.schema_name, return_type, true)) } else { plan_err!( "dummy_udwf expects 1 argument, got {}: {:?}", From f5b62efc211660c29fc1d55b9a8d846c0921df82 Mon Sep 17 00:00:00 2001 From: Jacob Sherin Date: Thu, 5 Sep 2024 18:08:57 +0530 Subject: [PATCH 22/48] Make `FieldArgs` opaque --- datafusion-examples/examples/advanced_udwf.rs | 2 +- .../examples/simplify_udwf_expression.rs | 2 +- .../user_defined_window_functions.rs | 2 +- datafusion/expr/src/expr_fn.rs | 2 +- datafusion/expr/src/expr_schema.rs | 10 ++++----- datafusion/expr/src/udwf.rs | 4 ++-- .../functions-window-common/src/field.rs | 21 ++++++++++++++++--- datafusion/functions-window/src/row_number.rs | 2 +- datafusion/physical-plan/src/windows/mod.rs | 16 +++++++------- .../tests/cases/roundtrip_logical_plan.rs | 6 +++--- 10 files changed, 40 insertions(+), 27 deletions(-) diff --git a/datafusion-examples/examples/advanced_udwf.rs b/datafusion-examples/examples/advanced_udwf.rs index d8c05bc93722..167eb432b301 100644 --- a/datafusion-examples/examples/advanced_udwf.rs +++ b/datafusion-examples/examples/advanced_udwf.rs @@ -84,7 +84,7 @@ impl WindowUDFImpl for SmoothItUdf { } fn field(&self, field_args: FieldArgs) -> Result { - Ok(Field::new(field_args.schema_name, DataType::Float64, true)) + Ok(Field::new(field_args.name(), DataType::Float64, true)) } } diff --git a/datafusion-examples/examples/simplify_udwf_expression.rs b/datafusion-examples/examples/simplify_udwf_expression.rs index e273e56046e8..e367adabd927 100644 --- a/datafusion-examples/examples/simplify_udwf_expression.rs +++ b/datafusion-examples/examples/simplify_udwf_expression.rs @@ -86,7 +86,7 @@ impl WindowUDFImpl for SimplifySmoothItUdf { } fn field(&self, field_args: FieldArgs) -> Result { - Ok(Field::new(field_args.schema_name, DataType::Float64, true)) + Ok(Field::new(field_args.name(), DataType::Float64, true)) } } diff --git a/datafusion/core/tests/user_defined/user_defined_window_functions.rs b/datafusion/core/tests/user_defined/user_defined_window_functions.rs index 2e45b2755fde..4e18b004b7e4 100644 --- a/datafusion/core/tests/user_defined/user_defined_window_functions.rs +++ b/datafusion/core/tests/user_defined/user_defined_window_functions.rs @@ -569,7 +569,7 @@ impl OddCounter { fn field(&self, field_args: FieldArgs) -> Result { Ok(Field::new( - field_args.schema_name, + field_args.name(), self.return_type.clone(), true, )) diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs index 4836cc100875..cbca14c165d7 100644 --- a/datafusion/expr/src/expr_fn.rs +++ b/datafusion/expr/src/expr_fn.rs @@ -669,7 +669,7 @@ impl WindowUDFImpl for SimpleWindowUDF { fn field(&self, field_args: FieldArgs) -> Result { Ok(Field::new( - field_args.schema_name, + field_args.name(), self.return_type.clone(), true, )) diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index 01c296c6803a..26f670c38857 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -205,11 +205,11 @@ impl ExprSchemable for Expr { ) ) })?; - udwf.field(FieldArgs { - input_types: new_types.as_ref(), - schema_name: self.schema_name().to_string().as_ref(), - }) - .map(|field| field.data_type().clone()) + let schema_name = self.schema_name().to_string(); + let field_args = FieldArgs::new(&new_types, &schema_name); + + udwf.field(field_args) + .map(|field| field.data_type().clone()) } _ => fun.return_type(&data_types, &nullability), } diff --git a/datafusion/expr/src/udwf.rs b/datafusion/expr/src/udwf.rs index a97f737164e6..740c02ace573 100644 --- a/datafusion/expr/src/udwf.rs +++ b/datafusion/expr/src/udwf.rs @@ -538,10 +538,10 @@ impl WindowUDFImpl for WindowUDFLegacyWrapper { } fn field(&self, field_args: FieldArgs) -> Result { - let return_type = (self.return_type)(field_args.input_types)?; + let return_type = (self.return_type)(field_args.input_types())?; Ok(Field::new( - field_args.schema_name, + field_args.name(), return_type.as_ref().clone(), true, )) diff --git a/datafusion/functions-window-common/src/field.rs b/datafusion/functions-window-common/src/field.rs index 6d65256718ab..cb2a0bb09d55 100644 --- a/datafusion/functions-window-common/src/field.rs +++ b/datafusion/functions-window-common/src/field.rs @@ -18,11 +18,26 @@ use datafusion_common::arrow::datatypes::DataType; pub struct FieldArgs<'a> { - pub input_types: &'a [DataType], - pub schema_name: &'a str, + input_types: &'a [DataType], + schema_name: &'a str, } -impl FieldArgs<'_> { +impl<'a> FieldArgs<'a> { + pub fn new(input_types: &'a [DataType], schema_name: &'a str) -> Self { + FieldArgs { + input_types, + schema_name, + } + } + + pub fn input_types(&self) -> &[DataType] { + self.input_types + } + + pub fn name(&self) -> &str { + self.schema_name + } + pub fn get_input_type(&self, index: usize) -> Option { self.input_types.get(index).cloned() } diff --git a/datafusion/functions-window/src/row_number.rs b/datafusion/functions-window/src/row_number.rs index e950e40ca703..b03f3bac8e4a 100644 --- a/datafusion/functions-window/src/row_number.rs +++ b/datafusion/functions-window/src/row_number.rs @@ -100,7 +100,7 @@ impl WindowUDFImpl for RowNumber { } fn field(&self, field_args: FieldArgs) -> Result { - Ok(Field::new(field_args.schema_name, DataType::UInt64, false)) + Ok(Field::new(field_args.name(), DataType::UInt64, false)) } fn sort_options(&self) -> Option { diff --git a/datafusion/physical-plan/src/windows/mod.rs b/datafusion/physical-plan/src/windows/mod.rs index 2e6e2403e1b4..1e7e64cc0602 100644 --- a/datafusion/physical-plan/src/windows/mod.rs +++ b/datafusion/physical-plan/src/windows/mod.rs @@ -76,11 +76,10 @@ pub fn schema_add_window_field( .collect::>>()?; let window_expr_return_type = if let WindowFunctionDefinition::WindowUDF(udwf) = window_fn { - udwf.field(FieldArgs { - input_types: &data_types, - schema_name: fn_name, - }) - .map(|field| field.data_type().clone())? + let field_args = FieldArgs::new(&data_types, fn_name); + + udwf.field(field_args) + .map(|field| field.data_type().clone())? } else { window_fn.return_type(&data_types, &nullability)? }; @@ -369,10 +368,9 @@ impl BuiltInWindowFunctionExpr for WindowUDFExpr { } fn field(&self) -> Result { - self.fun.field(FieldArgs { - input_types: &self.input_types, - schema_name: &self.name, - }) + let field_args = FieldArgs::new(&self.input_types, &self.name); + + self.fun.field(field_args) } fn expressions(&self) -> Vec> { diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs index 80be9d80ffa8..dc66a97d618c 100644 --- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs +++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs @@ -2423,12 +2423,12 @@ fn roundtrip_window() { fn field(&self, field_args: FieldArgs) -> Result { if let Some(return_type) = field_args.get_input_type(0) { - Ok(Field::new(field_args.schema_name, return_type, true)) + Ok(Field::new(field_args.name(), return_type, true)) } else { plan_err!( "dummy_udwf expects 1 argument, got {}: {:?}", - field_args.input_types.len(), - field_args.input_types + field_args.input_types().len(), + field_args.input_types() ) } } From 34565ffe835e83f4ffd1cc74698dc4617c2e882f Mon Sep 17 00:00:00 2001 From: Jacob Sherin Date: Thu, 5 Sep 2024 19:50:04 +0530 Subject: [PATCH 23/48] Minor refactor --- datafusion/physical-plan/src/windows/mod.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/datafusion/physical-plan/src/windows/mod.rs b/datafusion/physical-plan/src/windows/mod.rs index 1e7e64cc0602..8f1df6e562fd 100644 --- a/datafusion/physical-plan/src/windows/mod.rs +++ b/datafusion/physical-plan/src/windows/mod.rs @@ -368,9 +368,8 @@ impl BuiltInWindowFunctionExpr for WindowUDFExpr { } fn field(&self) -> Result { - let field_args = FieldArgs::new(&self.input_types, &self.name); - - self.fun.field(field_args) + self.fun + .field(FieldArgs::new(&self.input_types, &self.name)) } fn expressions(&self) -> Vec> { From ede7827989a7841c63520dbc1bfb8c08255058cd Mon Sep 17 00:00:00 2001 From: Jacob Sherin Date: Thu, 5 Sep 2024 20:24:12 +0530 Subject: [PATCH 24/48] Removes `nullable` trait method from `WindowUDFImpl` --- datafusion/expr/src/expr_schema.rs | 15 ++++++++++++-- datafusion/expr/src/udwf.rs | 20 ------------------- datafusion/functions-window/src/row_number.rs | 4 ---- 3 files changed, 13 insertions(+), 26 deletions(-) diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index 26f670c38857..f9340185eaab 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -345,7 +345,7 @@ impl ExprSchemable for Expr { Expr::AggregateFunction(AggregateFunction { func, .. }) => { Ok(func.is_nullable()) } - Expr::WindowFunction(WindowFunction { fun, .. }) => match fun { + Expr::WindowFunction(WindowFunction { fun, args, .. }) => match fun { WindowFunctionDefinition::BuiltInWindowFunction(func) => { if func.name() == "RANK" || func.name() == "NTILE" @@ -357,7 +357,18 @@ impl ExprSchemable for Expr { } } WindowFunctionDefinition::AggregateUDF(func) => Ok(func.is_nullable()), - WindowFunctionDefinition::WindowUDF(udwf) => Ok(udwf.nullable()), + WindowFunctionDefinition::WindowUDF(udwf) => { + let data_types = args + .iter() + .map(|e| e.get_type(input_schema)) + .collect::>>()?; + let input_types = data_types_with_window_udf(&data_types, udwf)?; + let schema_name = self.schema_name().to_string(); + let field_args = FieldArgs::new(&input_types, &schema_name); + let field = udwf.field(field_args)?; + + Ok(field.is_nullable()) + } }, Expr::ScalarVariable(_, _) | Expr::TryCast { .. } diff --git a/datafusion/expr/src/udwf.rs b/datafusion/expr/src/udwf.rs index 740c02ace573..81d6e894e687 100644 --- a/datafusion/expr/src/udwf.rs +++ b/datafusion/expr/src/udwf.rs @@ -180,13 +180,6 @@ impl WindowUDF { self.inner.partition_evaluator() } - /// Returns if column values are nullable for this window function. - /// - /// See [`WindowUDFImpl::nullable`] for more details. - pub fn nullable(&self) -> bool { - self.inner.nullable() - } - /// Returns the field of the final result of evaluating this window function. /// /// See [`WindowUDFImpl::field`] for more details. @@ -359,15 +352,6 @@ pub trait WindowUDFImpl: Debug + Send + Sync { hasher.finish() } - /// Allows customizing nullable of column for this window UDF. - /// - /// By default, the final result of evaluating the window UDF is - /// allowed to have null values. But if that is not the case then - /// it can be customized in the window UDF implementation. - fn nullable(&self) -> bool { - true - } - /// The field of the final result of evaluating this window function. /// /// The [`FieldArgs`] argument allows the window UDF to customize the @@ -472,10 +456,6 @@ impl WindowUDFImpl for AliasedWindowUDFImpl { hasher.finish() } - fn nullable(&self) -> bool { - self.inner.nullable() - } - fn field(&self, field_args: FieldArgs) -> Result { self.inner.field(field_args) } diff --git a/datafusion/functions-window/src/row_number.rs b/datafusion/functions-window/src/row_number.rs index b03f3bac8e4a..d584db603589 100644 --- a/datafusion/functions-window/src/row_number.rs +++ b/datafusion/functions-window/src/row_number.rs @@ -95,10 +95,6 @@ impl WindowUDFImpl for RowNumber { Ok(Box::::default()) } - fn nullable(&self) -> bool { - false - } - fn field(&self, field_args: FieldArgs) -> Result { Ok(Field::new(field_args.name(), DataType::UInt64, false)) } From 25f3b506c286aae4f7da9c52f926f079568c8fd9 Mon Sep 17 00:00:00 2001 From: Jacob Sherin Date: Thu, 5 Sep 2024 21:04:17 +0530 Subject: [PATCH 25/48] Add doc comments --- .../functions-window-common/src/field.rs | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/datafusion/functions-window-common/src/field.rs b/datafusion/functions-window-common/src/field.rs index cb2a0bb09d55..7448400461a1 100644 --- a/datafusion/functions-window-common/src/field.rs +++ b/datafusion/functions-window-common/src/field.rs @@ -17,12 +17,29 @@ use datafusion_common::arrow::datatypes::DataType; +/// Contains metadata which may be useful for a user-defined window +/// function to specify the field of the final result of evaluating the +/// user-defined window function. pub struct FieldArgs<'a> { + /// The data types of input expressions. input_types: &'a [DataType], + /// The display name. schema_name: &'a str, } impl<'a> FieldArgs<'a> { + /// Create an instance of [`FieldArgs`]. + /// + /// # Arguments + /// + /// * `input_types` - The data types derived from the input + /// expressions to the window function. + /// * `schema_name` - The formatted display name for the window + /// function derived from the input schema. + /// + /// # Returns + /// + /// A new [`FieldArgs`] instance. pub fn new(input_types: &'a [DataType], schema_name: &'a str) -> Self { FieldArgs { input_types, @@ -30,14 +47,20 @@ impl<'a> FieldArgs<'a> { } } + /// Returns the data types of input expressions to the window + /// function. pub fn input_types(&self) -> &[DataType] { self.input_types } + /// Returns the name of the output field of this window function. pub fn name(&self) -> &str { self.schema_name } + /// Returns an `Option` containing the data type of the + /// input expression at the specified index. It returns `None` when + /// the index is out of bounds. pub fn get_input_type(&self, index: usize) -> Option { self.input_types.get(index).cloned() } From f012bdda48afb288ad02e593b5419a7c1c45566d Mon Sep 17 00:00:00 2001 From: Jacob Sherin Date: Thu, 5 Sep 2024 21:07:43 +0530 Subject: [PATCH 26/48] Rename to `WindowUDFResultArgs` --- datafusion-examples/examples/advanced_udwf.rs | 4 ++-- .../examples/simplify_udwf_expression.rs | 4 ++-- .../user_defined_window_functions.rs | 4 ++-- datafusion/expr/src/expr_fn.rs | 4 ++-- datafusion/expr/src/expr_schema.rs | 6 +++--- datafusion/expr/src/function.rs | 2 +- datafusion/expr/src/udwf.rs | 16 ++++++++-------- datafusion/functions-window-common/src/lib.rs | 2 +- .../src/{field.rs => result.rs} | 10 +++++----- datafusion/functions-window/src/row_number.rs | 6 +++--- .../src/simplify_expressions/expr_simplifier.rs | 4 ++-- datafusion/physical-plan/src/windows/mod.rs | 6 +++--- .../proto/tests/cases/roundtrip_logical_plan.rs | 4 ++-- 13 files changed, 36 insertions(+), 36 deletions(-) rename datafusion/functions-window-common/src/{field.rs => result.rs} (91%) diff --git a/datafusion-examples/examples/advanced_udwf.rs b/datafusion-examples/examples/advanced_udwf.rs index 167eb432b301..946c55313897 100644 --- a/datafusion-examples/examples/advanced_udwf.rs +++ b/datafusion-examples/examples/advanced_udwf.rs @@ -26,7 +26,7 @@ use arrow_schema::Field; use datafusion::error::Result; use datafusion::prelude::*; use datafusion_common::ScalarValue; -use datafusion_expr::function::FieldArgs; +use datafusion_expr::function::WindowUDFResultArgs; use datafusion_expr::{ PartitionEvaluator, Signature, WindowFrame, WindowUDF, WindowUDFImpl, }; @@ -83,7 +83,7 @@ impl WindowUDFImpl for SmoothItUdf { Ok(Box::new(MyPartitionEvaluator::new())) } - fn field(&self, field_args: FieldArgs) -> Result { + fn field(&self, field_args: WindowUDFResultArgs) -> Result { Ok(Field::new(field_args.name(), DataType::Float64, true)) } } diff --git a/datafusion-examples/examples/simplify_udwf_expression.rs b/datafusion-examples/examples/simplify_udwf_expression.rs index e367adabd927..05ebf159a685 100644 --- a/datafusion-examples/examples/simplify_udwf_expression.rs +++ b/datafusion-examples/examples/simplify_udwf_expression.rs @@ -22,7 +22,7 @@ use arrow_schema::{DataType, Field}; use datafusion::execution::context::SessionContext; use datafusion::functions_aggregate::average::avg_udaf; use datafusion::{error::Result, execution::options::CsvReadOptions}; -use datafusion_expr::function::{FieldArgs, WindowFunctionSimplification}; +use datafusion_expr::function::{WindowUDFResultArgs, WindowFunctionSimplification}; use datafusion_expr::{ expr::WindowFunction, simplify::SimplifyInfo, Expr, PartitionEvaluator, Signature, Volatility, WindowUDF, WindowUDFImpl, @@ -85,7 +85,7 @@ impl WindowUDFImpl for SimplifySmoothItUdf { Some(Box::new(simplify)) } - fn field(&self, field_args: FieldArgs) -> Result { + fn field(&self, field_args: WindowUDFResultArgs) -> Result { Ok(Field::new(field_args.name(), DataType::Float64, true)) } } diff --git a/datafusion/core/tests/user_defined/user_defined_window_functions.rs b/datafusion/core/tests/user_defined/user_defined_window_functions.rs index 4e18b004b7e4..158cc57c60be 100644 --- a/datafusion/core/tests/user_defined/user_defined_window_functions.rs +++ b/datafusion/core/tests/user_defined/user_defined_window_functions.rs @@ -35,7 +35,7 @@ use datafusion_common::{Result, ScalarValue}; use datafusion_expr::{ PartitionEvaluator, Signature, Volatility, WindowUDF, WindowUDFImpl, }; -use datafusion_functions_window_common::field::FieldArgs; +use datafusion_functions_window_common::result::WindowUDFResultArgs; /// A query with a window function evaluated over the entire partition const UNBOUNDED_WINDOW_QUERY: &str = "SELECT x, y, val, \ @@ -567,7 +567,7 @@ impl OddCounter { &self.aliases } - fn field(&self, field_args: FieldArgs) -> Result { + fn field(&self, field_args: WindowUDFResultArgs) -> Result { Ok(Field::new( field_args.name(), self.return_type.clone(), diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs index cbca14c165d7..ebfd3abc3770 100644 --- a/datafusion/expr/src/expr_fn.rs +++ b/datafusion/expr/src/expr_fn.rs @@ -38,7 +38,7 @@ use arrow::compute::kernels::cast_utils::{ }; use arrow::datatypes::{DataType, Field}; use datafusion_common::{plan_err, Column, Result, ScalarValue, TableReference}; -use datafusion_functions_window_common::field::FieldArgs; +use datafusion_functions_window_common::result::WindowUDFResultArgs; use sqlparser::ast::NullTreatment; use std::any::Any; use std::fmt::Debug; @@ -667,7 +667,7 @@ impl WindowUDFImpl for SimpleWindowUDF { (self.partition_evaluator_factory)() } - fn field(&self, field_args: FieldArgs) -> Result { + fn field(&self, field_args: WindowUDFResultArgs) -> Result { Ok(Field::new( field_args.name(), self.return_type.clone(), diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index f9340185eaab..539a1abe8efa 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -31,7 +31,7 @@ use datafusion_common::{ not_impl_err, plan_datafusion_err, plan_err, Column, ExprSchema, Result, TableReference, }; -use datafusion_functions_window_common::field::FieldArgs; +use datafusion_functions_window_common::result::WindowUDFResultArgs; use std::collections::HashMap; use std::sync::Arc; @@ -206,7 +206,7 @@ impl ExprSchemable for Expr { ) })?; let schema_name = self.schema_name().to_string(); - let field_args = FieldArgs::new(&new_types, &schema_name); + let field_args = WindowUDFResultArgs::new(&new_types, &schema_name); udwf.field(field_args) .map(|field| field.data_type().clone()) @@ -364,7 +364,7 @@ impl ExprSchemable for Expr { .collect::>>()?; let input_types = data_types_with_window_udf(&data_types, udwf)?; let schema_name = self.schema_name().to_string(); - let field_args = FieldArgs::new(&input_types, &schema_name); + let field_args = WindowUDFResultArgs::new(&input_types, &schema_name); let field = udwf.field(field_args)?; Ok(field.is_nullable()) diff --git a/datafusion/expr/src/function.rs b/datafusion/expr/src/function.rs index 59f6f6865ca2..1dee8f64f880 100644 --- a/datafusion/expr/src/function.rs +++ b/datafusion/expr/src/function.rs @@ -27,7 +27,7 @@ pub use datafusion_functions_aggregate_common::accumulator::{ AccumulatorArgs, AccumulatorFactoryFunction, StateFieldsArgs, }; -pub use datafusion_functions_window_common::field::FieldArgs; +pub use datafusion_functions_window_common::result::WindowUDFResultArgs; #[derive(Debug, Clone, Copy)] pub enum Hint { diff --git a/datafusion/expr/src/udwf.rs b/datafusion/expr/src/udwf.rs index 81d6e894e687..f746368ae54b 100644 --- a/datafusion/expr/src/udwf.rs +++ b/datafusion/expr/src/udwf.rs @@ -28,7 +28,7 @@ use std::{ use arrow::datatypes::{DataType, Field}; use datafusion_common::{not_impl_err, Result}; -use datafusion_functions_window_common::field::FieldArgs; +use datafusion_functions_window_common::result::WindowUDFResultArgs; use crate::expr::WindowFunction; use crate::{ @@ -183,7 +183,7 @@ impl WindowUDF { /// Returns the field of the final result of evaluating this window function. /// /// See [`WindowUDFImpl::field`] for more details. - pub fn field(&self, field_args: FieldArgs) -> Result { + pub fn field(&self, field_args: WindowUDFResultArgs) -> Result { self.inner.field(field_args) } @@ -227,7 +227,7 @@ where /// # use datafusion_common::{DataFusionError, plan_err, Result}; /// # use datafusion_expr::{col, Signature, Volatility, PartitionEvaluator, WindowFrame, ExprFunctionExt}; /// # use datafusion_expr::{WindowUDFImpl, WindowUDF}; -/// use datafusion_functions_window_common::field::FieldArgs; +/// use datafusion_functions_window_common::result::WindowUDFResultArgs; /// #[derive(Debug, Clone)] /// struct SmoothIt { /// signature: Signature @@ -254,7 +254,7 @@ where /// } /// // The actual implementation would add one to the argument /// fn partition_evaluator(&self) -> Result> { unimplemented!() } -/// fn field(&self, field_args: FieldArgs) -> Result { unimplemented!() } +/// fn field(&self, field_args: WindowUDFResultArgs) -> Result { unimplemented!() } /// } /// /// // Create a new WindowUDF from the implementation @@ -354,9 +354,9 @@ pub trait WindowUDFImpl: Debug + Send + Sync { /// The field of the final result of evaluating this window function. /// - /// The [`FieldArgs`] argument allows the window UDF to customize the + /// The [`WindowUDFResultArgs`] argument allows the window UDF to customize the /// field given the types of the input expressions. - fn field(&self, field_args: FieldArgs) -> Result; + fn field(&self, field_args: WindowUDFResultArgs) -> Result; /// Allows the window UDF to define a custom result ordering. /// @@ -456,7 +456,7 @@ impl WindowUDFImpl for AliasedWindowUDFImpl { hasher.finish() } - fn field(&self, field_args: FieldArgs) -> Result { + fn field(&self, field_args: WindowUDFResultArgs) -> Result { self.inner.field(field_args) } @@ -517,7 +517,7 @@ impl WindowUDFImpl for WindowUDFLegacyWrapper { (self.partition_evaluator_factory)() } - fn field(&self, field_args: FieldArgs) -> Result { + fn field(&self, field_args: WindowUDFResultArgs) -> Result { let return_type = (self.return_type)(field_args.input_types())?; Ok(Field::new( diff --git a/datafusion/functions-window-common/src/lib.rs b/datafusion/functions-window-common/src/lib.rs index 2e4bcbbc83b9..5a8926cb2fd9 100644 --- a/datafusion/functions-window-common/src/lib.rs +++ b/datafusion/functions-window-common/src/lib.rs @@ -18,4 +18,4 @@ //! Common user-defined window functionality for [DataFusion] //! //! [DataFusion]: -pub mod field; +pub mod result; diff --git a/datafusion/functions-window-common/src/field.rs b/datafusion/functions-window-common/src/result.rs similarity index 91% rename from datafusion/functions-window-common/src/field.rs rename to datafusion/functions-window-common/src/result.rs index 7448400461a1..d794e4af5448 100644 --- a/datafusion/functions-window-common/src/field.rs +++ b/datafusion/functions-window-common/src/result.rs @@ -20,15 +20,15 @@ use datafusion_common::arrow::datatypes::DataType; /// Contains metadata which may be useful for a user-defined window /// function to specify the field of the final result of evaluating the /// user-defined window function. -pub struct FieldArgs<'a> { +pub struct WindowUDFResultArgs<'a> { /// The data types of input expressions. input_types: &'a [DataType], /// The display name. schema_name: &'a str, } -impl<'a> FieldArgs<'a> { - /// Create an instance of [`FieldArgs`]. +impl<'a> WindowUDFResultArgs<'a> { + /// Create an instance of [`WindowUDFResultArgs`]. /// /// # Arguments /// @@ -39,9 +39,9 @@ impl<'a> FieldArgs<'a> { /// /// # Returns /// - /// A new [`FieldArgs`] instance. + /// A new [`WindowUDFResultArgs`] instance. pub fn new(input_types: &'a [DataType], schema_name: &'a str) -> Self { - FieldArgs { + WindowUDFResultArgs { input_types, schema_name, } diff --git a/datafusion/functions-window/src/row_number.rs b/datafusion/functions-window/src/row_number.rs index d584db603589..012c8f9c56d8 100644 --- a/datafusion/functions-window/src/row_number.rs +++ b/datafusion/functions-window/src/row_number.rs @@ -29,8 +29,8 @@ use datafusion_common::arrow::datatypes::Field; use datafusion_common::{Result, ScalarValue}; use datafusion_expr::expr::WindowFunction; use datafusion_expr::{Expr, PartitionEvaluator, Signature, Volatility, WindowUDFImpl}; -use datafusion_functions_window_common::field; -use field::FieldArgs; +use datafusion_functions_window_common::result; +use result::WindowUDFResultArgs; /// Create a [`WindowFunction`](Expr::WindowFunction) expression for /// `row_number` user-defined window function. @@ -95,7 +95,7 @@ impl WindowUDFImpl for RowNumber { Ok(Box::::default()) } - fn field(&self, field_args: FieldArgs) -> Result { + fn field(&self, field_args: WindowUDFResultArgs) -> Result { Ok(Field::new(field_args.name(), DataType::UInt64, false)) } diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs index 5c0fb388c856..7f4ea3caa3ed 100644 --- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs +++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs @@ -1798,7 +1798,7 @@ mod tests { interval_arithmetic::Interval, *, }; - use datafusion_functions_window_common::field::FieldArgs; + use datafusion_functions_window_common::result::WindowUDFResultArgs; use std::{ collections::HashMap, ops::{BitAnd, BitOr, BitXor}, @@ -3918,7 +3918,7 @@ mod tests { unimplemented!("not needed for tests") } - fn field(&self, _field_args: FieldArgs) -> Result { + fn field(&self, _field_args: WindowUDFResultArgs) -> Result { unimplemented!("not needed for tests") } } diff --git a/datafusion/physical-plan/src/windows/mod.rs b/datafusion/physical-plan/src/windows/mod.rs index 8f1df6e562fd..912a1a2fdbbc 100644 --- a/datafusion/physical-plan/src/windows/mod.rs +++ b/datafusion/physical-plan/src/windows/mod.rs @@ -51,7 +51,7 @@ mod utils; mod window_agg_exec; pub use bounded_window_agg_exec::BoundedWindowAggExec; -use datafusion_functions_window_common::field::FieldArgs; +use datafusion_functions_window_common::result::WindowUDFResultArgs; use datafusion_physical_expr::expressions::Column; pub use datafusion_physical_expr::window::{ BuiltInWindowExpr, PlainAggregateWindowExpr, WindowExpr, @@ -76,7 +76,7 @@ pub fn schema_add_window_field( .collect::>>()?; let window_expr_return_type = if let WindowFunctionDefinition::WindowUDF(udwf) = window_fn { - let field_args = FieldArgs::new(&data_types, fn_name); + let field_args = WindowUDFResultArgs::new(&data_types, fn_name); udwf.field(field_args) .map(|field| field.data_type().clone())? @@ -369,7 +369,7 @@ impl BuiltInWindowFunctionExpr for WindowUDFExpr { fn field(&self) -> Result { self.fun - .field(FieldArgs::new(&self.input_types, &self.name)) + .field(WindowUDFResultArgs::new(&self.input_types, &self.name)) } fn expressions(&self) -> Vec> { diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs index dc66a97d618c..0d972481a3a3 100644 --- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs +++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs @@ -75,7 +75,7 @@ use datafusion_functions_aggregate::expr_fn::{ }; use datafusion_functions_aggregate::kurtosis_pop::kurtosis_pop; use datafusion_functions_aggregate::string_agg::string_agg; -use datafusion_functions_window_common::field::FieldArgs; +use datafusion_functions_window_common::result::WindowUDFResultArgs; use datafusion_proto::bytes::{ logical_plan_from_bytes, logical_plan_from_bytes_with_extension_codec, logical_plan_to_bytes, logical_plan_to_bytes_with_extension_codec, @@ -2421,7 +2421,7 @@ fn roundtrip_window() { make_partition_evaluator() } - fn field(&self, field_args: FieldArgs) -> Result { + fn field(&self, field_args: WindowUDFResultArgs) -> Result { if let Some(return_type) = field_args.get_input_type(0) { Ok(Field::new(field_args.name(), return_type, true)) } else { From 0e8795f975d4ceeac0ac8a7705ef061a837af366 Mon Sep 17 00:00:00 2001 From: Jacob Sherin Date: Thu, 5 Sep 2024 21:09:10 +0530 Subject: [PATCH 27/48] Minor: fixes formatting --- datafusion-examples/examples/simplify_udwf_expression.rs | 2 +- datafusion/expr/src/expr_schema.rs | 3 ++- datafusion/functions-window-common/src/result.rs | 4 ++-- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/datafusion-examples/examples/simplify_udwf_expression.rs b/datafusion-examples/examples/simplify_udwf_expression.rs index 05ebf159a685..55fd946d8664 100644 --- a/datafusion-examples/examples/simplify_udwf_expression.rs +++ b/datafusion-examples/examples/simplify_udwf_expression.rs @@ -22,7 +22,7 @@ use arrow_schema::{DataType, Field}; use datafusion::execution::context::SessionContext; use datafusion::functions_aggregate::average::avg_udaf; use datafusion::{error::Result, execution::options::CsvReadOptions}; -use datafusion_expr::function::{WindowUDFResultArgs, WindowFunctionSimplification}; +use datafusion_expr::function::{WindowFunctionSimplification, WindowUDFResultArgs}; use datafusion_expr::{ expr::WindowFunction, simplify::SimplifyInfo, Expr, PartitionEvaluator, Signature, Volatility, WindowUDF, WindowUDFImpl, diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index 539a1abe8efa..308e9fcbab90 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -206,7 +206,8 @@ impl ExprSchemable for Expr { ) })?; let schema_name = self.schema_name().to_string(); - let field_args = WindowUDFResultArgs::new(&new_types, &schema_name); + let field_args = + WindowUDFResultArgs::new(&new_types, &schema_name); udwf.field(field_args) .map(|field| field.data_type().clone()) diff --git a/datafusion/functions-window-common/src/result.rs b/datafusion/functions-window-common/src/result.rs index d794e4af5448..b9a806896d38 100644 --- a/datafusion/functions-window-common/src/result.rs +++ b/datafusion/functions-window-common/src/result.rs @@ -33,9 +33,9 @@ impl<'a> WindowUDFResultArgs<'a> { /// # Arguments /// /// * `input_types` - The data types derived from the input - /// expressions to the window function. + /// expressions to the window function. /// * `schema_name` - The formatted display name for the window - /// function derived from the input schema. + /// function derived from the input schema. /// /// # Returns /// From 4bd77d5dcc4374ad0f3e0e66dee00c4a6cd23bc9 Mon Sep 17 00:00:00 2001 From: Jacob Sherin Date: Fri, 6 Sep 2024 15:40:27 +0530 Subject: [PATCH 28/48] Copy edits for doc comments --- .../functions-window-common/src/result.rs | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/datafusion/functions-window-common/src/result.rs b/datafusion/functions-window-common/src/result.rs index b9a806896d38..0521d48b75ce 100644 --- a/datafusion/functions-window-common/src/result.rs +++ b/datafusion/functions-window-common/src/result.rs @@ -17,13 +17,13 @@ use datafusion_common::arrow::datatypes::DataType; -/// Contains metadata which may be useful for a user-defined window -/// function to specify the field of the final result of evaluating the -/// user-defined window function. +/// Contains metadata necessary for defining the field which represents +/// the final result of evaluating a user-defined window function. pub struct WindowUDFResultArgs<'a> { - /// The data types of input expressions. + /// The data types of input expressions to the user-defined window + /// function. input_types: &'a [DataType], - /// The display name. + /// The display name of the user-defined window function. schema_name: &'a str, } @@ -47,20 +47,20 @@ impl<'a> WindowUDFResultArgs<'a> { } } - /// Returns the data types of input expressions to the window - /// function. + /// Returns the data type of input expressions passed as arguments + /// the user-defined window function. pub fn input_types(&self) -> &[DataType] { self.input_types } - /// Returns the name of the output field of this window function. + /// Returns the name for the field of the final result of evaluating + /// the user-defined window function. pub fn name(&self) -> &str { self.schema_name } - /// Returns an `Option` containing the data type of the - /// input expression at the specified index. It returns `None` when - /// the index is out of bounds. + /// Returns `Some(DataType)` of input expression at index, otherwise + /// returns `None` if the index is out of bounds. pub fn get_input_type(&self, index: usize) -> Option { self.input_types.get(index).cloned() } From a0f2ccb68af065ed6a0383e9b92b18f0abb019ac Mon Sep 17 00:00:00 2001 From: Jacob Sherin Date: Fri, 6 Sep 2024 15:43:29 +0530 Subject: [PATCH 29/48] Renames field to `function_name` --- datafusion/functions-window-common/src/result.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/datafusion/functions-window-common/src/result.rs b/datafusion/functions-window-common/src/result.rs index 0521d48b75ce..00069fcd7f1c 100644 --- a/datafusion/functions-window-common/src/result.rs +++ b/datafusion/functions-window-common/src/result.rs @@ -24,7 +24,7 @@ pub struct WindowUDFResultArgs<'a> { /// function. input_types: &'a [DataType], /// The display name of the user-defined window function. - schema_name: &'a str, + function_name: &'a str, } impl<'a> WindowUDFResultArgs<'a> { @@ -33,17 +33,17 @@ impl<'a> WindowUDFResultArgs<'a> { /// # Arguments /// /// * `input_types` - The data types derived from the input - /// expressions to the window function. - /// * `schema_name` - The formatted display name for the window - /// function derived from the input schema. + /// expressions to the user-defined window function. + /// * `function_name` - The formatted display name for the + /// user-defined window function. /// /// # Returns /// /// A new [`WindowUDFResultArgs`] instance. - pub fn new(input_types: &'a [DataType], schema_name: &'a str) -> Self { + pub fn new(input_types: &'a [DataType], function_name: &'a str) -> Self { WindowUDFResultArgs { input_types, - schema_name, + function_name, } } @@ -56,7 +56,7 @@ impl<'a> WindowUDFResultArgs<'a> { /// Returns the name for the field of the final result of evaluating /// the user-defined window function. pub fn name(&self) -> &str { - self.schema_name + self.function_name } /// Returns `Some(DataType)` of input expression at index, otherwise From d0d429c022b71e527d16bcf244337d6e3a8ca360 Mon Sep 17 00:00:00 2001 From: Jacob Sherin Date: Fri, 6 Sep 2024 15:44:39 +0530 Subject: [PATCH 30/48] Rename struct to `WindowUDFFieldArgs` --- datafusion-examples/examples/advanced_udwf.rs | 4 ++-- .../examples/simplify_udwf_expression.rs | 4 ++-- .../user_defined_window_functions.rs | 4 ++-- datafusion/expr/src/expr_fn.rs | 4 ++-- datafusion/expr/src/expr_schema.rs | 6 +++--- datafusion/expr/src/function.rs | 2 +- datafusion/expr/src/udwf.rs | 16 ++++++++-------- datafusion/functions-window-common/src/result.rs | 10 +++++----- datafusion/functions-window/src/row_number.rs | 4 ++-- .../src/simplify_expressions/expr_simplifier.rs | 4 ++-- datafusion/physical-plan/src/windows/mod.rs | 6 +++--- .../proto/tests/cases/roundtrip_logical_plan.rs | 4 ++-- 12 files changed, 34 insertions(+), 34 deletions(-) diff --git a/datafusion-examples/examples/advanced_udwf.rs b/datafusion-examples/examples/advanced_udwf.rs index 946c55313897..90341f578a7b 100644 --- a/datafusion-examples/examples/advanced_udwf.rs +++ b/datafusion-examples/examples/advanced_udwf.rs @@ -26,7 +26,7 @@ use arrow_schema::Field; use datafusion::error::Result; use datafusion::prelude::*; use datafusion_common::ScalarValue; -use datafusion_expr::function::WindowUDFResultArgs; +use datafusion_expr::function::WindowUDFFieldArgs; use datafusion_expr::{ PartitionEvaluator, Signature, WindowFrame, WindowUDF, WindowUDFImpl, }; @@ -83,7 +83,7 @@ impl WindowUDFImpl for SmoothItUdf { Ok(Box::new(MyPartitionEvaluator::new())) } - fn field(&self, field_args: WindowUDFResultArgs) -> Result { + fn field(&self, field_args: WindowUDFFieldArgs) -> Result { Ok(Field::new(field_args.name(), DataType::Float64, true)) } } diff --git a/datafusion-examples/examples/simplify_udwf_expression.rs b/datafusion-examples/examples/simplify_udwf_expression.rs index 55fd946d8664..08a9cc90615f 100644 --- a/datafusion-examples/examples/simplify_udwf_expression.rs +++ b/datafusion-examples/examples/simplify_udwf_expression.rs @@ -22,7 +22,7 @@ use arrow_schema::{DataType, Field}; use datafusion::execution::context::SessionContext; use datafusion::functions_aggregate::average::avg_udaf; use datafusion::{error::Result, execution::options::CsvReadOptions}; -use datafusion_expr::function::{WindowFunctionSimplification, WindowUDFResultArgs}; +use datafusion_expr::function::{WindowFunctionSimplification, WindowUDFFieldArgs}; use datafusion_expr::{ expr::WindowFunction, simplify::SimplifyInfo, Expr, PartitionEvaluator, Signature, Volatility, WindowUDF, WindowUDFImpl, @@ -85,7 +85,7 @@ impl WindowUDFImpl for SimplifySmoothItUdf { Some(Box::new(simplify)) } - fn field(&self, field_args: WindowUDFResultArgs) -> Result { + fn field(&self, field_args: WindowUDFFieldArgs) -> Result { Ok(Field::new(field_args.name(), DataType::Float64, true)) } } diff --git a/datafusion/core/tests/user_defined/user_defined_window_functions.rs b/datafusion/core/tests/user_defined/user_defined_window_functions.rs index 158cc57c60be..51afa6d44fdf 100644 --- a/datafusion/core/tests/user_defined/user_defined_window_functions.rs +++ b/datafusion/core/tests/user_defined/user_defined_window_functions.rs @@ -35,7 +35,7 @@ use datafusion_common::{Result, ScalarValue}; use datafusion_expr::{ PartitionEvaluator, Signature, Volatility, WindowUDF, WindowUDFImpl, }; -use datafusion_functions_window_common::result::WindowUDFResultArgs; +use datafusion_functions_window_common::result::WindowUDFFieldArgs; /// A query with a window function evaluated over the entire partition const UNBOUNDED_WINDOW_QUERY: &str = "SELECT x, y, val, \ @@ -567,7 +567,7 @@ impl OddCounter { &self.aliases } - fn field(&self, field_args: WindowUDFResultArgs) -> Result { + fn field(&self, field_args: WindowUDFFieldArgs) -> Result { Ok(Field::new( field_args.name(), self.return_type.clone(), diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs index ebfd3abc3770..716b9e547051 100644 --- a/datafusion/expr/src/expr_fn.rs +++ b/datafusion/expr/src/expr_fn.rs @@ -38,7 +38,7 @@ use arrow::compute::kernels::cast_utils::{ }; use arrow::datatypes::{DataType, Field}; use datafusion_common::{plan_err, Column, Result, ScalarValue, TableReference}; -use datafusion_functions_window_common::result::WindowUDFResultArgs; +use datafusion_functions_window_common::result::WindowUDFFieldArgs; use sqlparser::ast::NullTreatment; use std::any::Any; use std::fmt::Debug; @@ -667,7 +667,7 @@ impl WindowUDFImpl for SimpleWindowUDF { (self.partition_evaluator_factory)() } - fn field(&self, field_args: WindowUDFResultArgs) -> Result { + fn field(&self, field_args: WindowUDFFieldArgs) -> Result { Ok(Field::new( field_args.name(), self.return_type.clone(), diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index 308e9fcbab90..7b762a3ddd5d 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -31,7 +31,7 @@ use datafusion_common::{ not_impl_err, plan_datafusion_err, plan_err, Column, ExprSchema, Result, TableReference, }; -use datafusion_functions_window_common::result::WindowUDFResultArgs; +use datafusion_functions_window_common::result::WindowUDFFieldArgs; use std::collections::HashMap; use std::sync::Arc; @@ -207,7 +207,7 @@ impl ExprSchemable for Expr { })?; let schema_name = self.schema_name().to_string(); let field_args = - WindowUDFResultArgs::new(&new_types, &schema_name); + WindowUDFFieldArgs::new(&new_types, &schema_name); udwf.field(field_args) .map(|field| field.data_type().clone()) @@ -365,7 +365,7 @@ impl ExprSchemable for Expr { .collect::>>()?; let input_types = data_types_with_window_udf(&data_types, udwf)?; let schema_name = self.schema_name().to_string(); - let field_args = WindowUDFResultArgs::new(&input_types, &schema_name); + let field_args = WindowUDFFieldArgs::new(&input_types, &schema_name); let field = udwf.field(field_args)?; Ok(field.is_nullable()) diff --git a/datafusion/expr/src/function.rs b/datafusion/expr/src/function.rs index 1dee8f64f880..4a08fc67a55d 100644 --- a/datafusion/expr/src/function.rs +++ b/datafusion/expr/src/function.rs @@ -27,7 +27,7 @@ pub use datafusion_functions_aggregate_common::accumulator::{ AccumulatorArgs, AccumulatorFactoryFunction, StateFieldsArgs, }; -pub use datafusion_functions_window_common::result::WindowUDFResultArgs; +pub use datafusion_functions_window_common::result::WindowUDFFieldArgs; #[derive(Debug, Clone, Copy)] pub enum Hint { diff --git a/datafusion/expr/src/udwf.rs b/datafusion/expr/src/udwf.rs index f746368ae54b..2c10e0d8e198 100644 --- a/datafusion/expr/src/udwf.rs +++ b/datafusion/expr/src/udwf.rs @@ -28,7 +28,7 @@ use std::{ use arrow::datatypes::{DataType, Field}; use datafusion_common::{not_impl_err, Result}; -use datafusion_functions_window_common::result::WindowUDFResultArgs; +use datafusion_functions_window_common::result::WindowUDFFieldArgs; use crate::expr::WindowFunction; use crate::{ @@ -183,7 +183,7 @@ impl WindowUDF { /// Returns the field of the final result of evaluating this window function. /// /// See [`WindowUDFImpl::field`] for more details. - pub fn field(&self, field_args: WindowUDFResultArgs) -> Result { + pub fn field(&self, field_args: WindowUDFFieldArgs) -> Result { self.inner.field(field_args) } @@ -227,7 +227,7 @@ where /// # use datafusion_common::{DataFusionError, plan_err, Result}; /// # use datafusion_expr::{col, Signature, Volatility, PartitionEvaluator, WindowFrame, ExprFunctionExt}; /// # use datafusion_expr::{WindowUDFImpl, WindowUDF}; -/// use datafusion_functions_window_common::result::WindowUDFResultArgs; +/// use datafusion_functions_window_common::result::WindowUDFFieldArgs; /// #[derive(Debug, Clone)] /// struct SmoothIt { /// signature: Signature @@ -254,7 +254,7 @@ where /// } /// // The actual implementation would add one to the argument /// fn partition_evaluator(&self) -> Result> { unimplemented!() } -/// fn field(&self, field_args: WindowUDFResultArgs) -> Result { unimplemented!() } +/// fn field(&self, field_args: WindowUDFFieldArgs) -> Result { unimplemented!() } /// } /// /// // Create a new WindowUDF from the implementation @@ -354,9 +354,9 @@ pub trait WindowUDFImpl: Debug + Send + Sync { /// The field of the final result of evaluating this window function. /// - /// The [`WindowUDFResultArgs`] argument allows the window UDF to customize the + /// The [`WindowUDFFieldArgs`] argument allows the window UDF to customize the /// field given the types of the input expressions. - fn field(&self, field_args: WindowUDFResultArgs) -> Result; + fn field(&self, field_args: WindowUDFFieldArgs) -> Result; /// Allows the window UDF to define a custom result ordering. /// @@ -456,7 +456,7 @@ impl WindowUDFImpl for AliasedWindowUDFImpl { hasher.finish() } - fn field(&self, field_args: WindowUDFResultArgs) -> Result { + fn field(&self, field_args: WindowUDFFieldArgs) -> Result { self.inner.field(field_args) } @@ -517,7 +517,7 @@ impl WindowUDFImpl for WindowUDFLegacyWrapper { (self.partition_evaluator_factory)() } - fn field(&self, field_args: WindowUDFResultArgs) -> Result { + fn field(&self, field_args: WindowUDFFieldArgs) -> Result { let return_type = (self.return_type)(field_args.input_types())?; Ok(Field::new( diff --git a/datafusion/functions-window-common/src/result.rs b/datafusion/functions-window-common/src/result.rs index 00069fcd7f1c..ce364e4c87b1 100644 --- a/datafusion/functions-window-common/src/result.rs +++ b/datafusion/functions-window-common/src/result.rs @@ -19,7 +19,7 @@ use datafusion_common::arrow::datatypes::DataType; /// Contains metadata necessary for defining the field which represents /// the final result of evaluating a user-defined window function. -pub struct WindowUDFResultArgs<'a> { +pub struct WindowUDFFieldArgs<'a> { /// The data types of input expressions to the user-defined window /// function. input_types: &'a [DataType], @@ -27,8 +27,8 @@ pub struct WindowUDFResultArgs<'a> { function_name: &'a str, } -impl<'a> WindowUDFResultArgs<'a> { - /// Create an instance of [`WindowUDFResultArgs`]. +impl<'a> WindowUDFFieldArgs<'a> { + /// Create an instance of [`WindowUDFFieldArgs`]. /// /// # Arguments /// @@ -39,9 +39,9 @@ impl<'a> WindowUDFResultArgs<'a> { /// /// # Returns /// - /// A new [`WindowUDFResultArgs`] instance. + /// A new [`WindowUDFFieldArgs`] instance. pub fn new(input_types: &'a [DataType], function_name: &'a str) -> Self { - WindowUDFResultArgs { + WindowUDFFieldArgs { input_types, function_name, } diff --git a/datafusion/functions-window/src/row_number.rs b/datafusion/functions-window/src/row_number.rs index 012c8f9c56d8..54c785df0c08 100644 --- a/datafusion/functions-window/src/row_number.rs +++ b/datafusion/functions-window/src/row_number.rs @@ -30,7 +30,7 @@ use datafusion_common::{Result, ScalarValue}; use datafusion_expr::expr::WindowFunction; use datafusion_expr::{Expr, PartitionEvaluator, Signature, Volatility, WindowUDFImpl}; use datafusion_functions_window_common::result; -use result::WindowUDFResultArgs; +use result::WindowUDFFieldArgs; /// Create a [`WindowFunction`](Expr::WindowFunction) expression for /// `row_number` user-defined window function. @@ -95,7 +95,7 @@ impl WindowUDFImpl for RowNumber { Ok(Box::::default()) } - fn field(&self, field_args: WindowUDFResultArgs) -> Result { + fn field(&self, field_args: WindowUDFFieldArgs) -> Result { Ok(Field::new(field_args.name(), DataType::UInt64, false)) } diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs index 7f4ea3caa3ed..b52ca28e0401 100644 --- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs +++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs @@ -1798,7 +1798,7 @@ mod tests { interval_arithmetic::Interval, *, }; - use datafusion_functions_window_common::result::WindowUDFResultArgs; + use datafusion_functions_window_common::result::WindowUDFFieldArgs; use std::{ collections::HashMap, ops::{BitAnd, BitOr, BitXor}, @@ -3918,7 +3918,7 @@ mod tests { unimplemented!("not needed for tests") } - fn field(&self, _field_args: WindowUDFResultArgs) -> Result { + fn field(&self, _field_args: WindowUDFFieldArgs) -> Result { unimplemented!("not needed for tests") } } diff --git a/datafusion/physical-plan/src/windows/mod.rs b/datafusion/physical-plan/src/windows/mod.rs index 912a1a2fdbbc..3a81eb36479a 100644 --- a/datafusion/physical-plan/src/windows/mod.rs +++ b/datafusion/physical-plan/src/windows/mod.rs @@ -51,7 +51,7 @@ mod utils; mod window_agg_exec; pub use bounded_window_agg_exec::BoundedWindowAggExec; -use datafusion_functions_window_common::result::WindowUDFResultArgs; +use datafusion_functions_window_common::result::WindowUDFFieldArgs; use datafusion_physical_expr::expressions::Column; pub use datafusion_physical_expr::window::{ BuiltInWindowExpr, PlainAggregateWindowExpr, WindowExpr, @@ -76,7 +76,7 @@ pub fn schema_add_window_field( .collect::>>()?; let window_expr_return_type = if let WindowFunctionDefinition::WindowUDF(udwf) = window_fn { - let field_args = WindowUDFResultArgs::new(&data_types, fn_name); + let field_args = WindowUDFFieldArgs::new(&data_types, fn_name); udwf.field(field_args) .map(|field| field.data_type().clone())? @@ -369,7 +369,7 @@ impl BuiltInWindowFunctionExpr for WindowUDFExpr { fn field(&self) -> Result { self.fun - .field(WindowUDFResultArgs::new(&self.input_types, &self.name)) + .field(WindowUDFFieldArgs::new(&self.input_types, &self.name)) } fn expressions(&self) -> Vec> { diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs index 0d972481a3a3..63d8f1952f0c 100644 --- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs +++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs @@ -75,7 +75,7 @@ use datafusion_functions_aggregate::expr_fn::{ }; use datafusion_functions_aggregate::kurtosis_pop::kurtosis_pop; use datafusion_functions_aggregate::string_agg::string_agg; -use datafusion_functions_window_common::result::WindowUDFResultArgs; +use datafusion_functions_window_common::result::WindowUDFFieldArgs; use datafusion_proto::bytes::{ logical_plan_from_bytes, logical_plan_from_bytes_with_extension_codec, logical_plan_to_bytes, logical_plan_to_bytes_with_extension_codec, @@ -2421,7 +2421,7 @@ fn roundtrip_window() { make_partition_evaluator() } - fn field(&self, field_args: WindowUDFResultArgs) -> Result { + fn field(&self, field_args: WindowUDFFieldArgs) -> Result { if let Some(return_type) = field_args.get_input_type(0) { Ok(Field::new(field_args.name(), return_type, true)) } else { From 7bceb9e684815439b6b89de7f5759ee9cfd745e7 Mon Sep 17 00:00:00 2001 From: Jacob Sherin Date: Fri, 6 Sep 2024 15:59:59 +0530 Subject: [PATCH 31/48] Add comments for unreachable code --- datafusion/expr/src/expr.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index 0cf21c4cb3e2..a9e72b4380bd 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -714,7 +714,12 @@ impl WindowFunctionDefinition { WindowFunctionDefinition::AggregateUDF(fun) => { fun.return_type(input_expr_types) } - WindowFunctionDefinition::WindowUDF(_) => unreachable!(), + WindowFunctionDefinition::WindowUDF(_) => { + /// To get the return data type of the result from + /// evaluating the user-defined window function instead + /// use the `WindowUDF::field` trait method. + unreachable!() + } } } From 9e04fd8f4d6e4f8787d16f738f34aa6dd1c7c371 Mon Sep 17 00:00:00 2001 From: Jacob Sherin Date: Fri, 6 Sep 2024 16:06:58 +0530 Subject: [PATCH 32/48] Copy edit for `WindowUDFImpl::field` trait method --- datafusion/expr/src/udwf.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/datafusion/expr/src/udwf.rs b/datafusion/expr/src/udwf.rs index 2c10e0d8e198..30a052a272ce 100644 --- a/datafusion/expr/src/udwf.rs +++ b/datafusion/expr/src/udwf.rs @@ -352,10 +352,7 @@ pub trait WindowUDFImpl: Debug + Send + Sync { hasher.finish() } - /// The field of the final result of evaluating this window function. - /// - /// The [`WindowUDFFieldArgs`] argument allows the window UDF to customize the - /// field given the types of the input expressions. + /// The [`Field`] of the final result of evaluating this window function. fn field(&self, field_args: WindowUDFFieldArgs) -> Result; /// Allows the window UDF to define a custom result ordering. From 1e8c300610af6d47b338ea1ccfd6c26adcdf8573 Mon Sep 17 00:00:00 2001 From: Jacob Sherin Date: Fri, 6 Sep 2024 16:08:37 +0530 Subject: [PATCH 33/48] Renames module --- .../core/tests/user_defined/user_defined_window_functions.rs | 2 +- datafusion/expr/src/expr_fn.rs | 2 +- datafusion/expr/src/expr_schema.rs | 2 +- datafusion/expr/src/function.rs | 2 +- datafusion/expr/src/udwf.rs | 4 ++-- .../functions-window-common/src/{result.rs => field.rs} | 0 datafusion/functions-window-common/src/lib.rs | 2 +- datafusion/functions-window/src/row_number.rs | 4 ++-- .../optimizer/src/simplify_expressions/expr_simplifier.rs | 2 +- datafusion/physical-plan/src/windows/mod.rs | 2 +- datafusion/proto/tests/cases/roundtrip_logical_plan.rs | 2 +- 11 files changed, 12 insertions(+), 12 deletions(-) rename datafusion/functions-window-common/src/{result.rs => field.rs} (100%) diff --git a/datafusion/core/tests/user_defined/user_defined_window_functions.rs b/datafusion/core/tests/user_defined/user_defined_window_functions.rs index 51afa6d44fdf..b563febbd40f 100644 --- a/datafusion/core/tests/user_defined/user_defined_window_functions.rs +++ b/datafusion/core/tests/user_defined/user_defined_window_functions.rs @@ -35,7 +35,7 @@ use datafusion_common::{Result, ScalarValue}; use datafusion_expr::{ PartitionEvaluator, Signature, Volatility, WindowUDF, WindowUDFImpl, }; -use datafusion_functions_window_common::result::WindowUDFFieldArgs; +use datafusion_functions_window_common::field::WindowUDFFieldArgs; /// A query with a window function evaluated over the entire partition const UNBOUNDED_WINDOW_QUERY: &str = "SELECT x, y, val, \ diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs index 716b9e547051..21fc51883082 100644 --- a/datafusion/expr/src/expr_fn.rs +++ b/datafusion/expr/src/expr_fn.rs @@ -38,7 +38,7 @@ use arrow::compute::kernels::cast_utils::{ }; use arrow::datatypes::{DataType, Field}; use datafusion_common::{plan_err, Column, Result, ScalarValue, TableReference}; -use datafusion_functions_window_common::result::WindowUDFFieldArgs; +use datafusion_functions_window_common::field::WindowUDFFieldArgs; use sqlparser::ast::NullTreatment; use std::any::Any; use std::fmt::Debug; diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index 7b762a3ddd5d..22071533c60c 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -31,7 +31,7 @@ use datafusion_common::{ not_impl_err, plan_datafusion_err, plan_err, Column, ExprSchema, Result, TableReference, }; -use datafusion_functions_window_common::result::WindowUDFFieldArgs; +use datafusion_functions_window_common::field::WindowUDFFieldArgs; use std::collections::HashMap; use std::sync::Arc; diff --git a/datafusion/expr/src/function.rs b/datafusion/expr/src/function.rs index 4a08fc67a55d..9814d16ddfa3 100644 --- a/datafusion/expr/src/function.rs +++ b/datafusion/expr/src/function.rs @@ -27,7 +27,7 @@ pub use datafusion_functions_aggregate_common::accumulator::{ AccumulatorArgs, AccumulatorFactoryFunction, StateFieldsArgs, }; -pub use datafusion_functions_window_common::result::WindowUDFFieldArgs; +pub use datafusion_functions_window_common::field::WindowUDFFieldArgs; #[derive(Debug, Clone, Copy)] pub enum Hint { diff --git a/datafusion/expr/src/udwf.rs b/datafusion/expr/src/udwf.rs index 30a052a272ce..9511f446775b 100644 --- a/datafusion/expr/src/udwf.rs +++ b/datafusion/expr/src/udwf.rs @@ -28,7 +28,7 @@ use std::{ use arrow::datatypes::{DataType, Field}; use datafusion_common::{not_impl_err, Result}; -use datafusion_functions_window_common::result::WindowUDFFieldArgs; +use datafusion_functions_window_common::field::WindowUDFFieldArgs; use crate::expr::WindowFunction; use crate::{ @@ -227,7 +227,7 @@ where /// # use datafusion_common::{DataFusionError, plan_err, Result}; /// # use datafusion_expr::{col, Signature, Volatility, PartitionEvaluator, WindowFrame, ExprFunctionExt}; /// # use datafusion_expr::{WindowUDFImpl, WindowUDF}; -/// use datafusion_functions_window_common::result::WindowUDFFieldArgs; +/// use datafusion_functions_window_common::field::WindowUDFFieldArgs; /// #[derive(Debug, Clone)] /// struct SmoothIt { /// signature: Signature diff --git a/datafusion/functions-window-common/src/result.rs b/datafusion/functions-window-common/src/field.rs similarity index 100% rename from datafusion/functions-window-common/src/result.rs rename to datafusion/functions-window-common/src/field.rs diff --git a/datafusion/functions-window-common/src/lib.rs b/datafusion/functions-window-common/src/lib.rs index 5a8926cb2fd9..2e4bcbbc83b9 100644 --- a/datafusion/functions-window-common/src/lib.rs +++ b/datafusion/functions-window-common/src/lib.rs @@ -18,4 +18,4 @@ //! Common user-defined window functionality for [DataFusion] //! //! [DataFusion]: -pub mod result; +pub mod field; diff --git a/datafusion/functions-window/src/row_number.rs b/datafusion/functions-window/src/row_number.rs index 54c785df0c08..8c375867ad78 100644 --- a/datafusion/functions-window/src/row_number.rs +++ b/datafusion/functions-window/src/row_number.rs @@ -29,8 +29,8 @@ use datafusion_common::arrow::datatypes::Field; use datafusion_common::{Result, ScalarValue}; use datafusion_expr::expr::WindowFunction; use datafusion_expr::{Expr, PartitionEvaluator, Signature, Volatility, WindowUDFImpl}; -use datafusion_functions_window_common::result; -use result::WindowUDFFieldArgs; +use datafusion_functions_window_common::field; +use field::WindowUDFFieldArgs; /// Create a [`WindowFunction`](Expr::WindowFunction) expression for /// `row_number` user-defined window function. diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs index b52ca28e0401..d24c9662fd64 100644 --- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs +++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs @@ -1798,7 +1798,7 @@ mod tests { interval_arithmetic::Interval, *, }; - use datafusion_functions_window_common::result::WindowUDFFieldArgs; + use datafusion_functions_window_common::field::WindowUDFFieldArgs; use std::{ collections::HashMap, ops::{BitAnd, BitOr, BitXor}, diff --git a/datafusion/physical-plan/src/windows/mod.rs b/datafusion/physical-plan/src/windows/mod.rs index 3a81eb36479a..98b26630df37 100644 --- a/datafusion/physical-plan/src/windows/mod.rs +++ b/datafusion/physical-plan/src/windows/mod.rs @@ -51,7 +51,7 @@ mod utils; mod window_agg_exec; pub use bounded_window_agg_exec::BoundedWindowAggExec; -use datafusion_functions_window_common::result::WindowUDFFieldArgs; +use datafusion_functions_window_common::field::WindowUDFFieldArgs; use datafusion_physical_expr::expressions::Column; pub use datafusion_physical_expr::window::{ BuiltInWindowExpr, PlainAggregateWindowExpr, WindowExpr, diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs index 63d8f1952f0c..4ae7b09b1eae 100644 --- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs +++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs @@ -75,7 +75,7 @@ use datafusion_functions_aggregate::expr_fn::{ }; use datafusion_functions_aggregate::kurtosis_pop::kurtosis_pop; use datafusion_functions_aggregate::string_agg::string_agg; -use datafusion_functions_window_common::result::WindowUDFFieldArgs; +use datafusion_functions_window_common::field::WindowUDFFieldArgs; use datafusion_proto::bytes::{ logical_plan_from_bytes, logical_plan_from_bytes_with_extension_codec, logical_plan_to_bytes, logical_plan_to_bytes_with_extension_codec, From efbad36743653762389ca5193b8555ddd6e201a8 Mon Sep 17 00:00:00 2001 From: Jacob Sherin Date: Fri, 6 Sep 2024 16:09:43 +0530 Subject: [PATCH 34/48] Fix warning: unused doc comment --- datafusion/expr/src/expr.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index a9e72b4380bd..cecf44bfa7cf 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -715,9 +715,9 @@ impl WindowFunctionDefinition { fun.return_type(input_expr_types) } WindowFunctionDefinition::WindowUDF(_) => { - /// To get the return data type of the result from - /// evaluating the user-defined window function instead - /// use the `WindowUDF::field` trait method. + // To get the return data type of the result from + // evaluating the user-defined window function instead + // use the `WindowUDF::field` trait method. unreachable!() } } From 7c445e155a1a967277720152869f3d75cfd9898d Mon Sep 17 00:00:00 2001 From: Jacob Sherin Date: Fri, 6 Sep 2024 16:39:41 +0530 Subject: [PATCH 35/48] Minor: rename bindings --- datafusion/expr/src/expr_schema.rs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index 22071533c60c..3ad1064f4aeb 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -205,9 +205,9 @@ impl ExprSchemable for Expr { ) ) })?; - let schema_name = self.schema_name().to_string(); + let function_name = self.schema_name().to_string(); let field_args = - WindowUDFFieldArgs::new(&new_types, &schema_name); + WindowUDFFieldArgs::new(&new_types, &function_name); udwf.field(field_args) .map(|field| field.data_type().clone()) @@ -364,8 +364,9 @@ impl ExprSchemable for Expr { .map(|e| e.get_type(input_schema)) .collect::>>()?; let input_types = data_types_with_window_udf(&data_types, udwf)?; - let schema_name = self.schema_name().to_string(); - let field_args = WindowUDFFieldArgs::new(&input_types, &schema_name); + let function_name = self.schema_name().to_string(); + let field_args = + WindowUDFFieldArgs::new(&input_types, &function_name); let field = udwf.field(field_args)?; Ok(field.is_nullable()) From 17da2045ac5af20f524950371bf249fe06528533 Mon Sep 17 00:00:00 2001 From: Jacob Sherin Date: Fri, 6 Sep 2024 17:56:17 +0530 Subject: [PATCH 36/48] Minor refactor --- datafusion/expr/src/expr_schema.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index 27a50f840383..989c4478b816 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -367,9 +367,8 @@ impl ExprSchemable for Expr { let function_name = self.schema_name().to_string(); let field_args = WindowUDFFieldArgs::new(&input_types, &function_name); - let field = udwf.field(field_args)?; - Ok(field.is_nullable()) + udwf.field(field_args).map(|field| field.is_nullable()) } }, Expr::ScalarVariable(_, _) From 32bdf3661ee914f5a35954b0b99819d118a3de44 Mon Sep 17 00:00:00 2001 From: Jacob Sherin Date: Sat, 7 Sep 2024 17:42:18 +0530 Subject: [PATCH 37/48] Minor: copy edit --- datafusion/functions-window-common/src/field.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/functions-window-common/src/field.rs b/datafusion/functions-window-common/src/field.rs index ce364e4c87b1..5e5292bacce7 100644 --- a/datafusion/functions-window-common/src/field.rs +++ b/datafusion/functions-window-common/src/field.rs @@ -48,7 +48,7 @@ impl<'a> WindowUDFFieldArgs<'a> { } /// Returns the data type of input expressions passed as arguments - /// the user-defined window function. + /// to the user-defined window function. pub fn input_types(&self) -> &[DataType] { self.input_types } From a47b7a002d423bfbe6b480fc682d1a787a94250d Mon Sep 17 00:00:00 2001 From: Jacob Sherin Date: Sun, 8 Sep 2024 00:07:32 +0530 Subject: [PATCH 38/48] Fixes: use `Expr::qualified_name` for window function name --- datafusion/expr/src/expr_schema.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index 989c4478b816..7e6fcba43a37 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -205,7 +205,7 @@ impl ExprSchemable for Expr { ) ) })?; - let function_name = self.schema_name().to_string(); + let (_, function_name) = self.qualified_name(); let field_args = WindowUDFFieldArgs::new(&new_types, &function_name); From 8da1364a9c420eed77d9f4a654b332d11f5af3cd Mon Sep 17 00:00:00 2001 From: Jacob Sherin Date: Sun, 8 Sep 2024 00:12:27 +0530 Subject: [PATCH 39/48] Fixes: apply previous fix to `Expr::nullable` --- datafusion/expr/src/expr_schema.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index 7e6fcba43a37..464715b9727f 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -364,7 +364,7 @@ impl ExprSchemable for Expr { .map(|e| e.get_type(input_schema)) .collect::>>()?; let input_types = data_types_with_window_udf(&data_types, udwf)?; - let function_name = self.schema_name().to_string(); + let (_, function_name) = self.qualified_name(); let field_args = WindowUDFFieldArgs::new(&input_types, &function_name); From 275daea575f777a2fb253edea97512250f6caea2 Mon Sep 17 00:00:00 2001 From: Jacob Sherin Date: Sat, 14 Sep 2024 18:10:42 +0530 Subject: [PATCH 40/48] Refactor: reuse type coercion for window functions --- datafusion/expr/src/expr_schema.rs | 146 ++++++++++++++--------------- 1 file changed, 73 insertions(+), 73 deletions(-) diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index 464715b9727f..252084444544 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -167,54 +167,9 @@ impl ExprSchemable for Expr { // expressiveness of `TypeSignature`), then infer return type Ok(func.return_type_from_exprs(args, schema, &arg_data_types)?) } - Expr::WindowFunction(WindowFunction { fun, args, .. }) => { - let data_types = args - .iter() - .map(|e| e.get_type(schema)) - .collect::>>()?; - let nullability = args - .iter() - .map(|e| e.nullable(schema)) - .collect::>>()?; - match fun { - WindowFunctionDefinition::AggregateUDF(udf) => { - let new_types = data_types_with_aggregate_udf(&data_types, udf) - .map_err(|err| { - plan_datafusion_err!( - "{} {}", - err, - utils::generate_signature_error_msg( - fun.name(), - fun.signature(), - &data_types - ) - ) - })?; - Ok(fun.return_type(&new_types, &nullability)?) - } - WindowFunctionDefinition::WindowUDF(udwf) => { - let new_types = data_types_with_window_udf(&data_types, udwf) - .map_err(|err| { - plan_datafusion_err!( - "{} {}", - err, - utils::generate_signature_error_msg( - fun.name(), - fun.signature(), - &data_types - ) - ) - })?; - let (_, function_name) = self.qualified_name(); - let field_args = - WindowUDFFieldArgs::new(&new_types, &function_name); - - udwf.field(field_args) - .map(|field| field.data_type().clone()) - } - _ => fun.return_type(&data_types, &nullability), - } - } + Expr::WindowFunction(window_function) => self + .data_type_and_nullable_with_window_function(schema, window_function) + .map(|(return_type, _)| return_type), Expr::AggregateFunction(AggregateFunction { func, args, .. }) => { let data_types = args .iter() @@ -346,31 +301,12 @@ impl ExprSchemable for Expr { Expr::AggregateFunction(AggregateFunction { func, .. }) => { Ok(func.is_nullable()) } - Expr::WindowFunction(WindowFunction { fun, args, .. }) => match fun { - WindowFunctionDefinition::BuiltInWindowFunction(func) => { - if func.name() == "RANK" - || func.name() == "NTILE" - || func.name() == "CUME_DIST" - { - Ok(false) - } else { - Ok(true) - } - } - WindowFunctionDefinition::AggregateUDF(func) => Ok(func.is_nullable()), - WindowFunctionDefinition::WindowUDF(udwf) => { - let data_types = args - .iter() - .map(|e| e.get_type(input_schema)) - .collect::>>()?; - let input_types = data_types_with_window_udf(&data_types, udwf)?; - let (_, function_name) = self.qualified_name(); - let field_args = - WindowUDFFieldArgs::new(&input_types, &function_name); - - udwf.field(field_args).map(|field| field.is_nullable()) - } - }, + Expr::WindowFunction(window_function) => self + .data_type_and_nullable_with_window_function( + input_schema, + window_function, + ) + .map(|(_, nullable)| nullable), Expr::ScalarVariable(_, _) | Expr::TryCast { .. } | Expr::Unnest(_) @@ -467,6 +403,9 @@ impl ExprSchemable for Expr { let right = right.data_type_and_nullable(schema)?; Ok((get_result_type(&left.0, op, &right.0)?, left.1 || right.1)) } + Expr::WindowFunction(window_function) => { + self.data_type_and_nullable_with_window_function(schema, window_function) + } _ => Ok((self.get_type(schema)?, self.nullable(schema)?)), } } @@ -516,6 +455,67 @@ impl ExprSchemable for Expr { } } +impl Expr { + fn data_type_and_nullable_with_window_function( + &self, + schema: &dyn ExprSchema, + window_function: &WindowFunction, + ) -> Result<(DataType, bool)> { + let WindowFunction { fun, args, .. } = window_function; + + let data_types = args + .iter() + .map(|e| e.get_type(schema)) + .collect::>>()?; + match fun { + WindowFunctionDefinition::BuiltInWindowFunction(window_fun) => { + let return_type = window_fun.return_type(&data_types)?; + let nullable = + !["RANK", "NTILE", "CUME_DIST"].contains(&window_fun.name()); + Ok((return_type, nullable)) + } + WindowFunctionDefinition::AggregateUDF(udaf) => { + let new_types = data_types_with_aggregate_udf(&data_types, udaf) + .map_err(|err| { + plan_datafusion_err!( + "{} {}", + err, + utils::generate_signature_error_msg( + fun.name(), + fun.signature(), + &data_types + ) + ) + })?; + + let return_type = udaf.return_type(&new_types)?; + let nullable = udaf.is_nullable(); + + Ok((return_type, nullable)) + } + WindowFunctionDefinition::WindowUDF(udwf) => { + let new_types = + data_types_with_window_udf(&data_types, udwf).map_err(|err| { + plan_datafusion_err!( + "{} {}", + err, + utils::generate_signature_error_msg( + fun.name(), + fun.signature(), + &data_types + ) + ) + })?; + let (_, function_name) = self.qualified_name(); + let field_args = WindowUDFFieldArgs::new(&new_types, &function_name); + + udwf.field(field_args) + .map(|field| (field.data_type().clone(), field.is_nullable())) + } + } + } +} + /// cast subquery in InSubquery/ScalarSubquery to a given type. pub fn cast_subquery(subquery: Subquery, cast_to_type: &DataType) -> Result { if subquery.subquery.schema().field(0).data_type() == cast_to_type { From 045d3524af21c610fc0eb5cc0685a23f00cc6c91 Mon Sep 17 00:00:00 2001 From: Jacob Sherin Date: Mon, 16 Sep 2024 22:15:57 +0530 Subject: [PATCH 41/48] Fixes: clippy errors --- datafusion-cli/Cargo.lock | 2 +- datafusion/expr/src/udwf.rs | 10 ++++++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index dffcf6401634..2d216343a3da 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -1451,7 +1451,7 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" -version = "41.0.0" +version = "42.0.0" dependencies = [ "datafusion-common", ] diff --git a/datafusion/expr/src/udwf.rs b/datafusion/expr/src/udwf.rs index 874f09bd8a41..d1661e7812e1 100644 --- a/datafusion/expr/src/udwf.rs +++ b/datafusion/expr/src/udwf.rs @@ -18,7 +18,6 @@ //! [`WindowUDF`]: User Defined Window Functions use arrow::compute::SortOptions; -use arrow::datatypes::DataType; use std::cmp::Ordering; use std::hash::{DefaultHasher, Hash, Hasher}; use std::{ @@ -545,9 +544,10 @@ impl WindowUDFImpl for WindowUDFLegacyWrapper { #[cfg(test)] mod test { use crate::{PartitionEvaluator, WindowUDF, WindowUDFImpl}; - use arrow::datatypes::DataType; + use arrow::datatypes::{DataType, Field}; use datafusion_common::Result; use datafusion_expr_common::signature::{Signature, Volatility}; + use datafusion_functions_window_common::field::WindowUDFFieldArgs; use std::any::Any; use std::cmp::Ordering; @@ -585,6 +585,9 @@ mod test { fn partition_evaluator(&self) -> Result> { unimplemented!() } + fn field(&self, _field_args: WindowUDFFieldArgs) -> Result { + unimplemented!() + } } #[derive(Debug, Clone)] @@ -621,6 +624,9 @@ mod test { fn partition_evaluator(&self) -> Result> { unimplemented!() } + fn field(&self, _field_args: WindowUDFFieldArgs) -> Result { + unimplemented!() + } } #[test] From 5cc7d06a0dd95dace219c666535630c1c7de3c3c Mon Sep 17 00:00:00 2001 From: Jacob Sherin Date: Tue, 17 Sep 2024 12:28:17 +0530 Subject: [PATCH 42/48] Adds name parameter to `WindowFunctionDefinition::return_type` --- datafusion/expr/src/expr.rs | 37 ++++++++++----------- datafusion/physical-plan/src/windows/mod.rs | 9 +---- 2 files changed, 19 insertions(+), 27 deletions(-) diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index 639dfbc7830b..6bf70fb2b580 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -40,6 +40,7 @@ use datafusion_common::tree_node::{ use datafusion_common::{ plan_err, Column, DFSchema, Result, ScalarValue, TableReference, }; +use datafusion_functions_window_common::field::WindowUDFFieldArgs; use sqlparser::ast::{ display_comma_separated, ExceptSelectItem, ExcludeSelectItem, IlikeSelectItem, NullTreatment, RenameSelectItem, ReplaceSelectElement, @@ -706,6 +707,7 @@ impl WindowFunctionDefinition { &self, input_expr_types: &[DataType], _input_expr_nullable: &[bool], + display_name: &str, ) -> Result { match self { WindowFunctionDefinition::BuiltInWindowFunction(fun) => { @@ -714,12 +716,9 @@ impl WindowFunctionDefinition { WindowFunctionDefinition::AggregateUDF(fun) => { fun.return_type(input_expr_types) } - WindowFunctionDefinition::WindowUDF(_) => { - // To get the return data type of the result from - // evaluating the user-defined window function instead - // use the `WindowUDF::field` trait method. - unreachable!() - } + WindowFunctionDefinition::WindowUDF(fun) => fun + .field(WindowUDFFieldArgs::new(input_expr_types, display_name)) + .map(|field| field.data_type().clone()), } } @@ -2558,10 +2557,10 @@ mod test { #[test] fn test_first_value_return_type() -> Result<()> { let fun = find_df_window_func("first_value").unwrap(); - let observed = fun.return_type(&[DataType::Utf8], &[true])?; + let observed = fun.return_type(&[DataType::Utf8], &[true], "")?; assert_eq!(DataType::Utf8, observed); - let observed = fun.return_type(&[DataType::UInt64], &[true])?; + let observed = fun.return_type(&[DataType::UInt64], &[true], "")?; assert_eq!(DataType::UInt64, observed); Ok(()) @@ -2570,10 +2569,10 @@ mod test { #[test] fn test_last_value_return_type() -> Result<()> { let fun = find_df_window_func("last_value").unwrap(); - let observed = fun.return_type(&[DataType::Utf8], &[true])?; + let observed = fun.return_type(&[DataType::Utf8], &[true], "")?; assert_eq!(DataType::Utf8, observed); - let observed = fun.return_type(&[DataType::Float64], &[true])?; + let observed = fun.return_type(&[DataType::Float64], &[true], "")?; assert_eq!(DataType::Float64, observed); Ok(()) @@ -2582,10 +2581,10 @@ mod test { #[test] fn test_lead_return_type() -> Result<()> { let fun = find_df_window_func("lead").unwrap(); - let observed = fun.return_type(&[DataType::Utf8], &[true])?; + let observed = fun.return_type(&[DataType::Utf8], &[true], "")?; assert_eq!(DataType::Utf8, observed); - let observed = fun.return_type(&[DataType::Float64], &[true])?; + let observed = fun.return_type(&[DataType::Float64], &[true], "")?; assert_eq!(DataType::Float64, observed); Ok(()) @@ -2594,10 +2593,10 @@ mod test { #[test] fn test_lag_return_type() -> Result<()> { let fun = find_df_window_func("lag").unwrap(); - let observed = fun.return_type(&[DataType::Utf8], &[true])?; + let observed = fun.return_type(&[DataType::Utf8], &[true], "")?; assert_eq!(DataType::Utf8, observed); - let observed = fun.return_type(&[DataType::Float64], &[true])?; + let observed = fun.return_type(&[DataType::Float64], &[true], "")?; assert_eq!(DataType::Float64, observed); Ok(()) @@ -2607,11 +2606,11 @@ mod test { fn test_nth_value_return_type() -> Result<()> { let fun = find_df_window_func("nth_value").unwrap(); let observed = - fun.return_type(&[DataType::Utf8, DataType::UInt64], &[true, true])?; + fun.return_type(&[DataType::Utf8, DataType::UInt64], &[true, true], "")?; assert_eq!(DataType::Utf8, observed); let observed = - fun.return_type(&[DataType::Float64, DataType::UInt64], &[true, true])?; + fun.return_type(&[DataType::Float64, DataType::UInt64], &[true, true], "")?; assert_eq!(DataType::Float64, observed); Ok(()) @@ -2620,7 +2619,7 @@ mod test { #[test] fn test_percent_rank_return_type() -> Result<()> { let fun = find_df_window_func("percent_rank").unwrap(); - let observed = fun.return_type(&[], &[])?; + let observed = fun.return_type(&[], &[], "")?; assert_eq!(DataType::Float64, observed); Ok(()) @@ -2629,7 +2628,7 @@ mod test { #[test] fn test_cume_dist_return_type() -> Result<()> { let fun = find_df_window_func("cume_dist").unwrap(); - let observed = fun.return_type(&[], &[])?; + let observed = fun.return_type(&[], &[], "")?; assert_eq!(DataType::Float64, observed); Ok(()) @@ -2638,7 +2637,7 @@ mod test { #[test] fn test_ntile_return_type() -> Result<()> { let fun = find_df_window_func("ntile").unwrap(); - let observed = fun.return_type(&[DataType::Int16], &[true])?; + let observed = fun.return_type(&[DataType::Int16], &[true], "")?; assert_eq!(DataType::UInt64, observed); Ok(()) diff --git a/datafusion/physical-plan/src/windows/mod.rs b/datafusion/physical-plan/src/windows/mod.rs index a6968ea63ba8..981a8e285166 100644 --- a/datafusion/physical-plan/src/windows/mod.rs +++ b/datafusion/physical-plan/src/windows/mod.rs @@ -75,14 +75,7 @@ pub fn schema_add_window_field( .map(|e| Arc::clone(e).as_ref().nullable(schema)) .collect::>>()?; let window_expr_return_type = - if let WindowFunctionDefinition::WindowUDF(udwf) = window_fn { - let field_args = WindowUDFFieldArgs::new(&data_types, fn_name); - - udwf.field(field_args) - .map(|field| field.data_type().clone())? - } else { - window_fn.return_type(&data_types, &nullability)? - }; + window_fn.return_type(&data_types, &nullability, fn_name)?; let mut window_fields = schema .fields() .iter() From 010233416e41c2506494dbca85b88fd442cb8714 Mon Sep 17 00:00:00 2001 From: Jacob Sherin Date: Tue, 17 Sep 2024 15:52:36 +0530 Subject: [PATCH 43/48] Removes `return_type` field from `SimpleWindowUDF` --- .../user_defined/user_defined_window_functions.rs | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/datafusion/core/tests/user_defined/user_defined_window_functions.rs b/datafusion/core/tests/user_defined/user_defined_window_functions.rs index b563febbd40f..2c4e4cda60de 100644 --- a/datafusion/core/tests/user_defined/user_defined_window_functions.rs +++ b/datafusion/core/tests/user_defined/user_defined_window_functions.rs @@ -523,7 +523,6 @@ impl OddCounter { #[derive(Debug, Clone)] struct SimpleWindowUDF { signature: Signature, - return_type: DataType, test_state: Arc, aliases: Vec, } @@ -532,10 +531,8 @@ impl OddCounter { fn new(test_state: Arc) -> Self { let signature = Signature::exact(vec![DataType::Float64], Volatility::Immutable); - let return_type = DataType::Int64; Self { signature, - return_type, test_state, aliases: vec!["odd_counter_alias".to_string()], } @@ -556,7 +553,7 @@ impl OddCounter { } fn return_type(&self, _arg_types: &[DataType]) -> Result { - Ok(self.return_type.clone()) + Ok(DataType::Int64) } fn partition_evaluator(&self) -> Result> { @@ -568,11 +565,7 @@ impl OddCounter { } fn field(&self, field_args: WindowUDFFieldArgs) -> Result { - Ok(Field::new( - field_args.name(), - self.return_type.clone(), - true, - )) + Ok(Field::new(field_args.name(), DataType::Int64, true)) } } From 4bb799d307fd66872427bb55cc163181825c89fe Mon Sep 17 00:00:00 2001 From: Jacob Sherin Date: Tue, 17 Sep 2024 17:06:02 +0530 Subject: [PATCH 44/48] Add doc comment for helper method --- datafusion/expr/src/expr_schema.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index 252084444544..f40ac409dd43 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -456,6 +456,15 @@ impl ExprSchemable for Expr { } impl Expr { + /// Common method for window functions that applies type coercion + /// to all arguments of the window function to check if it matches + /// its signature. + /// + /// If successful, this method returns the data type and + /// nullability of the window function's result. + /// + /// Otherwise, returns an error if there's a type mismatch between + /// the window function's signature and the provided arguments. fn data_type_and_nullable_with_window_function( &self, schema: &dyn ExprSchema, From 25301695ffbbb22f06af9c83dba8bdbdce84fa36 Mon Sep 17 00:00:00 2001 From: Jacob Sherin Date: Tue, 17 Sep 2024 17:34:09 +0530 Subject: [PATCH 45/48] Rewrite doc comments --- .../functions-window-common/src/field.rs | 28 +++++++++---------- datafusion/sqllogictest/bin/sqllogictests.rs | 1 + 2 files changed, 14 insertions(+), 15 deletions(-) diff --git a/datafusion/functions-window-common/src/field.rs b/datafusion/functions-window-common/src/field.rs index 5e5292bacce7..f17453a32696 100644 --- a/datafusion/functions-window-common/src/field.rs +++ b/datafusion/functions-window-common/src/field.rs @@ -17,14 +17,15 @@ use datafusion_common::arrow::datatypes::DataType; -/// Contains metadata necessary for defining the field which represents -/// the final result of evaluating a user-defined window function. +/// Metadata for defining the result field from evaluating a +/// user-defined window function. pub struct WindowUDFFieldArgs<'a> { - /// The data types of input expressions to the user-defined window - /// function. + /// The data types corresponding to the arguments to the + /// user-defined window function. input_types: &'a [DataType], + /// /// The display name of the user-defined window function. - function_name: &'a str, + display_name: &'a str, } impl<'a> WindowUDFFieldArgs<'a> { @@ -32,18 +33,15 @@ impl<'a> WindowUDFFieldArgs<'a> { /// /// # Arguments /// - /// * `input_types` - The data types derived from the input - /// expressions to the user-defined window function. - /// * `function_name` - The formatted display name for the - /// user-defined window function. - /// - /// # Returns + /// * `input_types` - The data types corresponding to the + /// arguments to the user-defined window function. + /// * `function_name` - The qualified schema name of the + /// user-defined window function expression. /// - /// A new [`WindowUDFFieldArgs`] instance. - pub fn new(input_types: &'a [DataType], function_name: &'a str) -> Self { + pub fn new(input_types: &'a [DataType], display_name: &'a str) -> Self { WindowUDFFieldArgs { input_types, - function_name, + display_name, } } @@ -56,7 +54,7 @@ impl<'a> WindowUDFFieldArgs<'a> { /// Returns the name for the field of the final result of evaluating /// the user-defined window function. pub fn name(&self) -> &str { - self.function_name + self.display_name } /// Returns `Some(DataType)` of input expression at index, otherwise diff --git a/datafusion/sqllogictest/bin/sqllogictests.rs b/datafusion/sqllogictest/bin/sqllogictests.rs index baa49057e1b9..f94b302f70a2 100644 --- a/datafusion/sqllogictest/bin/sqllogictests.rs +++ b/datafusion/sqllogictest/bin/sqllogictests.rs @@ -34,6 +34,7 @@ const PG_COMPAT_FILE_PREFIX: &str = "pg_compat_"; pub fn main() -> Result<()> { tokio::runtime::Builder::new_multi_thread() .enable_all() + .thread_stack_size(8 * 1024 * 1024) .build() .unwrap() .block_on(run_tests()) From 25b299e35c6c2a3c9fb197ddda1718291d0724f7 Mon Sep 17 00:00:00 2001 From: Jacob Sherin Date: Tue, 17 Sep 2024 17:37:00 +0530 Subject: [PATCH 46/48] Minor: remove empty comment --- datafusion/functions-window-common/src/field.rs | 1 - datafusion/sqllogictest/bin/sqllogictests.rs | 1 - 2 files changed, 2 deletions(-) diff --git a/datafusion/functions-window-common/src/field.rs b/datafusion/functions-window-common/src/field.rs index f17453a32696..8011b7b0f05f 100644 --- a/datafusion/functions-window-common/src/field.rs +++ b/datafusion/functions-window-common/src/field.rs @@ -23,7 +23,6 @@ pub struct WindowUDFFieldArgs<'a> { /// The data types corresponding to the arguments to the /// user-defined window function. input_types: &'a [DataType], - /// /// The display name of the user-defined window function. display_name: &'a str, } diff --git a/datafusion/sqllogictest/bin/sqllogictests.rs b/datafusion/sqllogictest/bin/sqllogictests.rs index f94b302f70a2..baa49057e1b9 100644 --- a/datafusion/sqllogictest/bin/sqllogictests.rs +++ b/datafusion/sqllogictest/bin/sqllogictests.rs @@ -34,7 +34,6 @@ const PG_COMPAT_FILE_PREFIX: &str = "pg_compat_"; pub fn main() -> Result<()> { tokio::runtime::Builder::new_multi_thread() .enable_all() - .thread_stack_size(8 * 1024 * 1024) .build() .unwrap() .block_on(run_tests()) From ce7df3aa52ea718f899e1c9dd3265f2c3ab01cc2 Mon Sep 17 00:00:00 2001 From: Jacob Sherin Date: Tue, 17 Sep 2024 18:11:11 +0530 Subject: [PATCH 47/48] Remove `WindowUDFImpl::return_type` --- datafusion-examples/examples/advanced_udwf.rs | 5 --- .../examples/simplify_udwf_expression.rs | 4 --- .../user_defined_window_functions.rs | 4 --- datafusion/expr/src/expr_fn.rs | 4 --- datafusion/expr/src/udwf.rs | 33 ------------------- datafusion/functions-window/src/row_number.rs | 4 --- .../simplify_expressions/expr_simplifier.rs | 4 --- .../tests/cases/roundtrip_logical_plan.rs | 11 ------- 8 files changed, 69 deletions(-) diff --git a/datafusion-examples/examples/advanced_udwf.rs b/datafusion-examples/examples/advanced_udwf.rs index 90341f578a7b..fd1b84070cf6 100644 --- a/datafusion-examples/examples/advanced_udwf.rs +++ b/datafusion-examples/examples/advanced_udwf.rs @@ -72,11 +72,6 @@ impl WindowUDFImpl for SmoothItUdf { &self.signature } - /// What is the type of value that will be returned by this function. - fn return_type(&self, _arg_types: &[DataType]) -> Result { - Ok(DataType::Float64) - } - /// Create a `PartitionEvaluator` to evaluate this function on a new /// partition. fn partition_evaluator(&self) -> Result> { diff --git a/datafusion-examples/examples/simplify_udwf_expression.rs b/datafusion-examples/examples/simplify_udwf_expression.rs index 08a9cc90615f..1ff629eef196 100644 --- a/datafusion-examples/examples/simplify_udwf_expression.rs +++ b/datafusion-examples/examples/simplify_udwf_expression.rs @@ -60,10 +60,6 @@ impl WindowUDFImpl for SimplifySmoothItUdf { &self.signature } - fn return_type(&self, _arg_types: &[DataType]) -> Result { - Ok(DataType::Float64) - } - fn partition_evaluator(&self) -> Result> { todo!() } diff --git a/datafusion/core/tests/user_defined/user_defined_window_functions.rs b/datafusion/core/tests/user_defined/user_defined_window_functions.rs index 2c4e4cda60de..d96bb23953ae 100644 --- a/datafusion/core/tests/user_defined/user_defined_window_functions.rs +++ b/datafusion/core/tests/user_defined/user_defined_window_functions.rs @@ -552,10 +552,6 @@ impl OddCounter { &self.signature } - fn return_type(&self, _arg_types: &[DataType]) -> Result { - Ok(DataType::Int64) - } - fn partition_evaluator(&self) -> Result> { Ok(Box::new(OddCounter::new(Arc::clone(&self.test_state)))) } diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs index 21fc51883082..a9e3f369d0fd 100644 --- a/datafusion/expr/src/expr_fn.rs +++ b/datafusion/expr/src/expr_fn.rs @@ -659,10 +659,6 @@ impl WindowUDFImpl for SimpleWindowUDF { &self.signature } - fn return_type(&self, _arg_types: &[DataType]) -> Result { - Ok(self.return_type.clone()) - } - fn partition_evaluator(&self) -> Result> { (self.partition_evaluator_factory)() } diff --git a/datafusion/expr/src/udwf.rs b/datafusion/expr/src/udwf.rs index d1661e7812e1..c5b8f7a4a13e 100644 --- a/datafusion/expr/src/udwf.rs +++ b/datafusion/expr/src/udwf.rs @@ -161,14 +161,6 @@ impl WindowUDF { self.inner.signature() } - /// Return the type of the function given its input types - /// - /// See [`WindowUDFImpl::return_type`] for more details. - #[allow(deprecated)] - pub fn return_type(&self, args: &[DataType]) -> Result { - self.inner.return_type(args) - } - /// Do the function rewrite /// /// See [`WindowUDFImpl::simplify`] for more details. @@ -284,14 +276,6 @@ pub trait WindowUDFImpl: Debug + Send + Sync { /// types are accepted and the function's Volatility. fn signature(&self) -> &Signature; - /// What [`DataType`] will be returned by this function, given the types of - /// the arguments - #[deprecated( - since = "41.0.0", - note = "Use `field` instead to define the final result of evaluating this user-defined window function." - )] - fn return_type(&self, arg_types: &[DataType]) -> Result; - /// Invoke the function, returning the [`PartitionEvaluator`] instance fn partition_evaluator(&self) -> Result>; @@ -437,11 +421,6 @@ impl WindowUDFImpl for AliasedWindowUDFImpl { self.inner.signature() } - #[allow(deprecated)] - fn return_type(&self, arg_types: &[DataType]) -> Result { - self.inner.return_type(arg_types) - } - fn partition_evaluator(&self) -> Result> { self.inner.partition_evaluator() } @@ -520,12 +499,6 @@ impl WindowUDFImpl for WindowUDFLegacyWrapper { &self.signature } - fn return_type(&self, arg_types: &[DataType]) -> Result { - // Old API returns an Arc of the datatype for some reason - let res = (self.return_type)(arg_types)?; - Ok(res.as_ref().clone()) - } - fn partition_evaluator(&self) -> Result> { (self.partition_evaluator_factory)() } @@ -579,9 +552,6 @@ mod test { fn signature(&self) -> &Signature { &self.signature } - fn return_type(&self, _args: &[DataType]) -> Result { - unimplemented!() - } fn partition_evaluator(&self) -> Result> { unimplemented!() } @@ -618,9 +588,6 @@ mod test { fn signature(&self) -> &Signature { &self.signature } - fn return_type(&self, _args: &[DataType]) -> Result { - unimplemented!() - } fn partition_evaluator(&self) -> Result> { unimplemented!() } diff --git a/datafusion/functions-window/src/row_number.rs b/datafusion/functions-window/src/row_number.rs index 8c375867ad78..7f348bf9d2a0 100644 --- a/datafusion/functions-window/src/row_number.rs +++ b/datafusion/functions-window/src/row_number.rs @@ -87,10 +87,6 @@ impl WindowUDFImpl for RowNumber { &self.signature } - fn return_type(&self, _arg_types: &[DataType]) -> Result { - Ok(DataType::UInt64) - } - fn partition_evaluator(&self) -> Result> { Ok(Box::::default()) } diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs index d24c9662fd64..a78a54a57123 100644 --- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs +++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs @@ -3902,10 +3902,6 @@ mod tests { unimplemented!() } - fn return_type(&self, _arg_types: &[DataType]) -> Result { - unimplemented!("not needed for tests") - } - fn simplify(&self) -> Option { if self.simplify { Some(Box::new(|_, _| Ok(col("result_column")))) diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs index d025f439d3dd..3be5e0c89595 100644 --- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs +++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs @@ -2431,17 +2431,6 @@ fn roundtrip_window() { &self.signature } - fn return_type(&self, arg_types: &[DataType]) -> Result { - if arg_types.len() != 1 { - return plan_err!( - "dummy_udwf expects 1 argument, got {}: {:?}", - arg_types.len(), - arg_types - ); - } - Ok(arg_types[0].clone()) - } - fn partition_evaluator(&self) -> Result> { make_partition_evaluator() } From bb57f8f49fde7a523b864e75ec6de537cc2fb8da Mon Sep 17 00:00:00 2001 From: Jacob Sherin Date: Tue, 17 Sep 2024 18:37:23 +0530 Subject: [PATCH 48/48] Fixes doc test --- datafusion/expr/src/udwf.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/datafusion/expr/src/udwf.rs b/datafusion/expr/src/udwf.rs index c5b8f7a4a13e..9078366e7c91 100644 --- a/datafusion/expr/src/udwf.rs +++ b/datafusion/expr/src/udwf.rs @@ -239,15 +239,15 @@ where /// fn as_any(&self) -> &dyn Any { self } /// fn name(&self) -> &str { "smooth_it" } /// fn signature(&self) -> &Signature { &self.signature } -/// fn return_type(&self, args: &[DataType]) -> Result { -/// if !matches!(args.get(0), Some(&DataType::Int32)) { -/// return plan_err!("smooth_it only accepts Int32 arguments"); -/// } -/// Ok(DataType::Int32) -/// } /// // The actual implementation would add one to the argument /// fn partition_evaluator(&self) -> Result> { unimplemented!() } -/// fn field(&self, field_args: WindowUDFFieldArgs) -> Result { unimplemented!() } +/// fn field(&self, field_args: WindowUDFFieldArgs) -> Result { +/// if let Some(DataType::Int32) = field_args.get_input_type(0) { +/// Ok(Field::new(field_args.name(), DataType::Int32, false)) +/// } else { +/// plan_err!("smooth_it only accepts Int32 arguments") +/// } +/// } /// } /// /// // Create a new WindowUDF from the implementation