From 7659a00a70a3b3e9bb95847669a632301113f34e Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Thu, 23 Feb 2023 12:56:34 -0500 Subject: [PATCH 1/3] Add bindings for case, cast, and trycast --- Cargo.lock | 39 ++++++++-------- datafusion/__init__.py | 6 +++ datafusion/tests/test_imports.py | 6 +++ src/expr.rs | 5 ++ src/expr/case.rs | 62 +++++++++++++++++++++++++ src/expr/cast.rs | 79 ++++++++++++++++++++++++++++++++ 6 files changed, 177 insertions(+), 20 deletions(-) create mode 100644 src/expr/case.rs create mode 100644 src/expr/cast.rs diff --git a/Cargo.lock b/Cargo.lock index 04a2ea8d..9e337e20 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -844,9 +844,9 @@ dependencies = [ "datafusion", "itertools", "object_store", - "prost 0.11.6", + "prost 0.11.7", "prost-build 0.9.0", - "prost-types 0.11.6", + "prost-types 0.11.7", "substrait", "tokio", ] @@ -1813,12 +1813,12 @@ dependencies = [ [[package]] name = "prost" -version = "0.11.6" +version = "0.11.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21dc42e00223fc37204bd4aa177e69420c604ca4a183209a8f9de30c6d934698" +checksum = "3933d3ac2717077b3d5f42b40f59edfb1fb6a8c14e1c7de0f38075c4bac8e314" dependencies = [ "bytes", - "prost-derive 0.11.6", + "prost-derive 0.11.7", ] [[package]] @@ -1843,9 +1843,9 @@ dependencies = [ [[package]] name = "prost-build" -version = "0.11.6" +version = "0.11.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3f8ad728fb08fe212df3c05169e940fbb6d9d16a877ddde14644a983ba2012e" +checksum = "a24be1d23b4552a012093e1b93697b73d644ae9590e3253d878d0e77d411b614" dependencies = [ "bytes", "heck 0.4.1", @@ -1854,8 +1854,8 @@ dependencies = [ "log", "multimap", "petgraph", - "prost 0.11.6", - "prost-types 0.11.6", + "prost 0.11.7", + "prost-types 0.11.7", "regex", "tempfile", "which", @@ -1876,9 +1876,9 @@ dependencies = [ [[package]] name = "prost-derive" -version = "0.11.6" +version = "0.11.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8bda8c0881ea9f722eb9629376db3d0b903b462477c1aafcb0566610ac28ac5d" +checksum = "8e9935362e8369bc3acd874caeeae814295c504c2bdbcde5c024089cf8b4dc12" dependencies = [ "anyhow", "itertools", @@ -1899,12 +1899,11 @@ dependencies = [ [[package]] name = "prost-types" -version = "0.11.6" +version = "0.11.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5e0526209433e96d83d750dd81a99118edbc55739e7e61a46764fd2ad537788" +checksum = "7de56acd5cc9642cac2a9518d4c8c53818905398fe42d33235859e0d542a7695" dependencies = [ - "bytes", - "prost 0.11.6", + "prost 0.11.7", ] [[package]] @@ -2469,9 +2468,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2feb96a6a106e21161551af32dc4e0fdab3aceb926b940d7e92a086b640fc7c" dependencies = [ "heck 0.4.1", - "prost 0.11.6", - "prost-build 0.11.6", - "prost-types 0.11.6", + "prost 0.11.7", + "prost-build 0.11.7", + "prost-types 0.11.7", "schemars", "serde", "serde_json", @@ -2488,9 +2487,9 @@ checksum = "6bdef32e8150c2a081110b42772ffe7d7c9032b606bc226c8260fd97e0976601" [[package]] name = "syn" -version = "1.0.107" +version = "1.0.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f4064b5b16e03ae50984a5a8ed5d4f8803e6bc1fd170a3cda91a1be4b18e3f5" +checksum = "d56e159d99e6c2b93995d171050271edb50ecc5288fbc7cc17de8fdce4e58c14" dependencies = [ "proc-macro2", "quote", diff --git a/datafusion/__init__.py b/datafusion/__init__.py index c695bfbd..a707ff87 100644 --- a/datafusion/__init__.py +++ b/datafusion/__init__.py @@ -62,6 +62,9 @@ IsNotFalse, IsNotUnknown, Negative, + Case, + Cast, + TryCast, ) __version__ = importlib_metadata.version(__name__) @@ -99,6 +102,9 @@ "IsNotFalse", "IsNotUnknown", "Negative", + "Case", + "Cast", + "TryCast", ] diff --git a/datafusion/tests/test_imports.py b/datafusion/tests/test_imports.py index b154f562..f5beaf1a 100644 --- a/datafusion/tests/test_imports.py +++ b/datafusion/tests/test_imports.py @@ -58,6 +58,9 @@ IsNotFalse, IsNotUnknown, Negative, + Case, + Cast, + TryCast, ) @@ -105,6 +108,9 @@ def test_class_module_is_datafusion(): IsNotFalse, IsNotUnknown, Negative, + Case, + Cast, + TryCast, ]: assert klass.__module__ == "datafusion.expr" diff --git a/src/expr.rs b/src/expr.rs index f411068d..40272147 100644 --- a/src/expr.rs +++ b/src/expr.rs @@ -54,6 +54,8 @@ pub mod projection; pub mod scalar_variable; pub mod sort; pub mod table_scan; +pub mod case; +pub mod cast; /// A PyExpr that can be used on a DataFrame #[pyclass(name = "Expr", module = "datafusion.expr", subclass)] @@ -230,6 +232,9 @@ pub(crate) fn init_module(m: &PyModule) -> PyResult<()> { m.add_class::()?; m.add_class::()?; m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; // operators m.add_class::()?; m.add_class::()?; diff --git a/src/expr/case.rs b/src/expr/case.rs new file mode 100644 index 00000000..a24fe860 --- /dev/null +++ b/src/expr/case.rs @@ -0,0 +1,62 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::expr::PyExpr; +use datafusion_expr::Case; +use pyo3::prelude::*; + +#[pyclass(name = "Case", module = "datafusion.expr", subclass)] +#[derive(Clone)] +pub struct PyCase { + case: Case, +} + +impl From for Case { + fn from(case: PyCase) -> Self { + case.case + } +} + +impl From for PyCase { + fn from(case: Case) -> PyCase { + PyCase { case } + } +} + +#[pymethods] +impl PyCase { + fn expr(&self) -> Option { + match &self.case.expr { + Some(e) => Some((**e).clone().into()), + None => None, + } + } + + fn when_then_expr(&self) -> Vec<(PyExpr, PyExpr)> { + self.case.when_then_expr + .iter() + .map(|e| ((*e.0).clone().into(), (*e.1).clone().into())) + .collect() + } + + fn else_expr(&self) -> Option { + match &self.case.else_expr { + Some(e) => Some((**e).clone().into()), + None => None, + } + } +} diff --git a/src/expr/cast.rs b/src/expr/cast.rs new file mode 100644 index 00000000..aafef38c --- /dev/null +++ b/src/expr/cast.rs @@ -0,0 +1,79 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::{expr::PyExpr, common::data_type::PyDataType}; +use datafusion_expr::{Cast, TryCast}; +use pyo3::prelude::*; + +#[pyclass(name = "Cast", module = "datafusion.expr", subclass)] +#[derive(Clone)] +pub struct PyCast { + cast: Cast, +} + +impl From for Cast { + fn from(cast: PyCast) -> Self { + cast.cast + } +} + +impl From for PyCast { + fn from(cast: Cast) -> PyCast { + PyCast { cast } + } +} + +#[pymethods] +impl PyCast { + fn expr(&self) -> PyResult { + Ok((*self.cast.expr).clone().into()) + } + + fn data_type(&self) -> PyResult { + Ok(self.cast.data_type.clone().into()) + } +} + + +#[pyclass(name = "TryCast", module = "datafusion.expr", subclass)] +#[derive(Clone)] +pub struct PyTryCast { + try_cast: TryCast, +} + +impl From for TryCast { + fn from(try_cast: PyTryCast) -> Self { + try_cast.try_cast + } +} + +impl From for PyTryCast { + fn from(try_cast: TryCast) -> PyTryCast { + PyTryCast { try_cast } + } +} + +#[pymethods] +impl PyTryCast { + fn expr(&self) -> PyResult { + Ok((*self.try_cast.expr).clone().into()) + } + + fn data_type(&self) -> PyResult { + Ok(self.try_cast.data_type.clone().into()) + } +} From 87fc7e4132918d4ef6eb5ea87773c314d412569b Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Thu, 23 Feb 2023 15:26:24 -0500 Subject: [PATCH 2/3] cargo clippy --- src/expr/case.rs | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/src/expr/case.rs b/src/expr/case.rs index 24c7f0b9..60527537 100644 --- a/src/expr/case.rs +++ b/src/expr/case.rs @@ -40,10 +40,7 @@ impl From for PyCase { #[pymethods] impl PyCase { fn expr(&self) -> Option { - match &self.case.expr { - Some(e) => Some((**e).clone().into()), - None => None, - } + self.case.expr.as_ref().map(|e| (**e).clone().into()) } fn when_then_expr(&self) -> Vec<(PyExpr, PyExpr)> { @@ -55,9 +52,6 @@ impl PyCase { } fn else_expr(&self) -> Option { - match &self.case.else_expr { - Some(e) => Some((**e).clone().into()), - None => None, - } + self.case.else_expr.as_ref().map(|e| (**e).clone().into()) } } From 69e8903637f69b1df3d62b0748c8f6b6a0d28f83 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Thu, 23 Feb 2023 15:35:13 -0500 Subject: [PATCH 3/3] Python linters --- datafusion/__init__.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/datafusion/__init__.py b/datafusion/__init__.py index d57ea351..6c299dfa 100644 --- a/datafusion/__init__.py +++ b/datafusion/__init__.py @@ -77,7 +77,7 @@ "SessionConfig", "RuntimeConfig", "Expr", - "AggregateUDF",Subquery + "AggregateUDF", "ScalarUDF", "column", "literal", @@ -103,13 +103,10 @@ "IsNotFalse", "IsNotUnknown", "Negative", -<<<<<<< HEAD "Case", "Cast", "TryCast", -======= "Between", ->>>>>>> upstream/main ]