From 15cde56ce463809ef998e20fb2a5e751d7a469e9 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Fri, 10 Mar 2023 15:47:39 -0500 Subject: [PATCH 01/11] Introduce to_variant trait function to LogicalNode and create Explain LogicalNode bindings --- datafusion/__init__.py | 2 + src/errors.rs | 4 ++ src/expr.rs | 6 +- src/expr/aggregate.rs | 6 +- src/expr/analyze.rs | 6 +- src/expr/cross_join.rs | 6 +- src/expr/empty_relation.rs | 15 ++++- src/expr/explain.rs | 110 +++++++++++++++++++++++++++++++++++++ src/expr/filter.rs | 8 ++- src/expr/join.rs | 20 ++++--- src/expr/limit.rs | 10 +++- src/expr/logical_node.rs | 4 ++ src/expr/projection.rs | 4 ++ src/expr/sort.rs | 4 ++ src/expr/table_scan.rs | 10 +++- src/expr/union.rs | 8 ++- src/sql/logical.rs | 24 ++++---- 17 files changed, 211 insertions(+), 36 deletions(-) create mode 100644 src/expr/explain.rs diff --git a/datafusion/__init__.py b/datafusion/__init__.py index a7878e1bf..4956d9d30 100644 --- a/datafusion/__init__.py +++ b/datafusion/__init__.py @@ -76,6 +76,7 @@ Cast, TryCast, Between, + Explain, ) __version__ = importlib_metadata.version(__name__) @@ -127,6 +128,7 @@ "Cast", "TryCast", "Between", + "Explain", ] diff --git a/src/errors.rs b/src/errors.rs index c9abf06bc..e739fe31f 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -82,3 +82,7 @@ pub fn py_runtime_err(e: impl Debug) -> PyErr { pub fn py_datafusion_err(e: impl Debug) -> PyErr { PyErr::new::(format!("{e:?}")) } + +pub fn py_unsupported_variant_err(e: impl Debug) -> PyErr { + PyErr::new::(format!("{e:?}")) +} diff --git a/src/expr.rs b/src/expr.rs index cf1bc5d71..68e841b38 100644 --- a/src/expr.rs +++ b/src/expr.rs @@ -50,6 +50,7 @@ pub mod column; pub mod cross_join; pub mod empty_relation; pub mod exists; +pub mod explain; pub mod filter; pub mod grouping_set; pub mod in_list; @@ -260,10 +261,7 @@ pub(crate) fn init_module(m: &PyModule) -> PyResult<()> { m.add_class::()?; m.add_class::()?; m.add_class::()?; - // operators - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; + m.add_class::()?; m.add_class::()?; m.add_class::()?; m.add_class::()?; diff --git a/src/expr/aggregate.rs b/src/expr/aggregate.rs index 0449d16b7..6cdb66811 100644 --- a/src/expr/aggregate.rs +++ b/src/expr/aggregate.rs @@ -21,7 +21,7 @@ use pyo3::prelude::*; use std::fmt::{self, Display, Formatter}; use crate::common::df_schema::PyDFSchema; -use crate::expr::logical_node::LogicalNode; +use super::logical_node::LogicalNode; use crate::expr::PyExpr; use crate::sql::logical::PyLogicalPlan; @@ -103,4 +103,8 @@ impl LogicalNode for PyAggregate { fn inputs(&self) -> Vec { vec![PyLogicalPlan::from((*self.aggregate.input).clone())] } + + fn to_variant(&self, py: Python) -> PyResult { + Ok(self.clone().into_py(py)) + } } diff --git a/src/expr/analyze.rs b/src/expr/analyze.rs index 5e195f7d3..09b0e35b8 100644 --- a/src/expr/analyze.rs +++ b/src/expr/analyze.rs @@ -20,7 +20,7 @@ use pyo3::prelude::*; use std::fmt::{self, Display, Formatter}; use crate::common::df_schema::PyDFSchema; -use crate::expr::logical_node::LogicalNode; +use super::logical_node::LogicalNode; use crate::sql::logical::PyLogicalPlan; #[pyclass(name = "Analyze", module = "datafusion.expr", subclass)] @@ -77,4 +77,8 @@ impl LogicalNode for PyAnalyze { fn inputs(&self) -> Vec { vec![PyLogicalPlan::from((*self.analyze.input).clone())] } + + fn to_variant(&self, py: Python) -> PyResult { + Ok(self.clone().into_py(py)) + } } diff --git a/src/expr/cross_join.rs b/src/expr/cross_join.rs index 4f5952c2b..e43d3045e 100644 --- a/src/expr/cross_join.rs +++ b/src/expr/cross_join.rs @@ -20,7 +20,7 @@ use pyo3::prelude::*; use std::fmt::{self, Display, Formatter}; use crate::common::df_schema::PyDFSchema; -use crate::expr::logical_node::LogicalNode; +use super::logical_node::LogicalNode; use crate::sql::logical::PyLogicalPlan; #[pyclass(name = "CrossJoin", module = "datafusion.expr", subclass)] @@ -87,4 +87,8 @@ impl LogicalNode for PyCrossJoin { PyLogicalPlan::from((*self.cross_join.right).clone()), ] } + + fn to_variant(&self, py: Python) -> PyResult { + Ok(self.clone().into_py(py)) + } } diff --git a/src/expr/empty_relation.rs b/src/expr/empty_relation.rs index 8b2621da5..0bc222e59 100644 --- a/src/expr/empty_relation.rs +++ b/src/expr/empty_relation.rs @@ -15,11 +15,13 @@ // specific language governing permissions and limitations // under the License. -use crate::common::df_schema::PyDFSchema; +use crate::{common::df_schema::PyDFSchema, sql::logical::PyLogicalPlan}; use datafusion_expr::EmptyRelation; use pyo3::prelude::*; use std::fmt::{self, Display, Formatter}; +use super::logical_node::LogicalNode; + #[pyclass(name = "EmptyRelation", module = "datafusion.expr", subclass)] #[derive(Clone)] pub struct PyEmptyRelation { @@ -70,3 +72,14 @@ impl PyEmptyRelation { Ok("EmptyRelation".to_string()) } } + +impl LogicalNode for PyEmptyRelation { + fn inputs(&self) -> Vec { + // table scans are leaf nodes and do not have inputs + vec![] + } + + fn to_variant(&self, py: Python) -> PyResult { + Ok(self.clone().into_py(py)) + } +} diff --git a/src/expr/explain.rs b/src/expr/explain.rs new file mode 100644 index 000000000..4f855de11 --- /dev/null +++ b/src/expr/explain.rs @@ -0,0 +1,110 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::fmt::{self, Display, Formatter}; + +use datafusion_expr::{logical_plan::Explain, LogicalPlan}; +use pyo3::prelude::*; + +use crate::{sql::logical::PyLogicalPlan, errors::py_type_err, common::df_schema::PyDFSchema}; + +use super::logical_node::LogicalNode; + +#[pyclass(name = "Explain", module = "datafusion.expr", subclass)] +#[derive(Clone)] +pub struct PyExplain { + explain: Explain, +} + +impl From for Explain { + fn from(explain: PyExplain) -> Self { + explain.explain + } +} + +impl From for PyExplain { + fn from(explain: Explain) -> PyExplain { + PyExplain { explain } + } +} + +impl Display for PyExplain { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!( + f, + "Explain + verbose: {:?} + plan: {:?} + stringified_plans: {:?} + schema: {:?} + logical_optimization_succeeded: {:?}", + &self.explain.verbose, + &self.explain.plan, + &self.explain.stringified_plans, + &self.explain.schema, + &self.explain.logical_optimization_succeeded + ) + } +} + +#[pymethods] +impl PyExplain { + fn explain_string(&self) -> PyResult> { + let mut string_plans: Vec = Vec::new(); + for stringified_plan in &self.explain.stringified_plans { + string_plans.push((*stringified_plan.plan).clone()); + } + Ok(string_plans) + } + + fn verbose(&self) -> bool { + self.explain.verbose + } + + fn plan(&self) -> PyResult { + Ok(PyLogicalPlan::from((*self.explain.plan).clone())) + } + + fn schema(&self) -> PyDFSchema { + (*self.explain.schema).clone().into() + } + + fn logical_optimization_succceeded(&self) -> bool { + self.explain.logical_optimization_succeeded + } +} + +impl TryFrom for PyExplain { + type Error = PyErr; + + fn try_from(logical_plan: LogicalPlan) -> Result { + match logical_plan { + LogicalPlan::Explain(explain) => Ok(PyExplain { explain }), + _ => Err(py_type_err("unexpected plan")), + } + } +} + +impl LogicalNode for PyExplain { + fn inputs(&self) -> Vec { + vec![] + } + + fn to_variant(&self, py: Python) -> PyResult { + Ok(self.clone().into_py(py)) + } +} diff --git a/src/expr/filter.rs b/src/expr/filter.rs index 0994b4e88..2def2f7d6 100644 --- a/src/expr/filter.rs +++ b/src/expr/filter.rs @@ -47,8 +47,8 @@ impl Display for PyFilter { write!( f, "Filter - \nPredicate: {:?} - \nInput: {:?}", + Predicate: {:?} + Input: {:?}", &self.filter.predicate, &self.filter.input ) } @@ -80,4 +80,8 @@ impl LogicalNode for PyFilter { fn inputs(&self) -> Vec { vec![PyLogicalPlan::from((*self.filter.input).clone())] } + + fn to_variant(&self, py: Python) -> PyResult { + Ok(self.clone().into_py(py)) + } } diff --git a/src/expr/join.rs b/src/expr/join.rs index 428fb6703..801662962 100644 --- a/src/expr/join.rs +++ b/src/expr/join.rs @@ -95,14 +95,14 @@ impl Display for PyJoin { write!( f, "Join - \nLeft: {:?} - \nRight: {:?} - \nOn: {:?} - \nFilter: {:?} - \nJoinType: {:?} - \nJoinConstraint: {:?} - \nSchema: {:?} - \nNullEqualsNull: {:?}", + Left: {:?} + Right: {:?} + On: {:?} + Filter: {:?} + JoinType: {:?} + JoinConstraint: {:?} + Schema: {:?} + NullEqualsNull: {:?}", &self.join.left, &self.join.right, &self.join.on, @@ -178,4 +178,8 @@ impl LogicalNode for PyJoin { PyLogicalPlan::from((*self.join.right).clone()), ] } + + fn to_variant(&self, py: Python) -> PyResult { + Ok(self.clone().into_py(py)) + } } diff --git a/src/expr/limit.rs b/src/expr/limit.rs index 2366be637..d7b3f4ca5 100644 --- a/src/expr/limit.rs +++ b/src/expr/limit.rs @@ -46,9 +46,9 @@ impl Display for PyLimit { write!( f, "Limit - \nSkip: {} - \nFetch: {:?} - \nInput: {:?}", + Skip: {} + Fetch: {:?} + Input: {:?}", &self.limit.skip, &self.limit.fetch, &self.limit.input ) } @@ -85,4 +85,8 @@ impl LogicalNode for PyLimit { fn inputs(&self) -> Vec { vec![PyLogicalPlan::from((*self.limit.input).clone())] } + + fn to_variant(&self, py: Python) -> PyResult { + Ok(self.clone().into_py(py)) + } } diff --git a/src/expr/logical_node.rs b/src/expr/logical_node.rs index 7d4fe54de..f935d0495 100644 --- a/src/expr/logical_node.rs +++ b/src/expr/logical_node.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +use pyo3::{PyResult, Python, PyObject}; + use crate::sql::logical::PyLogicalPlan; /// Representation of a `LogicalNode` in the in overall `LogicalPlan` @@ -22,4 +24,6 @@ use crate::sql::logical::PyLogicalPlan; pub trait LogicalNode { /// The input plan to the current logical node instance. fn inputs(&self) -> Vec; + + fn to_variant(&self, py: Python) -> PyResult; } diff --git a/src/expr/projection.rs b/src/expr/projection.rs index 2551803bf..f5ba12db2 100644 --- a/src/expr/projection.rs +++ b/src/expr/projection.rs @@ -96,4 +96,8 @@ impl LogicalNode for PyProjection { fn inputs(&self) -> Vec { vec![PyLogicalPlan::from((*self.projection.input).clone())] } + + fn to_variant(&self, py: Python) -> PyResult { + Ok(self.clone().into_py(py)) + } } diff --git a/src/expr/sort.rs b/src/expr/sort.rs index 5037b6d2d..8843c638d 100644 --- a/src/expr/sort.rs +++ b/src/expr/sort.rs @@ -91,4 +91,8 @@ impl LogicalNode for PySort { fn inputs(&self) -> Vec { vec![PyLogicalPlan::from((*self.sort.input).clone())] } + + fn to_variant(&self, py: Python) -> PyResult { + Ok(self.clone().into_py(py)) + } } diff --git a/src/expr/table_scan.rs b/src/expr/table_scan.rs index bd9e7dbd2..63684fe7f 100644 --- a/src/expr/table_scan.rs +++ b/src/expr/table_scan.rs @@ -52,9 +52,9 @@ impl Display for PyTableScan { write!( f, "TableScan\nTable Name: {} - \nProjections: {:?} - \nProjected Schema: {:?} - \nFilters: {:?}", + Projections: {:?} + Projected Schema: {:?} + Filters: {:?}", &self.table_scan.table_name, &self.py_projections(), &self.py_schema(), @@ -131,4 +131,8 @@ impl LogicalNode for PyTableScan { // table scans are leaf nodes and do not have inputs vec![] } + + fn to_variant(&self, py: Python) -> PyResult { + Ok(self.clone().into_py(py)) + } } diff --git a/src/expr/union.rs b/src/expr/union.rs index 186fbed1d..98e8eaae6 100644 --- a/src/expr/union.rs +++ b/src/expr/union.rs @@ -46,8 +46,8 @@ impl Display for PyUnion { write!( f, "Union - \nInputs: {:?} - \nSchema: {:?}", + Inputs: {:?} + Schema: {:?}", &self.union_.inputs, &self.union_.schema, ) } @@ -82,4 +82,8 @@ impl LogicalNode for PyUnion { .map(|x| x.as_ref().clone().into()) .collect() } + + fn to_variant(&self, py: Python) -> PyResult { + Ok(self.clone().into_py(py)) + } } diff --git a/src/sql/logical.rs b/src/sql/logical.rs index 5a21ca8d9..95ff3a29c 100644 --- a/src/sql/logical.rs +++ b/src/sql/logical.rs @@ -17,10 +17,11 @@ use std::sync::Arc; -use crate::errors::py_runtime_err; +use crate::errors::py_unsupported_variant_err; use crate::expr::aggregate::PyAggregate; use crate::expr::analyze::PyAnalyze; use crate::expr::empty_relation::PyEmptyRelation; +use crate::expr::explain::PyExplain; use crate::expr::filter::PyFilter; use crate::expr::limit::PyLimit; use crate::expr::projection::PyProjection; @@ -29,6 +30,8 @@ use crate::expr::table_scan::PyTableScan; use datafusion_expr::LogicalPlan; use pyo3::prelude::*; +use crate::expr::logical_node::LogicalNode; + #[pyclass(name = "LogicalPlan", module = "datafusion", subclass)] #[derive(Debug, Clone)] pub struct PyLogicalPlan { @@ -53,15 +56,16 @@ impl PyLogicalPlan { /// Return the specific logical operator fn to_variant(&self, py: Python) -> PyResult { Python::with_gil(|_| match self.plan.as_ref() { - LogicalPlan::Aggregate(plan) => Ok(PyAggregate::from(plan.clone()).into_py(py)), - LogicalPlan::Analyze(plan) => Ok(PyAnalyze::from(plan.clone()).into_py(py)), - LogicalPlan::EmptyRelation(plan) => Ok(PyEmptyRelation::from(plan.clone()).into_py(py)), - LogicalPlan::Filter(plan) => Ok(PyFilter::from(plan.clone()).into_py(py)), - LogicalPlan::Limit(plan) => Ok(PyLimit::from(plan.clone()).into_py(py)), - LogicalPlan::Projection(plan) => Ok(PyProjection::from(plan.clone()).into_py(py)), - LogicalPlan::Sort(plan) => Ok(PySort::from(plan.clone()).into_py(py)), - LogicalPlan::TableScan(plan) => Ok(PyTableScan::from(plan.clone()).into_py(py)), - other => Err(py_runtime_err(format!( + LogicalPlan::Aggregate(plan) => PyAggregate::from(plan.clone()).to_variant(py), + LogicalPlan::Analyze(plan) => PyAnalyze::from(plan.clone()).to_variant(py), + LogicalPlan::EmptyRelation(plan) => PyEmptyRelation::from(plan.clone()).to_variant(py), + LogicalPlan::Explain(plan) => PyExplain::from(plan.clone()).to_variant(py), + LogicalPlan::Filter(plan) => PyFilter::from(plan.clone()).to_variant(py), + LogicalPlan::Limit(plan) => PyLimit::from(plan.clone()).to_variant(py), + LogicalPlan::Projection(plan) => PyProjection::from(plan.clone()).to_variant(py), + LogicalPlan::Sort(plan) => PySort::from(plan.clone()).to_variant(py), + LogicalPlan::TableScan(plan) => PyTableScan::from(plan.clone()).to_variant(py), + other => Err(py_unsupported_variant_err(format!( "Cannot convert this plan to a LogicalNode: {:?}", other ))), From e03bc86d8f34a5798b3227ff5c79887503900edc Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Fri, 10 Mar 2023 15:50:40 -0500 Subject: [PATCH 02/11] Cargo fmt --- src/expr/aggregate.rs | 2 +- src/expr/analyze.rs | 2 +- src/expr/cross_join.rs | 2 +- src/expr/explain.rs | 2 +- src/expr/logical_node.rs | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/expr/aggregate.rs b/src/expr/aggregate.rs index 6cdb66811..c3de9673a 100644 --- a/src/expr/aggregate.rs +++ b/src/expr/aggregate.rs @@ -20,8 +20,8 @@ use datafusion_expr::logical_plan::Aggregate; use pyo3::prelude::*; use std::fmt::{self, Display, Formatter}; -use crate::common::df_schema::PyDFSchema; use super::logical_node::LogicalNode; +use crate::common::df_schema::PyDFSchema; use crate::expr::PyExpr; use crate::sql::logical::PyLogicalPlan; diff --git a/src/expr/analyze.rs b/src/expr/analyze.rs index 09b0e35b8..bbec3a808 100644 --- a/src/expr/analyze.rs +++ b/src/expr/analyze.rs @@ -19,8 +19,8 @@ use datafusion_expr::logical_plan::Analyze; use pyo3::prelude::*; use std::fmt::{self, Display, Formatter}; -use crate::common::df_schema::PyDFSchema; use super::logical_node::LogicalNode; +use crate::common::df_schema::PyDFSchema; use crate::sql::logical::PyLogicalPlan; #[pyclass(name = "Analyze", module = "datafusion.expr", subclass)] diff --git a/src/expr/cross_join.rs b/src/expr/cross_join.rs index e43d3045e..68793f249 100644 --- a/src/expr/cross_join.rs +++ b/src/expr/cross_join.rs @@ -19,8 +19,8 @@ use datafusion_expr::logical_plan::CrossJoin; use pyo3::prelude::*; use std::fmt::{self, Display, Formatter}; -use crate::common::df_schema::PyDFSchema; use super::logical_node::LogicalNode; +use crate::common::df_schema::PyDFSchema; use crate::sql::logical::PyLogicalPlan; #[pyclass(name = "CrossJoin", module = "datafusion.expr", subclass)] diff --git a/src/expr/explain.rs b/src/expr/explain.rs index 4f855de11..d5d6a7bbd 100644 --- a/src/expr/explain.rs +++ b/src/expr/explain.rs @@ -20,7 +20,7 @@ use std::fmt::{self, Display, Formatter}; use datafusion_expr::{logical_plan::Explain, LogicalPlan}; use pyo3::prelude::*; -use crate::{sql::logical::PyLogicalPlan, errors::py_type_err, common::df_schema::PyDFSchema}; +use crate::{common::df_schema::PyDFSchema, errors::py_type_err, sql::logical::PyLogicalPlan}; use super::logical_node::LogicalNode; diff --git a/src/expr/logical_node.rs b/src/expr/logical_node.rs index f935d0495..757e4f94b 100644 --- a/src/expr/logical_node.rs +++ b/src/expr/logical_node.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -use pyo3::{PyResult, Python, PyObject}; +use pyo3::{PyObject, PyResult, Python}; use crate::sql::logical::PyLogicalPlan; From 1e35f309c3645f3f3d0ae0c5fdb65d42b8a1eb58 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Fri, 10 Mar 2023 16:12:52 -0500 Subject: [PATCH 03/11] bindings for Extension LogicalNode --- Cargo.lock | 127 +++++++++++++++++++------------ Cargo.toml | 12 +-- datafusion/__init__.py | 2 + datafusion/tests/test_imports.py | 4 + src/expr.rs | 2 + src/expr/extension.rs | 52 +++++++++++++ src/sql/logical.rs | 2 + 7 files changed, 147 insertions(+), 54 deletions(-) create mode 100644 src/expr/extension.rs diff --git a/Cargo.lock b/Cargo.lock index 4d5941b73..ff62f726f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -107,9 +107,9 @@ checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6" [[package]] name = "arrow" -version = "33.0.0" +version = "34.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3724c874f1517cf898cd1c3ad18ab5071edf893c48e73139ab1e16cf0f2affe" +checksum = "f410d3907b6b3647b9e7bca4551274b2e3d716aa940afb67b7287257401da921" dependencies = [ "ahash", "arrow-arith", @@ -131,9 +131,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "33.0.0" +version = "34.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e958823b8383ca14d0a2e973de478dd7674cd9f72837f8c41c132a0fda6a4e5e" +checksum = "f87391cf46473c9bc53dab68cb8872c3a81d4dfd1703f1c8aa397dba9880a043" dependencies = [ "arrow-array", "arrow-buffer", @@ -146,9 +146,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "33.0.0" +version = "34.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db670eab50e76654065b5aed930f4367101fcddcb2223802007d1e0b4d5a2579" +checksum = "d35d5475e65c57cffba06d0022e3006b677515f99b54af33a7cd54f6cdd4a5b5" dependencies = [ "ahash", "arrow-buffer", @@ -162,9 +162,9 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "33.0.0" +version = "34.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f0e01c931882448c0407bd32311a624b9f099739e94e786af68adc97016b5f2" +checksum = "68b4ec72eda7c0207727df96cf200f539749d736b21f3e782ece113e18c1a0a7" dependencies = [ "half", "num", @@ -172,9 +172,9 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "33.0.0" +version = "34.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4bf35d78836c93f80d9362f3ccb47ff5e2c5ecfc270ff42cdf1ef80334961d44" +checksum = "0a7285272c9897321dfdba59de29f5b05aeafd3cdedf104a941256d155f6d304" dependencies = [ "arrow-array", "arrow-buffer", @@ -188,9 +188,9 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "33.0.0" +version = "34.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a6aa7c2531d89d01fed8c469a9b1bf97132a0bdf70b4724fe4bbb4537a50880" +checksum = "981ee4e7f6a120da04e00d0b39182e1eeacccb59c8da74511de753c56b7fddf7" dependencies = [ "arrow-array", "arrow-buffer", @@ -207,9 +207,9 @@ dependencies = [ [[package]] name = "arrow-data" -version = "33.0.0" +version = "34.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea50db4d1e1e4c2da2bfdea7b6d2722eef64267d5ab680d815f7ae42428057f5" +checksum = "27cc673ee6989ea6e4b4e8c7d461f7e06026a096c8f0b1a7288885ff71ae1e56" dependencies = [ "arrow-buffer", "arrow-schema", @@ -219,9 +219,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "33.0.0" +version = "34.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4042fe6585155d1ec28a8e4937ec901a3ca7a19a22b9f6cd3f551b935cd84f5" +checksum = "e37b8b69d9e59116b6b538e8514e0ec63a30f08b617ce800d31cb44e3ef64c1a" dependencies = [ "arrow-array", "arrow-buffer", @@ -233,9 +233,9 @@ dependencies = [ [[package]] name = "arrow-json" -version = "33.0.0" +version = "34.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c907c4ab4f26970a3719dc06e78e8054a01d0c96da3664d23b941e201b33d2b" +checksum = "80c3fa0bed7cfebf6d18e46b733f9cb8a1cb43ce8e6539055ca3e1e48a426266" dependencies = [ "arrow-array", "arrow-buffer", @@ -252,9 +252,9 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "33.0.0" +version = "34.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e131b447242a32129efc7932f58ed8931b42f35d8701c1a08f9f524da13b1d3c" +checksum = "d247dce7bed6a8d6a3c6debfa707a3a2f694383f0c692a39d736a593eae5ef94" dependencies = [ "arrow-array", "arrow-buffer", @@ -266,9 +266,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "33.0.0" +version = "34.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b591ef70d76f4ac28dd7666093295fece0e5f9298f49af51ea49c001e1635bb6" +checksum = "8d609c0181f963cea5c70fddf9a388595b5be441f3aa1d1cdbf728ca834bbd3a" dependencies = [ "ahash", "arrow-array", @@ -281,18 +281,18 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "33.0.0" +version = "34.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb327717d87eb94be5eff3b0cb8987f54059d343ee5235abf7f143c85f54cfc8" +checksum = "64951898473bfb8e22293e83a44f02874d2257514d49cd95f9aa4afcff183fbc" dependencies = [ "bitflags", ] [[package]] name = "arrow-select" -version = "33.0.0" +version = "34.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79d3c389d1cea86793934f31594f914c8547d82e91e3411d4833ad0aac3266a7" +checksum = "2a513d89c2e1ac22b28380900036cf1f3992c6443efc5e079de631dcf83c6888" dependencies = [ "arrow-array", "arrow-buffer", @@ -303,9 +303,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "33.0.0" +version = "34.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30ee67790496dd310ddbf5096870324431e89aa76453e010020ac29b1184d356" +checksum = "5288979b2705dae1114c864d73150629add9153b9b8f1d7ee3963db94c372ba5" dependencies = [ "arrow-array", "arrow-buffer", @@ -330,6 +330,8 @@ dependencies = [ "pin-project-lite", "tokio", "xz2", + "zstd 0.11.2+zstd.1.5.2", + "zstd-safe 5.0.2+zstd.1.5.2", ] [[package]] @@ -665,8 +667,7 @@ dependencies = [ [[package]] name = "datafusion" version = "19.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12d462c103bd1cfd24f8e8a199986d89582af6280528e085c393c4be2ff25da7" +source = "git+https://github.com/jdye64/arrow-datafusion.git?rev=50e9d78#50e9d78da179fcee28d7fbd30e32de08f1a73d3d" dependencies = [ "ahash", "apache-avro", @@ -678,6 +679,7 @@ dependencies = [ "chrono", "dashmap", "datafusion-common", + "datafusion-execution", "datafusion-expr", "datafusion-optimizer", "datafusion-physical-expr", @@ -709,13 +711,13 @@ dependencies = [ "url", "uuid", "xz2", + "zstd 0.12.3+zstd.1.5.2", ] [[package]] name = "datafusion-common" version = "19.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5babdbcf102862b1f1828c1ab41094e39ba881d5ece4cee2d481d528148f592" +source = "git+https://github.com/jdye64/arrow-datafusion.git?rev=50e9d78#50e9d78da179fcee28d7fbd30e32de08f1a73d3d" dependencies = [ "apache-avro", "arrow", @@ -727,11 +729,24 @@ dependencies = [ "sqlparser", ] +[[package]] +name = "datafusion-execution" +version = "19.0.0" +source = "git+https://github.com/jdye64/arrow-datafusion.git?rev=50e9d78#50e9d78da179fcee28d7fbd30e32de08f1a73d3d" +dependencies = [ + "datafusion-common", + "datafusion-expr", + "hashbrown 0.13.2", + "log", + "parking_lot", + "rand", + "tempfile", +] + [[package]] name = "datafusion-expr" version = "19.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90f0c34e87fa541a59d378dc7ee7c9c3dd1fcfa793eab09561b8b4cb35e1827a" +source = "git+https://github.com/jdye64/arrow-datafusion.git?rev=50e9d78#50e9d78da179fcee28d7fbd30e32de08f1a73d3d" dependencies = [ "ahash", "arrow", @@ -743,8 +758,7 @@ dependencies = [ [[package]] name = "datafusion-optimizer" version = "19.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d0c6d912b7b7e4637d85947222455cd948ea193ca454ebf649e7265fd10b048" +source = "git+https://github.com/jdye64/arrow-datafusion.git?rev=50e9d78#50e9d78da179fcee28d7fbd30e32de08f1a73d3d" dependencies = [ "arrow", "async-trait", @@ -753,6 +767,7 @@ dependencies = [ "datafusion-expr", "datafusion-physical-expr", "hashbrown 0.13.2", + "itertools", "log", "regex-syntax", ] @@ -760,8 +775,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" version = "19.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8000e8f8efafb810ff2943323bb48bd722ac5bb919fe302a66b832ed9c25245f" +source = "git+https://github.com/jdye64/arrow-datafusion.git?rev=50e9d78#50e9d78da179fcee28d7fbd30e32de08f1a73d3d" dependencies = [ "ahash", "arrow", @@ -781,6 +795,7 @@ dependencies = [ "md-5", "num-traits", "paste", + "petgraph", "rand", "regex", "sha2", @@ -813,8 +828,7 @@ dependencies = [ [[package]] name = "datafusion-row" version = "19.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e900f05d7e5666e8ab714a96a28cb6f143e62aa1d501ba1199024f8635c726c" +source = "git+https://github.com/jdye64/arrow-datafusion.git?rev=50e9d78#50e9d78da179fcee28d7fbd30e32de08f1a73d3d" dependencies = [ "arrow", "datafusion-common", @@ -825,8 +839,7 @@ dependencies = [ [[package]] name = "datafusion-sql" version = "19.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "096f293799e8ae883e0f79f8ebaa51e4292e690ba45e0269b48ca9bd79f57094" +source = "git+https://github.com/jdye64/arrow-datafusion.git?rev=50e9d78#50e9d78da179fcee28d7fbd30e32de08f1a73d3d" dependencies = [ "arrow-schema", "datafusion-common", @@ -838,8 +851,7 @@ dependencies = [ [[package]] name = "datafusion-substrait" version = "19.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5a1d24d6db620a9d18c69289bcf8ed413feb6e33b4f69902dbc415bce05d28e" +source = "git+https://github.com/jdye64/arrow-datafusion.git?rev=50e9d78#50e9d78da179fcee28d7fbd30e32de08f1a73d3d" dependencies = [ "async-recursion", "chrono", @@ -1738,9 +1750,9 @@ dependencies = [ [[package]] name = "parquet" -version = "33.0.0" +version = "34.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1b076829801167d889795cd1957989055543430fa1469cb1f6e32b789bfc764" +checksum = "7ac135ecf63ebb5f53dda0921b0b76d6048b3ef631a5f4760b9e8f863ff00cfa" dependencies = [ "ahash", "arrow-array", @@ -1766,7 +1778,7 @@ dependencies = [ "thrift", "tokio", "twox-hash", - "zstd", + "zstd 0.12.3+zstd.1.5.2", ] [[package]] @@ -3225,13 +3237,32 @@ dependencies = [ "syn", ] +[[package]] +name = "zstd" +version = "0.11.2+zstd.1.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "20cc960326ece64f010d2d2107537f26dc589a6573a316bd5b1dba685fa5fde4" +dependencies = [ + "zstd-safe 5.0.2+zstd.1.5.2", +] + [[package]] name = "zstd" version = "0.12.3+zstd.1.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "76eea132fb024e0e13fd9c2f5d5d595d8a967aa72382ac2f9d39fcc95afd0806" dependencies = [ - "zstd-safe", + "zstd-safe 6.0.4+zstd.1.5.4", +] + +[[package]] +name = "zstd-safe" +version = "5.0.2+zstd.1.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d2a5585e04f9eea4b2a3d1eca508c4dee9592a89ef6f450c11719da0726f4db" +dependencies = [ + "libc", + "zstd-sys", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index d84b2b6c3..7e1767dc7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -34,12 +34,12 @@ default = ["mimalloc"] tokio = { version = "1.24", features = ["macros", "rt", "rt-multi-thread", "sync"] } rand = "0.8" pyo3 = { version = "0.18.0", features = ["extension-module", "abi3", "abi3-py37"] } -datafusion = { version = "19.0.0", features = ["pyarrow", "avro"] } -datafusion-expr = "19.0.0" -datafusion-optimizer = "19.0.0" -datafusion-common = { version = "19.0.0", features = ["pyarrow"] } -datafusion-sql = "19.0.0" -datafusion-substrait = "19.0.0" +datafusion = { git = "https://github.com/jdye64/arrow-datafusion.git", rev = "50e9d78", features = ["pyarrow", "avro"]} +datafusion-expr = { git = "https://github.com/jdye64/arrow-datafusion.git", rev = "50e9d78" } +datafusion-optimizer = { git = "https://github.com/jdye64/arrow-datafusion.git", rev = "50e9d78" } +datafusion-common = { git = "https://github.com/jdye64/arrow-datafusion.git", rev = "50e9d78", features = ["pyarrow"]} +datafusion-sql = { git = "https://github.com/jdye64/arrow-datafusion.git", rev = "50e9d78" } +datafusion-substrait = { git = "https://github.com/jdye64/arrow-datafusion.git", rev = "50e9d78" } uuid = { version = "1.2", features = ["v4"] } mimalloc = { version = "*", optional = true, default-features = false } async-trait = "0.1" diff --git a/datafusion/__init__.py b/datafusion/__init__.py index 4956d9d30..578e7dc05 100644 --- a/datafusion/__init__.py +++ b/datafusion/__init__.py @@ -77,6 +77,7 @@ TryCast, Between, Explain, + Extension, ) __version__ = importlib_metadata.version(__name__) @@ -129,6 +130,7 @@ "TryCast", "Between", "Explain", + "Extension", ] diff --git a/datafusion/tests/test_imports.py b/datafusion/tests/test_imports.py index 77309e2bc..907544627 100644 --- a/datafusion/tests/test_imports.py +++ b/datafusion/tests/test_imports.py @@ -77,6 +77,8 @@ Cast, TryCast, Between, + Explain, + Extension, ) @@ -143,6 +145,8 @@ def test_class_module_is_datafusion(): Cast, TryCast, Between, + Explain, + Extension, ]: assert klass.__module__ == "datafusion.expr" diff --git a/src/expr.rs b/src/expr.rs index 68e841b38..b52c2a6f2 100644 --- a/src/expr.rs +++ b/src/expr.rs @@ -51,6 +51,7 @@ pub mod cross_join; pub mod empty_relation; pub mod exists; pub mod explain; +pub mod extension; pub mod filter; pub mod grouping_set; pub mod in_list; @@ -272,5 +273,6 @@ pub(crate) fn init_module(m: &PyModule) -> PyResult<()> { m.add_class::()?; m.add_class::()?; m.add_class::()?; + m.add_class::()?; Ok(()) } diff --git a/src/expr/extension.rs b/src/expr/extension.rs new file mode 100644 index 000000000..81a435c23 --- /dev/null +++ b/src/expr/extension.rs @@ -0,0 +1,52 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use datafusion_expr::Extension; +use pyo3::prelude::*; + +use crate::sql::logical::PyLogicalPlan; + +use super::logical_node::LogicalNode; + +#[pyclass(name = "Extension", module = "datafusion.expr", subclass)] +#[derive(Clone)] +pub struct PyExtension { + pub node: Extension, +} + +impl From for PyExtension { + fn from(node: Extension) -> PyExtension { + PyExtension { node } + } +} + +#[pymethods] +impl PyExtension { + fn name(&self) -> PyResult { + Ok(self.node.node.name().to_string()) + } +} + +impl LogicalNode for PyExtension { + fn inputs(&self) -> Vec { + vec![] + } + + fn to_variant(&self, py: Python) -> PyResult { + Ok(self.clone().into_py(py)) + } +} diff --git a/src/sql/logical.rs b/src/sql/logical.rs index 95ff3a29c..a22f269f1 100644 --- a/src/sql/logical.rs +++ b/src/sql/logical.rs @@ -22,6 +22,7 @@ use crate::expr::aggregate::PyAggregate; use crate::expr::analyze::PyAnalyze; use crate::expr::empty_relation::PyEmptyRelation; use crate::expr::explain::PyExplain; +use crate::expr::extension::PyExtension; use crate::expr::filter::PyFilter; use crate::expr::limit::PyLimit; use crate::expr::projection::PyProjection; @@ -60,6 +61,7 @@ impl PyLogicalPlan { LogicalPlan::Analyze(plan) => PyAnalyze::from(plan.clone()).to_variant(py), LogicalPlan::EmptyRelation(plan) => PyEmptyRelation::from(plan.clone()).to_variant(py), LogicalPlan::Explain(plan) => PyExplain::from(plan.clone()).to_variant(py), + LogicalPlan::Extension(plan) => PyExtension::from(plan.clone()).to_variant(py), LogicalPlan::Filter(plan) => PyFilter::from(plan.clone()).to_variant(py), LogicalPlan::Limit(plan) => PyLimit::from(plan.clone()).to_variant(py), LogicalPlan::Projection(plan) => PyProjection::from(plan.clone()).to_variant(py), From 1dd7246029e26d3678a9c414aa7d9e28eed5b556 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Fri, 10 Mar 2023 16:21:29 -0500 Subject: [PATCH 04/11] Add missing classes to list of exports so test_imports will pass --- src/expr.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/expr.rs b/src/expr.rs index 68e841b38..33f36f599 100644 --- a/src/expr.rs +++ b/src/expr.rs @@ -272,5 +272,8 @@ pub(crate) fn init_module(m: &PyModule) -> PyResult<()> { m.add_class::()?; m.add_class::()?; m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; Ok(()) } From 72baea1a8ce6d4c96ed37a530f61f0642661e46d Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Sat, 11 Mar 2023 11:39:00 -0500 Subject: [PATCH 05/11] Update to point to proper repo --- Cargo.lock | 71 ++++++++++++++++++++++++++------------------------ Cargo.toml | 12 ++++----- src/context.rs | 8 +++--- 3 files changed, 48 insertions(+), 43 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ff62f726f..0fe0dbd74 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -399,9 +399,9 @@ dependencies = [ [[package]] name = "block-buffer" -version = "0.10.3" +version = "0.10.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69cce20737498f97b993470a6e536b8523f0af7892a4f928cceb1ac5e52ebe7e" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" dependencies = [ "generic-array", ] @@ -666,8 +666,8 @@ dependencies = [ [[package]] name = "datafusion" -version = "19.0.0" -source = "git+https://github.com/jdye64/arrow-datafusion.git?rev=50e9d78#50e9d78da179fcee28d7fbd30e32de08f1a73d3d" +version = "20.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=dd98aab#dd98aabdaebcfc30ec4c370be93f6663de50e02f" dependencies = [ "ahash", "apache-avro", @@ -716,8 +716,8 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "19.0.0" -source = "git+https://github.com/jdye64/arrow-datafusion.git?rev=50e9d78#50e9d78da179fcee28d7fbd30e32de08f1a73d3d" +version = "20.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=dd98aab#dd98aabdaebcfc30ec4c370be93f6663de50e02f" dependencies = [ "apache-avro", "arrow", @@ -731,22 +731,25 @@ dependencies = [ [[package]] name = "datafusion-execution" -version = "19.0.0" -source = "git+https://github.com/jdye64/arrow-datafusion.git?rev=50e9d78#50e9d78da179fcee28d7fbd30e32de08f1a73d3d" +version = "20.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=dd98aab#dd98aabdaebcfc30ec4c370be93f6663de50e02f" dependencies = [ + "dashmap", "datafusion-common", "datafusion-expr", "hashbrown 0.13.2", "log", + "object_store", "parking_lot", "rand", "tempfile", + "url", ] [[package]] name = "datafusion-expr" -version = "19.0.0" -source = "git+https://github.com/jdye64/arrow-datafusion.git?rev=50e9d78#50e9d78da179fcee28d7fbd30e32de08f1a73d3d" +version = "20.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=dd98aab#dd98aabdaebcfc30ec4c370be93f6663de50e02f" dependencies = [ "ahash", "arrow", @@ -757,8 +760,8 @@ dependencies = [ [[package]] name = "datafusion-optimizer" -version = "19.0.0" -source = "git+https://github.com/jdye64/arrow-datafusion.git?rev=50e9d78#50e9d78da179fcee28d7fbd30e32de08f1a73d3d" +version = "20.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=dd98aab#dd98aabdaebcfc30ec4c370be93f6663de50e02f" dependencies = [ "arrow", "async-trait", @@ -774,8 +777,8 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "19.0.0" -source = "git+https://github.com/jdye64/arrow-datafusion.git?rev=50e9d78#50e9d78da179fcee28d7fbd30e32de08f1a73d3d" +version = "20.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=dd98aab#dd98aabdaebcfc30ec4c370be93f6663de50e02f" dependencies = [ "ahash", "arrow", @@ -827,8 +830,8 @@ dependencies = [ [[package]] name = "datafusion-row" -version = "19.0.0" -source = "git+https://github.com/jdye64/arrow-datafusion.git?rev=50e9d78#50e9d78da179fcee28d7fbd30e32de08f1a73d3d" +version = "20.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=dd98aab#dd98aabdaebcfc30ec4c370be93f6663de50e02f" dependencies = [ "arrow", "datafusion-common", @@ -838,8 +841,8 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "19.0.0" -source = "git+https://github.com/jdye64/arrow-datafusion.git?rev=50e9d78#50e9d78da179fcee28d7fbd30e32de08f1a73d3d" +version = "20.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=dd98aab#dd98aabdaebcfc30ec4c370be93f6663de50e02f" dependencies = [ "arrow-schema", "datafusion-common", @@ -850,8 +853,8 @@ dependencies = [ [[package]] name = "datafusion-substrait" -version = "19.0.0" -source = "git+https://github.com/jdye64/arrow-datafusion.git?rev=50e9d78#50e9d78da179fcee28d7fbd30e32de08f1a73d3d" +version = "20.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=dd98aab#dd98aabdaebcfc30ec4c370be93f6663de50e02f" dependencies = [ "async-recursion", "chrono", @@ -1203,9 +1206,9 @@ checksum = "c4a1e36c821dbe04574f602848a19f742f4fb3c98d40449f11bcad18d6b17421" [[package]] name = "hyper" -version = "0.14.24" +version = "0.14.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e011372fa0b68db8350aa7a248930ecc7839bf46d8485577d69f117a75f164c" +checksum = "cc5e554ff619822309ffd57d8734d77cd5ce6238bc956f037ea06c58238c9899" dependencies = [ "bytes", "futures-channel", @@ -1424,9 +1427,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.139" +version = "0.2.140" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "201de327520df007757c1f0adce6e827fe8562fbc28bfd9c15571c66ca1f5f79" +checksum = "99227334921fae1a979cf0bfdfcc6b3e5ce376ef57e16fb6fb3ea2ed6095f80c" [[package]] name = "libflate" @@ -2325,18 +2328,18 @@ checksum = "e6b44e8fc93a14e66336d230954dda83d18b4605ccace8fe09bc7514a71ad0bc" [[package]] name = "serde" -version = "1.0.153" +version = "1.0.154" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a382c72b4ba118526e187430bb4963cd6d55051ebf13d9b25574d379cc98d20" +checksum = "8cdd151213925e7f1ab45a9bbfb129316bd00799784b174b7cc7bcd16961c49e" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.153" +version = "1.0.154" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ef476a5790f0f6decbc66726b6e5d63680ed518283e64c7df415989d880954f" +checksum = "4fc80d722935453bcafdc2c9a73cd6fac4dc1938f0346035d84bf99fa9e33217" dependencies = [ "proc-macro2", "quote", @@ -2473,9 +2476,9 @@ checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" [[package]] name = "sqlparser" -version = "0.30.0" +version = "0.32.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db67dc6ef36edb658196c3fef0464a80b53dbbc194a904e81f9bd4190f9ecc5b" +checksum = "0366f270dbabb5cc2e4c88427dc4c08bba144f81e32fbd459a013f26a4d16aa0" dependencies = [ "log", "sqlparser_derive", @@ -2519,9 +2522,9 @@ dependencies = [ [[package]] name = "substrait" -version = "0.4.0" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2feb96a6a106e21161551af32dc4e0fdab3aceb926b940d7e92a086b640fc7c" +checksum = "3108bf99c703e39728847cce9becff451f8a94cbc72fb5918b4e7f0543d7b06a" dependencies = [ "heck 0.4.1", "prost 0.11.8", @@ -2858,9 +2861,9 @@ checksum = "9e79c4d996edb816c91e4308506774452e55e95c3c9de07b6729e17e15a5ef81" [[package]] name = "unicode-bidi" -version = "0.3.10" +version = "0.3.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d54675592c1dbefd78cbd98db9bacd89886e1ca50692a0692baefffdeb92dd58" +checksum = "524b68aca1d05e03fdf03fcdce2c6c94b6daf6d16861ddaa7e4f2b6638a9052c" [[package]] name = "unicode-ident" diff --git a/Cargo.toml b/Cargo.toml index 7e1767dc7..f7c19e9bd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -34,12 +34,12 @@ default = ["mimalloc"] tokio = { version = "1.24", features = ["macros", "rt", "rt-multi-thread", "sync"] } rand = "0.8" pyo3 = { version = "0.18.0", features = ["extension-module", "abi3", "abi3-py37"] } -datafusion = { git = "https://github.com/jdye64/arrow-datafusion.git", rev = "50e9d78", features = ["pyarrow", "avro"]} -datafusion-expr = { git = "https://github.com/jdye64/arrow-datafusion.git", rev = "50e9d78" } -datafusion-optimizer = { git = "https://github.com/jdye64/arrow-datafusion.git", rev = "50e9d78" } -datafusion-common = { git = "https://github.com/jdye64/arrow-datafusion.git", rev = "50e9d78", features = ["pyarrow"]} -datafusion-sql = { git = "https://github.com/jdye64/arrow-datafusion.git", rev = "50e9d78" } -datafusion-substrait = { git = "https://github.com/jdye64/arrow-datafusion.git", rev = "50e9d78" } +datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "dd98aab", features = ["pyarrow", "avro"]} +datafusion-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "dd98aab" } +datafusion-optimizer = { git = "https://github.com/apache/arrow-datafusion.git", rev = "dd98aab" } +datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git", rev = "dd98aab", features = ["pyarrow"]} +datafusion-sql = { git = "https://github.com/apache/arrow-datafusion.git", rev = "dd98aab" } +datafusion-substrait = { git = "https://github.com/apache/arrow-datafusion.git", rev = "dd98aab" } uuid = { version = "1.2", features = ["v4"] } mimalloc = { version = "*", optional = true, default-features = false } async-trait = "0.1" diff --git a/src/context.rs b/src/context.rs index e77a3b394..e2c509abe 100644 --- a/src/context.rs +++ b/src/context.rs @@ -49,6 +49,7 @@ use datafusion::physical_plan::SendableRecordBatchStream; use datafusion::prelude::{ AvroReadOptions, CsvReadOptions, DataFrame, NdJsonReadOptions, ParquetReadOptions, }; +use datafusion_common::config::Extensions; use datafusion_common::ScalarValue; use pyo3::types::PyTuple; use tokio::runtime::Runtime; @@ -698,19 +699,20 @@ impl PySessionContext { part: usize, py: Python, ) -> PyResult { - let ctx = Arc::new(TaskContext::new( + let ctx = TaskContext::try_new( "task_id".to_string(), "session_id".to_string(), HashMap::new(), HashMap::new(), HashMap::new(), Arc::new(RuntimeEnv::default()), - )); + Extensions::default(), + ); // create a Tokio runtime to run the async code let rt = Runtime::new().unwrap(); let plan = plan.plan.clone(); let fut: JoinHandle> = - rt.spawn(async move { plan.execute(part, ctx) }); + rt.spawn(async move { plan.execute(part, Arc::new(ctx?)) }); let stream = wait_for_future(py, fut).map_err(py_datafusion_err)?; Ok(PyRecordBatchStream::new(stream?)) } From ae9187371e6ac1eae9a806d544450e08b4bb1be6 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Mon, 13 Mar 2023 11:55:24 -0400 Subject: [PATCH 06/11] Update pytest to adhere to aggregate calls being wrapped in projections --- Cargo.lock | 98 +++++++++++++++--------------- datafusion/tests/test_dataframe.py | 13 ++-- 2 files changed, 54 insertions(+), 57 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0fe0dbd74..33156d6ff 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -483,9 +483,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "chrono" -version = "0.4.23" +version = "0.4.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16b0a3d9ed01224b22057780a37bb8c5dbfe1be8ba48678e7bf57ec4b385411f" +checksum = "4e3c5919066adf22df73762e50cffcde3a758f2a848b113b586d1f86728b673b" dependencies = [ "iana-time-zone", "js-sys", @@ -542,9 +542,9 @@ dependencies = [ [[package]] name = "constant_time_eq" -version = "0.2.4" +version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3ad85c1f65dc7b37604eb0e89748faf0b9653065f2a8ef69f96a687ec1e9279" +checksum = "13418e745008f7349ec7e449155f419a61b92b58a99cc3616942b926825ec76b" [[package]] name = "core-foundation-sys" @@ -979,9 +979,9 @@ dependencies = [ [[package]] name = "futures" -version = "0.3.26" +version = "0.3.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13e2792b0ff0340399d58445b88fd9770e3489eff258a4cbc1523418f12abf84" +checksum = "531ac96c6ff5fd7c62263c5e3c67a603af4fcaee2e1a0ae5565ba3a11e69e549" dependencies = [ "futures-channel", "futures-core", @@ -994,9 +994,9 @@ dependencies = [ [[package]] name = "futures-channel" -version = "0.3.26" +version = "0.3.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e5317663a9089767a1ec00a487df42e0ca174b61b4483213ac24448e4664df5" +checksum = "164713a5a0dcc3e7b4b1ed7d3b433cabc18025386f9339346e8daf15963cf7ac" dependencies = [ "futures-core", "futures-sink", @@ -1004,15 +1004,15 @@ dependencies = [ [[package]] name = "futures-core" -version = "0.3.26" +version = "0.3.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec90ff4d0fe1f57d600049061dc6bb68ed03c7d2fbd697274c41805dcb3f8608" +checksum = "86d7a0c1aa76363dac491de0ee99faf6941128376f1cf96f07db7603b7de69dd" [[package]] name = "futures-executor" -version = "0.3.26" +version = "0.3.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8de0a35a6ab97ec8869e32a2473f4b1324459e14c29275d14b10cb1fd19b50e" +checksum = "1997dd9df74cdac935c76252744c1ed5794fac083242ea4fe77ef3ed60ba0f83" dependencies = [ "futures-core", "futures-task", @@ -1021,15 +1021,15 @@ dependencies = [ [[package]] name = "futures-io" -version = "0.3.26" +version = "0.3.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfb8371b6fb2aeb2d280374607aeabfc99d95c72edfe51692e42d3d7f0d08531" +checksum = "89d422fa3cbe3b40dca574ab087abb5bc98258ea57eea3fd6f1fa7162c778b91" [[package]] name = "futures-macro" -version = "0.3.26" +version = "0.3.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95a73af87da33b5acf53acfebdc339fe592ecf5357ac7c0a7734ab9d8c876a70" +checksum = "3eb14ed937631bd8b8b8977f2c198443447a8355b6e3ca599f38c975e5a963b6" dependencies = [ "proc-macro2", "quote", @@ -1038,21 +1038,21 @@ dependencies = [ [[package]] name = "futures-sink" -version = "0.3.26" +version = "0.3.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f310820bb3e8cfd46c80db4d7fb8353e15dfff853a127158425f31e0be6c8364" +checksum = "ec93083a4aecafb2a80a885c9de1f0ccae9dbd32c2bb54b0c3a65690e0b8d2f2" [[package]] name = "futures-task" -version = "0.3.26" +version = "0.3.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dcf79a1bf610b10f42aea489289c5a2c478a786509693b80cd39c44ccd936366" +checksum = "fd65540d33b37b16542a0438c12e6aeead10d4ac5d05bd3f805b8f35ab592879" [[package]] name = "futures-util" -version = "0.3.26" +version = "0.3.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c1d6de3acfef38d2be4b1f543f553131788603495be83da675e180c8d6b7bd1" +checksum = "3ef6b17e481503ec85211fed8f39d1970f128935ca1f814cd32ac4a6842e84ab" dependencies = [ "futures-channel", "futures-core", @@ -1848,9 +1848,9 @@ checksum = "dc375e1527247fe1a97d8b7156678dfe7c1af2fc075c9a4db3690ecd2a148068" [[package]] name = "proc-macro2" -version = "1.0.51" +version = "1.0.52" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d727cae5b39d21da60fa540906919ad737832fe0b1c165da3a34d6548c849d6" +checksum = "1d0e1ae9e836cc3beddd63db0df682593d7e2d3d891ae8c9083d2113e1744224" dependencies = [ "unicode-ident", ] @@ -2038,9 +2038,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.23" +version = "1.0.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8856d8364d252a14d474036ea1358d63c9e6965c8e5c1885c18f73d70bff9c7b" +checksum = "50686e0021c4136d1d453b2dfe059902278681512a34d4248435dc34b6b5c8ec" dependencies = [ "proc-macro2", ] @@ -2178,7 +2178,7 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366" dependencies = [ - "semver 1.0.16", + "semver 1.0.17", ] [[package]] @@ -2307,9 +2307,9 @@ dependencies = [ [[package]] name = "semver" -version = "1.0.16" +version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "58bc9567378fc7690d6b2addae4e60ac2eeea07becb2c64b9f218b53865cba2a" +checksum = "bebd363326d05ec3e2f532ab7660680f3b02130d780c299bca73469d521bc0ed" [[package]] name = "semver-parser" @@ -2328,18 +2328,18 @@ checksum = "e6b44e8fc93a14e66336d230954dda83d18b4605ccace8fe09bc7514a71ad0bc" [[package]] name = "serde" -version = "1.0.154" +version = "1.0.155" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8cdd151213925e7f1ab45a9bbfb129316bd00799784b174b7cc7bcd16961c49e" +checksum = "71f2b4817415c6d4210bfe1c7bfcf4801b2d904cb4d0e1a8fdb651013c9e86b8" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.154" +version = "1.0.155" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fc80d722935453bcafdc2c9a73cd6fac4dc1938f0346035d84bf99fa9e33217" +checksum = "d071a94a3fac4aff69d023a7f411e33f40f3483f8c5190b1953822b6b76d7630" dependencies = [ "proc-macro2", "quote", @@ -3146,9 +3146,9 @@ dependencies = [ [[package]] name = "windows-targets" -version = "0.42.1" +version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e2522491fbfcd58cc84d47aeb2958948c4b8982e9a2d8a2a35bbaed431390e7" +checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" dependencies = [ "windows_aarch64_gnullvm", "windows_aarch64_msvc", @@ -3161,45 +3161,45 @@ dependencies = [ [[package]] name = "windows_aarch64_gnullvm" -version = "0.42.1" +version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c9864e83243fdec7fc9c5444389dcbbfd258f745e7853198f365e3c4968a608" +checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" [[package]] name = "windows_aarch64_msvc" -version = "0.42.1" +version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c8b1b673ffc16c47a9ff48570a9d85e25d265735c503681332589af6253c6c7" +checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" [[package]] name = "windows_i686_gnu" -version = "0.42.1" +version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de3887528ad530ba7bdbb1faa8275ec7a1155a45ffa57c37993960277145d640" +checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" [[package]] name = "windows_i686_msvc" -version = "0.42.1" +version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf4d1122317eddd6ff351aa852118a2418ad4214e6613a50e0191f7004372605" +checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" [[package]] name = "windows_x86_64_gnu" -version = "0.42.1" +version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1040f221285e17ebccbc2591ffdc2d44ee1f9186324dd3e84e99ac68d699c45" +checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" [[package]] name = "windows_x86_64_gnullvm" -version = "0.42.1" +version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "628bfdf232daa22b0d64fdb62b09fcc36bb01f05a3939e20ab73aaf9470d0463" +checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" [[package]] name = "windows_x86_64_msvc" -version = "0.42.1" +version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "447660ad36a13288b1db4d4248e857b510e8c3a225c822ba4fb748c0aafecffd" +checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" [[package]] name = "winreg" diff --git a/datafusion/tests/test_dataframe.py b/datafusion/tests/test_dataframe.py index b87b1f14a..5260c47aa 100644 --- a/datafusion/tests/test_dataframe.py +++ b/datafusion/tests/test_dataframe.py @@ -350,14 +350,13 @@ def test_logical_plan(aggregate_df): def test_optimized_logical_plan(aggregate_df): plan = aggregate_df.optimized_logical_plan() - expected = "Projection: test.c1, SUM(test.c2)" + expected = "Aggregate: groupBy=[[test.c1]], aggr=[[SUM(test.c2)]]" assert expected == plan.display() expected = ( - "Projection: test.c1, SUM(test.c2)\n" - " Aggregate: groupBy=[[test.c1]], aggr=[[SUM(test.c2)]]\n" - " TableScan: test projection=[c1, c2]" + "Aggregate: groupBy=[[test.c1]], aggr=[[SUM(test.c2)]]\n" + " TableScan: test projection=[c1, c2]" ) assert expected == plan.display_indent() @@ -366,9 +365,8 @@ def test_optimized_logical_plan(aggregate_df): def test_execution_plan(aggregate_df): plan = aggregate_df.execution_plan() - expected = ( - "ProjectionExec: expr=[c1@0 as c1, SUM(test.c2)@1 as SUM(test.c2)]\n" - ) + expected = "AggregateExec: mode=FinalPartitioned, gby=[c1@0 as c1],\ + aggr=[SUM(test.c2)]\n" assert expected == plan.display() @@ -382,7 +380,6 @@ def test_execution_plan(aggregate_df): # indent plan will be different for everyone due to absolute path # to filename, so we just check for some expected content - assert "ProjectionExec:" in indent assert "AggregateExec:" in indent assert "CoalesceBatchesExec:" in indent assert "RepartitionExec:" in indent From bf67ce6deffc4d4d2fd100c2f24ce28858ed091f Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Mon, 13 Mar 2023 12:43:01 -0400 Subject: [PATCH 07/11] Address linter change which causes a pytest to fail --- datafusion/tests/test_dataframe.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/datafusion/tests/test_dataframe.py b/datafusion/tests/test_dataframe.py index 5260c47aa..611bcabe4 100644 --- a/datafusion/tests/test_dataframe.py +++ b/datafusion/tests/test_dataframe.py @@ -365,8 +365,7 @@ def test_optimized_logical_plan(aggregate_df): def test_execution_plan(aggregate_df): plan = aggregate_df.execution_plan() - expected = "AggregateExec: mode=FinalPartitioned, gby=[c1@0 as c1],\ - aggr=[SUM(test.c2)]\n" + expected = "AggregateExec: mode=FinalPartitioned, gby=[c1@0 as c1], aggr=[SUM(test.c2)]\n" # noqa: E501 assert expected == plan.display() From 75956faf03d1a853ba5991ad8ae34ce73daeb265 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Mon, 13 Mar 2023 12:51:37 -0400 Subject: [PATCH 08/11] Add bindings for LogicalPlan::CreateMemoryTable --- datafusion/__init__.py | 2 + datafusion/tests/test_imports.py | 2 + src/expr.rs | 2 + src/expr/create_memory_table.rs | 97 ++++++++++++++++++++++++++++++++ 4 files changed, 103 insertions(+) create mode 100644 src/expr/create_memory_table.rs diff --git a/datafusion/__init__.py b/datafusion/__init__.py index 578e7dc05..42687796f 100644 --- a/datafusion/__init__.py +++ b/datafusion/__init__.py @@ -78,6 +78,7 @@ Between, Explain, Extension, + CreateMemoryTable, ) __version__ = importlib_metadata.version(__name__) @@ -131,6 +132,7 @@ "Between", "Explain", "Extension", + "CreateMemoryTable", ] diff --git a/datafusion/tests/test_imports.py b/datafusion/tests/test_imports.py index 907544627..490a866d2 100644 --- a/datafusion/tests/test_imports.py +++ b/datafusion/tests/test_imports.py @@ -79,6 +79,7 @@ Between, Explain, Extension, + CreateMemoryTable, ) @@ -147,6 +148,7 @@ def test_class_module_is_datafusion(): Between, Explain, Extension, + CreateMemoryTable, ]: assert klass.__module__ == "datafusion.expr" diff --git a/src/expr.rs b/src/expr.rs index 1d6c99932..de2f75cda 100644 --- a/src/expr.rs +++ b/src/expr.rs @@ -47,6 +47,7 @@ pub mod bool_expr; pub mod case; pub mod cast; pub mod column; +pub mod create_memory_table; pub mod cross_join; pub mod empty_relation; pub mod exists; @@ -277,5 +278,6 @@ pub(crate) fn init_module(m: &PyModule) -> PyResult<()> { m.add_class::()?; m.add_class::()?; m.add_class::()?; + m.add_class::()?; Ok(()) } diff --git a/src/expr/create_memory_table.rs b/src/expr/create_memory_table.rs new file mode 100644 index 000000000..509bf2168 --- /dev/null +++ b/src/expr/create_memory_table.rs @@ -0,0 +1,97 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::fmt::{self, Display, Formatter}; + +use datafusion_expr::CreateMemoryTable; +use pyo3::prelude::*; + +use crate::sql::logical::PyLogicalPlan; + +use super::logical_node::LogicalNode; + +#[pyclass(name = "CreateMemoryTable", module = "datafusion.expr", subclass)] +#[derive(Clone)] +pub struct PyCreateMemoryTable { + create: CreateMemoryTable, +} + +impl From for CreateMemoryTable { + fn from(create: PyCreateMemoryTable) -> Self { + create.create + } +} + +impl From for PyCreateMemoryTable { + fn from(create: CreateMemoryTable) -> PyCreateMemoryTable { + PyCreateMemoryTable { create } + } +} + +impl Display for PyCreateMemoryTable { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!( + f, + "CreateMemoryTable + Name: {:?} + Input: {:?} + if_not_exists: {:?} + or_replace: {:?}", + &self.create.name, + &self.create.input, + &self.create.if_not_exists, + &self.create.or_replace, + ) + } +} + +#[pymethods] +impl PyCreateMemoryTable { + fn name(&self) -> PyResult { + Ok(self.create.name.to_string()) + } + + fn input(&self) -> PyResult> { + Ok(Self::inputs(self)) + } + + fn if_not_exists(&self) -> bool { + self.create.if_not_exists + } + + fn or_replace(&self) -> bool { + self.create.or_replace + } + + fn __repr__(&self) -> PyResult { + Ok(format!("CreateMemoryTable({})", self)) + } + + fn __name__(&self) -> PyResult { + Ok("CreateMemoryTable".to_string()) + } +} + +impl LogicalNode for PyCreateMemoryTable { + fn inputs(&self) -> Vec { + vec![PyLogicalPlan::from((*self.create.input).clone())] + } + + fn to_variant(&self, py: Python) -> PyResult { + Ok(self.clone().into_py(py)) + } +} From 798f5bae209d210685455a578e0a92d105f84788 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Mon, 13 Mar 2023 12:56:59 -0400 Subject: [PATCH 09/11] Add bindings for LogicalPlan::CreateView --- datafusion/__init__.py | 2 + datafusion/tests/test_imports.py | 2 + src/expr.rs | 2 + src/expr/create_view.rs | 94 ++++++++++++++++++++++++++++++++ 4 files changed, 100 insertions(+) create mode 100644 src/expr/create_view.rs diff --git a/datafusion/__init__.py b/datafusion/__init__.py index 42687796f..91fdadcbd 100644 --- a/datafusion/__init__.py +++ b/datafusion/__init__.py @@ -79,6 +79,7 @@ Explain, Extension, CreateMemoryTable, + CreateView, ) __version__ = importlib_metadata.version(__name__) @@ -133,6 +134,7 @@ "Explain", "Extension", "CreateMemoryTable", + "CreateView", ] diff --git a/datafusion/tests/test_imports.py b/datafusion/tests/test_imports.py index 490a866d2..5d706e2c1 100644 --- a/datafusion/tests/test_imports.py +++ b/datafusion/tests/test_imports.py @@ -80,6 +80,7 @@ Explain, Extension, CreateMemoryTable, + CreateView, ) @@ -149,6 +150,7 @@ def test_class_module_is_datafusion(): Explain, Extension, CreateMemoryTable, + CreateView, ]: assert klass.__module__ == "datafusion.expr" diff --git a/src/expr.rs b/src/expr.rs index de2f75cda..a28dfe586 100644 --- a/src/expr.rs +++ b/src/expr.rs @@ -48,6 +48,7 @@ pub mod case; pub mod cast; pub mod column; pub mod create_memory_table; +pub mod create_view; pub mod cross_join; pub mod empty_relation; pub mod exists; @@ -279,5 +280,6 @@ pub(crate) fn init_module(m: &PyModule) -> PyResult<()> { m.add_class::()?; m.add_class::()?; m.add_class::()?; + m.add_class::()?; Ok(()) } diff --git a/src/expr/create_view.rs b/src/expr/create_view.rs new file mode 100644 index 000000000..9d06239ea --- /dev/null +++ b/src/expr/create_view.rs @@ -0,0 +1,94 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::fmt::{self, Display, Formatter}; + +use datafusion_expr::CreateView; +use pyo3::prelude::*; + +use crate::sql::logical::PyLogicalPlan; + +use super::logical_node::LogicalNode; + +#[pyclass(name = "CreateView", module = "datafusion.expr", subclass)] +#[derive(Clone)] +pub struct PyCreateView { + create: CreateView, +} + +impl From for CreateView { + fn from(create: PyCreateView) -> Self { + create.create + } +} + +impl From for PyCreateView { + fn from(create: CreateView) -> PyCreateView { + PyCreateView { create } + } +} + +impl Display for PyCreateView { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!( + f, + "CreateView + name: {:?} + input: {:?} + or_replace: {:?} + definition: {:?}", + &self.create.name, &self.create.input, &self.create.or_replace, &self.create.definition, + ) + } +} + +#[pymethods] +impl PyCreateView { + fn name(&self) -> PyResult { + Ok(self.create.name.to_string()) + } + + fn input(&self) -> PyResult> { + Ok(Self::inputs(self)) + } + + fn or_replace(&self) -> bool { + self.create.or_replace + } + + fn definition(&self) -> PyResult> { + Ok(self.create.definition.clone()) + } + + fn __repr__(&self) -> PyResult { + Ok(format!("CreateView({})", self)) + } + + fn __name__(&self) -> PyResult { + Ok("CreateView".to_string()) + } +} + +impl LogicalNode for PyCreateView { + fn inputs(&self) -> Vec { + vec![PyLogicalPlan::from((*self.create.input).clone())] + } + + fn to_variant(&self, py: Python) -> PyResult { + Ok(self.clone().into_py(py)) + } +} From 4b8d6065cf2e33091f48aeca7ffd8a67a123cb19 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Mon, 13 Mar 2023 13:04:33 -0400 Subject: [PATCH 10/11] Add bindings for LogicalPlan::Distinct --- datafusion/__init__.py | 2 + datafusion/tests/test_imports.py | 2 + src/expr.rs | 2 + src/expr/distinct.rs | 80 ++++++++++++++++++++++++++++++++ 4 files changed, 86 insertions(+) create mode 100644 src/expr/distinct.rs diff --git a/datafusion/__init__.py b/datafusion/__init__.py index 91fdadcbd..eb7646dc2 100644 --- a/datafusion/__init__.py +++ b/datafusion/__init__.py @@ -80,6 +80,7 @@ Extension, CreateMemoryTable, CreateView, + Distinct, ) __version__ = importlib_metadata.version(__name__) @@ -135,6 +136,7 @@ "Extension", "CreateMemoryTable", "CreateView", + "Distinct", ] diff --git a/datafusion/tests/test_imports.py b/datafusion/tests/test_imports.py index 5d706e2c1..c3ea19449 100644 --- a/datafusion/tests/test_imports.py +++ b/datafusion/tests/test_imports.py @@ -81,6 +81,7 @@ Extension, CreateMemoryTable, CreateView, + Distinct, ) @@ -151,6 +152,7 @@ def test_class_module_is_datafusion(): Extension, CreateMemoryTable, CreateView, + Distinct, ]: assert klass.__module__ == "datafusion.expr" diff --git a/src/expr.rs b/src/expr.rs index a28dfe586..c50507def 100644 --- a/src/expr.rs +++ b/src/expr.rs @@ -50,6 +50,7 @@ pub mod column; pub mod create_memory_table; pub mod create_view; pub mod cross_join; +pub mod distinct; pub mod empty_relation; pub mod exists; pub mod explain; @@ -281,5 +282,6 @@ pub(crate) fn init_module(m: &PyModule) -> PyResult<()> { m.add_class::()?; m.add_class::()?; m.add_class::()?; + m.add_class::()?; Ok(()) } diff --git a/src/expr/distinct.rs b/src/expr/distinct.rs new file mode 100644 index 000000000..681ae953b --- /dev/null +++ b/src/expr/distinct.rs @@ -0,0 +1,80 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::fmt::{self, Display, Formatter}; + +use datafusion_expr::Distinct; +use pyo3::prelude::*; + +use crate::sql::logical::PyLogicalPlan; + +use super::logical_node::LogicalNode; + +#[pyclass(name = "Distinct", module = "datafusion.expr", subclass)] +#[derive(Clone)] +pub struct PyDistinct { + distinct: Distinct, +} + +impl From for Distinct { + fn from(distinct: PyDistinct) -> Self { + distinct.distinct + } +} + +impl From for PyDistinct { + fn from(distinct: Distinct) -> PyDistinct { + PyDistinct { distinct } + } +} + +impl Display for PyDistinct { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!( + f, + "Distinct + \nInput: {:?}", + self.distinct.input, + ) + } +} + +#[pymethods] +impl PyDistinct { + /// Retrieves the input `LogicalPlan` to this `Projection` node + fn input(&self) -> PyResult> { + Ok(Self::inputs(self)) + } + + fn __repr__(&self) -> PyResult { + Ok(format!("Distinct({})", self)) + } + + fn __name__(&self) -> PyResult { + Ok("Distinct".to_string()) + } +} + +impl LogicalNode for PyDistinct { + fn inputs(&self) -> Vec { + vec![PyLogicalPlan::from((*self.distinct.input).clone())] + } + + fn to_variant(&self, py: Python) -> PyResult { + Ok(self.clone().into_py(py)) + } +} From ae513e49e5371ff9f0e89e86e4df8eebd953f384 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Mon, 13 Mar 2023 17:00:21 -0400 Subject: [PATCH 11/11] Remove accidential duplicate CreateMemoryTable import that crept in from merge conflict resolution --- datafusion/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/datafusion/__init__.py b/datafusion/__init__.py index a211722d0..c9e58ec7a 100644 --- a/datafusion/__init__.py +++ b/datafusion/__init__.py @@ -80,7 +80,6 @@ CreateMemoryTable, SubqueryAlias, Extension, - CreateMemoryTable, CreateView, Distinct, )