From 0d4738c0e179358415a39d9a2a1da5151bd8ca39 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 24 Feb 2023 10:52:36 -0700 Subject: [PATCH 1/3] Split out datafusion-execution more disk_manager move registry --- Cargo.toml | 1 + datafusion/core/Cargo.toml | 1 + datafusion/core/src/execution/mod.rs | 8 +++--- datafusion/execution/Cargo.toml | 26 +++++++++++++++++++ .../src}/disk_manager.rs | 3 +-- datafusion/execution/src/lib.rs | 20 ++++++++++++++ .../src}/memory_pool/mod.rs | 2 +- .../src}/memory_pool/pool.rs | 2 +- .../src}/memory_pool/proxy.rs | 0 .../execution => execution/src}/registry.rs | 2 +- dev/release/README.md | 1 + 11 files changed, 58 insertions(+), 8 deletions(-) create mode 100644 datafusion/execution/Cargo.toml rename datafusion/{core/src/execution => execution/src}/disk_manager.rs (99%) create mode 100644 datafusion/execution/src/lib.rs rename datafusion/{core/src/execution => execution/src}/memory_pool/mod.rs (99%) rename datafusion/{core/src/execution => execution/src}/memory_pool/pool.rs (99%) rename datafusion/{core/src/execution => execution/src}/memory_pool/proxy.rs (100%) rename datafusion/{core/src/execution => execution/src}/registry.rs (97%) diff --git a/Cargo.toml b/Cargo.toml index 96bbc377eeee..c1f068eb335e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,6 +21,7 @@ members = [ "datafusion/common", "datafusion/core", "datafusion/expr", + "datafusion/execution", "datafusion/jit", "datafusion/optimizer", "datafusion/physical-expr", diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml index ecc7945f3829..c0f6163d6358 100644 --- a/datafusion/core/Cargo.toml +++ b/datafusion/core/Cargo.toml @@ -68,6 +68,7 @@ bzip2 = { version = "0.4.3", optional = true } chrono = { version = "0.4.23", default-features = false } dashmap = "5.4.0" datafusion-common = { path = "../common", version = "19.0.0", features = ["parquet", "object_store"] } +datafusion-execution = { path = "../execution", version = "19.0.0" } datafusion-expr = { path = "../expr", version = "19.0.0" } datafusion-jit = { path = "../jit", version = "19.0.0", optional = true } datafusion-optimizer = { path = "../optimizer", version = "19.0.0" } diff --git a/datafusion/core/src/execution/mod.rs b/datafusion/core/src/execution/mod.rs index 8761237bd014..5586c2ce3ce7 100644 --- a/datafusion/core/src/execution/mod.rs +++ b/datafusion/core/src/execution/mod.rs @@ -41,12 +41,14 @@ //! pub mod context; -pub mod disk_manager; -pub mod memory_pool; // backwards compatibility pub use crate::datasource::file_format::options; -pub mod registry; pub mod runtime_env; +// backwards compatibility +pub use datafusion_execution::disk_manager; +pub use datafusion_execution::memory_pool; +pub use datafusion_execution::registry; + pub use disk_manager::DiskManager; pub use registry::FunctionRegistry; diff --git a/datafusion/execution/Cargo.toml b/datafusion/execution/Cargo.toml new file mode 100644 index 000000000000..1dc776a37166 --- /dev/null +++ b/datafusion/execution/Cargo.toml @@ -0,0 +1,26 @@ +[package] +name = "datafusion-execution" +description = "Execution configuration support for DataFusion query engine" +version = "19.0.0" +homepage = "https://github.com/apache/arrow-datafusion" +repository = "https://github.com/apache/arrow-datafusion" +readme = "README.md" +authors = ["Apache Arrow "] +license = "Apache-2.0" +keywords = [ "arrow", "query", "sql" ] +edition = "2021" +rust-version = "1.62" + +[lib] +name = "datafusion_execution" +path = "src/lib.rs" + + +[dependencies] +datafusion-common = { path = "../common", version = "19.0.0" } +datafusion-expr = { path = "../expr", version = "19.0.0" } +hashbrown = { version = "0.13", features = ["raw"] } +log = "^0.4" +parking_lot = "0.12" +rand = "0.8" +tempfile = "3" \ No newline at end of file diff --git a/datafusion/core/src/execution/disk_manager.rs b/datafusion/execution/src/disk_manager.rs similarity index 99% rename from datafusion/core/src/execution/disk_manager.rs rename to datafusion/execution/src/disk_manager.rs index 9ff6f1e556a6..60a8da44dff0 100644 --- a/datafusion/core/src/execution/disk_manager.rs +++ b/datafusion/execution/src/disk_manager.rs @@ -18,7 +18,7 @@ //! Manages files generated during query execution, files are //! hashed among the directories listed in RuntimeConfig::local_dirs. -use crate::error::{DataFusionError, Result}; +use datafusion_common::{DataFusionError, Result}; use log::debug; use parking_lot::Mutex; use rand::{thread_rng, Rng}; @@ -155,7 +155,6 @@ mod tests { use std::path::Path; use super::*; - use crate::error::Result; use tempfile::TempDir; #[test] diff --git a/datafusion/execution/src/lib.rs b/datafusion/execution/src/lib.rs new file mode 100644 index 000000000000..b8e6debb923a --- /dev/null +++ b/datafusion/execution/src/lib.rs @@ -0,0 +1,20 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +pub mod disk_manager; +pub mod memory_pool; +pub mod registry; diff --git a/datafusion/core/src/execution/memory_pool/mod.rs b/datafusion/execution/src/memory_pool/mod.rs similarity index 99% rename from datafusion/core/src/execution/memory_pool/mod.rs rename to datafusion/execution/src/memory_pool/mod.rs index 8b519d52954b..f68a2565004d 100644 --- a/datafusion/core/src/execution/memory_pool/mod.rs +++ b/datafusion/execution/src/memory_pool/mod.rs @@ -17,7 +17,7 @@ //! Manages all available memory during query execution -use crate::error::Result; +use datafusion_common::Result; use std::sync::Arc; mod pool; diff --git a/datafusion/core/src/execution/memory_pool/pool.rs b/datafusion/execution/src/memory_pool/pool.rs similarity index 99% rename from datafusion/core/src/execution/memory_pool/pool.rs rename to datafusion/execution/src/memory_pool/pool.rs index 97dc7c2e5936..7bb9fa4253d2 100644 --- a/datafusion/core/src/execution/memory_pool/pool.rs +++ b/datafusion/execution/src/memory_pool/pool.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -use crate::execution::memory_pool::{MemoryConsumer, MemoryPool, MemoryReservation}; +use crate::memory_pool::{MemoryConsumer, MemoryPool, MemoryReservation}; use datafusion_common::{DataFusionError, Result}; use parking_lot::Mutex; use std::sync::atomic::{AtomicUsize, Ordering}; diff --git a/datafusion/core/src/execution/memory_pool/proxy.rs b/datafusion/execution/src/memory_pool/proxy.rs similarity index 100% rename from datafusion/core/src/execution/memory_pool/proxy.rs rename to datafusion/execution/src/memory_pool/proxy.rs diff --git a/datafusion/core/src/execution/registry.rs b/datafusion/execution/src/registry.rs similarity index 97% rename from datafusion/core/src/execution/registry.rs rename to datafusion/execution/src/registry.rs index 5bfa306e35ec..365f0529fdd9 100644 --- a/datafusion/core/src/execution/registry.rs +++ b/datafusion/execution/src/registry.rs @@ -17,7 +17,7 @@ //! FunctionRegistry trait -use crate::error::Result; +use datafusion_common::Result; use datafusion_expr::{AggregateUDF, ScalarUDF}; use std::{collections::HashSet, sync::Arc}; diff --git a/dev/release/README.md b/dev/release/README.md index 92dc7e5bbed0..fbbde87b3f3a 100644 --- a/dev/release/README.md +++ b/dev/release/README.md @@ -310,6 +310,7 @@ dot -Tsvg dev/release/crate-deps.dot > dev/release/crate-deps.svg (cd datafusion/row && cargo publish) (cd datafusion/physical-expr && cargo publish) (cd datafusion/optimizer && cargo publish) +(cd datafusion/execution && cargo publish) (cd datafusion/core && cargo publish) (cd datafusion/proto && cargo publish) (cd datafusion/substrait && cargo publish) From aea988c7ce3bf2507b320055cb73ff725ee4ef01 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 28 Feb 2023 17:46:39 -0500 Subject: [PATCH 2/3] Update datafusion-cli dependencies --- datafusion-cli/Cargo.lock | 40 ++++++++++++++++++--------------- datafusion/execution/Cargo.toml | 2 +- 2 files changed, 23 insertions(+), 19 deletions(-) diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index e83c77d7a15d..e2f461d7b64c 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -681,6 +681,7 @@ dependencies = [ "chrono", "dashmap", "datafusion-common", + "datafusion-execution", "datafusion-expr", "datafusion-optimizer", "datafusion-physical-expr", @@ -744,6 +745,19 @@ dependencies = [ "sqlparser", ] +[[package]] +name = "datafusion-execution" +version = "19.0.0" +dependencies = [ + "datafusion-common", + "datafusion-expr", + "hashbrown 0.13.2", + "log", + "parking_lot", + "rand", + "tempfile", +] + [[package]] name = "datafusion-expr" version = "19.0.0" @@ -1117,9 +1131,9 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" [[package]] name = "h2" -version = "0.3.15" +version = "0.3.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f9f29bc9dda355256b2916cf526ab02ce0aeaaaf2bad60d65ef3f12f11dd0f4" +checksum = "5be7b54589b581f624f566bf5d8eb2bab1db736c51528720b6bd36b96b55924d" dependencies = [ "bytes", "fnv", @@ -1352,9 +1366,9 @@ checksum = "fad582f4b9e86b6caa621cabeb0963332d92eea04729ab12892c2533951e6440" [[package]] name = "jobserver" -version = "0.1.25" +version = "0.1.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "068b1ee6743e4d11fb9c6a1e6064b3693a1b600e7f5f5988047d98b3dc9fb90b" +checksum = "936cfd212a0155903bcbc060e316fb6cc7cbf2e1907329391ebadc1fe0ce77c2" dependencies = [ "libc", ] @@ -1960,15 +1974,6 @@ version = "0.6.28" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "456c603be3e8d448b072f410900c09faf164fbce2d480456f50eea6e25f9c848" -[[package]] -name = "remove_dir_all" -version = "0.5.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7" -dependencies = [ - "winapi", -] - [[package]] name = "reqwest" version = "0.11.14" @@ -2337,16 +2342,15 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.3.0" +version = "3.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5cdb1ef4eaeeaddc8fbd371e5017057064af0911902ef36b39801f67cc6d79e4" +checksum = "af18f7ae1acd354b992402e9ec5864359d693cd8a79dcbef59f76891701c1e95" dependencies = [ "cfg-if", "fastrand", - "libc", "redox_syscall", - "remove_dir_all", - "winapi", + "rustix", + "windows-sys 0.42.0", ] [[package]] diff --git a/datafusion/execution/Cargo.toml b/datafusion/execution/Cargo.toml index 1dc776a37166..82a55cfba905 100644 --- a/datafusion/execution/Cargo.toml +++ b/datafusion/execution/Cargo.toml @@ -23,4 +23,4 @@ hashbrown = { version = "0.13", features = ["raw"] } log = "^0.4" parking_lot = "0.12" rand = "0.8" -tempfile = "3" \ No newline at end of file +tempfile = "3" From 8fbf3b83237e0fbd74e14386945bb376c31250ac Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 28 Feb 2023 17:50:57 -0500 Subject: [PATCH 3/3] rat --- datafusion/execution/Cargo.toml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/datafusion/execution/Cargo.toml b/datafusion/execution/Cargo.toml index 82a55cfba905..3753d40df5fc 100644 --- a/datafusion/execution/Cargo.toml +++ b/datafusion/execution/Cargo.toml @@ -1,3 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + [package] name = "datafusion-execution" description = "Execution configuration support for DataFusion query engine"