From d2c43c77599298ed0ae5ee15f0be7fed66301bc0 Mon Sep 17 00:00:00 2001 From: Sam Kleinman Date: Mon, 12 Aug 2024 20:22:11 -0400 Subject: [PATCH] feat: sdk should export its dependency (#3135) --- Cargo.lock | 6 ++---- bindings/nodejs/Cargo.toml | 4 ---- bindings/nodejs/index.d.ts | 2 +- bindings/nodejs/src/execution.rs | 16 ++++++++++------ bindings/python/Cargo.toml | 6 ++---- bindings/python/src/environment.rs | 7 +++---- bindings/python/src/execution.rs | 30 +++++++++++++++--------------- crates/glaredb/Cargo.toml | 2 ++ crates/glaredb/src/lib.rs | 26 ++++++++++++++++++++------ 9 files changed, 55 insertions(+), 44 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index da9ec185d..dff7f07d8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3943,12 +3943,14 @@ name = "glaredb" version = "0.9.4" dependencies = [ "anyhow", + "arrow_util", "datafusion", "derive_builder", "futures", "indexmap 2.3.0", "metastore", "sqlexec", + "terminal_util", "thiserror", "url", ] @@ -4589,10 +4591,7 @@ dependencies = [ name = "js-glaredb" version = "0.0.0" dependencies = [ - "arrow_util", "async-once-cell", - "async-trait", - "datafusion", "futures", "glaredb", "lzma-sys", @@ -4600,7 +4599,6 @@ dependencies = [ "napi-build", "napi-derive", "once_cell", - "terminal_util", "thiserror", "url", ] diff --git a/bindings/nodejs/Cargo.toml b/bindings/nodejs/Cargo.toml index 76571a2ae..1d559af05 100644 --- a/bindings/nodejs/Cargo.toml +++ b/bindings/nodejs/Cargo.toml @@ -10,14 +10,10 @@ crate-type = ["cdylib"] workspace = true [dependencies] -arrow_util = { path = "../../crates/arrow_util" } glaredb = { path = "../../crates/glaredb" } -terminal_util = { path = "../../crates/terminal_util" } futures = { workspace = true } -datafusion = { workspace = true } thiserror = { workspace = true } url = { workspace = true } -async-trait = { workspace = true } lzma-sys = { version = "*", features = ["static"] } # Prevent dynamic linking of lzma, which comes from datafusion napi = { version = "2.16.8", default-features = false, features = ["full"] } napi-derive = "2.16.10" diff --git a/bindings/nodejs/index.d.ts b/bindings/nodejs/index.d.ts index af1e75c0f..09c755656 100644 --- a/bindings/nodejs/index.d.ts +++ b/bindings/nodejs/index.d.ts @@ -11,7 +11,7 @@ export interface ConnectOptions { storageOptions?: Record } /** Connect to a GlareDB database. */ -export function connect(dataDirOrCloudUrl?: string | undefined | null, options?: ConnectOptions | undefined | null): Promise +export declare function connect(dataDirOrCloudUrl?: string | undefined | null, options?: ConnectOptions | undefined | null): Promise /** A connected session to a GlareDB database. */ export class Connection { /** diff --git a/bindings/nodejs/src/execution.rs b/bindings/nodejs/src/execution.rs index 6a61035a7..f15c83531 100644 --- a/bindings/nodejs/src/execution.rs +++ b/bindings/nodejs/src/execution.rs @@ -1,9 +1,9 @@ use std::sync::{Arc, Mutex}; -use arrow_util::pretty; -use datafusion::arrow::ipc::writer::FileWriter; use futures::stream::StreamExt; -use glaredb::{DatabaseError, RecordStream, SendableRecordBatchStream}; +use glaredb::ext::datafusion::arrow::ipc::writer::FileWriter; +use glaredb::ext::SendableRecordBatchStream; +use glaredb::{DatabaseError, RecordStream}; use crate::error::JsDatabaseError; @@ -96,9 +96,13 @@ async fn print_record_batches(stream: SendableRecordBatchStream) -> Result<(), J let mut stream: RecordStream = stream.into(); let batches = stream.to_vec().await?; - let disp = - pretty::pretty_format_batches(&schema, &batches, Some(terminal_util::term_width()), None) - .map_err(DatabaseError::from)?; + let disp = glaredb::ext::tools::pretty_format_batches( + &schema, + &batches, + Some(glaredb::ext::tools::term_width()), + None, + ) + .map_err(DatabaseError::from)?; println!("{}", disp); Ok(()) diff --git a/bindings/python/Cargo.toml b/bindings/python/Cargo.toml index f9b36e6fd..5b8c41cc0 100644 --- a/bindings/python/Cargo.toml +++ b/bindings/python/Cargo.toml @@ -17,14 +17,12 @@ telemetry = { path = "../../crates/telemetry" } arrow_util = { path = "../../crates/arrow_util" } terminal_util = { path = "../../crates/terminal_util" } glaredb = { path = "../../crates/glaredb" } -datafusion = { workspace = true, features = ["pyarrow"] } tokio = { workspace = true } thiserror = { workspace = true } url = { workspace = true } futures = { workspace = true } async-trait = { workspace = true } +datafusion = { workspace = true, features = ["pyarrow"] } # override workspace features +lzma-sys = { version = "*", features = ["static"] } # prevent dynamic linking of lzma, which comes from datafusion pyo3 = { version = "0.20.3", features = ["abi3-py37", "extension-module"] } once_cell = "1.19.0" - -# Prevent dynamic linking of lzma, which comes from datafusion -lzma-sys = { version = "*", features = ["static"] } diff --git a/bindings/python/src/environment.rs b/bindings/python/src/environment.rs index 456b8a4a5..45e8f54d2 100644 --- a/bindings/python/src/environment.rs +++ b/bindings/python/src/environment.rs @@ -1,9 +1,8 @@ use std::sync::Arc; -use datafusion::arrow::array::RecordBatch; -use datafusion::arrow::pyarrow::PyArrowType; -use datafusion::datasource::{MemTable, TableProvider}; -use glaredb::EnvironmentReader; +use glaredb::ext::datafusion::arrow::pyarrow::PyArrowType; +use glaredb::ext::datafusion::datasource::{MemTable, TableProvider}; +use glaredb::ext::{EnvironmentReader, RecordBatch}; use pyo3::prelude::*; use pyo3::types::{IntoPyDict, PyTuple, PyType}; diff --git a/bindings/python/src/execution.rs b/bindings/python/src/execution.rs index 4a10d263f..7f8084d70 100644 --- a/bindings/python/src/execution.rs +++ b/bindings/python/src/execution.rs @@ -2,21 +2,21 @@ use std::any::Any; use std::fmt::Debug; use std::sync::{Arc, Mutex}; -use arrow_util::pretty; use async_trait::async_trait; -use datafusion::arrow::datatypes::{Schema, SchemaRef}; -use datafusion::arrow::pyarrow::ToPyArrow; -use datafusion::datasource::TableProvider; -use datafusion::error::DataFusionError; -use datafusion::execution::context::SessionState; -use datafusion::execution::TaskContext; -use datafusion::logical_expr::{TableProviderFilterPushDown, TableType}; -use datafusion::physical_plan::stream::RecordBatchStreamAdapter; -use datafusion::physical_plan::streaming::{PartitionStream, StreamingTableExec}; -use datafusion::physical_plan::ExecutionPlan; -use datafusion::prelude::Expr; use futures::StreamExt; -use glaredb::{DatabaseError, Operation, RecordBatch, SendableRecordBatchStream}; +use glaredb::ext::datafusion::arrow::datatypes::{Schema, SchemaRef}; +use glaredb::ext::datafusion::arrow::pyarrow::ToPyArrow; +use glaredb::ext::datafusion::datasource::TableProvider; +use glaredb::ext::datafusion::error::DataFusionError; +use glaredb::ext::datafusion::execution::context::SessionState; +use glaredb::ext::datafusion::execution::TaskContext; +use glaredb::ext::datafusion::logical_expr::{TableProviderFilterPushDown, TableType}; +use glaredb::ext::datafusion::physical_plan::stream::RecordBatchStreamAdapter; +use glaredb::ext::datafusion::physical_plan::streaming::{PartitionStream, StreamingTableExec}; +use glaredb::ext::datafusion::physical_plan::ExecutionPlan; +use glaredb::ext::datafusion::prelude::Expr; +use glaredb::ext::{RecordBatch, SendableRecordBatchStream}; +use glaredb::{DatabaseError, Operation}; use pyo3::exceptions::PyRuntimeError; use pyo3::prelude::*; use pyo3::types::PyTuple; @@ -101,10 +101,10 @@ impl PyExecutionOutput { pub fn show(&mut self, py: Python) -> PyResult<()> { let (schema, batches) = self.resolve_operation(py)?; - let disp = pretty::pretty_format_batches( + let disp = glaredb::ext::tools::pretty_format_batches( &schema, &batches, - Some(terminal_util::term_width()), + Some(glaredb::ext::tools::term_width()), None, ) .map_err(|e| PyRuntimeError::new_err(e.to_string()))?; diff --git a/crates/glaredb/Cargo.toml b/crates/glaredb/Cargo.toml index a1a68eabd..bf0fcd202 100644 --- a/crates/glaredb/Cargo.toml +++ b/crates/glaredb/Cargo.toml @@ -14,6 +14,8 @@ test = false [dependencies] sqlexec = { path = "../sqlexec" } metastore = { path = "../metastore" } +arrow_util = { path = "../arrow_util" } +terminal_util = { path = "../terminal_util" } url = { workspace = true } datafusion = { workspace = true } futures = { workspace = true } diff --git a/crates/glaredb/src/lib.rs b/crates/glaredb/src/lib.rs index afe6d8224..6d201f115 100644 --- a/crates/glaredb/src/lib.rs +++ b/crates/glaredb/src/lib.rs @@ -19,27 +19,41 @@ use std::task::{Context, Poll}; use datafusion::arrow::array::{StringArray, UInt64Array}; use datafusion::arrow::datatypes::{DataType, Field, Schema}; use datafusion::arrow::error::ArrowError; -// public re-export so downstream users of this package don't have to -// directly depend on DF (and our version no-less) to use our interfaces. -pub use datafusion::arrow::record_batch::RecordBatch; use datafusion::error::DataFusionError; use datafusion::logical_expr::LogicalPlan; use datafusion::physical_plan::stream::RecordBatchStreamAdapter; -pub use datafusion::physical_plan::SendableRecordBatchStream; -pub use datafusion::scalar::ScalarValue; use derive_builder::Builder; use futures::lock::Mutex; use futures::stream::{self, Stream, StreamExt}; use futures::TryStreamExt; use metastore::errors::MetastoreError; use sqlexec::engine::{Engine, EngineStorage, TrackedSession}; -pub use sqlexec::environment::EnvironmentReader; use sqlexec::errors::ExecError; use sqlexec::remote::client::RemoteClientType; use sqlexec::session::ExecutionResult; use sqlexec::OperationInfo; use url::Url; +// public re-export so downstream users of this package don't have to +// directly depend on DF (and our version no-less) to use our interfaces. +pub mod ext { + pub use datafusion; + pub use datafusion::arrow; + pub use datafusion::arrow::record_batch::RecordBatch; + pub use datafusion::physical_plan::SendableRecordBatchStream; + pub use datafusion::scalar::ScalarValue; + pub use sqlexec::environment::EnvironmentReader; + + // public exports for some quasi-internal tools used by external and + // downstream dependencies to reduce friction/dependencies. + pub mod tools { + pub use arrow_util::pretty::pretty_format_batches; + pub use terminal_util::term_width; + } +} + +use crate::ext::{EnvironmentReader, RecordBatch, ScalarValue, SendableRecordBatchStream}; + /// ConnectOptions are the set of options to configure a GlareDB /// instance, and are an analogue to the commandline arguments to /// produce a "running database". The ConnectOptionsBuilder provides a