From 408d942771419e273747292924a227f76d11c5bf Mon Sep 17 00:00:00 2001 From: Micah Wylde Date: Thu, 12 Dec 2024 15:40:46 -0800 Subject: [PATCH] Finish renaming arroyo-df crate to arroyo-planner (#810) --- Cargo.lock | 106 +++++++++--------- crates/arroyo-api/Cargo.toml | 2 +- crates/arroyo-api/src/pipelines.rs | 4 +- crates/arroyo-planner/Cargo.toml | 2 +- crates/arroyo-sql-testing/Cargo.toml | 2 +- crates/arroyo-sql-testing/src/smoke_tests.rs | 2 +- crates/arroyo-worker/Cargo.toml | 2 +- crates/arroyo-worker/src/arrow/async_udf.rs | 2 +- .../arroyo-worker/src/arrow/instant_join.rs | 2 +- .../src/arrow/join_with_expiration.rs | 2 +- crates/arroyo-worker/src/arrow/mod.rs | 4 +- .../src/arrow/session_aggregating_window.rs | 4 +- .../src/arrow/sliding_aggregating_window.rs | 2 +- .../src/arrow/tumbling_aggregating_window.rs | 4 +- .../src/arrow/updating_aggregator.rs | 2 +- crates/arroyo-worker/src/arrow/window_fn.rs | 2 +- crates/arroyo-worker/src/engine.rs | 2 +- crates/arroyo-worker/src/lib.rs | 2 +- crates/arroyo-worker/src/utils.rs | 2 +- crates/arroyo/Cargo.toml | 2 +- crates/arroyo/src/main.rs | 9 +- 21 files changed, 81 insertions(+), 80 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 472ae2715..a8b3ad351 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -432,9 +432,9 @@ dependencies = [ "arroyo-compiler-service", "arroyo-connectors", "arroyo-controller", - "arroyo-df", "arroyo-node", "arroyo-openapi", + "arroyo-planner", "arroyo-rpc", "arroyo-server-common", "arroyo-storage", @@ -474,9 +474,9 @@ dependencies = [ "arrow-schema", "arroyo-connectors", "arroyo-datastream", - "arroyo-df", "arroyo-formats", "arroyo-operator", + "arroyo-planner", "arroyo-rpc", "arroyo-server-common", "arroyo-state", @@ -706,55 +706,6 @@ dependencies = [ "tonic 0.13.0", ] -[[package]] -name = "arroyo-df" -version = "0.13.0-dev" -dependencies = [ - "anyhow", - "apache-avro", - "arrow", - "arrow-array", - "arrow-schema", - "arroyo-connectors", - "arroyo-datastream", - "arroyo-formats", - "arroyo-operator", - "arroyo-rpc", - "arroyo-storage", - "arroyo-types", - "arroyo-udf-host", - "arroyo-udf-python", - "async-trait", - "bincode", - "datafusion", - "datafusion-functions", - "datafusion-functions-json", - "datafusion-functions-window", - "datafusion-proto", - "futures", - "glob", - "petgraph", - "prettyplease 0.2.25", - "proc-macro2", - "prost 0.13.3", - "quote", - "regex", - "rstest 0.23.0", - "serde", - "serde_json", - "serde_json_path", - "syn 2.0.87", - "test-log", - "tokio", - "tokio-stream", - "toml", - "tracing", - "tracing-subscriber", - "unicase", - "xxhash-rust", - "xz2", -] - [[package]] name = "arroyo-formats" version = "0.13.0-dev" @@ -868,6 +819,55 @@ dependencies = [ "tracing", ] +[[package]] +name = "arroyo-planner" +version = "0.13.0-dev" +dependencies = [ + "anyhow", + "apache-avro", + "arrow", + "arrow-array", + "arrow-schema", + "arroyo-connectors", + "arroyo-datastream", + "arroyo-formats", + "arroyo-operator", + "arroyo-rpc", + "arroyo-storage", + "arroyo-types", + "arroyo-udf-host", + "arroyo-udf-python", + "async-trait", + "bincode", + "datafusion", + "datafusion-functions", + "datafusion-functions-json", + "datafusion-functions-window", + "datafusion-proto", + "futures", + "glob", + "petgraph", + "prettyplease 0.2.25", + "proc-macro2", + "prost 0.13.3", + "quote", + "regex", + "rstest 0.23.0", + "serde", + "serde_json", + "serde_json_path", + "syn 2.0.87", + "test-log", + "tokio", + "tokio-stream", + "toml", + "tracing", + "tracing-subscriber", + "unicase", + "xxhash-rust", + "xz2", +] + [[package]] name = "arroyo-rpc" version = "0.13.0-dev" @@ -955,8 +955,8 @@ dependencies = [ "arrow-array", "arrow-schema", "arroyo-datastream", - "arroyo-df", "arroyo-formats", + "arroyo-planner", "arroyo-rpc", "arroyo-state", "arroyo-types", @@ -1132,10 +1132,10 @@ dependencies = [ "arrow-schema", "arroyo-connectors", "arroyo-datastream", - "arroyo-df", "arroyo-formats", "arroyo-metrics", "arroyo-operator", + "arroyo-planner", "arroyo-rpc", "arroyo-server-common", "arroyo-state", diff --git a/crates/arroyo-api/Cargo.toml b/crates/arroyo-api/Cargo.toml index 970219cb5..0a462c3bf 100644 --- a/crates/arroyo-api/Cargo.toml +++ b/crates/arroyo-api/Cargo.toml @@ -9,7 +9,7 @@ default = [] [dependencies] arroyo-types = { path = "../arroyo-types" } -arroyo-df = { path = "../arroyo-planner" } +arroyo-planner = { path = "../arroyo-planner" } arroyo-rpc = { path = "../arroyo-rpc" } arroyo-server-common = { path = "../arroyo-server-common" } arroyo-operator = { path = "../arroyo-operator" } diff --git a/crates/arroyo-api/src/pipelines.rs b/crates/arroyo-api/src/pipelines.rs index 7e829c5db..9f3389e33 100644 --- a/crates/arroyo-api/src/pipelines.rs +++ b/crates/arroyo-api/src/pipelines.rs @@ -27,8 +27,8 @@ use arroyo_connectors::kafka::{KafkaConfig, KafkaTable, SchemaRegistry}; use arroyo_datastream::logical::{ ChainedLogicalOperator, LogicalNode, LogicalProgram, OperatorChain, OperatorName, }; -use arroyo_df::{ArroyoSchemaProvider, CompiledSql, SqlConfig}; use arroyo_formats::ser::ArrowSerializer; +use arroyo_planner::{ArroyoSchemaProvider, CompiledSql, SqlConfig}; use arroyo_rpc::formats::Format; use arroyo_rpc::grpc::rpc::compiler_grpc_client::CompilerGrpcClient; use arroyo_rpc::public_ids::{generate_id, IdTypes}; @@ -174,7 +174,7 @@ async fn compile_sql<'a>( schema_provider.add_connection_profile(profile); } - arroyo_df::parse_and_get_program( + arroyo_planner::parse_and_get_program( &query, schema_provider, SqlConfig { diff --git a/crates/arroyo-planner/Cargo.toml b/crates/arroyo-planner/Cargo.toml index ad15d40a8..795e41ac9 100644 --- a/crates/arroyo-planner/Cargo.toml +++ b/crates/arroyo-planner/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "arroyo-df" +name = "arroyo-planner" version = "0.13.0-dev" edition = "2021" diff --git a/crates/arroyo-sql-testing/Cargo.toml b/crates/arroyo-sql-testing/Cargo.toml index c82d7d78f..027dc1fda 100644 --- a/crates/arroyo-sql-testing/Cargo.toml +++ b/crates/arroyo-sql-testing/Cargo.toml @@ -21,7 +21,7 @@ serde_json = "1.0" anyhow = "1.0" arroyo-types = { path = "../arroyo-types" } -arroyo-df = { path = "../arroyo-planner" } +arroyo-planner = { path = "../arroyo-planner" } arroyo-datastream = { path = "../arroyo-datastream" } arroyo-rpc = { path = "../arroyo-rpc" } arroyo-worker = { path = "../arroyo-worker" } diff --git a/crates/arroyo-sql-testing/src/smoke_tests.rs b/crates/arroyo-sql-testing/src/smoke_tests.rs index 82fdc2f5e..352bef30b 100644 --- a/crates/arroyo-sql-testing/src/smoke_tests.rs +++ b/crates/arroyo-sql-testing/src/smoke_tests.rs @@ -2,7 +2,7 @@ use anyhow::Result; use arroyo_datastream::logical::{ LogicalEdge, LogicalEdgeType, LogicalGraph, LogicalNode, LogicalProgram, OperatorName, }; -use arroyo_df::{parse_and_get_arrow_program, ArroyoSchemaProvider, SqlConfig}; +use arroyo_planner::{parse_and_get_arrow_program, ArroyoSchemaProvider, SqlConfig}; use arroyo_state::parquet::ParquetBackend; use petgraph::algo::has_path_connecting; use petgraph::visit::EdgeRef; diff --git a/crates/arroyo-worker/Cargo.toml b/crates/arroyo-worker/Cargo.toml index 705e7be77..1e22acaee 100644 --- a/crates/arroyo-worker/Cargo.toml +++ b/crates/arroyo-worker/Cargo.toml @@ -15,7 +15,7 @@ arroyo-storage = { path = "../arroyo-storage" } arroyo-metrics = { path = "../arroyo-metrics" } arroyo-formats = { path = "../arroyo-formats" } arroyo-datastream = { path = "../arroyo-datastream" } -arroyo-df = { path = "../arroyo-planner" } +arroyo-planner = { path = "../arroyo-planner" } arroyo-operator = { path = "../arroyo-operator" } arroyo-connectors = { path = "../arroyo-connectors" } arroyo-udf-host = { path = "../arroyo-udf/arroyo-udf-host" } diff --git a/crates/arroyo-worker/src/arrow/async_udf.rs b/crates/arroyo-worker/src/arrow/async_udf.rs index 6ad69f1c7..4cc98fbf9 100644 --- a/crates/arroyo-worker/src/arrow/async_udf.rs +++ b/crates/arroyo-worker/src/arrow/async_udf.rs @@ -3,12 +3,12 @@ use arrow::row::{OwnedRow, RowConverter, SortField}; use arrow_array::{make_array, Array, RecordBatch, UInt64Array}; use arrow_schema::{Field, Schema}; use arroyo_datastream::logical::DylibUdfConfig; -use arroyo_df::ASYNC_RESULT_FIELD; use arroyo_operator::context::{Collector, OperatorContext}; use arroyo_operator::operator::{ ArrowOperator, AsDisplayable, ConstructedOperator, DisplayableOperator, OperatorConstructor, Registry, }; +use arroyo_planner::ASYNC_RESULT_FIELD; use arroyo_rpc::grpc::api; use arroyo_rpc::grpc::rpc::TableConfig; use arroyo_state::global_table_config; diff --git a/crates/arroyo-worker/src/arrow/instant_join.rs b/crates/arroyo-worker/src/arrow/instant_join.rs index 45dc474b7..0346d788c 100644 --- a/crates/arroyo-worker/src/arrow/instant_join.rs +++ b/crates/arroyo-worker/src/arrow/instant_join.rs @@ -2,11 +2,11 @@ use super::sync::streams::KeyedCloneableStreamFuture; use anyhow::Result; use arrow::compute::{max, min, partition, sort_to_indices, take}; use arrow_array::{RecordBatch, TimestampNanosecondArray}; -use arroyo_df::physical::{ArroyoPhysicalExtensionCodec, DecodingContext}; use arroyo_operator::context::{Collector, OperatorContext}; use arroyo_operator::operator::{ ArrowOperator, ConstructedOperator, DisplayableOperator, OperatorConstructor, Registry, }; +use arroyo_planner::physical::{ArroyoPhysicalExtensionCodec, DecodingContext}; use arroyo_rpc::{ df::{ArroyoSchema, ArroyoSchemaRef}, grpc::{api, rpc::TableConfig}, diff --git a/crates/arroyo-worker/src/arrow/join_with_expiration.rs b/crates/arroyo-worker/src/arrow/join_with_expiration.rs index 14e4b338a..b596e6823 100644 --- a/crates/arroyo-worker/src/arrow/join_with_expiration.rs +++ b/crates/arroyo-worker/src/arrow/join_with_expiration.rs @@ -1,12 +1,12 @@ use anyhow::Result; use arrow::compute::concat_batches; use arrow_array::RecordBatch; -use arroyo_df::physical::{ArroyoPhysicalExtensionCodec, DecodingContext}; use arroyo_operator::context::{Collector, OperatorContext}; use arroyo_operator::operator::{ ArrowOperator, AsDisplayable, ConstructedOperator, DisplayableOperator, OperatorConstructor, Registry, }; +use arroyo_planner::physical::{ArroyoPhysicalExtensionCodec, DecodingContext}; use arroyo_rpc::{ df::ArroyoSchema, grpc::{api, rpc::TableConfig}, diff --git a/crates/arroyo-worker/src/arrow/mod.rs b/crates/arroyo-worker/src/arrow/mod.rs index 0ea7f57e6..70353877a 100644 --- a/crates/arroyo-worker/src/arrow/mod.rs +++ b/crates/arroyo-worker/src/arrow/mod.rs @@ -1,12 +1,12 @@ use arrow::datatypes::SchemaRef; use arrow_array::RecordBatch; -use arroyo_df::physical::ArroyoPhysicalExtensionCodec; -use arroyo_df::physical::DecodingContext; use arroyo_operator::context::{Collector, OperatorContext}; use arroyo_operator::operator::{ ArrowOperator, AsDisplayable, ConstructedOperator, DisplayableOperator, OperatorConstructor, Registry, }; +use arroyo_planner::physical::ArroyoPhysicalExtensionCodec; +use arroyo_planner::physical::DecodingContext; use arroyo_rpc::grpc::api; use datafusion::common::DataFusionError; use datafusion::common::Result as DFResult; diff --git a/crates/arroyo-worker/src/arrow/session_aggregating_window.rs b/crates/arroyo-worker/src/arrow/session_aggregating_window.rs index 5d14387f5..c847febb6 100644 --- a/crates/arroyo-worker/src/arrow/session_aggregating_window.rs +++ b/crates/arroyo-worker/src/arrow/session_aggregating_window.rs @@ -17,11 +17,11 @@ use arrow_array::{ TimestampNanosecondArray, }; use arrow_schema::{DataType, Field, FieldRef}; -use arroyo_df::schemas::window_arrow_struct; use arroyo_operator::{ context::OperatorContext, operator::{ArrowOperator, ConstructedOperator, OperatorConstructor}, }; +use arroyo_planner::schemas::window_arrow_struct; use arroyo_rpc::{ grpc::{api, rpc::TableConfig}, Converter, @@ -32,9 +32,9 @@ use arroyo_state::{ use arroyo_types::{from_nanos, print_time, to_nanos, CheckpointBarrier, Watermark}; use datafusion::{execution::context::SessionContext, physical_plan::ExecutionPlan}; -use arroyo_df::physical::{ArroyoPhysicalExtensionCodec, DecodingContext}; use arroyo_operator::context::Collector; use arroyo_operator::operator::Registry; +use arroyo_planner::physical::{ArroyoPhysicalExtensionCodec, DecodingContext}; use arroyo_rpc::df::{ArroyoSchema, ArroyoSchemaRef}; use datafusion::execution::{ runtime_env::{RuntimeConfig, RuntimeEnv}, diff --git a/crates/arroyo-worker/src/arrow/sliding_aggregating_window.rs b/crates/arroyo-worker/src/arrow/sliding_aggregating_window.rs index 0c97fdfbb..39b2706c2 100644 --- a/crates/arroyo-worker/src/arrow/sliding_aggregating_window.rs +++ b/crates/arroyo-worker/src/arrow/sliding_aggregating_window.rs @@ -22,9 +22,9 @@ use std::{ use futures::stream::FuturesUnordered; use super::sync::streams::KeyedCloneableStreamFuture; -use arroyo_df::physical::{ArroyoPhysicalExtensionCodec, DecodingContext}; use arroyo_operator::context::Collector; use arroyo_operator::operator::{AsDisplayable, DisplayableOperator, Registry}; +use arroyo_planner::physical::{ArroyoPhysicalExtensionCodec, DecodingContext}; use arroyo_rpc::df::ArroyoSchema; use datafusion::execution::{ runtime_env::{RuntimeConfig, RuntimeEnv}, diff --git a/crates/arroyo-worker/src/arrow/tumbling_aggregating_window.rs b/crates/arroyo-worker/src/arrow/tumbling_aggregating_window.rs index 366adde3f..8a2a89052 100644 --- a/crates/arroyo-worker/src/arrow/tumbling_aggregating_window.rs +++ b/crates/arroyo-worker/src/arrow/tumbling_aggregating_window.rs @@ -2,12 +2,12 @@ use anyhow::{anyhow, Result}; use arrow::compute::{partition, sort_to_indices, take}; use arrow_array::{types::TimestampNanosecondType, Array, PrimitiveArray, RecordBatch}; use arrow_schema::SchemaRef; -use arroyo_df::schemas::add_timestamp_field_arrow; use arroyo_operator::context::{Collector, OperatorContext}; use arroyo_operator::operator::{ ArrowOperator, AsDisplayable, ConstructedOperator, DisplayableOperator, OperatorConstructor, Registry, }; +use arroyo_planner::schemas::add_timestamp_field_arrow; use arroyo_rpc::grpc::{api, rpc::TableConfig}; use arroyo_state::timestamp_table_config; use arroyo_types::{from_nanos, print_time, to_nanos, CheckpointBarrier, Watermark}; @@ -25,7 +25,7 @@ use std::{ time::SystemTime, }; -use arroyo_df::physical::{ArroyoPhysicalExtensionCodec, DecodingContext}; +use arroyo_planner::physical::{ArroyoPhysicalExtensionCodec, DecodingContext}; use arroyo_rpc::df::ArroyoSchema; use datafusion::execution::{ runtime_env::{RuntimeConfig, RuntimeEnv}, diff --git a/crates/arroyo-worker/src/arrow/updating_aggregator.rs b/crates/arroyo-worker/src/arrow/updating_aggregator.rs index b51e1d85c..f40583e23 100644 --- a/crates/arroyo-worker/src/arrow/updating_aggregator.rs +++ b/crates/arroyo-worker/src/arrow/updating_aggregator.rs @@ -12,7 +12,6 @@ use arrow_array::{Array, BooleanArray, RecordBatch, StructArray}; use arrow_array::cast::AsArray; use arrow_schema::SchemaRef; -use arroyo_df::physical::{ArroyoPhysicalExtensionCodec, DecodingContext}; use arroyo_operator::context::Collector; use arroyo_operator::{ context::OperatorContext, @@ -21,6 +20,7 @@ use arroyo_operator::{ OperatorConstructor, Registry, }, }; +use arroyo_planner::physical::{ArroyoPhysicalExtensionCodec, DecodingContext}; use arroyo_rpc::df::ArroyoSchemaRef; use arroyo_rpc::grpc::{api::UpdatingAggregateOperator, rpc::TableConfig}; use arroyo_rpc::{updating_meta_fields, UPDATING_META_FIELD}; diff --git a/crates/arroyo-worker/src/arrow/window_fn.rs b/crates/arroyo-worker/src/arrow/window_fn.rs index 0462634b3..3697a21cf 100644 --- a/crates/arroyo-worker/src/arrow/window_fn.rs +++ b/crates/arroyo-worker/src/arrow/window_fn.rs @@ -6,11 +6,11 @@ use std::{collections::BTreeMap, sync::RwLock, time::SystemTime}; use anyhow::{anyhow, Result}; use arrow::compute::{max, min}; use arrow_array::RecordBatch; -use arroyo_df::physical::{ArroyoPhysicalExtensionCodec, DecodingContext}; use arroyo_operator::context::{Collector, OperatorContext}; use arroyo_operator::operator::{ ArrowOperator, ConstructedOperator, OperatorConstructor, Registry, }; +use arroyo_planner::physical::{ArroyoPhysicalExtensionCodec, DecodingContext}; use arroyo_rpc::df::ArroyoSchema; use arroyo_rpc::grpc::rpc::TableConfig; use arroyo_rpc::{df::ArroyoSchemaRef, grpc::api}; diff --git a/crates/arroyo-worker/src/engine.rs b/crates/arroyo-worker/src/engine.rs index 5bc60d3a4..879bab37f 100644 --- a/crates/arroyo-worker/src/engine.rs +++ b/crates/arroyo-worker/src/engine.rs @@ -13,11 +13,11 @@ use arroyo_connectors::connectors; use arroyo_datastream::logical::{ LogicalEdge, LogicalEdgeType, LogicalGraph, LogicalNode, OperatorChain, OperatorName, }; -use arroyo_df::physical::new_registry; use arroyo_operator::context::{batch_bounded, BatchReceiver, BatchSender, OperatorContext}; use arroyo_operator::operator::Registry; use arroyo_operator::operator::{ChainedOperator, ConstructedOperator, OperatorNode, SourceNode}; use arroyo_operator::ErasedConstructor; +use arroyo_planner::physical::new_registry; use arroyo_rpc::config::config; use arroyo_rpc::df::ArroyoSchema; use arroyo_rpc::grpc::{ diff --git a/crates/arroyo-worker/src/lib.rs b/crates/arroyo-worker/src/lib.rs index 87c013663..1bb8827b6 100644 --- a/crates/arroyo-worker/src/lib.rs +++ b/crates/arroyo-worker/src/lib.rs @@ -40,7 +40,7 @@ use prost::Message; use crate::utils::to_d2; use arroyo_datastream::logical::LogicalProgram; -use arroyo_df::physical::new_registry; +use arroyo_planner::physical::new_registry; use arroyo_rpc::config::config; use arroyo_server_common::shutdown::ShutdownGuard; use arroyo_server_common::wrap_start; diff --git a/crates/arroyo-worker/src/utils.rs b/crates/arroyo-worker/src/utils.rs index 357400553..79d9af6f2 100644 --- a/crates/arroyo-worker/src/utils.rs +++ b/crates/arroyo-worker/src/utils.rs @@ -1,8 +1,8 @@ use crate::engine::construct_operator; use arrow_schema::Schema; use arroyo_datastream::logical::{ChainedLogicalOperator, LogicalEdgeType, LogicalProgram}; -use arroyo_df::physical::new_registry; use arroyo_operator::operator::Registry; +use arroyo_planner::physical::new_registry; use std::fmt::Write; use std::sync::Arc; diff --git a/crates/arroyo/Cargo.toml b/crates/arroyo/Cargo.toml index 4fa290d37..55c7ba6ce 100644 --- a/crates/arroyo/Cargo.toml +++ b/crates/arroyo/Cargo.toml @@ -20,7 +20,7 @@ arroyo-rpc = { path = "../arroyo-rpc" } arroyo-openapi = { path ="../arroyo-openapi" } arroyo-storage = { path = "../arroyo-storage" } arroyo-udf-python = { path = "../arroyo-udf/arroyo-udf-python" } -arroyo-df = { path = "../arroyo-planner" } +arroyo-planner = { path = "../arroyo-planner" } clap = { version = "4", features = ["derive"] } tokio = { version = "1", features = ["full"] } diff --git a/crates/arroyo/src/main.rs b/crates/arroyo/src/main.rs index ac7798a0f..3433aba1b 100644 --- a/crates/arroyo/src/main.rs +++ b/crates/arroyo/src/main.rs @@ -1,7 +1,7 @@ mod run; use anyhow::{anyhow, bail}; -use arroyo_df::{ArroyoSchemaProvider, SqlConfig}; +use arroyo_planner::{ArroyoSchemaProvider, SqlConfig}; use arroyo_rpc::config; use arroyo_rpc::config::{config, DatabaseType}; use arroyo_server_common::shutdown::{Shutdown, SignalBehavior}; @@ -493,9 +493,10 @@ async fn visualize(query: Input, open: bool) { let query = std::io::read_to_string(query).expect("Failed to read query"); let schema_provider = ArroyoSchemaProvider::new(); - let compiled = arroyo_df::parse_and_get_program(&query, schema_provider, SqlConfig::default()) - .await - .expect("Failed while planning query"); + let compiled = + arroyo_planner::parse_and_get_program(&query, schema_provider, SqlConfig::default()) + .await + .expect("Failed while planning query"); let d2 = utils::to_d2(&compiled.program).await.unwrap();