From 015d108dc38dcaccfbf4d304d1ddd128f405f411 Mon Sep 17 00:00:00 2001 From: universalmind303 Date: Mon, 5 Feb 2024 13:18:57 -0600 Subject: [PATCH 1/6] df 35 upgrade --- Cargo.lock | 537 ++++++++++++------ Cargo.toml | 14 +- .../src/planner/expr/function.rs | 41 +- crates/datafusion_ext/src/planner/mod.rs | 2 +- crates/datafusion_ext/src/planner/query.rs | 9 +- .../datafusion_ext/src/planner/statement.rs | 1 + crates/datasources/Cargo.toml | 2 +- crates/datasources/src/bigquery/mod.rs | 12 +- crates/datasources/src/cassandra/exec.rs | 13 +- crates/datasources/src/clickhouse/mod.rs | 12 +- crates/datasources/src/common/mod.rs | 3 +- crates/datasources/src/debug/mod.rs | 12 +- crates/datasources/src/lake/iceberg/table.rs | 8 +- crates/datasources/src/mongodb/exec.rs | 12 +- crates/datasources/src/mysql/mod.rs | 12 +- crates/datasources/src/object_store/mod.rs | 1 - crates/datasources/src/postgres/mod.rs | 12 +- crates/datasources/src/postgres/query_exec.rs | 12 +- crates/datasources/src/snowflake/mod.rs | 12 +- crates/datasources/src/sqlserver/mod.rs | 12 +- crates/distexec/src/adapter.rs | 8 +- .../protogen/src/metastore/types/catalog.rs | 1 + .../sqlbuiltins/src/functions/aggregates.rs | 9 + .../src/functions/scalars/df_scalars.rs | 9 + .../table/system/cache_external_tables.rs | 12 +- .../src/functions/table/system/mod.rs | 12 +- crates/sqlexec/src/dispatch/system.rs | 1 + .../sqlexec/src/planner/logical_plan/mod.rs | 2 +- .../planner/physical_plan/alter_database.rs | 12 +- .../src/planner/physical_plan/alter_table.rs | 12 +- .../physical_plan/alter_tunnel_rotate_keys.rs | 12 +- .../src/planner/physical_plan/client_recv.rs | 12 +- .../src/planner/physical_plan/client_send.rs | 12 +- .../physical_plan/create_credential.rs | 21 +- .../physical_plan/create_credentials.rs | 12 +- .../physical_plan/create_external_database.rs | 12 +- .../physical_plan/create_external_table.rs | 12 +- .../planner/physical_plan/create_schema.rs | 12 +- .../planner/physical_plan/create_tunnel.rs | 12 +- .../src/planner/physical_plan/create_view.rs | 12 +- .../src/planner/physical_plan/delete.rs | 12 +- .../planner/physical_plan/describe_table.rs | 12 +- .../planner/physical_plan/drop_credentials.rs | 12 +- .../planner/physical_plan/drop_database.rs | 12 +- .../src/planner/physical_plan/drop_schemas.rs | 12 +- .../src/planner/physical_plan/drop_tables.rs | 12 +- .../planner/physical_plan/drop_temp_tables.rs | 12 +- .../src/planner/physical_plan/drop_tunnel.rs | 12 +- .../src/planner/physical_plan/drop_views.rs | 12 +- .../src/planner/physical_plan/remote_exec.rs | 2 +- .../src/planner/physical_plan/remote_scan.rs | 12 +- .../src/planner/physical_plan/set_var.rs | 12 +- .../src/planner/physical_plan/show_var.rs | 12 +- .../src/planner/physical_plan/update.rs | 12 +- .../src/planner/physical_plan/values.rs | 12 +- crates/sqlexec/src/remote/batch_stream.rs | 2 +- crates/sqlexec/src/remote/planner.rs | 9 +- 57 files changed, 761 insertions(+), 366 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 20e62daff..06e49216c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -219,11 +219,10 @@ checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" [[package]] name = "arrow" -version = "49.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5bc25126d18a012146a888a0298f2c22e1150327bd2765fc76d710a556b2d614" +checksum = "aa285343fba4d829d49985bdc541e3789cf6000ed0e84be7c039438df4a4e78c" dependencies = [ - "ahash 0.8.6", "arrow-arith", "arrow-array", "arrow-buffer", @@ -242,9 +241,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "49.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34ccd45e217ffa6e53bbb0080990e77113bdd4e91ddb84e97b77649810bcf1a7" +checksum = "753abd0a5290c1bcade7c6623a556f7d1659c5f4148b140b5b63ce7bd1a45705" dependencies = [ "arrow-array", "arrow-buffer", @@ -257,9 +256,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "49.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6bda9acea48b25123c08340f3a8ac361aa0f74469bb36f5ee9acf923fce23e9d" +checksum = "d390feeb7f21b78ec997a4081a025baef1e2e0d6069e181939b61864c9779609" dependencies = [ "ahash 0.8.6", "arrow-buffer", @@ -274,9 +273,9 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "49.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01a0fc21915b00fc6c2667b069c1b64bdd920982f426079bc4a7cab86822886c" +checksum = "69615b061701bcdffbc62756bc7e85c827d5290b472b580c972ebbbf690f5aa4" dependencies = [ "bytes", "half", @@ -285,9 +284,9 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "49.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5dc0368ed618d509636c1e3cc20db1281148190a78f43519487b2daf07b63b4a" +checksum = "e448e5dd2f4113bf5b74a1f26531708f5edcacc77335b7066f9398f4bcf4cdef" dependencies = [ "arrow-array", "arrow-buffer", @@ -304,9 +303,9 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "49.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e09aa6246a1d6459b3f14baeaa49606cfdbca34435c46320e14054d244987ca" +checksum = "46af72211f0712612f5b18325530b9ad1bfbdc87290d5fbfd32a7da128983781" dependencies = [ "arrow-array", "arrow-buffer", @@ -323,9 +322,9 @@ dependencies = [ [[package]] name = "arrow-data" -version = "49.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "907fafe280a3874474678c1858b9ca4cb7fd83fb8034ff5b6d6376205a08c634" +checksum = "67d644b91a162f3ad3135ce1184d0a31c28b816a581e08f29e8e9277a574c64e" dependencies = [ "arrow-buffer", "arrow-schema", @@ -335,9 +334,9 @@ dependencies = [ [[package]] name = "arrow-flight" -version = "49.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "624e0dcb6b5a7a06222bfd2be3f7e905ce849a6b714ec989f18cdba330c77d38" +checksum = "1d7f215461ad6346f2e4cc853e377d4e076d533e1ed78d327debe83023e3601f" dependencies = [ "arrow-arith", "arrow-array", @@ -362,9 +361,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "49.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79a43d6808411886b8c7d4f6f7dd477029c1e77ffffffb7923555cc6579639cd" +checksum = "03dea5e79b48de6c2e04f03f62b0afea7105be7b77d134f6c5414868feefb80d" dependencies = [ "arrow-array", "arrow-buffer", @@ -372,14 +371,15 @@ dependencies = [ "arrow-data", "arrow-schema", "flatbuffers", + "lz4_flex", "zstd 0.13.0", ] [[package]] name = "arrow-json" -version = "49.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d82565c91fd627922ebfe2810ee4e8346841b6f9361b87505a9acea38b614fee" +checksum = "8950719280397a47d37ac01492e3506a8a724b3fb81001900b866637a829ee0f" dependencies = [ "arrow-array", "arrow-buffer", @@ -397,9 +397,9 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "49.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b23b0e53c0db57c6749997fd343d4c0354c994be7eca67152dd2bdb9a3e1bb4" +checksum = "1ed9630979034077982d8e74a942b7ac228f33dd93a93b615b4d02ad60c260be" dependencies = [ "arrow-array", "arrow-buffer", @@ -412,9 +412,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "49.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "361249898d2d6d4a6eeb7484be6ac74977e48da12a4dd81a708d620cc558117a" +checksum = "007035e17ae09c4e8993e4cb8b5b96edf0afb927cd38e2dff27189b274d83dcf" dependencies = [ "ahash 0.8.6", "arrow-array", @@ -427,9 +427,9 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "49.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09e28a5e781bf1b0f981333684ad13f5901f4cd2f20589eab7cf1797da8fc167" +checksum = "0ff3e9c01f7cd169379d269f926892d0e622a704960350d09d331be3ec9e0029" dependencies = [ "bitflags 2.4.1", "serde", @@ -437,9 +437,9 @@ dependencies = [ [[package]] name = "arrow-select" -version = "49.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f6208466590960efc1d2a7172bc4ff18a67d6e25c529381d7f96ddaf0dc4036" +checksum = "1ce20973c1912de6514348e064829e50947e35977bb9d7fb637dc99ea9ffd78c" dependencies = [ "ahash 0.8.6", "arrow-array", @@ -451,9 +451,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "49.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4a48149c63c11c9ff571e50ab8f017d2a7cb71037a882b42f6354ed2da9acc7" +checksum = "00f3b37f2aeece31a2636d1b037dabb69ef590e03bdc7eb68519b51ec86932a7" dependencies = [ "arrow-array", "arrow-buffer", @@ -1467,7 +1467,7 @@ dependencies = [ "datafusion", "logutil", "metastore", - "object_store 0.8.0", + "object_store", "parking_lot", "protogen", "thiserror", @@ -2199,14 +2199,15 @@ dependencies = [ [[package]] name = "datafusion" -version = "34.0.0" +version = "35.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "193fd1e7628278d0641c5122860f9a7fd6a1d77d055838d12f55d15bbe28d4d0" +checksum = "4328f5467f76d890fe3f924362dbc3a838c6a733f762b32d87f9e0b7bef5fb49" dependencies = [ "ahash 0.8.6", "apache-avro", "arrow", "arrow-array", + "arrow-ipc", "arrow-schema", "async-compression", "async-trait", @@ -2231,12 +2232,12 @@ dependencies = [ "log", "num-traits", "num_cpus", - "object_store 0.8.0", + "object_store", "parking_lot", "parquet", "pin-project-lite", "rand", - "sqlparser 0.40.0", + "sqlparser", "tempfile", "tokio", "tokio-util", @@ -2248,9 +2249,9 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "34.0.0" +version = "35.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "548bc49c4a489e3de474813831ea556dc9d368f9ed8d867b1493da42e8e9f613" +checksum = "d29a7752143b446db4a2cccd9a6517293c6b97e8c39e520ca43ccd07135a4f7e" dependencies = [ "ahash 0.8.6", "apache-avro", @@ -2262,17 +2263,17 @@ dependencies = [ "half", "libc", "num_cpus", - "object_store 0.8.0", + "object_store", "parquet", "pyo3", - "sqlparser 0.40.0", + "sqlparser", ] [[package]] name = "datafusion-execution" -version = "34.0.0" +version = "35.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ecc865657ffcf4da5ff08bdc6436a9a833bc0aa96c3254c8d18ab8a0ad4e437d" +checksum = "2d447650af16e138c31237f53ddaef6dd4f92f0e2d3f2f35d190e16c214ca496" dependencies = [ "arrow", "chrono", @@ -2282,7 +2283,7 @@ dependencies = [ "futures", "hashbrown 0.14.2", "log", - "object_store 0.8.0", + "object_store", "parking_lot", "rand", "tempfile", @@ -2291,25 +2292,25 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "34.0.0" +version = "35.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33c473f72d8d81a532e63f6e562ed66dd9209dfd8e433d9712abd42444ee161e" +checksum = "d8d19598e48a498850fb79f97a9719b1f95e7deb64a7a06f93f313e8fa1d524b" dependencies = [ "ahash 0.8.6", "arrow", "arrow-array", "datafusion-common", "paste", - "sqlparser 0.40.0", + "sqlparser", "strum 0.25.0", "strum_macros 0.25.3", ] [[package]] name = "datafusion-optimizer" -version = "34.0.0" +version = "35.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb6218318001d2f6783b7fffa17592318f65f26609d7aab605a3dd0c7c2e2618" +checksum = "8b7feb0391f1fc75575acb95b74bfd276903dc37a5409fcebe160bc7ddff2010" dependencies = [ "arrow", "async-trait", @@ -2325,9 +2326,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "34.0.0" +version = "35.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e1ca7e35ca22f9dc506c2375b92054b03ccf91afe25c0a90b395a1473a09735" +checksum = "e911bca609c89a54e8f014777449d8290327414d3e10c57a3e3c2122e38878d0" dependencies = [ "ahash 0.8.6", "arrow", @@ -2359,9 +2360,9 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" -version = "34.0.0" +version = "35.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ddde97adefcca3a55257c646ffee2a95b6cac66f74d1146a6e3a6dbb37830631" +checksum = "e96b546b8a02e9c2ab35ac6420d511f12a4701950c1eb2e568c122b4fefb0be3" dependencies = [ "ahash 0.8.6", "arrow", @@ -2390,31 +2391,47 @@ dependencies = [ [[package]] name = "datafusion-proto" -version = "34.0.0" +version = "35.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "344700ad4505ffcab6ba0715188ce317ad44e024fe1c5dc7267762113a9dcdf8" +checksum = "5742f993d1812d6bb3cdc4ce2a0aa99e10f6dc0daa11dd69b0ff57f2d8e7518c" dependencies = [ "arrow", "chrono", "datafusion", "datafusion-common", "datafusion-expr", - "object_store 0.8.0", + "object_store", "prost", ] [[package]] name = "datafusion-sql" -version = "34.0.0" +version = "35.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a60d9d6460a64fddb8663db41da97e6b8b0bf79da42f997ebe81722731eaf0e5" +checksum = "2d18d36f260bbbd63aafdb55339213a23d540d3419810575850ef0a798a6b768" dependencies = [ "arrow", "arrow-schema", "datafusion-common", "datafusion-expr", "log", - "sqlparser 0.40.0", + "sqlparser", +] + +[[package]] +name = "datafusion-substrait" +version = "35.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dad6bef77af3d8a697ae63ffbcb5aa66b74cd08ea93a31e2e757da75b2f1452f" +dependencies = [ + "async-recursion", + "chrono", + "datafusion", + "itertools 0.12.0", + "object_store", + "prost", + "prost-types", + "substrait", ] [[package]] @@ -2477,7 +2494,7 @@ dependencies = [ "mongodb", "mysql_async", "mysql_common", - "object_store 0.8.0", + "object_store", "object_store_util", "once_cell", "openssh", @@ -2521,7 +2538,7 @@ dependencies = [ [[package]] name = "deltalake" version = "0.17.0" -source = "git+https://github.com/GlareDB/delta-rs.git?rev=4c4bd7d5d4ce47f47f348c2a7d52c009048f841e#4c4bd7d5d4ce47f47f348c2a7d52c009048f841e" +source = "git+https://github.com/delta-io/delta-rs.git?rev=993e2c202936719855f8831513bcbab1b9930b94#993e2c202936719855f8831513bcbab1b9930b94" dependencies = [ "deltalake-aws", "deltalake-azure", @@ -2532,7 +2549,7 @@ dependencies = [ [[package]] name = "deltalake-aws" version = "0.1.0" -source = "git+https://github.com/GlareDB/delta-rs.git?rev=4c4bd7d5d4ce47f47f348c2a7d52c009048f841e#4c4bd7d5d4ce47f47f348c2a7d52c009048f841e" +source = "git+https://github.com/delta-io/delta-rs.git?rev=993e2c202936719855f8831513bcbab1b9930b94#993e2c202936719855f8831513bcbab1b9930b94" dependencies = [ "async-trait", "backoff", @@ -2541,7 +2558,7 @@ dependencies = [ "futures", "lazy_static", "maplit", - "object_store 0.8.0", + "object_store", "regex", "rusoto_core", "rusoto_credential", @@ -2557,14 +2574,14 @@ dependencies = [ [[package]] name = "deltalake-azure" version = "0.1.0" -source = "git+https://github.com/GlareDB/delta-rs.git?rev=4c4bd7d5d4ce47f47f348c2a7d52c009048f841e#4c4bd7d5d4ce47f47f348c2a7d52c009048f841e" +source = "git+https://github.com/delta-io/delta-rs.git?rev=993e2c202936719855f8831513bcbab1b9930b94#993e2c202936719855f8831513bcbab1b9930b94" dependencies = [ "async-trait", "bytes", "deltalake-core", "futures", "lazy_static", - "object_store 0.8.0", + "object_store", "regex", "thiserror", "tokio", @@ -2575,7 +2592,7 @@ dependencies = [ [[package]] name = "deltalake-core" version = "0.17.0" -source = "git+https://github.com/GlareDB/delta-rs.git?rev=4c4bd7d5d4ce47f47f348c2a7d52c009048f841e#4c4bd7d5d4ce47f47f348c2a7d52c009048f841e" +source = "git+https://github.com/delta-io/delta-rs.git?rev=993e2c202936719855f8831513bcbab1b9930b94#993e2c202936719855f8831513bcbab1b9930b94" dependencies = [ "arrow", "arrow-arith", @@ -2603,7 +2620,8 @@ dependencies = [ "errno", "fix-hidden-lifetime-bug", "futures", - "hashbrown 0.14.2", + "hashbrown 0.12.3", + "indexmap 2.2.2", "itertools 0.12.0", "lazy_static", "libc", @@ -2611,7 +2629,7 @@ dependencies = [ "num-bigint 0.4.4", "num-traits", "num_cpus", - "object_store 0.8.0", + "object_store", "once_cell", "parking_lot", "parquet", @@ -2622,7 +2640,7 @@ dependencies = [ "roaring", "serde", "serde_json", - "sqlparser 0.40.0", + "sqlparser", "thiserror", "tokio", "tracing", @@ -2634,14 +2652,14 @@ dependencies = [ [[package]] name = "deltalake-gcp" version = "0.1.0" -source = "git+https://github.com/GlareDB/delta-rs.git?rev=4c4bd7d5d4ce47f47f348c2a7d52c009048f841e#4c4bd7d5d4ce47f47f348c2a7d52c009048f841e" +source = "git+https://github.com/delta-io/delta-rs.git?rev=993e2c202936719855f8831513bcbab1b9930b94#993e2c202936719855f8831513bcbab1b9930b94" dependencies = [ "async-trait", "bytes", "deltalake-core", "futures", "lazy_static", - "object_store 0.8.0", + "object_store", "regex", "thiserror", "tokio", @@ -3462,7 +3480,7 @@ dependencies = [ "metastore", "nu-ansi-term 0.49.0", "num_cpus", - "object_store 0.8.0", + "object_store", "object_store_util", "pgrepr", "pgsrv", @@ -4044,16 +4062,13 @@ dependencies = [ [[package]] name = "lance" -version = "0.9.6" -source = "git+https://github.com/universalmind303/lance?rev=ffd4ac6ee2c61b3792a904b2e12152b246e837e6#ffd4ac6ee2c61b3792a904b2e12152b246e837e6" +version = "0.9.12" +source = "git+https://github.com/lancedb/lance?rev=310d79eccf93f3c6a48c162c575918cdba13faec#310d79eccf93f3c6a48c162c575918cdba13faec" dependencies = [ "arrow", "arrow-arith", "arrow-array", "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-ipc", "arrow-ord", "arrow-row", "arrow-schema", @@ -4061,9 +4076,7 @@ dependencies = [ "async-recursion", "async-trait", "async_cell", - "aws-config", "aws-credential-types", - "base64 0.21.7", "byteorder", "bytes", "chrono", @@ -4072,30 +4085,30 @@ dependencies = [ "datafusion-physical-expr", "futures", "half", - "http", + "itertools 0.12.0", "lance-arrow", "lance-core", "lance-datafusion", "lance-datagen", + "lance-file", "lance-index", + "lance-io", "lance-linalg", + "lance-table", "lazy_static", "log", "lru_time_cache", "moka 0.11.3", - "num-traits", "num_cpus", - "object_store 0.9.0", + "object_store", "ordered-float 3.9.2", "pin-project", "prost", "prost-build", - "prost-types", "rand", "roaring", "serde", "serde_json", - "shellexpand", "snafu", "tempfile", "tokio", @@ -4106,8 +4119,8 @@ dependencies = [ [[package]] name = "lance-arrow" -version = "0.9.6" -source = "git+https://github.com/universalmind303/lance?rev=ffd4ac6ee2c61b3792a904b2e12152b246e837e6#ffd4ac6ee2c61b3792a904b2e12152b246e837e6" +version = "0.9.12" +source = "git+https://github.com/lancedb/lance?rev=310d79eccf93f3c6a48c162c575918cdba13faec#310d79eccf93f3c6a48c162c575918cdba13faec" dependencies = [ "arrow-array", "arrow-buffer", @@ -4123,56 +4136,39 @@ dependencies = [ [[package]] name = "lance-core" -version = "0.9.6" -source = "git+https://github.com/universalmind303/lance?rev=ffd4ac6ee2c61b3792a904b2e12152b246e837e6#ffd4ac6ee2c61b3792a904b2e12152b246e837e6" +version = "0.9.12" +source = "git+https://github.com/lancedb/lance?rev=310d79eccf93f3c6a48c162c575918cdba13faec#310d79eccf93f3c6a48c162c575918cdba13faec" dependencies = [ - "arrow-arith", "arrow-array", "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-ipc", "arrow-schema", - "arrow-select", - "async-recursion", "async-trait", - "aws-config", - "aws-credential-types", "byteorder", "bytes", "chrono", "datafusion-common", "datafusion-sql", "futures", - "http", "lance-arrow", "lazy_static", - "log", "mock_instant", "moka 0.11.3", - "num-traits", - "num_cpus", - "object_store 0.9.0", + "object_store", "pin-project", "prost", - "prost-build", - "prost-types", "rand", "roaring", - "serde", "serde_json", - "shellexpand", "snafu", "tokio", "tracing", "url", - "uuid", ] [[package]] name = "lance-datafusion" -version = "0.9.6" -source = "git+https://github.com/universalmind303/lance?rev=ffd4ac6ee2c61b3792a904b2e12152b246e837e6#ffd4ac6ee2c61b3792a904b2e12152b246e837e6" +version = "0.9.12" +source = "git+https://github.com/lancedb/lance?rev=310d79eccf93f3c6a48c162c575918cdba13faec#310d79eccf93f3c6a48c162c575918cdba13faec" dependencies = [ "arrow", "arrow-array", @@ -4181,18 +4177,21 @@ dependencies = [ "async-trait", "datafusion", "datafusion-common", - "datafusion-expr", "datafusion-physical-expr", + "datafusion-substrait", "futures", "lance-arrow", "lance-core", + "prost", + "snafu", + "substrait", "tokio", ] [[package]] name = "lance-datagen" -version = "0.9.6" -source = "git+https://github.com/universalmind303/lance?rev=ffd4ac6ee2c61b3792a904b2e12152b246e837e6#ffd4ac6ee2c61b3792a904b2e12152b246e837e6" +version = "0.9.12" +source = "git+https://github.com/lancedb/lance?rev=310d79eccf93f3c6a48c162c575918cdba13faec#310d79eccf93f3c6a48c162c575918cdba13faec" dependencies = [ "arrow", "arrow-array", @@ -4203,13 +4202,40 @@ dependencies = [ "rand_xoshiro", ] +[[package]] +name = "lance-file" +version = "0.9.12" +source = "git+https://github.com/lancedb/lance?rev=310d79eccf93f3c6a48c162c575918cdba13faec#310d79eccf93f3c6a48c162c575918cdba13faec" +dependencies = [ + "arrow-arith", + "arrow-array", + "arrow-buffer", + "arrow-schema", + "arrow-select", + "async-recursion", + "async-trait", + "datafusion-common", + "futures", + "lance-arrow", + "lance-core", + "lance-io", + "num-traits", + "num_cpus", + "object_store", + "prost", + "prost-build", + "roaring", + "snafu", + "tokio", + "tracing", +] + [[package]] name = "lance-index" -version = "0.9.6" -source = "git+https://github.com/universalmind303/lance?rev=ffd4ac6ee2c61b3792a904b2e12152b246e837e6#ffd4ac6ee2c61b3792a904b2e12152b246e837e6" +version = "0.9.12" +source = "git+https://github.com/lancedb/lance?rev=310d79eccf93f3c6a48c162c575918cdba13faec#310d79eccf93f3c6a48c162c575918cdba13faec" dependencies = [ "arrow", - "arrow-arith", "arrow-array", "arrow-ord", "arrow-schema", @@ -4220,17 +4246,20 @@ dependencies = [ "datafusion-common", "datafusion-expr", "datafusion-physical-expr", + "datafusion-sql", "futures", "half", "lance-arrow", "lance-core", "lance-datafusion", + "lance-file", + "lance-io", "lance-linalg", + "lance-table", "log", "num-traits", "num_cpus", - "object_store 0.9.0", - "pin-project", + "object_store", "prost", "prost-build", "rand", @@ -4244,19 +4273,54 @@ dependencies = [ "tracing", ] +[[package]] +name = "lance-io" +version = "0.9.12" +source = "git+https://github.com/lancedb/lance?rev=310d79eccf93f3c6a48c162c575918cdba13faec#310d79eccf93f3c6a48c162c575918cdba13faec" +dependencies = [ + "arrow-arith", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-schema", + "arrow-select", + "async-recursion", + "async-trait", + "aws-config", + "aws-credential-types", + "byteorder", + "bytes", + "chrono", + "futures", + "lance-arrow", + "lance-core", + "lazy_static", + "num_cpus", + "object_store", + "pin-project", + "prost", + "prost-build", + "shellexpand", + "snafu", + "tokio", + "tracing", + "url", +] + [[package]] name = "lance-linalg" -version = "0.9.6" -source = "git+https://github.com/universalmind303/lance?rev=ffd4ac6ee2c61b3792a904b2e12152b246e837e6#ffd4ac6ee2c61b3792a904b2e12152b246e837e6" +version = "0.9.12" +source = "git+https://github.com/lancedb/lance?rev=310d79eccf93f3c6a48c162c575918cdba13faec#310d79eccf93f3c6a48c162c575918cdba13faec" dependencies = [ "arrow-array", "arrow-ord", "arrow-schema", - "arrow-select", "cc", "futures", "half", "lance-arrow", + "lance-core", "log", "num-traits", "num_cpus", @@ -4265,6 +4329,41 @@ dependencies = [ "tracing", ] +[[package]] +name = "lance-table" +version = "0.9.12" +source = "git+https://github.com/lancedb/lance?rev=310d79eccf93f3c6a48c162c575918cdba13faec#310d79eccf93f3c6a48c162c575918cdba13faec" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-ipc", + "arrow-schema", + "async-trait", + "aws-credential-types", + "byteorder", + "bytes", + "chrono", + "futures", + "lance-arrow", + "lance-core", + "lance-file", + "lance-io", + "log", + "object_store", + "prost", + "prost-build", + "prost-types", + "rand", + "roaring", + "serde", + "serde_json", + "snafu", + "tokio", + "tracing", + "url", + "uuid", +] + [[package]] name = "lazy_static" version = "1.4.0" @@ -4658,7 +4757,7 @@ dependencies = [ "futures", "ioutil", "logutil", - "object_store 0.8.0", + "object_store", "object_store_util", "once_cell", "pgrepr", @@ -5196,36 +5295,6 @@ dependencies = [ "memchr", ] -[[package]] -name = "object_store" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2524735495ea1268be33d200e1ee97455096a0846295a21548cd2f3541de7050" -dependencies = [ - "async-trait", - "base64 0.21.7", - "bytes", - "chrono", - "futures", - "humantime", - "hyper", - "itertools 0.11.0", - "parking_lot", - "percent-encoding", - "quick-xml", - "rand", - "reqwest", - "ring 0.17.7", - "rustls-pemfile 1.0.4", - "serde", - "serde_json", - "snafu", - "tokio", - "tracing", - "url", - "walkdir", -] - [[package]] name = "object_store" version = "0.9.0" @@ -5265,7 +5334,7 @@ dependencies = [ "futures", "logutil", "moka 0.12.5", - "object_store 0.8.0", + "object_store", "once_cell", "tempfile", "thiserror", @@ -5418,9 +5487,9 @@ dependencies = [ [[package]] name = "parquet" -version = "49.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af88740a842787da39b3d69ce5fbf6fce97d20211d3b299fee0a0da6430c74d4" +checksum = "547b92ebf0c1177e3892f44c8f79757ee62e678d564a9834189725f2c5b7a750" dependencies = [ "ahash 0.8.6", "arrow-array", @@ -5436,11 +5505,12 @@ dependencies = [ "chrono", "flate2", "futures", + "half", "hashbrown 0.14.2", "lz4_flex", "num", "num-bigint 0.4.4", - "object_store 0.8.0", + "object_store", "paste", "seq-macro", "snap", @@ -6009,7 +6079,7 @@ dependencies = [ "serde", "serde_json", "sqlformat", - "sqlparser 0.41.0", + "sqlparser", "strum 0.25.0", "strum_macros 0.25.3", ] @@ -6409,6 +6479,16 @@ version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" +[[package]] +name = "regress" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ed9969cad8051328011596bf549629f1b800cf1731e7964b1eef8dfc480d2c2" +dependencies = [ + "hashbrown 0.13.2", + "memchr", +] + [[package]] name = "relative-path" version = "1.9.0" @@ -7004,6 +7084,30 @@ dependencies = [ "parking_lot", ] +[[package]] +name = "schemars" +version = "0.8.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "45a28f4c49489add4ce10783f7911893516f15afe45d015608d41faca6bc4d29" +dependencies = [ + "dyn-clone", + "schemars_derive", + "serde", + "serde_json", +] + +[[package]] +name = "schemars_derive" +version = "0.8.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c767fd6fa65d9ccf9cf026122c1b555f2ef9a4f0cea69da4d7dbc3e258d30967" +dependencies = [ + "proc-macro2", + "quote", + "serde_derive_internals", + "syn 1.0.109", +] + [[package]] name = "scopeguard" version = "1.2.0" @@ -7240,6 +7344,17 @@ dependencies = [ "syn 2.0.48", ] +[[package]] +name = "serde_derive_internals" +version = "0.26.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85bf8229e7920a9f636479437026331ce11aa132b4dde37d121944a44d6e5f3c" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "serde_json" version = "1.0.113" @@ -7252,6 +7367,18 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_tokenstream" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a00ffd23fd882d096f09fcaae2a9de8329a328628e86027e049ee051dc1621f" +dependencies = [ + "proc-macro2", + "quote", + "serde", + "syn 2.0.48", +] + [[package]] name = "serde_urlencoded" version = "0.7.1" @@ -7315,6 +7442,19 @@ dependencies = [ "syn 2.0.48", ] +[[package]] +name = "serde_yaml" +version = "0.9.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "adf8a49373e98a4c5f0ceb5d05aa7c648d75f63774981ed95b7c7443bbd50c6e" +dependencies = [ + "indexmap 2.2.2", + "itoa", + "ryu", + "serde", + "unsafe-libyaml", +] + [[package]] name = "sha-1" version = "0.10.1" @@ -7494,7 +7634,7 @@ dependencies = [ "ioutil", "logutil", "metastore", - "object_store 0.8.0", + "object_store", "pgrepr", "pgsrv", "regex", @@ -7626,7 +7766,7 @@ dependencies = [ "logutil", "memoize", "num-traits", - "object_store 0.8.0", + "object_store", "once_cell", "pgrepr", "protogen", @@ -7659,7 +7799,7 @@ dependencies = [ "logutil", "metastore", "num_cpus", - "object_store 0.8.0", + "object_store", "object_store_util", "once_cell", "parking_lot", @@ -7718,16 +7858,6 @@ dependencies = [ "tracing", ] -[[package]] -name = "sqlparser" -version = "0.40.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c80afe31cdb649e56c0d9bb5503be9166600d68a852c38dd445636d126858e5" -dependencies = [ - "log", - "sqlparser_derive", -] - [[package]] name = "sqlparser" version = "0.41.0" @@ -7736,6 +7866,7 @@ checksum = "5cc2c25a6c66789625ef164b4c7d2e548d627902280c13710d33da8222169964" dependencies = [ "log", "serde", + "sqlparser_derive", ] [[package]] @@ -7897,6 +8028,28 @@ dependencies = [ "unicode-width", ] +[[package]] +name = "substrait" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5478fbd0313a9b0915a1c0e7ebf15b5fed7d7c6dd7229b4f5e32ce75b10f256a" +dependencies = [ + "git2", + "heck 0.4.1", + "prettyplease", + "prost", + "prost-build", + "prost-types", + "schemars", + "semver 1.0.20", + "serde", + "serde_json", + "serde_yaml", + "syn 2.0.48", + "typify", + "walkdir", +] + [[package]] name = "subtle" version = "2.4.1" @@ -8595,6 +8748,50 @@ version = "1.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" +[[package]] +name = "typify" +version = "0.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63ed4d717aa95e598e2f9183376b060e95669ef8f444701ea6afb990fde1cf69" +dependencies = [ + "typify-impl", + "typify-macro", +] + +[[package]] +name = "typify-impl" +version = "0.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89057244dfade7c58af9e62beccbcbeb7a7e7701697a33b06dbe0b7331fb79cf" +dependencies = [ + "heck 0.4.1", + "log", + "proc-macro2", + "quote", + "regress", + "schemars", + "serde_json", + "syn 2.0.48", + "thiserror", + "unicode-ident", +] + +[[package]] +name = "typify-macro" +version = "0.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2ddade397f5957d2cd7fb27f905a9a569db20e8e1e3ea589edce40be07b92825" +dependencies = [ + "proc-macro2", + "quote", + "schemars", + "serde", + "serde_json", + "serde_tokenstream", + "syn 2.0.48", + "typify-impl", +] + [[package]] name = "ucd-trie" version = "0.1.6" @@ -8667,6 +8864,12 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c7de7d73e1754487cb58364ee906a499937a0dfabd86bcb980fa99ec8c8fa2ce" +[[package]] +name = "unsafe-libyaml" +version = "0.2.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab4c90930b95a82d00dc9e9ac071b4991924390d46cbd0dfe566148667605e4b" + [[package]] name = "untrusted" version = "0.7.1" diff --git a/Cargo.toml b/Cargo.toml index fdbf59d34..c6c5fa468 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,9 +18,9 @@ wildcard_imports = "deny" [workspace.dependencies] clap = { version = "4.4.18", features = ["derive"] } -datafusion = { version = "34.0", features = ["avro"] } -arrow-flight = { version = "49.0.0", features = ["flight-sql-experimental"] } -datafusion-proto = { version = "34.0" } +datafusion = { version = "35.0.0", features = ["avro"] } +arrow-flight = { version = "50.0.0", features = ["flight-sql-experimental"] } +datafusion-proto = { version = "35.0.0" } reqwest = { version = "0.11.24", default-features = false, features = [ "json", "rustls-tls", @@ -32,7 +32,7 @@ anyhow = "1.0.79" async-trait = "0.1.77" chrono = "0.4.33" futures = "0.3.30" -object_store = "0.8" +object_store = "0.9" prost = "0.12" prost-build = "0.12" prost-types = "0.12" @@ -44,8 +44,6 @@ url = "2.5.0" [workspace.dependencies.deltalake] -git = "https://github.com/GlareDB/delta-rs.git" -rev = "4c4bd7d5d4ce47f47f348c2a7d52c009048f841e" +git = "https://github.com/delta-io/delta-rs.git" +rev = "993e2c202936719855f8831513bcbab1b9930b94" features = ["s3", "gcs", "azure", "datafusion"] - - diff --git a/crates/datafusion_ext/src/planner/expr/function.rs b/crates/datafusion_ext/src/planner/expr/function.rs index 5ca07d7d9..9005ab368 100644 --- a/crates/datafusion_ext/src/planner/expr/function.rs +++ b/crates/datafusion_ext/src/planner/expr/function.rs @@ -23,19 +23,19 @@ use datafusion::common::{ plan_err, DFSchema, DataFusionError, + Dependency, Result, }; -use datafusion::logical_expr::expr::ScalarFunction; +use datafusion::logical_expr::expr::{find_df_window_func, ScalarFunction}; use datafusion::logical_expr::function::suggest_valid_function; use datafusion::logical_expr::window_frame::{check_window_frame, regularize_window_order_by}; use datafusion::logical_expr::{ expr, - window_function, AggregateFunction, BuiltinScalarFunction, Expr, WindowFrame, - WindowFunction, + WindowFunctionDefinition, }; use datafusion::sql::planner::PlannerContext; use datafusion::sql::sqlparser::ast::{ @@ -119,6 +119,7 @@ impl<'a, S: AsyncContextProvider> SqlQueryPlanner<'a, S> { } partition_by }; + let mut order_by = self .order_by_to_sort_expr( &window.order_by, @@ -128,6 +129,21 @@ impl<'a, S: AsyncContextProvider> SqlQueryPlanner<'a, S> { false, ) .await?; + + let func_deps = schema.functional_dependencies(); + // Find whether ties are possible in the given ordering: + let is_ordering_strict = order_by.iter().any(|orderby_expr| { + if let Expr::Sort(sort_expr) = orderby_expr { + if let Expr::Column(col) = sort_expr.expr.as_ref() { + let idx = schema.index_of_column(col).unwrap(); + return func_deps.iter().any(|dep| { + dep.source_indices == vec![idx] && dep.mode == Dependency::Single + }); + } + } + false + }); + let window_frame = window .window_frame .as_ref() @@ -140,15 +156,17 @@ impl<'a, S: AsyncContextProvider> SqlQueryPlanner<'a, S> { let window_frame = if let Some(window_frame) = window_frame { regularize_window_order_by(&window_frame, &mut order_by)?; window_frame + } else if is_ordering_strict { + WindowFrame::new(Some(true)) } else { - WindowFrame::new(!order_by.is_empty()) + WindowFrame::new((!order_by.is_empty()).then_some(false)) }; if let Ok(fun) = self.find_window_func(&name).await { let expr = match fun { - WindowFunction::AggregateFunction(aggregate_fun) => { + WindowFunctionDefinition::AggregateFunction(aggregate_fun) => { Expr::WindowFunction(expr::WindowFunction::new( - WindowFunction::AggregateFunction(aggregate_fun), + WindowFunctionDefinition::AggregateFunction(aggregate_fun), args, partition_by, order_by, @@ -220,17 +238,20 @@ impl<'a, S: AsyncContextProvider> SqlQueryPlanner<'a, S> { Ok(Expr::ScalarFunction(ScalarFunction::new(fun, args))) } - pub(super) async fn find_window_func(&mut self, name: &str) -> Result { - if let Some(func) = window_function::find_df_window_func(name) { + pub(super) async fn find_window_func( + &mut self, + name: &str, + ) -> Result { + if let Some(func) = find_df_window_func(name) { return Ok(func); } if let Some(agg) = self.context_provider.get_aggregate_meta(name).await { - return Ok(WindowFunction::AggregateUDF(agg)); + return Ok(expr::WindowFunctionDefinition::AggregateUDF(agg)); } if let Some(win) = self.context_provider.get_window_meta(name).await { - return Ok(WindowFunction::WindowUDF(win)); + return Ok(WindowFunctionDefinition::WindowUDF(win)); } Err(plan_datafusion_err!( diff --git a/crates/datafusion_ext/src/planner/mod.rs b/crates/datafusion_ext/src/planner/mod.rs index a90ce596c..cc70333cc 100644 --- a/crates/datafusion_ext/src/planner/mod.rs +++ b/crates/datafusion_ext/src/planner/mod.rs @@ -144,7 +144,7 @@ impl<'a, S: AsyncContextProvider> SqlQueryPlanner<'a, S> { alias: TableAlias, ) -> Result { let apply_name_plan = LogicalPlan::SubqueryAlias(SubqueryAlias::try_new( - plan, + Arc::new(plan), self.normalizer.normalize(alias.name), )?); diff --git a/crates/datafusion_ext/src/planner/query.rs b/crates/datafusion_ext/src/planner/query.rs index 6d9d784f8..d613dd956 100644 --- a/crates/datafusion_ext/src/planner/query.rs +++ b/crates/datafusion_ext/src/planner/query.rs @@ -68,9 +68,12 @@ impl<'a, S: AsyncContextProvider> SqlQueryPlanner<'a, S> { // A `WITH` block can't use the same name more than once let cte_name = self.normalizer.normalize(cte.alias.name.clone()); if planner_context.contains_cte(&cte_name) { - return Err(DataFusionError::SQL(ParserError(format!( - "WITH query name {cte_name:?} specified more than once" - )))); + return Err(DataFusionError::SQL( + ParserError(format!( + "WITH query name {cte_name:?} specified more than once" + )), + None, + )); } // create logical plan & pass backreferencing CTEs // CTE expr don't need extend outer_query_schema diff --git a/crates/datafusion_ext/src/planner/statement.rs b/crates/datafusion_ext/src/planner/statement.rs index 952acead5..458005069 100644 --- a/crates/datafusion_ext/src/planner/statement.rs +++ b/crates/datafusion_ext/src/planner/statement.rs @@ -109,6 +109,7 @@ impl<'a, S: AsyncContextProvider> SqlQueryPlanner<'a, S> { datafusion::common::SchemaError::DuplicateUnqualifiedField { name: c.clone(), }, + Box::new(None), )); } else { value_indices[column_index] = Some(i); diff --git a/crates/datasources/Cargo.toml b/crates/datasources/Cargo.toml index d160aca85..93c2e13ce 100644 --- a/crates/datasources/Cargo.toml +++ b/crates/datasources/Cargo.toml @@ -69,7 +69,7 @@ tiberius = { version = "0.12.2", default-features = false, features = [ "rustls", "chrono", ] } -lance = { git = "https://github.com/universalmind303/lance", rev = "ffd4ac6ee2c61b3792a904b2e12152b246e837e6" } +lance = { git = "https://github.com/lancedb/lance", rev = "310d79eccf93f3c6a48c162c575918cdba13faec" } bson = "2.9.0" scylla = { version = "0.11.1" } glob = "0.3.1" diff --git a/crates/datasources/src/bigquery/mod.rs b/crates/datasources/src/bigquery/mod.rs index ac92c962e..bbc6f7200 100644 --- a/crates/datasources/src/bigquery/mod.rs +++ b/crates/datasources/src/bigquery/mod.rs @@ -400,11 +400,15 @@ impl ExecutionPlan for BigQueryExec { fn with_new_children( self: Arc, - _children: Vec>, + children: Vec>, ) -> DatafusionResult> { - Err(DataFusionError::Execution( - "cannot replace children for BigQueryExec".to_string(), - )) + if children.is_empty() { + Ok(self) + } else { + Err(DataFusionError::Execution( + "cannot replace children for BigQueryExec".to_string(), + )) + } } fn execute( diff --git a/crates/datasources/src/cassandra/exec.rs b/crates/datasources/src/cassandra/exec.rs index 5fa913210..c093e9fde 100644 --- a/crates/datasources/src/cassandra/exec.rs +++ b/crates/datasources/src/cassandra/exec.rs @@ -72,13 +72,18 @@ impl ExecutionPlan for CassandraExec { fn children(&self) -> Vec> { vec![] } + fn with_new_children( self: Arc, - _children: Vec>, + children: Vec>, ) -> DatafusionResult> { - Err(DataFusionError::Execution( - "cannot replace children for ScyllaExec".to_string(), - )) + if children.is_empty() { + Ok(self) + } else { + Err(DataFusionError::Execution( + "cannot replace children for ScyllaExec".to_string(), + )) + } } fn execute( &self, diff --git a/crates/datasources/src/clickhouse/mod.rs b/crates/datasources/src/clickhouse/mod.rs index b194ecbfb..d863d9fea 100644 --- a/crates/datasources/src/clickhouse/mod.rs +++ b/crates/datasources/src/clickhouse/mod.rs @@ -426,11 +426,15 @@ impl ExecutionPlan for ClickhouseExec { fn with_new_children( self: Arc, - _children: Vec>, + children: Vec>, ) -> DatafusionResult> { - Err(DataFusionError::Execution( - "cannot replace children for ClickhouseExec".to_string(), - )) + if children.is_empty() { + Ok(self) + } else { + Err(DataFusionError::Execution( + "cannot replace children for ClickhouseExec".to_string(), + )) + } } fn execute( diff --git a/crates/datasources/src/common/mod.rs b/crates/datasources/src/common/mod.rs index 4b53b6272..bc7cea14f 100644 --- a/crates/datasources/src/common/mod.rs +++ b/crates/datasources/src/common/mod.rs @@ -24,8 +24,7 @@ pub(crate) fn exprs_to_phys_exprs( ) -> Result>> { if let Some(expr) = conjunction(exprs.to_vec()) { let table_df_schema = schema.clone().to_dfschema()?; - let filters = - create_physical_expr(&expr, &table_df_schema, schema, state.execution_props())?; + let filters = create_physical_expr(&expr, &table_df_schema, state.execution_props())?; Ok(Some(filters)) } else { Ok(None) diff --git a/crates/datasources/src/debug/mod.rs b/crates/datasources/src/debug/mod.rs index ddbe50dc8..2288aa8bd 100644 --- a/crates/datasources/src/debug/mod.rs +++ b/crates/datasources/src/debug/mod.rs @@ -263,11 +263,15 @@ impl ExecutionPlan for DebugTableExec { fn with_new_children( self: Arc, - _children: Vec>, + children: Vec>, ) -> DatafusionResult> { - Err(DataFusionError::Execution( - "cannot replace children for DebugTableExec".to_string(), - )) + if children.is_empty() { + Ok(self) + } else { + Err(DataFusionError::Execution( + "cannot replace children for DebugTableExec".to_string(), + )) + } } fn execute( diff --git a/crates/datasources/src/lake/iceberg/table.rs b/crates/datasources/src/lake/iceberg/table.rs index 052b8ab39..5e428e9d2 100644 --- a/crates/datasources/src/lake/iceberg/table.rs +++ b/crates/datasources/src/lake/iceberg/table.rs @@ -280,9 +280,10 @@ impl TableProvider for IcebergTableReader { ) -> DataFusionResult> { // Create the datafusion specific url, and register the object store. let object_url = datasource_url_to_unique_url(&self.state.location); - ctx.runtime_env() - .object_store_registry - .register_store(object_url.as_ref(), self.state.store.clone()); + + // ctx.runtime_env() + // .object_store_registry + // .register_store(object_url.as_ref(), self.state.store.clone()); // TODO: Properly prune based on partition values. This currently skips // any partition processing, and shoves everything into a single file @@ -348,7 +349,6 @@ impl TableProvider for IcebergTableReader { limit, table_partition_cols: Vec::new(), output_ordering: Vec::new(), - infinite_source: false, }; let plan = ParquetFormat::new() diff --git a/crates/datasources/src/mongodb/exec.rs b/crates/datasources/src/mongodb/exec.rs index c043deb26..41aeccb15 100644 --- a/crates/datasources/src/mongodb/exec.rs +++ b/crates/datasources/src/mongodb/exec.rs @@ -79,11 +79,15 @@ impl ExecutionPlan for MongoDbBsonExec { fn with_new_children( self: Arc, - _children: Vec>, + children: Vec>, ) -> DatafusionResult> { - Err(DataFusionError::Execution( - "cannot replace children for MongoDB Exec".to_string(), - )) + if children.is_empty() { + Ok(self) + } else { + Err(DataFusionError::Execution( + "cannot replace children for MongoDB Exec".to_string(), + )) + } } fn execute( diff --git a/crates/datasources/src/mysql/mod.rs b/crates/datasources/src/mysql/mod.rs index 2c6defd13..e5b83d84a 100644 --- a/crates/datasources/src/mysql/mod.rs +++ b/crates/datasources/src/mysql/mod.rs @@ -495,11 +495,15 @@ impl ExecutionPlan for MysqlExec { fn with_new_children( self: Arc, - _children: Vec>, + children: Vec>, ) -> DatafusionResult> { - Err(DataFusionError::Execution( - "cannot replace children for MysqlExec".to_string(), - )) + if children.is_empty() { + Ok(self) + } else { + Err(DataFusionError::Execution( + "cannot replace children for MysqlExec".to_string(), + )) + } } fn execute( diff --git a/crates/datasources/src/object_store/mod.rs b/crates/datasources/src/object_store/mod.rs index bf313da91..f43b440ec 100644 --- a/crates/datasources/src/object_store/mod.rs +++ b/crates/datasources/src/object_store/mod.rs @@ -335,7 +335,6 @@ impl TableProvider for ObjStoreTableProvider { limit, table_partition_cols: Vec::new(), output_ordering: Vec::new(), - infinite_source: false, }; let filters = exprs_to_phys_exprs(filters, ctx, &self.arrow_schema)?; diff --git a/crates/datasources/src/postgres/mod.rs b/crates/datasources/src/postgres/mod.rs index 4f17f8dd8..0bbdfe480 100644 --- a/crates/datasources/src/postgres/mod.rs +++ b/crates/datasources/src/postgres/mod.rs @@ -864,11 +864,15 @@ impl ExecutionPlan for PostgresBinaryCopyExec { fn with_new_children( self: Arc, - _children: Vec>, + children: Vec>, ) -> DatafusionResult> { - Err(DataFusionError::Execution( - "cannot replace children for PostgresBinaryCopyExec".to_string(), - )) + if children.is_empty() { + Ok(self) + } else { + Err(DataFusionError::Execution( + "cannot replace children for PostgresBinaryCopyExec".to_string(), + )) + } } fn execute( diff --git a/crates/datasources/src/postgres/query_exec.rs b/crates/datasources/src/postgres/query_exec.rs index 695709437..4d07ec72b 100644 --- a/crates/datasources/src/postgres/query_exec.rs +++ b/crates/datasources/src/postgres/query_exec.rs @@ -66,11 +66,15 @@ impl ExecutionPlan for PostgresQueryExec { fn with_new_children( self: Arc, - _children: Vec>, + children: Vec>, ) -> DataFusionResult> { - Err(DataFusionError::Execution( - "cannot replace children for PostgresQueryExec".to_string(), - )) + if children.is_empty() { + Ok(self) + } else { + Err(DataFusionError::Execution( + "cannot replace children for PostgresQueryExec".to_string(), + )) + } } fn execute( diff --git a/crates/datasources/src/snowflake/mod.rs b/crates/datasources/src/snowflake/mod.rs index e2ff741fe..af2cae73c 100644 --- a/crates/datasources/src/snowflake/mod.rs +++ b/crates/datasources/src/snowflake/mod.rs @@ -425,11 +425,15 @@ impl ExecutionPlan for SnowflakeExec { fn with_new_children( self: Arc, - _children: Vec>, + children: Vec>, ) -> DatafusionResult> { - Err(DataFusionError::Execution( - "cannot replace children for Snowflake exec".to_string(), - )) + if children.is_empty() { + Ok(self) + } else { + Err(DataFusionError::Execution( + "cannot replace children for Snowflake exec".to_string(), + )) + } } fn execute( diff --git a/crates/datasources/src/sqlserver/mod.rs b/crates/datasources/src/sqlserver/mod.rs index ebccd7e02..75831ec5b 100644 --- a/crates/datasources/src/sqlserver/mod.rs +++ b/crates/datasources/src/sqlserver/mod.rs @@ -515,11 +515,15 @@ impl ExecutionPlan for SqlServerExec { fn with_new_children( self: Arc, - _children: Vec>, + children: Vec>, ) -> DatafusionResult> { - Err(DataFusionError::Execution( - "cannot replace children for SqlServerExec".to_string(), - )) + if children.is_empty() { + Ok(self) + } else { + Err(DataFusionError::Execution( + "cannot replace children for SqlServerExec".to_string(), + )) + } } fn execute( diff --git a/crates/distexec/src/adapter.rs b/crates/distexec/src/adapter.rs index 711f23071..ef3363a31 100644 --- a/crates/distexec/src/adapter.rs +++ b/crates/distexec/src/adapter.rs @@ -287,9 +287,13 @@ impl ExecutionPlan for ProxyExecutionPlan { fn with_new_children( self: Arc, - _children: Vec>, + children: Vec>, ) -> DataFusionResult> { - unimplemented!("Should not be referenced during optimization") + if children.is_empty() { + Ok(self) + } else { + unimplemented!("Should not be referenced during optimization") + } } fn execute( diff --git a/crates/protogen/src/metastore/types/catalog.rs b/crates/protogen/src/metastore/types/catalog.rs index 904d59a0e..6e636f293 100644 --- a/crates/protogen/src/metastore/types/catalog.rs +++ b/crates/protogen/src/metastore/types/catalog.rs @@ -720,6 +720,7 @@ impl From for catalog::TypeSignature { sigs.into_iter().map(|s| s.into()).collect(); ProtoSignature::OneOf(catalog::OneOfSignature { args: sigs }) } + _ => unimplemented!(), }; catalog::TypeSignature { diff --git a/crates/sqlbuiltins/src/functions/aggregates.rs b/crates/sqlbuiltins/src/functions/aggregates.rs index 56af3e5cb..29f085f9e 100644 --- a/crates/sqlbuiltins/src/functions/aggregates.rs +++ b/crates/sqlbuiltins/src/functions/aggregates.rs @@ -208,6 +208,12 @@ document! { name => variance_pop } +document! { + doc => "Returns the nth value in a column", + example => "nth_value(a, 2)", + name => nth_value +} + impl BuiltinFunction for AggregateFunction { fn function_type(&self) -> FunctionType { FunctionType::Aggregate @@ -252,6 +258,7 @@ impl BuiltinFunction for AggregateFunction { Sum => sum::NAME, Variance => variance::NAME, VariancePop => variance_pop::NAME, + NthValue => nth_value::NAME, } } @@ -298,6 +305,7 @@ impl BuiltinFunction for AggregateFunction { Sum => sum::EXAMPLE, Variance => variance::EXAMPLE, VariancePop => variance_pop::EXAMPLE, + NthValue => nth_value::EXAMPLE, }) } @@ -340,6 +348,7 @@ impl BuiltinFunction for AggregateFunction { Sum => sum::DESCRIPTION, Variance => variance::DESCRIPTION, VariancePop => variance_pop::DESCRIPTION, + NthValue => nth_value::DESCRIPTION, }) } } diff --git a/crates/sqlbuiltins/src/functions/scalars/df_scalars.rs b/crates/sqlbuiltins/src/functions/scalars/df_scalars.rs index 77a20cbde..df75c43f8 100644 --- a/crates/sqlbuiltins/src/functions/scalars/df_scalars.rs +++ b/crates/sqlbuiltins/src/functions/scalars/df_scalars.rs @@ -668,6 +668,12 @@ document! { "struct" => struct_ } +document! { + doc => "Resize an array to the specified length", + example => "array_resize([1, 2, 3], 5, 0)", + name => array_resize + +} impl BuiltinFunction for BuiltinScalarFunction { fn function_type(&self) -> FunctionType { FunctionType::Scalar @@ -806,6 +812,7 @@ impl BuiltinFunction for BuiltinScalarFunction { SubstrIndex => substr_index::NAME, FindInSet => find_in_set::NAME, Struct => struct_::NAME, + ArrayResize => array_resize::NAME, } } @@ -942,6 +949,7 @@ impl BuiltinFunction for BuiltinScalarFunction { SubstrIndex => substr_index::EXAMPLE, FindInSet => find_in_set::EXAMPLE, Struct => struct_::EXAMPLE, + ArrayResize => array_resize::EXAMPLE, }) } @@ -1078,6 +1086,7 @@ impl BuiltinFunction for BuiltinScalarFunction { SubstrIndex => substr_index::DESCRIPTION, FindInSet => find_in_set::DESCRIPTION, Struct => struct_::DESCRIPTION, + ArrayResize => array_resize::DESCRIPTION, }) } } diff --git a/crates/sqlbuiltins/src/functions/table/system/cache_external_tables.rs b/crates/sqlbuiltins/src/functions/table/system/cache_external_tables.rs index 188fca0e5..568f07c41 100644 --- a/crates/sqlbuiltins/src/functions/table/system/cache_external_tables.rs +++ b/crates/sqlbuiltins/src/functions/table/system/cache_external_tables.rs @@ -200,11 +200,15 @@ impl ExecutionPlan for StreamingListerExec { fn with_new_children( self: Arc, - _children: Vec>, + children: Vec>, ) -> DataFusionResult> { - Err(DataFusionError::Plan( - "Cannot change children for StreamingListerExec".to_string(), - )) + if children.is_empty() { + Ok(self) + } else { + Err(DataFusionError::Plan( + "Cannot change children for StreamingListerExec".to_string(), + )) + } } fn execute( diff --git a/crates/sqlbuiltins/src/functions/table/system/mod.rs b/crates/sqlbuiltins/src/functions/table/system/mod.rs index 413e5f0c3..6b5656a34 100644 --- a/crates/sqlbuiltins/src/functions/table/system/mod.rs +++ b/crates/sqlbuiltins/src/functions/table/system/mod.rs @@ -157,11 +157,15 @@ impl ExecutionPlan for SystemOperationExec { fn with_new_children( self: Arc, - _children: Vec>, + children: Vec>, ) -> DataFusionResult> { - Err(DataFusionError::Plan( - "Cannot change children for SystemOperationExec".to_string(), - )) + if children.is_empty() { + Ok(self) + } else { + Err(DataFusionError::Plan( + "Cannot change children for SystemOperationExec".to_string(), + )) + } } fn execute( diff --git a/crates/sqlexec/src/dispatch/system.rs b/crates/sqlexec/src/dispatch/system.rs index feb71120c..f9c1741b1 100644 --- a/crates/sqlexec/src/dispatch/system.rs +++ b/crates/sqlexec/src/dispatch/system.rs @@ -611,6 +611,7 @@ fn sig_to_string_repr(sig: &TypeSignature) -> Vec { TypeSignature::VariadicEqual => vec!["T, .., T".to_string()], TypeSignature::VariadicAny => vec!["Any, .., Any".to_string()], TypeSignature::OneOf(sigs) => sigs.iter().flat_map(sig_to_string_repr).collect(), + _ => vec!["Unknown".to_string()], } } diff --git a/crates/sqlexec/src/planner/logical_plan/mod.rs b/crates/sqlexec/src/planner/logical_plan/mod.rs index 570dc1b5f..171fb7f34 100644 --- a/crates/sqlexec/src/planner/logical_plan/mod.rs +++ b/crates/sqlexec/src/planner/logical_plan/mod.rs @@ -167,7 +167,7 @@ impl LogicalPlan { /// /// Note this currently only replaces placeholders for datafusion plans. pub fn replace_placeholders(&mut self, scalars: Vec) -> Result<()> { - let param_values = ParamValues::LIST(scalars); + let param_values = ParamValues::List(scalars); if let LogicalPlan::Datafusion(plan) = self { // Replace placeholders in the inner plan if the wrapped in an diff --git a/crates/sqlexec/src/planner/physical_plan/alter_database.rs b/crates/sqlexec/src/planner/physical_plan/alter_database.rs index e9b8ae065..df0097bde 100644 --- a/crates/sqlexec/src/planner/physical_plan/alter_database.rs +++ b/crates/sqlexec/src/planner/physical_plan/alter_database.rs @@ -52,11 +52,15 @@ impl ExecutionPlan for AlterDatabaseExec { fn with_new_children( self: Arc, - _children: Vec>, + children: Vec>, ) -> DataFusionResult> { - Err(DataFusionError::Plan( - "Cannot change children for AlterDatabaseRenameExec".to_string(), - )) + if children.is_empty() { + Ok(self) + } else { + Err(DataFusionError::Plan( + "Cannot change children for AlterDatabaseRenameExec".to_string(), + )) + } } fn execute( diff --git a/crates/sqlexec/src/planner/physical_plan/alter_table.rs b/crates/sqlexec/src/planner/physical_plan/alter_table.rs index e59216e7f..3e582b2e1 100644 --- a/crates/sqlexec/src/planner/physical_plan/alter_table.rs +++ b/crates/sqlexec/src/planner/physical_plan/alter_table.rs @@ -53,11 +53,15 @@ impl ExecutionPlan for AlterTableExec { fn with_new_children( self: Arc, - _children: Vec>, + children: Vec>, ) -> DataFusionResult> { - Err(DataFusionError::Plan( - "Cannot change children for AlterTableRenameExec".to_string(), - )) + if children.is_empty() { + Ok(self) + } else { + Err(DataFusionError::Plan( + "Cannot change children for AlterTableRenameExec".to_string(), + )) + } } fn execute( diff --git a/crates/sqlexec/src/planner/physical_plan/alter_tunnel_rotate_keys.rs b/crates/sqlexec/src/planner/physical_plan/alter_tunnel_rotate_keys.rs index 2fcb02d74..e61cf0ab0 100644 --- a/crates/sqlexec/src/planner/physical_plan/alter_tunnel_rotate_keys.rs +++ b/crates/sqlexec/src/planner/physical_plan/alter_tunnel_rotate_keys.rs @@ -53,11 +53,15 @@ impl ExecutionPlan for AlterTunnelRotateKeysExec { fn with_new_children( self: Arc, - _children: Vec>, + children: Vec>, ) -> DataFusionResult> { - Err(DataFusionError::Plan( - "Cannot change children for AlterTunnelRotateKeysExec".to_string(), - )) + if children.is_empty() { + Ok(self) + } else { + Err(DataFusionError::Plan( + "Cannot change children for AlterTunnelRotateKeysExec".to_string(), + )) + } } fn execute( diff --git a/crates/sqlexec/src/planner/physical_plan/client_recv.rs b/crates/sqlexec/src/planner/physical_plan/client_recv.rs index 9cb9c6e6e..eac88caa3 100644 --- a/crates/sqlexec/src/planner/physical_plan/client_recv.rs +++ b/crates/sqlexec/src/planner/physical_plan/client_recv.rs @@ -57,11 +57,15 @@ impl ExecutionPlan for ClientExchangeRecvExec { fn with_new_children( self: Arc, - _children: Vec>, + children: Vec>, ) -> DataFusionResult> { - Err(DataFusionError::Plan( - "Cannot change children for ClientExchangeRecvExec".to_string(), - )) + if children.is_empty() { + Ok(self) + } else { + Err(DataFusionError::Plan( + "Cannot change children for ClientExchangeRecvExec".to_string(), + )) + } } fn execute( diff --git a/crates/sqlexec/src/planner/physical_plan/client_send.rs b/crates/sqlexec/src/planner/physical_plan/client_send.rs index 9302f4f55..21eba70b8 100644 --- a/crates/sqlexec/src/planner/physical_plan/client_send.rs +++ b/crates/sqlexec/src/planner/physical_plan/client_send.rs @@ -71,11 +71,15 @@ impl ExecutionPlan for ClientExchangeSendExec { fn with_new_children( self: Arc, - _children: Vec>, + children: Vec>, ) -> DataFusionResult> { - Err(DataFusionError::Plan( - "Cannot change children for ClientExchangeSendExec".to_string(), - )) + if children.is_empty() { + Ok(self) + } else { + Err(DataFusionError::Plan( + "Cannot change children for ClientExchangeSendExec".to_string(), + )) + } } fn execute( diff --git a/crates/sqlexec/src/planner/physical_plan/create_credential.rs b/crates/sqlexec/src/planner/physical_plan/create_credential.rs index 50bc8174c..bd3517f83 100644 --- a/crates/sqlexec/src/planner/physical_plan/create_credential.rs +++ b/crates/sqlexec/src/planner/physical_plan/create_credential.rs @@ -1,8 +1,9 @@ -use super::*; - -use protogen::metastore::types::{options::CredentialsOptions, service, service::Mutation}; - use catalog::mutator::CatalogMutator; +use protogen::metastore::types::options::CredentialsOptions; +use protogen::metastore::types::service; +use protogen::metastore::types::service::Mutation; + +use super::*; #[derive(Clone, Debug)] pub struct CreateCredentialExec { @@ -42,11 +43,15 @@ impl ExecutionPlan for CreateCredentialExec { fn with_new_children( self: Arc, - _children: Vec>, + children: Vec>, ) -> DataFusionResult> { - Err(DataFusionError::Plan( - "Cannot change children for CreateCredentialsExec".to_string(), - )) + if children.is_empty() { + Ok(self) + } else { + Err(DataFusionError::Plan( + "Cannot change children for CreateCredentialsExec".to_string(), + )) + } } fn execute( diff --git a/crates/sqlexec/src/planner/physical_plan/create_credentials.rs b/crates/sqlexec/src/planner/physical_plan/create_credentials.rs index 6b9130bd0..68e0e081c 100644 --- a/crates/sqlexec/src/planner/physical_plan/create_credentials.rs +++ b/crates/sqlexec/src/planner/physical_plan/create_credentials.rs @@ -60,11 +60,15 @@ impl ExecutionPlan for CreateCredentialsExec { fn with_new_children( self: Arc, - _children: Vec>, + children: Vec>, ) -> DataFusionResult> { - Err(DataFusionError::Plan( - "Cannot change children for CreateCredentialsExec".to_string(), - )) + if children.is_empty() { + Ok(self) + } else { + Err(DataFusionError::Plan( + "Cannot change children for CreateCredentialsExec".to_string(), + )) + } } fn execute( diff --git a/crates/sqlexec/src/planner/physical_plan/create_external_database.rs b/crates/sqlexec/src/planner/physical_plan/create_external_database.rs index d76644100..b3691f6b6 100644 --- a/crates/sqlexec/src/planner/physical_plan/create_external_database.rs +++ b/crates/sqlexec/src/planner/physical_plan/create_external_database.rs @@ -55,11 +55,15 @@ impl ExecutionPlan for CreateExternalDatabaseExec { fn with_new_children( self: Arc, - _children: Vec>, + children: Vec>, ) -> DataFusionResult> { - Err(DataFusionError::Plan( - "Cannot change children for CreateExternalDatabaseExec".to_string(), - )) + if children.is_empty() { + Ok(self) + } else { + Err(DataFusionError::Plan( + "Cannot change children for CreateExternalDatabaseExec".to_string(), + )) + } } fn execute( diff --git a/crates/sqlexec/src/planner/physical_plan/create_external_table.rs b/crates/sqlexec/src/planner/physical_plan/create_external_table.rs index 18015ca3e..ce27065a6 100644 --- a/crates/sqlexec/src/planner/physical_plan/create_external_table.rs +++ b/crates/sqlexec/src/planner/physical_plan/create_external_table.rs @@ -57,11 +57,15 @@ impl ExecutionPlan for CreateExternalTableExec { fn with_new_children( self: Arc, - _children: Vec>, + children: Vec>, ) -> DataFusionResult> { - Err(DataFusionError::Plan( - "Cannot change children for CreateExternalTableExec".to_string(), - )) + if children.is_empty() { + Ok(self) + } else { + Err(DataFusionError::Plan( + "Cannot change children for CreateExternalTableExec".to_string(), + )) + } } fn execute( diff --git a/crates/sqlexec/src/planner/physical_plan/create_schema.rs b/crates/sqlexec/src/planner/physical_plan/create_schema.rs index 90bb3b786..896397d9e 100644 --- a/crates/sqlexec/src/planner/physical_plan/create_schema.rs +++ b/crates/sqlexec/src/planner/physical_plan/create_schema.rs @@ -53,11 +53,15 @@ impl ExecutionPlan for CreateSchemaExec { fn with_new_children( self: Arc, - _children: Vec>, + children: Vec>, ) -> DataFusionResult> { - Err(DataFusionError::Plan( - "Cannot change children for CreateSchemaExec".to_string(), - )) + if children.is_empty() { + Ok(self) + } else { + Err(DataFusionError::Plan( + "Cannot change children for CreateSchemaExec".to_string(), + )) + } } fn execute( diff --git a/crates/sqlexec/src/planner/physical_plan/create_tunnel.rs b/crates/sqlexec/src/planner/physical_plan/create_tunnel.rs index 1e6d23c76..137dd9e29 100644 --- a/crates/sqlexec/src/planner/physical_plan/create_tunnel.rs +++ b/crates/sqlexec/src/planner/physical_plan/create_tunnel.rs @@ -54,11 +54,15 @@ impl ExecutionPlan for CreateTunnelExec { fn with_new_children( self: Arc, - _children: Vec>, + children: Vec>, ) -> DataFusionResult> { - Err(DataFusionError::Plan( - "Cannot change children for CreateTunnelExec".to_string(), - )) + if children.is_empty() { + Ok(self) + } else { + Err(DataFusionError::Plan( + "Cannot change children for CreateTunnelExec".to_string(), + )) + } } fn execute( diff --git a/crates/sqlexec/src/planner/physical_plan/create_view.rs b/crates/sqlexec/src/planner/physical_plan/create_view.rs index ac4ea670d..a3612f3af 100644 --- a/crates/sqlexec/src/planner/physical_plan/create_view.rs +++ b/crates/sqlexec/src/planner/physical_plan/create_view.rs @@ -55,11 +55,15 @@ impl ExecutionPlan for CreateViewExec { fn with_new_children( self: Arc, - _children: Vec>, + children: Vec>, ) -> DataFusionResult> { - Err(DataFusionError::Plan( - "Cannot change children for CreateViewExec".to_string(), - )) + if children.is_empty() { + Ok(self) + } else { + Err(DataFusionError::Plan( + "Cannot change children for CreateViewExec".to_string(), + )) + } } fn execute( diff --git a/crates/sqlexec/src/planner/physical_plan/delete.rs b/crates/sqlexec/src/planner/physical_plan/delete.rs index fde5c7233..c55685f11 100644 --- a/crates/sqlexec/src/planner/physical_plan/delete.rs +++ b/crates/sqlexec/src/planner/physical_plan/delete.rs @@ -52,11 +52,15 @@ impl ExecutionPlan for DeleteExec { fn with_new_children( self: Arc, - _children: Vec>, + children: Vec>, ) -> DataFusionResult> { - Err(DataFusionError::Plan( - "Cannot change children for DeleteExec".to_string(), - )) + if children.is_empty() { + Ok(self) + } else { + Err(DataFusionError::Plan( + "Cannot change children for DeleteExec".to_string(), + )) + } } fn execute( diff --git a/crates/sqlexec/src/planner/physical_plan/describe_table.rs b/crates/sqlexec/src/planner/physical_plan/describe_table.rs index b2829d0e4..e35fcfbd6 100644 --- a/crates/sqlexec/src/planner/physical_plan/describe_table.rs +++ b/crates/sqlexec/src/planner/physical_plan/describe_table.rs @@ -57,11 +57,15 @@ impl ExecutionPlan for DescribeTableExec { fn with_new_children( self: Arc, - _: Vec>, + children: Vec>, ) -> Result> { - Err(DataFusionError::Plan( - "Cannot change children for DescribeTableExec".to_string(), - )) + if children.is_empty() { + Ok(self) + } else { + Err(DataFusionError::Plan( + "Cannot change children for DescribeTableExec".to_string(), + )) + } } fn execute(&self, _: usize, _: Arc) -> Result { diff --git a/crates/sqlexec/src/planner/physical_plan/drop_credentials.rs b/crates/sqlexec/src/planner/physical_plan/drop_credentials.rs index 7be2cbf2a..1c4243937 100644 --- a/crates/sqlexec/src/planner/physical_plan/drop_credentials.rs +++ b/crates/sqlexec/src/planner/physical_plan/drop_credentials.rs @@ -52,11 +52,15 @@ impl ExecutionPlan for DropCredentialsExec { fn with_new_children( self: Arc, - _children: Vec>, + children: Vec>, ) -> DataFusionResult> { - Err(DataFusionError::Plan( - "Cannot change children for DropCredentialsExec".to_string(), - )) + if children.is_empty() { + Ok(self) + } else { + Err(DataFusionError::Plan( + "Cannot change children for DropCredentialsExec".to_string(), + )) + } } fn execute( diff --git a/crates/sqlexec/src/planner/physical_plan/drop_database.rs b/crates/sqlexec/src/planner/physical_plan/drop_database.rs index a5fb4ed17..b1c6a2277 100644 --- a/crates/sqlexec/src/planner/physical_plan/drop_database.rs +++ b/crates/sqlexec/src/planner/physical_plan/drop_database.rs @@ -52,11 +52,15 @@ impl ExecutionPlan for DropDatabaseExec { fn with_new_children( self: Arc, - _children: Vec>, + children: Vec>, ) -> DataFusionResult> { - Err(DataFusionError::Plan( - "Cannot change children for DropDatabaseExec".to_string(), - )) + if children.is_empty() { + Ok(self) + } else { + Err(DataFusionError::Plan( + "Cannot change children for DropDatabaseExec".to_string(), + )) + } } fn execute( diff --git a/crates/sqlexec/src/planner/physical_plan/drop_schemas.rs b/crates/sqlexec/src/planner/physical_plan/drop_schemas.rs index edd0acf03..bd6673f3e 100644 --- a/crates/sqlexec/src/planner/physical_plan/drop_schemas.rs +++ b/crates/sqlexec/src/planner/physical_plan/drop_schemas.rs @@ -54,11 +54,15 @@ impl ExecutionPlan for DropSchemasExec { fn with_new_children( self: Arc, - _children: Vec>, + children: Vec>, ) -> DataFusionResult> { - Err(DataFusionError::Plan( - "Cannot change children for DropSchemasExec".to_string(), - )) + if children.is_empty() { + Ok(self) + } else { + Err(DataFusionError::Plan( + "Cannot change children for DropSchemasExec".to_string(), + )) + } } fn execute( diff --git a/crates/sqlexec/src/planner/physical_plan/drop_tables.rs b/crates/sqlexec/src/planner/physical_plan/drop_tables.rs index 8ee58caed..4a49054f0 100644 --- a/crates/sqlexec/src/planner/physical_plan/drop_tables.rs +++ b/crates/sqlexec/src/planner/physical_plan/drop_tables.rs @@ -57,11 +57,15 @@ impl ExecutionPlan for DropTablesExec { fn with_new_children( self: Arc, - _children: Vec>, + children: Vec>, ) -> DataFusionResult> { - Err(DataFusionError::Plan( - "Cannot change children for DropTablesExec".to_string(), - )) + if children.is_empty() { + Ok(self) + } else { + Err(DataFusionError::Plan( + "Cannot change children for DropTablesExec".to_string(), + )) + } } fn execute( diff --git a/crates/sqlexec/src/planner/physical_plan/drop_temp_tables.rs b/crates/sqlexec/src/planner/physical_plan/drop_temp_tables.rs index dc1e65229..6c9608b74 100644 --- a/crates/sqlexec/src/planner/physical_plan/drop_temp_tables.rs +++ b/crates/sqlexec/src/planner/physical_plan/drop_temp_tables.rs @@ -52,11 +52,15 @@ impl ExecutionPlan for DropTempTablesExec { fn with_new_children( self: Arc, - _children: Vec>, + children: Vec>, ) -> DataFusionResult> { - Err(DataFusionError::Plan( - "Cannot change children for DropTempTablesExec".to_string(), - )) + if children.is_empty() { + Ok(self) + } else { + Err(DataFusionError::Plan( + "Cannot change children for DropTempTablesExec".to_string(), + )) + } } fn execute( diff --git a/crates/sqlexec/src/planner/physical_plan/drop_tunnel.rs b/crates/sqlexec/src/planner/physical_plan/drop_tunnel.rs index b31b23e85..3801c31b8 100644 --- a/crates/sqlexec/src/planner/physical_plan/drop_tunnel.rs +++ b/crates/sqlexec/src/planner/physical_plan/drop_tunnel.rs @@ -52,11 +52,15 @@ impl ExecutionPlan for DropTunnelExec { fn with_new_children( self: Arc, - _children: Vec>, + children: Vec>, ) -> DataFusionResult> { - Err(DataFusionError::Plan( - "Cannot change children for DropTunnelExec".to_string(), - )) + if children.is_empty() { + Ok(self) + } else { + Err(DataFusionError::Plan( + "Cannot change children for DropTunnelExec".to_string(), + )) + } } fn execute( diff --git a/crates/sqlexec/src/planner/physical_plan/drop_views.rs b/crates/sqlexec/src/planner/physical_plan/drop_views.rs index fd0c087c5..4491d0913 100644 --- a/crates/sqlexec/src/planner/physical_plan/drop_views.rs +++ b/crates/sqlexec/src/planner/physical_plan/drop_views.rs @@ -53,11 +53,15 @@ impl ExecutionPlan for DropViewsExec { fn with_new_children( self: Arc, - _children: Vec>, + children: Vec>, ) -> DataFusionResult> { - Err(DataFusionError::Plan( - "Cannot change children for DropViewsExec".to_string(), - )) + if children.is_empty() { + Ok(self) + } else { + Err(DataFusionError::Plan( + "Cannot change children for DropViewsExec".to_string(), + )) + } } fn execute( diff --git a/crates/sqlexec/src/planner/physical_plan/remote_exec.rs b/crates/sqlexec/src/planner/physical_plan/remote_exec.rs index 488d34648..d84c01b13 100644 --- a/crates/sqlexec/src/planner/physical_plan/remote_exec.rs +++ b/crates/sqlexec/src/planner/physical_plan/remote_exec.rs @@ -179,7 +179,7 @@ impl Stream for ExecutionResponseBatchStream { self.buf .extend(reader.into_iter().map(|result| match result { Ok(batch) => Ok(batch), - Err(e) => Err(DataFusionError::ArrowError(e)), + Err(e) => Err(DataFusionError::ArrowError(e, None)), })); // See if we got anything. diff --git a/crates/sqlexec/src/planner/physical_plan/remote_scan.rs b/crates/sqlexec/src/planner/physical_plan/remote_scan.rs index 7fc45fbda..55de1754b 100644 --- a/crates/sqlexec/src/planner/physical_plan/remote_scan.rs +++ b/crates/sqlexec/src/planner/physical_plan/remote_scan.rs @@ -132,11 +132,15 @@ impl ExecutionPlan for RemoteScanExec { fn with_new_children( self: Arc, - _children: Vec>, + children: Vec>, ) -> DataFusionResult> { - Err(DataFusionError::Plan( - "Cannot replace children for RemoteScanExec".to_string(), - )) + if children.is_empty() { + Ok(self) + } else { + Err(DataFusionError::Plan( + "Cannot replace children for RemoteScanExec".to_string(), + )) + } } fn execute( diff --git a/crates/sqlexec/src/planner/physical_plan/set_var.rs b/crates/sqlexec/src/planner/physical_plan/set_var.rs index e45e19f17..b706507a8 100644 --- a/crates/sqlexec/src/planner/physical_plan/set_var.rs +++ b/crates/sqlexec/src/planner/physical_plan/set_var.rs @@ -50,11 +50,15 @@ impl ExecutionPlan for SetVarExec { fn with_new_children( self: Arc, - _children: Vec>, + children: Vec>, ) -> DataFusionResult> { - Err(DataFusionError::Plan( - "cannot change children for SetVarExec".to_string(), - )) + if children.is_empty() { + Ok(self) + } else { + Err(DataFusionError::Plan( + "cannot change children for SetVarExec".to_string(), + )) + } } fn execute( diff --git a/crates/sqlexec/src/planner/physical_plan/show_var.rs b/crates/sqlexec/src/planner/physical_plan/show_var.rs index bcbd1d374..2cad76b13 100644 --- a/crates/sqlexec/src/planner/physical_plan/show_var.rs +++ b/crates/sqlexec/src/planner/physical_plan/show_var.rs @@ -52,11 +52,15 @@ impl ExecutionPlan for ShowVarExec { fn with_new_children( self: Arc, - _children: Vec>, + children: Vec>, ) -> DataFusionResult> { - Err(DataFusionError::Plan( - "cannot change children for ShowVarExec".to_string(), - )) + if children.is_empty() { + Ok(self) + } else { + Err(DataFusionError::Plan( + "cannot change children for ShowVarExec".to_string(), + )) + } } fn execute( diff --git a/crates/sqlexec/src/planner/physical_plan/update.rs b/crates/sqlexec/src/planner/physical_plan/update.rs index cf0b3a896..abddf0431 100644 --- a/crates/sqlexec/src/planner/physical_plan/update.rs +++ b/crates/sqlexec/src/planner/physical_plan/update.rs @@ -53,11 +53,15 @@ impl ExecutionPlan for UpdateExec { fn with_new_children( self: Arc, - _children: Vec>, + children: Vec>, ) -> DataFusionResult> { - Err(DataFusionError::Plan( - "Cannot change children for UpdateExec".to_string(), - )) + if children.is_empty() { + Ok(self) + } else { + Err(DataFusionError::Plan( + "Cannot change children for UpdateExec".to_string(), + )) + } } fn execute( diff --git a/crates/sqlexec/src/planner/physical_plan/values.rs b/crates/sqlexec/src/planner/physical_plan/values.rs index 2d1e8921a..e59f7525e 100644 --- a/crates/sqlexec/src/planner/physical_plan/values.rs +++ b/crates/sqlexec/src/planner/physical_plan/values.rs @@ -47,11 +47,15 @@ impl ExecutionPlan for ExtValuesExec { fn with_new_children( self: Arc, - _children: Vec>, + children: Vec>, ) -> DataFusionResult> { - Err(DataFusionError::Plan( - "Cannot change children for ValuesExec".to_string(), - )) + if children.is_empty() { + Ok(self) + } else { + Err(DataFusionError::Plan( + "Cannot change children for ValuesExec".to_string(), + )) + } } fn execute( diff --git a/crates/sqlexec/src/remote/batch_stream.rs b/crates/sqlexec/src/remote/batch_stream.rs index 00c936552..f35840e24 100644 --- a/crates/sqlexec/src/remote/batch_stream.rs +++ b/crates/sqlexec/src/remote/batch_stream.rs @@ -104,7 +104,7 @@ impl ExecutionBatchStream { let reader = IpcFileReader::try_new(cursor, None)?; Ok(reader.into_iter().map(|result| match result { Ok(batch) => Ok(batch), - Err(e) => Err(DataFusionError::ArrowError(e)), + Err(e) => Err(DataFusionError::ArrowError(e, None)), })) } } diff --git a/crates/sqlexec/src/remote/planner.rs b/crates/sqlexec/src/remote/planner.rs index 8df0ebdf4..c098802c1 100644 --- a/crates/sqlexec/src/remote/planner.rs +++ b/crates/sqlexec/src/remote/planner.rs @@ -2,7 +2,6 @@ use std::sync::Arc; use async_trait::async_trait; use catalog::session_catalog::SessionCatalog; -use datafusion::arrow::datatypes::Schema; use datafusion::common::tree_node::Transformed; use datafusion::common::DFSchema; use datafusion::error::{DataFusionError, Result}; @@ -631,15 +630,9 @@ impl<'a> PhysicalPlanner for RemotePhysicalPlanner<'a> { &self, expr: &Expr, input_dfschema: &DFSchema, - input_schema: &Schema, session_state: &SessionState, ) -> Result> { - DefaultPhysicalPlanner::default().create_physical_expr( - expr, - input_dfschema, - input_schema, - session_state, - ) + DefaultPhysicalPlanner::default().create_physical_expr(expr, input_dfschema, session_state) } } From e1bc74350a25d1f95da2b4b9ddc44b20de2a35c6 Mon Sep 17 00:00:00 2001 From: universalmind303 Date: Mon, 5 Feb 2024 13:20:03 -0600 Subject: [PATCH 2/6] temporarily allow deprecated --- crates/sqlbuiltins/src/lib.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/crates/sqlbuiltins/src/lib.rs b/crates/sqlbuiltins/src/lib.rs index c8242bef2..45d6cee28 100644 --- a/crates/sqlbuiltins/src/lib.rs +++ b/crates/sqlbuiltins/src/lib.rs @@ -1,3 +1,7 @@ + +// allow deprecated items +#![allow(deprecated)] + //! Builtin sql objects. //! //! This crate provides the implementation of various builtin sql objects From e13f06b5a29c4e721238a737b97534e43a32da6e Mon Sep 17 00:00:00 2001 From: universalmind303 Date: Mon, 5 Feb 2024 13:21:41 -0600 Subject: [PATCH 3/6] temporarily allow deprecated --- crates/sqlbuiltins/src/lib.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/crates/sqlbuiltins/src/lib.rs b/crates/sqlbuiltins/src/lib.rs index 45d6cee28..d393491ef 100644 --- a/crates/sqlbuiltins/src/lib.rs +++ b/crates/sqlbuiltins/src/lib.rs @@ -1,4 +1,3 @@ - // allow deprecated items #![allow(deprecated)] From 7243192f33548c158c9a84576c06f20a15d7da6c Mon Sep 17 00:00:00 2001 From: universalmind303 Date: Mon, 5 Feb 2024 13:36:44 -0600 Subject: [PATCH 4/6] fix iceberg --- crates/datasources/src/lake/iceberg/table.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/crates/datasources/src/lake/iceberg/table.rs b/crates/datasources/src/lake/iceberg/table.rs index 5e428e9d2..d6446fc58 100644 --- a/crates/datasources/src/lake/iceberg/table.rs +++ b/crates/datasources/src/lake/iceberg/table.rs @@ -96,6 +96,7 @@ impl TableState { // TODO: Handle not finding a version hint. let path = format_object_path(&location, "metadata/version-hint.text")?; let path = ObjectPath::parse(path)?; + println!("path: {:?}", path); let bs = store.get(&path).await?.bytes().await?; let version_contents = String::from_utf8(bs.to_vec()).map_err(|e| { IcebergError::DataInvalid(format!("Expected utf-8 in version hint: {}", e)) @@ -281,9 +282,9 @@ impl TableProvider for IcebergTableReader { // Create the datafusion specific url, and register the object store. let object_url = datasource_url_to_unique_url(&self.state.location); - // ctx.runtime_env() - // .object_store_registry - // .register_store(object_url.as_ref(), self.state.store.clone()); + ctx.runtime_env() + .object_store_registry + .register_store(object_url.as_ref(), self.state.store.clone()); // TODO: Properly prune based on partition values. This currently skips // any partition processing, and shoves everything into a single file From b1ce136ba8c927902598525b0a37785145f8cdae Mon Sep 17 00:00:00 2001 From: universalmind303 Date: Mon, 5 Feb 2024 14:03:59 -0600 Subject: [PATCH 5/6] add todo comment --- crates/sqlbuiltins/src/lib.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/crates/sqlbuiltins/src/lib.rs b/crates/sqlbuiltins/src/lib.rs index d393491ef..ca2b95640 100644 --- a/crates/sqlbuiltins/src/lib.rs +++ b/crates/sqlbuiltins/src/lib.rs @@ -1,4 +1,5 @@ // allow deprecated items +// TODO: fix the deprecation warnings with scalarUDF. #![allow(deprecated)] //! Builtin sql objects. From e0e789f8baf7b607ef1e41f13605f75745a1e8bc Mon Sep 17 00:00:00 2001 From: universalmind303 Date: Mon, 5 Feb 2024 14:04:59 -0600 Subject: [PATCH 6/6] remove printlns --- crates/datasources/src/lake/iceberg/table.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/crates/datasources/src/lake/iceberg/table.rs b/crates/datasources/src/lake/iceberg/table.rs index d6446fc58..615b586bc 100644 --- a/crates/datasources/src/lake/iceberg/table.rs +++ b/crates/datasources/src/lake/iceberg/table.rs @@ -96,7 +96,6 @@ impl TableState { // TODO: Handle not finding a version hint. let path = format_object_path(&location, "metadata/version-hint.text")?; let path = ObjectPath::parse(path)?; - println!("path: {:?}", path); let bs = store.get(&path).await?.bytes().await?; let version_contents = String::from_utf8(bs.to_vec()).map_err(|e| { IcebergError::DataInvalid(format!("Expected utf-8 in version hint: {}", e))