From 262ad056dcd218cbc9bf4988530123f0612835c4 Mon Sep 17 00:00:00 2001 From: Jeremy Leibs Date: Mon, 25 Nov 2024 18:42:36 -0500 Subject: [PATCH 01/24] Attempt to update arrow/datafusion --- Cargo.lock | 1162 ++++++++++++--------- Cargo.toml | 44 +- rust/lance-datafusion/Cargo.toml | 5 +- rust/lance-datafusion/src/substrait.rs | 2 + rust/lance-io/src/encodings/binary.rs | 2 +- rust/lance-io/src/encodings/plain.rs | 4 +- rust/lance/Cargo.toml | 1 + rust/lance/src/datafusion/logical_plan.rs | 4 +- rust/lance/src/dataset/scanner.rs | 25 +- rust/lance/src/index/vector.rs | 7 +- 10 files changed, 748 insertions(+), 508 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 58c4b959d6..1a14983d2c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -172,35 +172,35 @@ checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" [[package]] name = "arrow" -version = "52.2.0" +version = "53.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05048a8932648b63f21c37d88b552ccc8a65afb6dfe9fc9f30ce79174c2e7a85" +checksum = "c91839b07e474b3995035fd8ac33ee54f9c9ccbbb1ea33d9909c71bffdf1259d" dependencies = [ "arrow-arith", - "arrow-array", - "arrow-buffer", - "arrow-cast", + "arrow-array 53.3.0", + "arrow-buffer 53.3.0", + "arrow-cast 53.3.0", "arrow-csv", - "arrow-data", - "arrow-ipc", + "arrow-data 53.3.0", + "arrow-ipc 53.3.0", "arrow-json", "arrow-ord", "arrow-row", - "arrow-schema", - "arrow-select", + "arrow-schema 53.3.0", + "arrow-select 53.3.0", "arrow-string", ] [[package]] name = "arrow-arith" -version = "52.2.0" +version = "53.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d8a57966e43bfe9a3277984a14c24ec617ad874e4c0e1d2a1b083a39cfbf22c" +checksum = "855c57c4efd26722b044dcd3e348252560e3e0333087fb9f6479dc0bf744054f" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-array 53.3.0", + "arrow-buffer 53.3.0", + "arrow-data 53.3.0", + "arrow-schema 53.3.0", "chrono", "half", "num", @@ -213,13 +213,29 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "16f4a9468c882dc66862cef4e1fd8423d47e67972377d85d80e022786427768c" dependencies = [ "ahash", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-buffer 52.2.0", + "arrow-data 52.2.0", + "arrow-schema 52.2.0", + "chrono", + "half", + "hashbrown 0.14.5", + "num", +] + +[[package]] +name = "arrow-array" +version = "53.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd03279cea46569acf9295f6224fbc370c5df184b4d2ecfe97ccb131d5615a7f" +dependencies = [ + "ahash", + "arrow-buffer 53.3.0", + "arrow-data 53.3.0", + "arrow-schema 53.3.0", "chrono", "chrono-tz", "half", - "hashbrown", + "hashbrown 0.15.2", "num", ] @@ -234,43 +250,74 @@ dependencies = [ "num", ] +[[package]] +name = "arrow-buffer" +version = "53.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e4a9b9b1d6d7117f6138e13bc4dd5daa7f94e671b70e8c9c4dc37b4f5ecfc16" +dependencies = [ + "bytes", + "half", + "num", +] + [[package]] name = "arrow-cast" version = "52.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "da26719e76b81d8bc3faad1d4dbdc1bcc10d14704e63dc17fc9f3e7e1e567c8e" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "arrow-select", + "arrow-array 52.2.0", + "arrow-buffer 52.2.0", + "arrow-data 52.2.0", + "arrow-schema 52.2.0", + "arrow-select 52.2.0", + "atoi", + "base64 0.22.1", + "chrono", + "half", + "lexical-core 0.8.5", + "num", + "ryu", +] + +[[package]] +name = "arrow-cast" +version = "53.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc70e39916e60c5b7af7a8e2719e3ae589326039e1e863675a008bee5ffe90fd" +dependencies = [ + "arrow-array 53.3.0", + "arrow-buffer 53.3.0", + "arrow-data 53.3.0", + "arrow-schema 53.3.0", + "arrow-select 53.3.0", "atoi", "base64 0.22.1", "chrono", "comfy-table", "half", - "lexical-core", + "lexical-core 1.0.2", "num", "ryu", ] [[package]] name = "arrow-csv" -version = "52.2.0" +version = "53.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c13c36dc5ddf8c128df19bab27898eea64bf9da2b555ec1cd17a8ff57fba9ec2" +checksum = "789b2af43c1049b03a8d088ff6b2257cdcea1756cd76b174b1f2600356771b97" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-schema", + "arrow-array 53.3.0", + "arrow-buffer 53.3.0", + "arrow-cast 53.3.0", + "arrow-data 53.3.0", + "arrow-schema 53.3.0", "chrono", "csv", "csv-core", "lazy_static", - "lexical-core", + "lexical-core 1.0.2", "regex", ] @@ -280,8 +327,20 @@ version = "52.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dd9d6f18c65ef7a2573ab498c374d8ae364b4a4edf67105357491c031f716ca5" dependencies = [ - "arrow-buffer", - "arrow-schema", + "arrow-buffer 52.2.0", + "arrow-schema 52.2.0", + "half", + "num", +] + +[[package]] +name = "arrow-data" +version = "53.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4e75edf21ffd53744a9b8e3ed11101f610e7ceb1a29860432824f1834a1f623" +dependencies = [ + "arrow-buffer 53.3.0", + "arrow-schema 53.3.0", "half", "num", ] @@ -292,11 +351,25 @@ version = "52.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e786e1cdd952205d9a8afc69397b317cfbb6e0095e445c69cda7e8da5c1eeb0f" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-schema", + "arrow-array 52.2.0", + "arrow-buffer 52.2.0", + "arrow-cast 52.2.0", + "arrow-data 52.2.0", + "arrow-schema 52.2.0", + "flatbuffers", +] + +[[package]] +name = "arrow-ipc" +version = "53.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d186a909dece9160bf8312f5124d797884f608ef5435a36d9d608e0b2a9bcbf8" +dependencies = [ + "arrow-array 53.3.0", + "arrow-buffer 53.3.0", + "arrow-cast 53.3.0", + "arrow-data 53.3.0", + "arrow-schema 53.3.0", "flatbuffers", "lz4_flex", "zstd", @@ -304,19 +377,19 @@ dependencies = [ [[package]] name = "arrow-json" -version = "52.2.0" +version = "53.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb22284c5a2a01d73cebfd88a33511a3234ab45d66086b2ca2d1228c3498e445" +checksum = "b66ff2fedc1222942d0bd2fd391cb14a85baa3857be95c9373179bd616753b85" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-schema", + "arrow-array 53.3.0", + "arrow-buffer 53.3.0", + "arrow-cast 53.3.0", + "arrow-data 53.3.0", + "arrow-schema 53.3.0", "chrono", "half", "indexmap", - "lexical-core", + "lexical-core 1.0.2", "num", "serde", "serde_json", @@ -324,30 +397,30 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "52.2.0" +version = "53.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42745f86b1ab99ef96d1c0bcf49180848a64fe2c7a7a0d945bc64fa2b21ba9bc" +checksum = "ece7b5bc1180e6d82d1a60e1688c199829e8842e38497563c3ab6ea813e527fd" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "arrow-select", + "arrow-array 53.3.0", + "arrow-buffer 53.3.0", + "arrow-data 53.3.0", + "arrow-schema 53.3.0", + "arrow-select 53.3.0", "half", "num", ] [[package]] name = "arrow-row" -version = "52.2.0" +version = "53.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4cd09a518c602a55bd406bcc291a967b284cfa7a63edfbf8b897ea4748aad23c" +checksum = "745c114c8f0e8ce211c83389270de6fbe96a9088a7b32c2a041258a443fe83ff" dependencies = [ "ahash", - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-array 53.3.0", + "arrow-buffer 53.3.0", + "arrow-data 53.3.0", + "arrow-schema 53.3.0", "half", ] @@ -356,6 +429,12 @@ name = "arrow-schema" version = "52.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9e972cd1ff4a4ccd22f86d3e53e835c2ed92e0eea6a3e8eadb72b4f1ac802cf8" + +[[package]] +name = "arrow-schema" +version = "53.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b95513080e728e4cec37f1ff5af4f12c9688d47795d17cda80b6ec2cf74d4678" dependencies = [ "bitflags 2.6.0", ] @@ -367,24 +446,38 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "600bae05d43483d216fb3494f8c32fdbefd8aa4e1de237e790dbb3d9f44690a3" dependencies = [ "ahash", - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-array 52.2.0", + "arrow-buffer 52.2.0", + "arrow-data 52.2.0", + "arrow-schema 52.2.0", + "num", +] + +[[package]] +name = "arrow-select" +version = "53.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e415279094ea70323c032c6e739c48ad8d80e78a09bef7117b8718ad5bf3722" +dependencies = [ + "ahash", + "arrow-array 53.3.0", + "arrow-buffer 53.3.0", + "arrow-data 53.3.0", + "arrow-schema 53.3.0", "num", ] [[package]] name = "arrow-string" -version = "52.2.0" +version = "53.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0dc1985b67cb45f6606a248ac2b4a288849f196bab8c657ea5589f47cdd55e6" +checksum = "11d956cae7002eb8d83a27dbd34daaea1cf5b75852f0b84deb4d93a276e92bbf" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "arrow-select", + "arrow-array 53.3.0", + "arrow-buffer 53.3.0", + "arrow-data 53.3.0", + "arrow-schema 53.3.0", + "arrow-select 53.3.0", "memchr", "num", "regex", @@ -536,7 +629,7 @@ checksum = "3b43422f69d8ff38f95f1b2bb76517c91589a924d1559a0e935d7c8ce0274c11" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.89", ] [[package]] @@ -579,7 +672,7 @@ checksum = "6e0c28dcc82d7c8ead5cb13beb15405b57b8546e93215673ff8ca0349a028107" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.89", ] [[package]] @@ -1083,6 +1176,17 @@ dependencies = [ "brotli-decompressor 4.0.1", ] +[[package]] +name = "brotli" +version = "7.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc97b8f16f944bba54f0433f07e30be199b6dc2bd25937444bbad560bcea29bd" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", + "brotli-decompressor 4.0.1", +] + [[package]] name = "brotli-decompressor" version = "2.5.1" @@ -1244,9 +1348,9 @@ dependencies = [ [[package]] name = "chrono-tz" -version = "0.9.0" +version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93698b29de5e97ad0ae26447b344c482a7284c737d9ddc5f9e52b74a336671bb" +checksum = "cd6dd8046d00723a59a2f8c5f295c515b9bb9a331ee4f8f3d4dd49e428acd3b6" dependencies = [ "chrono", "chrono-tz-build", @@ -1255,12 +1359,11 @@ dependencies = [ [[package]] name = "chrono-tz-build" -version = "0.3.0" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c088aee841df9c3041febbb73934cfc39708749bf96dc827e3359cd39ef11b1" +checksum = "e94fea34d77a245229e7746bd2beb786cd2a896f306ff491fb8cecb3074b10a7" dependencies = [ "parse-zoneinfo", - "phf", "phf_codegen", ] @@ -1322,7 +1425,7 @@ dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.89", ] [[package]] @@ -1393,15 +1496,6 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f7144d30dcf0fafbce74250a3963025d8d52177934239851c917d29f1df280c2" -[[package]] -name = "convert_case" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec182b0ca2f35d8fc196cf3404988fd8b8c739a4d270ff118a398feb0cbec1ca" -dependencies = [ - "unicode-segmentation", -] - [[package]] name = "core-foundation" version = "0.9.4" @@ -1585,7 +1679,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856" dependencies = [ "cfg-if", - "hashbrown", + "hashbrown 0.14.5", "lock_api", "once_cell", "parking_lot_core", @@ -1599,7 +1693,7 @@ checksum = "5041cc499144891f3790297212f32a74fb938e5136a14943f338ef9e0ae276cf" dependencies = [ "cfg-if", "crossbeam-utils", - "hashbrown", + "hashbrown 0.14.5", "lock_api", "once_cell", "parking_lot_core", @@ -1607,15 +1701,15 @@ dependencies = [ [[package]] name = "datafusion" -version = "41.0.0" +version = "42.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4fd4a99fc70d40ef7e52b243b4a399c3f8d353a40d5ecb200deee05e49c61bb" +checksum = "dae5f2abc725737d6e87b6d348a5aa2d0a77e4cf873045f004546da946e6e619" dependencies = [ "ahash", "arrow", - "arrow-array", - "arrow-ipc", - "arrow-schema", + "arrow-array 53.3.0", + "arrow-ipc 53.3.0", + "arrow-schema 53.3.0", "async-compression", "async-trait", "bytes", @@ -1630,6 +1724,7 @@ dependencies = [ "datafusion-functions", "datafusion-functions-aggregate", "datafusion-functions-nested", + "datafusion-functions-window", "datafusion-optimizer", "datafusion-physical-expr", "datafusion-physical-expr-common", @@ -1640,14 +1735,14 @@ dependencies = [ "futures", "glob", "half", - "hashbrown", + "hashbrown 0.14.5", "indexmap", - "itertools 0.12.1", + "itertools 0.13.0", "log", "num_cpus", - "object_store", + "object_store 0.11.1", "parking_lot", - "parquet", + "parquet 53.3.0", "paste", "pin-project-lite", "rand", @@ -1663,54 +1758,58 @@ dependencies = [ [[package]] name = "datafusion-catalog" -version = "41.0.0" +version = "42.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e13b3cfbd84c6003594ae1972314e3df303a27ce8ce755fcea3240c90f4c0529" +checksum = "998761705551f11ffa4ee692cc285b44eb1def6e0d28c4eaf5041b9e2810dc1e" dependencies = [ - "arrow-schema", + "arrow-schema 53.3.0", "async-trait", "datafusion-common", "datafusion-execution", "datafusion-expr", "datafusion-physical-plan", + "parking_lot", ] [[package]] name = "datafusion-common" -version = "41.0.0" +version = "42.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44fdbc877e3e40dcf88cc8f283d9f5c8851f0a3aa07fee657b1b75ac1ad49b9c" +checksum = "11986f191e88d950f10a5cc512a598afba27d92e04a0201215ad60785005115a" dependencies = [ "ahash", "arrow", - "arrow-array", - "arrow-buffer", - "arrow-schema", + "arrow-array 53.3.0", + "arrow-buffer 53.3.0", + "arrow-schema 53.3.0", "chrono", "half", - "hashbrown", + "hashbrown 0.14.5", "instant", "libc", "num_cpus", - "object_store", - "parquet", + "object_store 0.11.1", + "parquet 53.3.0", + "paste", "sqlparser", + "tokio", ] [[package]] name = "datafusion-common-runtime" -version = "41.0.0" +version = "42.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a7496d1f664179f6ce3a5cbef6566056ccaf3ea4aa72cc455f80e62c1dd86b1" +checksum = "694c9d7ea1b82f95768215c4cb5c2d5c613690624e832a7ee64be563139d582f" dependencies = [ + "log", "tokio", ] [[package]] name = "datafusion-execution" -version = "41.0.0" +version = "42.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "799e70968c815b611116951e3dd876aef04bf217da31b72eec01ee6a959336a1" +checksum = "30b4cedcd98151e0a297f34021b6b232ff0ebc0f2f18ea5e7446b5ebda99b1a1" dependencies = [ "arrow", "chrono", @@ -1718,9 +1817,9 @@ dependencies = [ "datafusion-common", "datafusion-expr", "futures", - "hashbrown", + "hashbrown 0.14.5", "log", - "object_store", + "object_store 0.11.1", "parking_lot", "rand", "tempfile", @@ -1729,16 +1828,19 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "41.0.0" +version = "42.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c1841c409d9518c17971d15c9bae62e629eb937e6fb6c68cd32e9186f8b30d2" +checksum = "a8dd114dc0296cacaee98ad3165724529fcca9a65b2875abcd447b9cc02b2b74" dependencies = [ "ahash", "arrow", - "arrow-array", - "arrow-buffer", + "arrow-array 53.3.0", + "arrow-buffer 53.3.0", "chrono", "datafusion-common", + "datafusion-expr-common", + "datafusion-functions-aggregate-common", + "datafusion-physical-expr-common", "paste", "serde_json", "sqlparser", @@ -1746,14 +1848,25 @@ dependencies = [ "strum_macros", ] +[[package]] +name = "datafusion-expr-common" +version = "42.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d1ba2bb018218d9260bbd7de6a46a20f61b93d4911dba8aa07735625004c4fb" +dependencies = [ + "arrow", + "datafusion-common", + "paste", +] + [[package]] name = "datafusion-functions" -version = "41.0.0" +version = "42.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8e481cf34d2a444bd8fa09b65945f0ce83dc92df8665b761505b3d9f351bebb" +checksum = "547cb780a4ac51fd8e52c0fb9188bc16cea4e35aebf6c454bda0b82a7a417304" dependencies = [ "arrow", - "arrow-buffer", + "arrow-buffer 53.3.0", "base64 0.22.1", "blake2", "blake3", @@ -1761,9 +1874,9 @@ dependencies = [ "datafusion-common", "datafusion-execution", "datafusion-expr", - "hashbrown", + "hashbrown 0.14.5", "hex", - "itertools 0.12.1", + "itertools 0.13.0", "log", "md-5", "rand", @@ -1775,49 +1888,79 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" -version = "41.0.0" +version = "42.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b4ece19f73c02727e5e8654d79cd5652de371352c1df3c4ac3e419ecd6943fb" +checksum = "e68cf5aa7ebcac08bd04bb709a9a6d4963eafd227da62b628133bc509c40f5a0" dependencies = [ "ahash", "arrow", - "arrow-schema", + "arrow-schema 53.3.0", "datafusion-common", "datafusion-execution", "datafusion-expr", + "datafusion-functions-aggregate-common", + "datafusion-physical-expr", "datafusion-physical-expr-common", + "half", "log", "paste", "sqlparser", ] +[[package]] +name = "datafusion-functions-aggregate-common" +version = "42.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2285d080dfecdfb8605b0ab2f1a41e2473208dc8e9bd6f5d1dbcfe97f517e6f" +dependencies = [ + "ahash", + "arrow", + "datafusion-common", + "datafusion-expr-common", + "datafusion-physical-expr-common", + "rand", +] + [[package]] name = "datafusion-functions-nested" -version = "41.0.0" +version = "42.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1474552cc824e8c9c88177d454db5781d4b66757d4aca75719306b8343a5e8d" +checksum = "6b6ffbbb7cf7bf0c0e05eb6207023fef341cac83a593a5365a6fc83803c572a9" dependencies = [ "arrow", - "arrow-array", - "arrow-buffer", + "arrow-array 53.3.0", + "arrow-buffer 53.3.0", "arrow-ord", - "arrow-schema", + "arrow-schema 53.3.0", "datafusion-common", "datafusion-execution", "datafusion-expr", "datafusion-functions", "datafusion-functions-aggregate", - "itertools 0.12.1", + "datafusion-physical-expr-common", + "itertools 0.13.0", "log", "paste", "rand", ] +[[package]] +name = "datafusion-functions-window" +version = "42.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e78d30ebd6e9f74d4aeddec32744f5a18b5f9584591bc586fb5259c4848bac5" +dependencies = [ + "datafusion-common", + "datafusion-expr", + "datafusion-physical-expr-common", + "log", +] + [[package]] name = "datafusion-optimizer" -version = "41.0.0" +version = "42.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "791ff56f55608bc542d1ea7a68a64bdc86a9413f5a381d06a39fd49c2a3ab906" +checksum = "be172c44bf344df707e0c041fa3f41e6dc5fb0976f539c68bc442bca150ee58c" dependencies = [ "arrow", "async-trait", @@ -1825,9 +1968,9 @@ dependencies = [ "datafusion-common", "datafusion-expr", "datafusion-physical-expr", - "hashbrown", + "hashbrown 0.14.5", "indexmap", - "itertools 0.12.1", + "itertools 0.13.0", "log", "paste", "regex-syntax 0.8.4", @@ -1835,28 +1978,30 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "41.0.0" +version = "42.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a223962b3041304a3e20ed07a21d5de3d88d7e4e71ca192135db6d24e3365a4" +checksum = "43b86b7fa0b8161c49b0f005b0df193fc6d9b65ceec675f155422cda5d1583ca" dependencies = [ "ahash", "arrow", - "arrow-array", - "arrow-buffer", + "arrow-array 53.3.0", + "arrow-buffer 53.3.0", "arrow-ord", - "arrow-schema", + "arrow-schema 53.3.0", "arrow-string", "base64 0.22.1", "chrono", "datafusion-common", "datafusion-execution", "datafusion-expr", + "datafusion-expr-common", + "datafusion-functions-aggregate-common", "datafusion-physical-expr-common", "half", - "hashbrown", + "hashbrown 0.14.5", "hex", "indexmap", - "itertools 0.12.1", + "itertools 0.13.0", "log", "paste", "petgraph", @@ -1865,42 +2010,44 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" -version = "41.0.0" +version = "42.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db5e7d8532a1601cd916881db87a70b0a599900d23f3db2897d389032da53bc6" +checksum = "242ba8a26351d9ca16295814c46743b0d1b00ec372174bdfbba991d0953dd596" dependencies = [ "ahash", "arrow", "datafusion-common", - "datafusion-expr", - "hashbrown", + "datafusion-expr-common", + "hashbrown 0.14.5", "rand", ] [[package]] name = "datafusion-physical-optimizer" -version = "41.0.0" +version = "42.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdb9c78f308e050f5004671039786a925c3fee83b90004e9fcfd328d7febdcc0" +checksum = "25ca088eb904bf1cfc9c5e5653110c70a6eaba43164085a9d180b35b77ce3b8b" dependencies = [ + "arrow-schema 53.3.0", "datafusion-common", "datafusion-execution", "datafusion-physical-expr", "datafusion-physical-plan", + "itertools 0.13.0", ] [[package]] name = "datafusion-physical-plan" -version = "41.0.0" +version = "42.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d1116949432eb2d30f6362707e2846d942e491052a206f2ddcb42d08aea1ffe" +checksum = "4989a53b824abc759685eb643f4d604c2fc2fea4e2c309ac3473bea263ecbbeb" dependencies = [ "ahash", "arrow", - "arrow-array", - "arrow-buffer", + "arrow-array 53.3.0", + "arrow-buffer 53.3.0", "arrow-ord", - "arrow-schema", + "arrow-schema 53.3.0", "async-trait", "chrono", "datafusion-common", @@ -1908,13 +2055,14 @@ dependencies = [ "datafusion-execution", "datafusion-expr", "datafusion-functions-aggregate", + "datafusion-functions-aggregate-common", "datafusion-physical-expr", "datafusion-physical-expr-common", "futures", "half", - "hashbrown", + "hashbrown 0.14.5", "indexmap", - "itertools 0.12.1", + "itertools 0.13.0", "log", "once_cell", "parking_lot", @@ -1925,13 +2073,13 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "41.0.0" +version = "42.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b45d0180711165fe94015d7c4123eb3e1cf5fb60b1506453200b8d1ce666bef0" +checksum = "66b9b75b9da10ed656073ac0553708f17eb8fa5a7b065ef9848914c93150ab9e" dependencies = [ "arrow", - "arrow-array", - "arrow-schema", + "arrow-array 53.3.0", + "arrow-schema 53.3.0", "datafusion-common", "datafusion-expr", "log", @@ -1942,19 +2090,19 @@ dependencies = [ [[package]] name = "datafusion-substrait" -version = "41.0.0" +version = "42.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf0a0055aa98246c79f98f0d03df11f16cb7adc87818d02d4413e3f3cdadbbee" +checksum = "220d7ab0ffadd8b1af753904b18dd92d270271810b1ce9f8be3c3dbe2392b636" dependencies = [ - "arrow-buffer", + "arrow-buffer 53.3.0", "async-recursion", "chrono", "datafusion", - "itertools 0.12.1", - "object_store", + "itertools 0.13.0", + "object_store 0.11.1", "pbjson-types", - "prost", - "substrait 0.36.0", + "prost 0.13.3", + "substrait", "url", ] @@ -2268,7 +2416,7 @@ dependencies = [ name = "fsst" version = "0.19.2" dependencies = [ - "arrow-array", + "arrow-array 53.3.0", "lance-datagen", "rand", "rand_xoshiro", @@ -2366,7 +2514,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.89", ] [[package]] @@ -2511,6 +2659,12 @@ dependencies = [ "allocator-api2", ] +[[package]] +name = "hashbrown" +version = "0.15.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289" + [[package]] name = "heck" version = "0.4.1" @@ -2788,7 +2942,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "de3fc2e30ba82dd1b3911c8de1ffc143c74a914a14e99514d7637e3099df5ea0" dependencies = [ "equivalent", - "hashbrown", + "hashbrown 0.14.5", ] [[package]] @@ -2966,12 +3120,12 @@ dependencies = [ "approx", "arrow", "arrow-arith", - "arrow-array", - "arrow-buffer", + "arrow-array 53.3.0", + "arrow-buffer 53.3.0", "arrow-ord", "arrow-row", - "arrow-schema", - "arrow-select", + "arrow-schema 53.3.0", + "arrow-select 53.3.0", "async-recursion", "async-trait", "async_cell", @@ -2984,6 +3138,7 @@ dependencies = [ "criterion", "dashmap 5.5.3", "datafusion", + "datafusion-expr", "datafusion-functions", "datafusion-physical-expr", "deepsize", @@ -3010,20 +3165,20 @@ dependencies = [ "lzma-sys", "mock_instant", "moka", - "object_store", + "object_store 0.10.2", "permutation", "pin-project", "pprof", "pretty_assertions", - "prost", - "prost-build", + "prost 0.13.3", + "prost-build 0.13.3", "rand", "random_word", "roaring", "rstest", "serde", "serde_json", - "snafu", + "snafu 0.7.5", "tantivy", "tempfile", "tfrecord", @@ -3039,12 +3194,12 @@ dependencies = [ name = "lance-arrow" version = "0.19.2" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-schema", - "arrow-select", + "arrow-array 53.3.0", + "arrow-buffer 53.3.0", + "arrow-cast 53.3.0", + "arrow-data 53.3.0", + "arrow-schema 53.3.0", + "arrow-select 53.3.0", "getrandom", "half", "num-traits", @@ -3055,9 +3210,9 @@ dependencies = [ name = "lance-core" version = "0.19.2" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-schema", + "arrow-array 53.3.0", + "arrow-buffer 53.3.0", + "arrow-schema 53.3.0", "async-trait", "byteorder", "bytes", @@ -3074,14 +3229,14 @@ dependencies = [ "mock_instant", "moka", "num_cpus", - "object_store", + "object_store 0.10.2", "pin-project", "proptest", - "prost", + "prost 0.13.3", "rand", "roaring", "serde_json", - "snafu", + "snafu 0.7.5", "tempfile", "tokio", "tokio-stream", @@ -3095,11 +3250,11 @@ name = "lance-datafusion" version = "0.19.2" dependencies = [ "arrow", - "arrow-array", - "arrow-buffer", + "arrow-array 53.3.0", + "arrow-buffer 53.3.0", "arrow-ord", - "arrow-schema", - "arrow-select", + "arrow-schema 53.3.0", + "arrow-select 53.3.0", "async-trait", "datafusion", "datafusion-common", @@ -3112,9 +3267,8 @@ dependencies = [ "lance-datagen", "lazy_static", "log", - "prost", - "snafu", - "substrait-expr", + "prost 0.13.3", + "snafu 0.7.5", "tokio", ] @@ -3123,9 +3277,9 @@ name = "lance-datagen" version = "0.19.2" dependencies = [ "arrow", - "arrow-array", - "arrow-cast", - "arrow-schema", + "arrow-array 53.3.0", + "arrow-cast 53.3.0", + "arrow-schema 53.3.0", "chrono", "criterion", "futures", @@ -3142,12 +3296,12 @@ dependencies = [ "arrayref", "arrow", "arrow-arith", - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-schema", - "arrow-select", + "arrow-array 53.3.0", + "arrow-buffer 53.3.0", + "arrow-cast 53.3.0", + "arrow-data 53.3.0", + "arrow-schema 53.3.0", + "arrow-select 53.3.0", "bytemuck", "byteorder", "bytes", @@ -3166,14 +3320,14 @@ dependencies = [ "num-traits", "paste", "pprof", - "prost", - "prost-build", - "prost-types", + "prost 0.13.3", + "prost-build 0.13.3", + "prost-types 0.13.3", "rand", "rand_xoshiro", "rstest", "seq-macro", - "snafu", + "snafu 0.7.5", "tempfile", "test-log", "tokio", @@ -3185,9 +3339,9 @@ dependencies = [ name = "lance-encoding-datafusion" version = "0.19.2" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-schema", + "arrow-array 53.3.0", + "arrow-buffer 53.3.0", + "arrow-schema 53.3.0", "bytes", "datafusion", "datafusion-common", @@ -3204,11 +3358,11 @@ dependencies = [ "lance-io", "log", "pprof", - "prost", - "prost-build", - "prost-types", + "prost 0.13.3", + "prost-build 0.13.3", + "prost-types 0.13.3", "rand", - "snafu", + "snafu 0.7.5", "test-log", "tokio", ] @@ -3218,11 +3372,11 @@ name = "lance-file" version = "0.19.2" dependencies = [ "arrow-arith", - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "arrow-select", + "arrow-array 53.3.0", + "arrow-buffer 53.3.0", + "arrow-data 53.3.0", + "arrow-schema 53.3.0", + "arrow-select 53.3.0", "async-recursion", "async-trait", "byteorder", @@ -3239,16 +3393,16 @@ dependencies = [ "lance-testing", "log", "num-traits", - "object_store", + "object_store 0.10.2", "pprof", "pretty_assertions", "proptest", - "prost", - "prost-build", - "prost-types", + "prost 0.13.3", + "prost-build 0.13.3", + "prost-types 0.13.3", "rand", "roaring", - "snafu", + "snafu 0.7.5", "tempfile", "test-log", "tokio", @@ -3261,10 +3415,10 @@ version = "0.19.2" dependencies = [ "approx", "arrow", - "arrow-array", + "arrow-array 53.3.0", "arrow-ord", - "arrow-schema", - "arrow-select", + "arrow-schema 53.3.0", + "arrow-select 53.3.0", "async-recursion", "async-trait", "bitvec", @@ -3295,17 +3449,17 @@ dependencies = [ "log", "moka", "num-traits", - "object_store", + "object_store 0.10.2", "pprof", - "prost", - "prost-build", + "prost 0.13.3", + "prost-build 0.13.3", "rand", "random_word", "rayon", "roaring", "serde", "serde_json", - "snafu", + "snafu 0.7.5", "tantivy", "tempfile", "test-log", @@ -3320,12 +3474,12 @@ version = "0.19.2" dependencies = [ "arrow", "arrow-arith", - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-schema", - "arrow-select", + "arrow-array 53.3.0", + "arrow-buffer 53.3.0", + "arrow-cast 53.3.0", + "arrow-data 53.3.0", + "arrow-schema 53.3.0", + "arrow-select 53.3.0", "async-priority-channel", "async-recursion", "async-trait", @@ -3342,16 +3496,16 @@ dependencies = [ "lazy_static", "log", "mockall", - "object_store", - "parquet", + "object_store 0.10.2", + "parquet 52.2.0", "path_abs", "pin-project", "pprof", - "prost", - "prost-build", + "prost 0.13.3", + "prost-build 0.13.3", "rand", "shellexpand", - "snafu", + "snafu 0.7.5", "tempfile", "test-log", "tokio", @@ -3364,7 +3518,7 @@ name = "lance-jni" version = "0.19.2" dependencies = [ "arrow", - "arrow-schema", + "arrow-schema 53.3.0", "datafusion", "jni", "lance", @@ -3375,7 +3529,7 @@ dependencies = [ "lazy_static", "serde", "serde_json", - "snafu", + "snafu 0.7.5", "tokio", ] @@ -3385,9 +3539,9 @@ version = "0.19.2" dependencies = [ "approx", "arrow-arith", - "arrow-array", + "arrow-array 53.3.0", "arrow-ord", - "arrow-schema", + "arrow-schema 53.3.0", "bitvec", "cc", "criterion", @@ -3413,10 +3567,10 @@ name = "lance-table" version = "0.19.2" dependencies = [ "arrow", - "arrow-array", - "arrow-buffer", - "arrow-ipc", - "arrow-schema", + "arrow-array 53.3.0", + "arrow-buffer 53.3.0", + "arrow-ipc 53.3.0", + "arrow-schema 53.3.0", "async-trait", "aws-credential-types", "aws-sdk-dynamodb", @@ -3433,19 +3587,19 @@ dependencies = [ "lance-io", "lazy_static", "log", - "object_store", + "object_store 0.10.2", "pprof", "pretty_assertions", "proptest", - "prost", - "prost-build", - "prost-types", + "prost 0.13.3", + "prost-build 0.13.3", + "prost-types 0.13.3", "rand", "rangemap", "roaring", "serde", "serde_json", - "snafu", + "snafu 0.7.5", "tokio", "tracing", "url", @@ -3458,15 +3612,15 @@ version = "0.19.2" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.89", ] [[package]] name = "lance-testing" version = "0.19.2" dependencies = [ - "arrow-array", - "arrow-schema", + "arrow-array 53.3.0", + "arrow-schema 53.3.0", "lance-arrow", "num-traits", "rand", @@ -3510,11 +3664,24 @@ version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2cde5de06e8d4c2faabc400238f9ae1c74d5412d03a7bd067645ccbc47070e46" dependencies = [ - "lexical-parse-float", - "lexical-parse-integer", - "lexical-util", - "lexical-write-float", - "lexical-write-integer", + "lexical-parse-float 0.8.5", + "lexical-parse-integer 0.8.6", + "lexical-util 0.8.5", + "lexical-write-float 0.8.5", + "lexical-write-integer 0.8.5", +] + +[[package]] +name = "lexical-core" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0431c65b318a590c1de6b8fd6e72798c92291d27762d94c9e6c37ed7a73d8458" +dependencies = [ + "lexical-parse-float 1.0.2", + "lexical-parse-integer 1.0.2", + "lexical-util 1.0.3", + "lexical-write-float 1.0.2", + "lexical-write-integer 1.0.2", ] [[package]] @@ -3523,8 +3690,19 @@ version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "683b3a5ebd0130b8fb52ba0bdc718cc56815b6a097e28ae5a6997d0ad17dc05f" dependencies = [ - "lexical-parse-integer", - "lexical-util", + "lexical-parse-integer 0.8.6", + "lexical-util 0.8.5", + "static_assertions", +] + +[[package]] +name = "lexical-parse-float" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb17a4bdb9b418051aa59d41d65b1c9be5affab314a872e5ad7f06231fb3b4e0" +dependencies = [ + "lexical-parse-integer 1.0.2", + "lexical-util 1.0.3", "static_assertions", ] @@ -3534,7 +3712,17 @@ version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6d0994485ed0c312f6d965766754ea177d07f9c00c9b82a5ee62ed5b47945ee9" dependencies = [ - "lexical-util", + "lexical-util 0.8.5", + "static_assertions", +] + +[[package]] +name = "lexical-parse-integer" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5df98f4a4ab53bf8b175b363a34c7af608fe31f93cc1fb1bf07130622ca4ef61" +dependencies = [ + "lexical-util 1.0.3", "static_assertions", ] @@ -3547,14 +3735,34 @@ dependencies = [ "static_assertions", ] +[[package]] +name = "lexical-util" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85314db53332e5c192b6bca611fb10c114a80d1b831ddac0af1e9be1b9232ca0" +dependencies = [ + "static_assertions", +] + [[package]] name = "lexical-write-float" version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "accabaa1c4581f05a3923d1b4cfd124c329352288b7b9da09e766b0668116862" dependencies = [ - "lexical-util", - "lexical-write-integer", + "lexical-util 0.8.5", + "lexical-write-integer 0.8.5", + "static_assertions", +] + +[[package]] +name = "lexical-write-float" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e7c3ad4e37db81c1cbe7cf34610340adc09c322871972f74877a712abc6c809" +dependencies = [ + "lexical-util 1.0.3", + "lexical-write-integer 1.0.2", "static_assertions", ] @@ -3564,7 +3772,17 @@ version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e1b6f3d1f4422866b68192d62f77bc5c700bee84f3069f2469d7bc8c77852446" dependencies = [ - "lexical-util", + "lexical-util 0.8.5", + "static_assertions", +] + +[[package]] +name = "lexical-write-integer" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb89e9f6958b83258afa3deed90b5de9ef68eef090ad5086c791cd2345610162" +dependencies = [ + "lexical-util 1.0.3", "static_assertions", ] @@ -3627,7 +3845,7 @@ version = "0.12.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37ee39891760e7d94734f6f63fedc29a2e4a152f836120753a72503f09fcf904" dependencies = [ - "hashbrown", + "hashbrown 0.14.5", ] [[package]] @@ -3775,7 +3993,7 @@ dependencies = [ "cfg-if", "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.89", ] [[package]] @@ -3988,7 +4206,28 @@ dependencies = [ "rustls-pemfile 2.1.3", "serde", "serde_json", - "snafu", + "snafu 0.7.5", + "tokio", + "tracing", + "url", + "walkdir", +] + +[[package]] +name = "object_store" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6eb4c22c6154a1e759d7099f9ffad7cc5ef8245f9efbab4a41b92623079c82f3" +dependencies = [ + "async-trait", + "bytes", + "chrono", + "futures", + "humantime", + "itertools 0.13.0", + "parking_lot", + "percent-encoding", + "snafu 0.8.5", "tokio", "tracing", "url", @@ -4091,25 +4330,58 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e977b9066b4d3b03555c22bdc442f3fadebd96a39111249113087d0edb2691cd" dependencies = [ "ahash", - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-ipc", - "arrow-schema", - "arrow-select", + "arrow-array 52.2.0", + "arrow-buffer 52.2.0", + "arrow-cast 52.2.0", + "arrow-data 52.2.0", + "arrow-ipc 52.2.0", + "arrow-schema 52.2.0", + "arrow-select 52.2.0", "base64 0.22.1", "brotli 6.0.0", "bytes", "chrono", "flate2", + "half", + "hashbrown 0.14.5", + "lz4_flex", + "num", + "num-bigint", + "paste", + "seq-macro", + "snap", + "thrift", + "twox-hash", + "zstd", + "zstd-sys", +] + +[[package]] +name = "parquet" +version = "53.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b449890367085eb65d7d3321540abc3d7babbd179ce31df0016e90719114191" +dependencies = [ + "ahash", + "arrow-array 53.3.0", + "arrow-buffer 53.3.0", + "arrow-cast 53.3.0", + "arrow-data 53.3.0", + "arrow-ipc 53.3.0", + "arrow-schema 53.3.0", + "arrow-select 53.3.0", + "base64 0.22.1", + "brotli 7.0.0", + "bytes", + "chrono", + "flate2", "futures", "half", - "hashbrown", + "hashbrown 0.15.2", "lz4_flex", "num", "num-bigint", - "object_store", + "object_store 0.11.1", "paste", "seq-macro", "snap", @@ -4149,9 +4421,9 @@ dependencies = [ [[package]] name = "pbjson" -version = "0.6.0" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1030c719b0ec2a2d25a5df729d6cff1acf3cc230bf766f4f97833591f7577b90" +checksum = "c7e6349fa080353f4a597daffd05cb81572a9c031a6d4fff7e504947496fcc68" dependencies = [ "base64 0.21.7", "serde", @@ -4159,28 +4431,28 @@ dependencies = [ [[package]] name = "pbjson-build" -version = "0.6.2" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2580e33f2292d34be285c5bc3dba5259542b083cfad6037b6d70345f24dcb735" +checksum = "6eea3058763d6e656105d1403cb04e0a41b7bbac6362d413e7c33be0c32279c9" dependencies = [ - "heck 0.4.1", - "itertools 0.11.0", - "prost", - "prost-types", + "heck 0.5.0", + "itertools 0.13.0", + "prost 0.13.3", + "prost-types 0.13.3", ] [[package]] name = "pbjson-types" -version = "0.6.0" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18f596653ba4ac51bdecbb4ef6773bc7f56042dc13927910de1684ad3d32aa12" +checksum = "e54e5e7bfb1652f95bc361d76f3c780d8e526b134b85417e774166ee941f0887" dependencies = [ "bytes", "chrono", "pbjson", "pbjson-build", - "prost", - "prost-build", + "prost 0.13.3", + "prost-build 0.13.3", "serde", ] @@ -4261,7 +4533,7 @@ checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.89", ] [[package]] @@ -4432,14 +4704,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5f12335488a2f3b0a83b14edad48dca9879ce89b2edd10e80237e4e852dd645e" dependencies = [ "proc-macro2", - "syn 2.0.72", + "syn 2.0.89", ] [[package]] name = "proc-macro2" -version = "1.0.86" +version = "1.0.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" +checksum = "37d3544b3f2748c54e147655edb5025752e2303145b5aefb3c3ea2c78b973bb0" dependencies = [ "unicode-ident", ] @@ -4471,7 +4743,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "deb1435c188b76130da55f17a466d252ff7b1418b2ad3e037d127b94e3411f29" dependencies = [ "bytes", - "prost-derive", + "prost-derive 0.12.6", +] + +[[package]] +name = "prost" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b0487d90e047de87f984913713b85c601c05609aad5b0df4b4573fbf69aa13f" +dependencies = [ + "bytes", + "prost-derive 0.13.3", ] [[package]] @@ -4488,10 +4770,31 @@ dependencies = [ "once_cell", "petgraph", "prettyplease", - "prost", - "prost-types", + "prost 0.12.6", + "prost-types 0.12.6", "regex", - "syn 2.0.72", + "syn 2.0.89", + "tempfile", +] + +[[package]] +name = "prost-build" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c1318b19085f08681016926435853bbf7858f9c082d0999b80550ff5d9abe15" +dependencies = [ + "bytes", + "heck 0.5.0", + "itertools 0.13.0", + "log", + "multimap", + "once_cell", + "petgraph", + "prettyplease", + "prost 0.13.3", + "prost-types 0.13.3", + "regex", + "syn 2.0.89", "tempfile", ] @@ -4505,7 +4808,20 @@ dependencies = [ "itertools 0.12.1", "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.89", +] + +[[package]] +name = "prost-derive" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9552f850d5f0964a4e4d0bf306459ac29323ddfbae05e35a7c0d35cb0803cc5" +dependencies = [ + "anyhow", + "itertools 0.13.0", + "proc-macro2", + "quote", + "syn 2.0.89", ] [[package]] @@ -4514,7 +4830,16 @@ version = "0.12.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9091c90b0a32608e984ff2fa4091273cbdd755d54935c51d520887f4a1dbd5b0" dependencies = [ - "prost", + "prost 0.12.6", +] + +[[package]] +name = "prost-types" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4759aa0d3a6232fb8dbdb97b61de2c20047c68aca932c7ed76da9d788508d670" +dependencies = [ + "prost 0.13.3", ] [[package]] @@ -4818,23 +5143,13 @@ version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" -[[package]] -name = "regress" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f5f39ba4513916c1b2657b72af6ec671f091cd637992f58d0ede5cae4e5dea0" -dependencies = [ - "hashbrown", - "memchr", -] - [[package]] name = "regress" version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0eae2a1ebfecc58aff952ef8ccd364329abe627762f5bf09ff42eb9d98522479" dependencies = [ - "hashbrown", + "hashbrown 0.14.5", "memchr", ] @@ -4948,7 +5263,7 @@ dependencies = [ "regex", "relative-path", "rustc_version", - "syn 2.0.72", + "syn 2.0.89", "unicode-ident", ] @@ -5199,7 +5514,7 @@ dependencies = [ "proc-macro2", "quote", "serde_derive_internals", - "syn 2.0.72", + "syn 2.0.89", ] [[package]] @@ -5258,22 +5573,22 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" [[package]] name = "serde" -version = "1.0.204" +version = "1.0.215" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc76f558e0cbb2a839d37354c575f1dc3fdc6546b5be373ba43d95f231bf7c12" +checksum = "6513c1ad0b11a9376da888e3e0baa0077f1aed55c17f50e7b2397136129fb88f" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.204" +version = "1.0.215" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0cd7e117be63d3c3678776753929474f3b04a43a080c744d6b0ae2a8c28e222" +checksum = "ad1e866f866923f252f05c889987993144fb74e722403468a4ebd70c3cd756c0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.89", ] [[package]] @@ -5284,14 +5599,14 @@ checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.89", ] [[package]] name = "serde_json" -version = "1.0.122" +version = "1.0.133" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "784b6203951c57ff748476b126ccb5e8e2959a5c19e5c617ab1956be3dbc68da" +checksum = "c7fceb2473b9166b2294ef05efcb65a3db80803f0b03ef86a5fc88a2b85ee377" dependencies = [ "itoa", "memchr", @@ -5308,7 +5623,7 @@ dependencies = [ "proc-macro2", "quote", "serde", - "syn 2.0.72", + "syn 2.0.89", ] [[package]] @@ -5426,7 +5741,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e4de37ad025c587a29e8f3f5605c00f70b98715ef90b9061a815b9e59e9042d6" dependencies = [ "doc-comment", - "snafu-derive", + "snafu-derive 0.7.5", +] + +[[package]] +name = "snafu" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "223891c85e2a29c3fe8fb900c1fae5e69c2e42415e3177752e8718475efa5019" +dependencies = [ + "snafu-derive 0.8.5", ] [[package]] @@ -5441,6 +5765,18 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "snafu-derive" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03c3c6b7927ffe7ecaa769ee0e3994da3b8cafc8f444578982c83ecb161af917" +dependencies = [ + "heck 0.5.0", + "proc-macro2", + "quote", + "syn 2.0.89", +] + [[package]] name = "snap" version = "1.1.1" @@ -5475,9 +5811,9 @@ checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" [[package]] name = "sqlparser" -version = "0.49.0" +version = "0.50.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4a404d0e14905361b918cb8afdb73605e25c1d5029312bd9785142dcb3aa49e" +checksum = "b2e5b515a2bd5168426033e9efbfd05500114833916f1d5c268f938b4ee130ac" dependencies = [ "log", "sqlparser_derive", @@ -5491,7 +5827,7 @@ checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.89", ] [[package]] @@ -5549,95 +5885,33 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.72", + "syn 2.0.89", ] [[package]] name = "substrait" -version = "0.29.4" +version = "0.41.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df6c402018947957c4c7f2af49304f5cd8a948858686bf958d519cf0aa644790" -dependencies = [ - "heck 0.5.0", - "prettyplease", - "prost", - "prost-build", - "prost-types", - "schemars", - "semver", - "serde", - "serde_json", - "serde_yaml", - "syn 2.0.72", - "typify 0.0.16", - "walkdir", -] - -[[package]] -name = "substrait" -version = "0.36.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1ee6e584c8bf37104b7eb51c25eae07a9321b0e01379bec3b7c462d2f42afbf" +checksum = "bdab7f3d581f47ffd33ccf7aef3fa13932176de0b63c52e01eea4cb60617bce3" dependencies = [ "heck 0.5.0", "pbjson", "pbjson-build", "pbjson-types", "prettyplease", - "prost", - "prost-build", - "prost-types", + "prost 0.13.3", + "prost-build 0.13.3", + "prost-types 0.13.3", "schemars", "semver", "serde", "serde_json", "serde_yaml", - "syn 2.0.72", - "typify 0.1.0", + "syn 2.0.89", + "typify", "walkdir", ] -[[package]] -name = "substrait-expr" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c9a8b8cc82442b391b67e7c195f0d3de35838bb78b115468d28076ec54dd4577" -dependencies = [ - "once_cell", - "prost", - "substrait 0.29.4", - "substrait-expr-funcgen", - "substrait-expr-macros", - "thiserror", -] - -[[package]] -name = "substrait-expr-funcgen" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96a5fb5bfa1ff743bdc1c259c46fde88d1ef8129c68ff7e7d876f907d67dbff7" -dependencies = [ - "convert_case", - "prettyplease", - "proc-macro2", - "quote", - "serde_yaml", - "substrait 0.29.4", - "syn 2.0.72", - "thiserror", -] - -[[package]] -name = "substrait-expr-macros" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "919e5b5c5495d18dffb0b8369d74a143c893cbfb98b4337cecb31f3f9bcc112b" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.72", -] - [[package]] name = "subtle" version = "2.6.1" @@ -5680,9 +5954,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.72" +version = "2.0.89" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc4b9b9bf2add8093d3f2c0204471e951b2285580335de42f9d2534f3ae7a8af" +checksum = "44d46482f1c1c87acd84dea20c1bf5ebff4c757009ed6bf19cfd36fb10e92c4e" dependencies = [ "proc-macro2", "quote", @@ -5909,7 +6183,7 @@ checksum = "5999e24eaa32083191ba4e425deb75cdf25efefabe5aaccb7446dd0d4122a3f5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.89", ] [[package]] @@ -5934,8 +6208,8 @@ dependencies = [ "num-traits", "once_cell", "pin-project", - "prost", - "prost-build", + "prost 0.12.6", + "prost-build 0.12.6", "tar", "thiserror", "ureq", @@ -5958,7 +6232,7 @@ checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.89", ] [[package]] @@ -6072,7 +6346,7 @@ checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.89", ] [[package]] @@ -6166,7 +6440,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.89", ] [[package]] @@ -6247,42 +6521,14 @@ version = "1.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" -[[package]] -name = "typify" -version = "0.0.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c61e9db210bbff218e6535c664b37ec47da449169b98e7866d0580d0db75529" -dependencies = [ - "typify-impl 0.0.16", - "typify-macro 0.0.16", -] - [[package]] name = "typify" version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "adb6beec125971dda80a086f90b4a70f60f222990ce4d63ad0fc140492f53444" dependencies = [ - "typify-impl 0.1.0", - "typify-macro 0.1.0", -] - -[[package]] -name = "typify-impl" -version = "0.0.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95e32f38493804f88e2dc7a5412eccd872ea5452b4db9b0a77de4df180f2a87e" -dependencies = [ - "heck 0.4.1", - "log", - "proc-macro2", - "quote", - "regress 0.8.0", - "schemars", - "serde_json", - "syn 2.0.72", - "thiserror", - "unicode-ident", + "typify-impl", + "typify-macro", ] [[package]] @@ -6295,32 +6541,16 @@ dependencies = [ "log", "proc-macro2", "quote", - "regress 0.9.1", + "regress", "schemars", "semver", "serde", "serde_json", - "syn 2.0.72", + "syn 2.0.89", "thiserror", "unicode-ident", ] -[[package]] -name = "typify-macro" -version = "0.0.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc09508b72f63d521d68e42c7f172c7416d67986df44b3c7d1f7f9963948ed32" -dependencies = [ - "proc-macro2", - "quote", - "schemars", - "serde", - "serde_json", - "serde_tokenstream", - "syn 2.0.72", - "typify-impl 0.0.16", -] - [[package]] name = "typify-macro" version = "0.1.0" @@ -6334,8 +6564,8 @@ dependencies = [ "serde", "serde_json", "serde_tokenstream", - "syn 2.0.72", - "typify-impl 0.1.0", + "syn 2.0.89", + "typify-impl", ] [[package]] @@ -6538,7 +6768,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.89", "wasm-bindgen-shared", ] @@ -6572,7 +6802,7 @@ checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.89", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -6958,7 +7188,7 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.89", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 011ef79cc0..ae01578718 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -61,17 +61,17 @@ lance-test-macros = { version = "=0.19.2", path = "./rust/lance-test-macros" } lance-testing = { version = "=0.19.2", path = "./rust/lance-testing" } approx = "0.5.1" # Note that this one does not include pyarrow -arrow = { version = "52.2", optional = false, features = ["prettyprint"] } -arrow-arith = "52.2" -arrow-array = "52.2" -arrow-buffer = "52.2" -arrow-cast = "52.2" -arrow-data = "52.2" -arrow-ipc = { version = "52.2", features = ["zstd"] } -arrow-ord = "52.2" -arrow-row = "52.2" -arrow-schema = "52.2" -arrow-select = "52.2" +arrow = { version = "53.2", optional = false, features = ["prettyprint"] } +arrow-arith = "53.2" +arrow-array = "53.2" +arrow-buffer = "53.2" +arrow-cast = "53.2" +arrow-data = "53.2" +arrow-ipc = { version = "53.2", features = ["zstd"] } +arrow-ord = "53.2" +arrow-row = "53.2" +arrow-schema = "53.2" +arrow-select = "53.2" async-recursion = "1.0" async-trait = "0.1" aws-config = "1.2.0" @@ -95,18 +95,18 @@ criterion = { version = "0.5", features = [ "html_reports", ] } crossbeam-queue = "0.3" -datafusion = { version = "41.0", default-features = false, features = [ +datafusion = { version = "42.0", default-features = false, features = [ "nested_expressions", "regex_expressions", "unicode_expressions", ] } -datafusion-common = "41.0" -datafusion-functions = { version = "41.0", features = ["regex_expressions"] } -datafusion-sql = "41.0" -datafusion-expr = "41.0" -datafusion-execution = "41.0" -datafusion-optimizer = "41.0" -datafusion-physical-expr = { version = "41.0", features = [ +datafusion-common = "42.0" +datafusion-functions = { version = "42.0", features = ["regex_expressions"] } +datafusion-sql = "42.0" +datafusion-expr = "42.0" +datafusion-execution = "42.0" +datafusion-optimizer = "42.0" +datafusion-physical-expr = { version = "42.0", features = [ "regex_expressions", ] } deepsize = "0.2.0" @@ -129,9 +129,9 @@ pin-project = "1.0" path_abs = "0.5" pprof = { version = "0.13", features = ["flamegraph", "criterion"] } proptest = "1.3.1" -prost = "0.12.2" -prost-build = "0.12.2" -prost-types = "0.12.2" +prost = "0.13.2" +prost-build = "0.13.2" +prost-types = "0.13.2" rand = { version = "0.8.3", features = ["small_rng"] } rangemap = { version = "1.0" } rayon = "1.10" diff --git a/rust/lance-datafusion/Cargo.toml b/rust/lance-datafusion/Cargo.toml index 41af9afb28..1e6840cf73 100644 --- a/rust/lance-datafusion/Cargo.toml +++ b/rust/lance-datafusion/Cargo.toml @@ -21,7 +21,7 @@ datafusion.workspace = true datafusion-common.workspace = true datafusion-functions.workspace = true datafusion-physical-expr.workspace = true -datafusion-substrait = { version = "41.0", optional = true } +datafusion-substrait = { version = "42.0", optional = true } futures.workspace = true lance-arrow.workspace = true lance-core = { workspace = true, features = ["datafusion"] } @@ -32,7 +32,8 @@ snafu.workspace = true tokio.workspace = true [dev-dependencies] -substrait-expr = { version = "0.2.1" } +# TODO: This is too old +#substrait-expr = { version = "0.2.1" } lance-datagen.workspace = true [features] diff --git a/rust/lance-datafusion/src/substrait.rs b/rust/lance-datafusion/src/substrait.rs index 57cffb1261..38d9bbdc80 100644 --- a/rust/lance-datafusion/src/substrait.rs +++ b/rust/lance-datafusion/src/substrait.rs @@ -382,6 +382,7 @@ pub async fn parse_substrait(expr: &[u8], input_schema: Arc) -> Result BinaryDecoder<'a, T> { .null_bit_buffer(null_buf); } - let buf = bytes.into(); + let buf = Buffer::from_vec(bytes.to_vec()); let array_data = data_builder .add_buffer(offset_data.buffers()[0].clone()) .add_buffer(buf) diff --git a/rust/lance-io/src/encodings/plain.rs b/rust/lance-io/src/encodings/plain.rs index 4f77fde5c7..7204e8264d 100644 --- a/rust/lance-io/src/encodings/plain.rs +++ b/rust/lance-io/src/encodings/plain.rs @@ -205,7 +205,7 @@ pub fn bytes_to_array( // alignment or size isn't right -- just make a copy if (bytes.len() < min_buffer_size) || (bytes.as_ptr().align_offset(*alignment) != 0) { - bytes.into() + Buffer::from_vec(bytes.to_vec()) } else { // SAFETY: the alignment is correct we can make this conversion unsafe { @@ -218,7 +218,7 @@ pub fn bytes_to_array( } } else { // cases we don't handle, just copy - bytes.into() + Buffer::from_vec(bytes.to_vec()) }; let array_data = ArrayDataBuilder::new(data_type.clone()) diff --git a/rust/lance/Cargo.toml b/rust/lance/Cargo.toml index f4de96c82a..b4de70f924 100644 --- a/rust/lance/Cargo.toml +++ b/rust/lance/Cargo.toml @@ -59,6 +59,7 @@ arrow.workspace = true datafusion.workspace = true datafusion-functions.workspace = true datafusion-physical-expr.workspace = true +datafusion-expr.workspace = true lapack = { version = "0.19.0", optional = true } snafu = { workspace = true } log = { workspace = true } diff --git a/rust/lance/src/datafusion/logical_plan.rs b/rust/lance/src/datafusion/logical_plan.rs index b45bdedbe2..6afb94e332 100644 --- a/rust/lance/src/datafusion/logical_plan.rs +++ b/rust/lance/src/datafusion/logical_plan.rs @@ -1,7 +1,7 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright The Lance Authors -use std::{any::Any, sync::Arc}; +use std::{any::Any, borrow::Cow, sync::Arc}; use arrow_schema::Schema as ArrowSchema; use async_trait::async_trait; @@ -34,7 +34,7 @@ impl TableProvider for Dataset { None } - fn get_logical_plan(&self) -> Option<&LogicalPlan> { + fn get_logical_plan(&self) -> Option> { None } diff --git a/rust/lance/src/dataset/scanner.rs b/rust/lance/src/dataset/scanner.rs index b9fa8a7c0c..0b05aa5320 100644 --- a/rust/lance/src/dataset/scanner.rs +++ b/rust/lance/src/dataset/scanner.rs @@ -26,11 +26,11 @@ use datafusion::physical_plan::{ filter::FilterExec, limit::GlobalLimitExec, repartition::RepartitionExec, - udaf::create_aggregate_expr, union::UnionExec, ExecutionPlan, SendableRecordBatchStream, }; use datafusion::scalar::ScalarValue; +use datafusion_physical_expr::aggregate::AggregateExprBuilder; use datafusion_physical_expr::{Partitioning, PhysicalExpr}; use futures::stream::{Stream, StreamExt}; use futures::TryStreamExt; @@ -957,17 +957,18 @@ impl Scanner { let plan = self.create_plan().await?; // Datafusion interprets COUNT(*) as COUNT(1) let one = Arc::new(Literal::new(ScalarValue::UInt8(Some(1)))); - let count_expr = create_aggregate_expr( - &count_udaf(), - &[one], - &[lit(1)], - &[], - &[], - &plan.schema(), - None, - false, - false, - )?; + + let input_phy_exprs: &[Arc] = &[one]; + let schema = plan.schema(); + + let mut builder = AggregateExprBuilder::new(count_udaf(), input_phy_exprs.to_vec()); + //builder = builder.logical_exprs(input_exprs.to_vec()); + builder = builder.schema(schema); + // TODO: This alias seem to be required? + builder = builder.alias("count".to_string()); + + let count_expr = builder.build()?; + let plan_schema = plan.schema(); let count_plan = Arc::new(AggregateExec::try_new( AggregateMode::Single, diff --git a/rust/lance/src/index/vector.rs b/rust/lance/src/index/vector.rs index ccde0156a1..1954bf7fed 100644 --- a/rust/lance/src/index/vector.rs +++ b/rust/lance/src/index/vector.rs @@ -369,7 +369,12 @@ pub(crate) async fn open_vector_index( vec_idx: &lance_index::pb::VectorIndex, reader: Arc, ) -> Result> { - let metric_type = pb::VectorMetricType::try_from(vec_idx.metric_type)?.into(); + let metric_type = pb::VectorMetricType::try_from(vec_idx.metric_type) + .map_err(|_| Error::Internal { + message: "Unexpected vector enum value".into(), + location: location!(), + })? + .into(); let mut last_stage: Option> = None; From 6449f3ec8048bfb3492a6dd748717149bad7599a Mon Sep 17 00:00:00 2001 From: Jeremy Leibs Date: Tue, 26 Nov 2024 09:12:34 -0500 Subject: [PATCH 02/24] Use proper snafu converter --- rust/lance-core/src/error.rs | 10 ++++++++++ rust/lance/src/index/vector.rs | 7 +------ 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/rust/lance-core/src/error.rs b/rust/lance-core/src/error.rs index 67475d2f0f..c31c336aa4 100644 --- a/rust/lance-core/src/error.rs +++ b/rust/lance-core/src/error.rs @@ -224,6 +224,16 @@ impl From for Error { } } +impl From for Error { + #[track_caller] + fn from(e: prost::UnknownEnumValue) -> Self { + Self::IO { + source: box_error(e), + location: std::panic::Location::caller().to_snafu_location(), + } + } +} + impl From for Error { #[track_caller] fn from(e: tokio::task::JoinError) -> Self { diff --git a/rust/lance/src/index/vector.rs b/rust/lance/src/index/vector.rs index 1954bf7fed..ccde0156a1 100644 --- a/rust/lance/src/index/vector.rs +++ b/rust/lance/src/index/vector.rs @@ -369,12 +369,7 @@ pub(crate) async fn open_vector_index( vec_idx: &lance_index::pb::VectorIndex, reader: Arc, ) -> Result> { - let metric_type = pb::VectorMetricType::try_from(vec_idx.metric_type) - .map_err(|_| Error::Internal { - message: "Unexpected vector enum value".into(), - location: location!(), - })? - .into(); + let metric_type = pb::VectorMetricType::try_from(vec_idx.metric_type)?.into(); let mut last_stage: Option> = None; From 03c9b72911eab2c9048f31697ab0cd37dbda1d72 Mon Sep 17 00:00:00 2001 From: Jeremy Leibs Date: Tue, 26 Nov 2024 11:29:17 -0500 Subject: [PATCH 03/24] Update substrait dep --- Cargo.lock | 206 +++++++++++++++++++++---- rust/lance-datafusion/Cargo.toml | 3 +- rust/lance-datafusion/src/substrait.rs | 2 - rust/lance/src/dataset/scanner.rs | 6 +- 4 files changed, 181 insertions(+), 36 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1a14983d2c..7ca8d5e949 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1496,6 +1496,15 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f7144d30dcf0fafbce74250a3963025d8d52177934239851c917d29f1df280c2" +[[package]] +name = "convert_case" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec182b0ca2f35d8fc196cf3404988fd8b8c739a4d270ff118a398feb0cbec1ca" +dependencies = [ + "unicode-segmentation", +] + [[package]] name = "core-foundation" version = "0.9.4" @@ -2102,7 +2111,7 @@ dependencies = [ "object_store 0.11.1", "pbjson-types", "prost 0.13.3", - "substrait", + "substrait 0.41.4", "url", ] @@ -3074,7 +3083,7 @@ dependencies = [ "combine", "jni-sys", "log", - "thiserror", + "thiserror 1.0.69", "walkdir", "windows-sys 0.45.0", ] @@ -3269,6 +3278,7 @@ dependencies = [ "log", "prost 0.13.3", "snafu 0.7.5", + "substrait-expr", "tokio", ] @@ -4016,7 +4026,7 @@ dependencies = [ "skeptic", "smallvec", "tagptr", - "thiserror", + "thiserror 1.0.69", "triomphe", "uuid", ] @@ -4649,7 +4659,7 @@ dependencies = [ "smallvec", "symbolic-demangle", "tempfile", - "thiserror", + "thiserror 1.0.69", ] [[package]] @@ -4699,9 +4709,9 @@ dependencies = [ [[package]] name = "prettyplease" -version = "0.2.20" +version = "0.2.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f12335488a2f3b0a83b14edad48dca9879ce89b2edd10e80237e4e852dd645e" +checksum = "64d1ec885c64d0457d564db4ec299b2dae3f9c02808b8ad9c3a089c591b18033" dependencies = [ "proc-macro2", "syn 2.0.89", @@ -4907,7 +4917,7 @@ dependencies = [ "rustc-hash 2.0.0", "rustls 0.23.12", "socket2 0.5.7", - "thiserror", + "thiserror 1.0.69", "tokio", "tracing", ] @@ -4924,7 +4934,7 @@ dependencies = [ "rustc-hash 2.0.0", "rustls 0.23.12", "slab", - "thiserror", + "thiserror 1.0.69", "tinyvec", "tracing", ] @@ -4944,9 +4954,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.36" +version = "1.0.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" +checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" dependencies = [ "proc-macro2", ] @@ -5090,7 +5100,7 @@ checksum = "bd283d9651eeda4b2a83a43c1c91b266c40fd76ecd39a50a8c630ae69dc72891" dependencies = [ "getrandom", "libredox", - "thiserror", + "thiserror 1.0.69", ] [[package]] @@ -5153,6 +5163,16 @@ dependencies = [ "memchr", ] +[[package]] +name = "regress" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1541daf4e4ed43a0922b7969bdc2170178bcacc5dabf7e39bc508a9fa3953a7a" +dependencies = [ + "hashbrown 0.14.5", + "memchr", +] + [[package]] name = "relative-path" version = "1.9.3" @@ -5616,9 +5636,9 @@ dependencies = [ [[package]] name = "serde_tokenstream" -version = "0.2.1" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8790a7c3fe883e443eaa2af6f705952bc5d6e8671a220b9335c8cae92c037e74" +checksum = "64060d864397305347a78851c51588fd283767e7e7589829e8121d65512340f1" dependencies = [ "proc-macro2", "quote", @@ -5908,10 +5928,73 @@ dependencies = [ "serde_json", "serde_yaml", "syn 2.0.89", - "typify", + "typify 0.1.0", "walkdir", ] +[[package]] +name = "substrait" +version = "0.49.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e13a66e9f86d17064bc06ca30971acdb5e2715a2973ce856801185b70aad7938" +dependencies = [ + "heck 0.5.0", + "prettyplease", + "prost 0.13.3", + "prost-build 0.13.3", + "prost-types 0.13.3", + "regress 0.10.1", + "schemars", + "semver", + "serde", + "serde_json", + "serde_yaml", + "syn 2.0.89", + "typify 0.2.0", + "walkdir", +] + +[[package]] +name = "substrait-expr" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "45a6a94f5dd69c5329a9c96c93ac5f17a8d64089ca21d29d7971825f7451941d" +dependencies = [ + "once_cell", + "prost 0.13.3", + "substrait 0.49.1", + "substrait-expr-funcgen", + "substrait-expr-macros", + "thiserror 2.0.3", +] + +[[package]] +name = "substrait-expr-funcgen" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc422ee763a029e27b5094e197f4af9b26866a728faeefe9a9e4b16d9c9724d6" +dependencies = [ + "convert_case", + "prettyplease", + "proc-macro2", + "quote", + "serde_yaml", + "substrait 0.49.1", + "syn 2.0.89", + "thiserror 2.0.3", +] + +[[package]] +name = "substrait-expr-macros" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a2be2af0276c9d693f90d0f4e0e7b1790b14692538e0d418812249f41c055be" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.89", +] + [[package]] name = "subtle" version = "2.6.1" @@ -6023,7 +6106,7 @@ dependencies = [ "tantivy-stacker", "tantivy-tokenizer-api", "tempfile", - "thiserror", + "thiserror 1.0.69", "time", "uuid", "winapi", @@ -6211,24 +6294,44 @@ dependencies = [ "prost 0.12.6", "prost-build 0.12.6", "tar", - "thiserror", + "thiserror 1.0.69", "ureq", ] [[package]] name = "thiserror" -version = "1.0.63" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" +dependencies = [ + "thiserror-impl 1.0.69", +] + +[[package]] +name = "thiserror" +version = "2.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c006c85c7651b3cf2ada4584faa36773bd07bac24acfb39f3c431b36d7e667aa" +dependencies = [ + "thiserror-impl 2.0.3", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0342370b38b6a11b6cc11d6a805569958d54cfa061a29969c3b5ce2ea405724" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ - "thiserror-impl", + "proc-macro2", + "quote", + "syn 2.0.89", ] [[package]] name = "thiserror-impl" -version = "1.0.63" +version = "2.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261" +checksum = "f077553d607adc1caf65430528a576c757a71ed73944b66ebb58ef2bbd243568" dependencies = [ "proc-macro2", "quote", @@ -6527,8 +6630,18 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "adb6beec125971dda80a086f90b4a70f60f222990ce4d63ad0fc140492f53444" dependencies = [ - "typify-impl", - "typify-macro", + "typify-impl 0.1.0", + "typify-macro 0.1.0", +] + +[[package]] +name = "typify" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4c644dda9862f0fef3a570d8ddb3c2cfb1d5ac824a1f2ddfa7bc8f071a5ad8a" +dependencies = [ + "typify-impl 0.2.0", + "typify-macro 0.2.0", ] [[package]] @@ -6541,13 +6654,33 @@ dependencies = [ "log", "proc-macro2", "quote", - "regress", + "regress 0.9.1", "schemars", "semver", "serde", "serde_json", "syn 2.0.89", - "thiserror", + "thiserror 1.0.69", + "unicode-ident", +] + +[[package]] +name = "typify-impl" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d59ab345b6c0d8ae9500b9ff334a4c7c0d316c1c628dc55726b95887eb8dbd11" +dependencies = [ + "heck 0.5.0", + "log", + "proc-macro2", + "quote", + "regress 0.10.1", + "schemars", + "semver", + "serde", + "serde_json", + "syn 2.0.89", + "thiserror 1.0.69", "unicode-ident", ] @@ -6565,7 +6698,24 @@ dependencies = [ "serde_json", "serde_tokenstream", "syn 2.0.89", - "typify-impl", + "typify-impl 0.1.0", +] + +[[package]] +name = "typify-macro" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "785e2cdcef0df8160fdd762ed548a637aaec1e83704fdbc14da0df66013ee8d0" +dependencies = [ + "proc-macro2", + "quote", + "schemars", + "semver", + "serde", + "serde_json", + "serde_tokenstream", + "syn 2.0.89", + "typify-impl 0.2.0", ] [[package]] @@ -6591,9 +6741,9 @@ checksum = "08f95100a766bf4f8f28f90d77e0a5461bbdb219042e7679bebe79004fed8d75" [[package]] name = "unicode-ident" -version = "1.0.12" +version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" +checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83" [[package]] name = "unicode-normalization" diff --git a/rust/lance-datafusion/Cargo.toml b/rust/lance-datafusion/Cargo.toml index 1e6840cf73..6887a747dc 100644 --- a/rust/lance-datafusion/Cargo.toml +++ b/rust/lance-datafusion/Cargo.toml @@ -32,8 +32,7 @@ snafu.workspace = true tokio.workspace = true [dev-dependencies] -# TODO: This is too old -#substrait-expr = { version = "0.2.1" } +substrait-expr = { version = "0.2.2" } lance-datagen.workspace = true [features] diff --git a/rust/lance-datafusion/src/substrait.rs b/rust/lance-datafusion/src/substrait.rs index 38d9bbdc80..57cffb1261 100644 --- a/rust/lance-datafusion/src/substrait.rs +++ b/rust/lance-datafusion/src/substrait.rs @@ -382,7 +382,6 @@ pub async fn parse_substrait(expr: &[u8], input_schema: Arc) -> Result Date: Tue, 26 Nov 2024 11:53:21 -0500 Subject: [PATCH 04/24] Change the handling of bytes.into() --- rust/lance-io/src/encodings/binary.rs | 2 +- rust/lance-io/src/encodings/plain.rs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/rust/lance-io/src/encodings/binary.rs b/rust/lance-io/src/encodings/binary.rs index 4738c59a77..2292eaae6d 100644 --- a/rust/lance-io/src/encodings/binary.rs +++ b/rust/lance-io/src/encodings/binary.rs @@ -224,7 +224,7 @@ impl<'a, T: ByteArrayType> BinaryDecoder<'a, T> { .null_bit_buffer(null_buf); } - let buf = Buffer::from_vec(bytes.to_vec()); + let buf = Buffer::from_vec(bytes.into()); let array_data = data_builder .add_buffer(offset_data.buffers()[0].clone()) .add_buffer(buf) diff --git a/rust/lance-io/src/encodings/plain.rs b/rust/lance-io/src/encodings/plain.rs index 7204e8264d..48a243ed2f 100644 --- a/rust/lance-io/src/encodings/plain.rs +++ b/rust/lance-io/src/encodings/plain.rs @@ -205,7 +205,7 @@ pub fn bytes_to_array( // alignment or size isn't right -- just make a copy if (bytes.len() < min_buffer_size) || (bytes.as_ptr().align_offset(*alignment) != 0) { - Buffer::from_vec(bytes.to_vec()) + Buffer::from_vec(bytes.into()) } else { // SAFETY: the alignment is correct we can make this conversion unsafe { @@ -218,7 +218,7 @@ pub fn bytes_to_array( } } else { // cases we don't handle, just copy - Buffer::from_vec(bytes.to_vec()) + Buffer::from_vec(bytes.into()) }; let array_data = ArrayDataBuilder::new(data_type.clone()) From e587921f37ef4ec484c92e021bccf50af56a6114 Mon Sep 17 00:00:00 2001 From: Weston Pace Date: Tue, 26 Nov 2024 10:12:26 -0800 Subject: [PATCH 05/24] Clean up and centralize bytes::Bytes -> arrow_buffer::Buffer routines --- Cargo.lock | 1 + rust/lance-arrow/Cargo.toml | 1 + rust/lance-arrow/src/deepcopy.rs | 2 +- rust/lance-arrow/src/lib.rs | 65 ++++++++++++++++++++++++++- rust/lance-io/src/encodings/binary.rs | 3 +- rust/lance-io/src/encodings/plain.rs | 16 ++----- 6 files changed, 73 insertions(+), 15 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7ca8d5e949..3387cb1f8e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3209,6 +3209,7 @@ dependencies = [ "arrow-data 53.3.0", "arrow-schema 53.3.0", "arrow-select 53.3.0", + "bytes", "getrandom", "half", "num-traits", diff --git a/rust/lance-arrow/Cargo.toml b/rust/lance-arrow/Cargo.toml index 64dea8d8db..d6b870965b 100644 --- a/rust/lance-arrow/Cargo.toml +++ b/rust/lance-arrow/Cargo.toml @@ -20,6 +20,7 @@ arrow-data = { workspace = true } arrow-cast = { workspace = true } arrow-schema = { workspace = true } arrow-select = { workspace = true } +bytes = { workspace = true } half = { workspace = true } num-traits = { workspace = true } rand.workspace = true diff --git a/rust/lance-arrow/src/deepcopy.rs b/rust/lance-arrow/src/deepcopy.rs index 7a04fc1c9f..93b58fb9c8 100644 --- a/rust/lance-arrow/src/deepcopy.rs +++ b/rust/lance-arrow/src/deepcopy.rs @@ -8,7 +8,7 @@ use arrow_buffer::{Buffer, NullBuffer}; use arrow_data::ArrayData; pub fn deep_copy_buffer(buffer: &Buffer) -> Buffer { - Buffer::from(Vec::from(buffer.as_slice())) + Buffer::from(buffer.as_slice()) } fn deep_copy_nulls(nulls: &NullBuffer) -> Buffer { diff --git a/rust/lance-arrow/src/lib.rs b/rust/lance-arrow/src/lib.rs index eafd958659..cc0a6e1c68 100644 --- a/rust/lance-arrow/src/lib.rs +++ b/rust/lance-arrow/src/lib.rs @@ -5,14 +5,15 @@ //! //! To improve Arrow-RS ergonomic -use std::collections::HashMap; use std::sync::Arc; +use std::{collections::HashMap, ptr::NonNull}; use arrow_array::{ cast::AsArray, Array, ArrayRef, ArrowNumericType, FixedSizeBinaryArray, FixedSizeListArray, GenericListArray, OffsetSizeTrait, PrimitiveArray, RecordBatch, StructArray, UInt32Array, UInt8Array, }; +use arrow_buffer::MutableBuffer; use arrow_data::ArrayDataBuilder; use arrow_schema::{ArrowError, DataType, Field, FieldRef, Fields, IntervalUnit, Schema}; use arrow_select::{interleave::interleave, take::take}; @@ -654,6 +655,68 @@ pub fn interleave_batches( RecordBatch::try_new(schema, columns) } +pub trait BufferExt { + /// Create an `arrow_buffer::Buffer`` from a `bytes::Bytes` object + /// + /// The alignment must be specified (as `bytes_per_value`) since we want to make + /// sure we can safely reinterpret the buffer. + /// + /// If the buffer is properly aligned this will be zero-copy. If not, a copy + /// will be made and an owned buffer returned. + /// + /// If `bytes_per_value` is not a power of two, then we assume the buffer is + /// never going to be reinterpreted into another type and we can safely + /// ignore the alignment. + /// + /// Yes, the method name is odd. It's because there is already a `from_bytes` + /// which converts from `arrow_buffer::bytes::Bytes` (not `bytes::Bytes`) + fn from_bytes_bytes(bytes: bytes::Bytes, bytes_per_value: u64) -> Self; + + /// Allocates a new properly aligned arrow buffer and copies `bytes` into it + /// + /// `size_bytes` can be larger than `bytes` and, if so, the trailing bytes will + /// be zeroed out. + /// + /// # Panics + /// + /// Panics if `size_bytes` is less than `bytes.len()` + fn copy_bytes_bytes(bytes: bytes::Bytes, size_bytes: usize) -> Self; +} + +fn is_pwr_two(n: u64) -> bool { + n & (n - 1) == 0 +} + +impl BufferExt for arrow_buffer::Buffer { + fn from_bytes_bytes(bytes: bytes::Bytes, bytes_per_value: u64) -> Self { + if is_pwr_two(bytes_per_value) && bytes.as_ptr().align_offset(bytes_per_value as usize) != 0 + { + // The original buffer is not aligned, cannot zero-copy + let size_bytes = bytes.len(); + Self::copy_bytes_bytes(bytes, size_bytes) + } else { + // The original buffer is aligned, can zero-copy + // SAFETY: the alignment is correct we can make this conversion + unsafe { + Self::from_custom_allocation( + NonNull::new(bytes.as_ptr() as _).expect("should be a valid pointer"), + bytes.len(), + Arc::new(bytes), + ) + } + } + } + + fn copy_bytes_bytes(bytes: bytes::Bytes, size_bytes: usize) -> Self { + assert!(size_bytes >= bytes.len()); + let mut buf = MutableBuffer::with_capacity(size_bytes); + let to_fill = size_bytes - bytes.len(); + buf.extend(bytes); + buf.extend(std::iter::repeat(0).take(to_fill)); + Self::from(buf) + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/rust/lance-io/src/encodings/binary.rs b/rust/lance-io/src/encodings/binary.rs index 2292eaae6d..d7c4c2ba11 100644 --- a/rust/lance-io/src/encodings/binary.rs +++ b/rust/lance-io/src/encodings/binary.rs @@ -26,6 +26,7 @@ use arrow_schema::DataType; use async_trait::async_trait; use bytes::Bytes; use futures::{StreamExt, TryStreamExt}; +use lance_arrow::BufferExt; use snafu::{location, Location}; use tokio::io::AsyncWriteExt; @@ -224,7 +225,7 @@ impl<'a, T: ByteArrayType> BinaryDecoder<'a, T> { .null_bit_buffer(null_buf); } - let buf = Buffer::from_vec(bytes.into()); + let buf = Buffer::from_bytes_bytes(bytes, /*bytes_per_value=*/ 1); let array_data = data_builder .add_buffer(offset_data.buffers()[0].clone()) .add_buffer(buf) diff --git a/rust/lance-io/src/encodings/plain.rs b/rust/lance-io/src/encodings/plain.rs index 48a243ed2f..50fd1926dc 100644 --- a/rust/lance-io/src/encodings/plain.rs +++ b/rust/lance-io/src/encodings/plain.rs @@ -7,7 +7,6 @@ //! it stores the array directly in the file. It offers O(1) read access. use std::ops::{Range, RangeFrom, RangeFull, RangeTo}; -use std::ptr::NonNull; use std::slice::from_raw_parts; use std::sync::Arc; @@ -204,21 +203,14 @@ pub fn bytes_to_array( let min_buffer_size = len_plus_offset.saturating_mul(*byte_width); // alignment or size isn't right -- just make a copy - if (bytes.len() < min_buffer_size) || (bytes.as_ptr().align_offset(*alignment) != 0) { - Buffer::from_vec(bytes.into()) + if bytes.len() < min_buffer_size { + Buffer::copy_bytes_bytes(bytes, min_buffer_size) } else { - // SAFETY: the alignment is correct we can make this conversion - unsafe { - Buffer::from_custom_allocation( - NonNull::new(bytes.as_ptr() as _).expect("should be a valid pointer"), - bytes.len(), - Arc::new(bytes), - ) - } + Buffer::from_bytes_bytes(bytes, *alignment as u64) } } else { // cases we don't handle, just copy - Buffer::from_vec(bytes.into()) + Buffer::from_slice_ref(bytes) }; let array_data = ArrayDataBuilder::new(data_type.clone()) From 388740b9cc7e7fff753bc7c8eb42d1ea51ab69b6 Mon Sep 17 00:00:00 2001 From: Jeremy Leibs Date: Mon, 25 Nov 2024 18:42:36 -0500 Subject: [PATCH 06/24] Attempt to update arrow/datafusion --- Cargo.toml | 44 +++++++++++------------ rust/lance-datafusion/Cargo.toml | 5 +-- rust/lance-datafusion/src/substrait.rs | 2 ++ rust/lance-io/src/encodings/binary.rs | 2 +- rust/lance-io/src/encodings/plain.rs | 4 +-- rust/lance/Cargo.toml | 1 + rust/lance/src/datafusion/logical_plan.rs | 4 +-- rust/lance/src/dataset/scanner.rs | 25 ++++++------- rust/lance/src/index/vector.rs | 7 +++- 9 files changed, 52 insertions(+), 42 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index e4f3174669..54a43ef05a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -61,17 +61,17 @@ lance-test-macros = { version = "=0.20.0", path = "./rust/lance-test-macros" } lance-testing = { version = "=0.20.0", path = "./rust/lance-testing" } approx = "0.5.1" # Note that this one does not include pyarrow -arrow = { version = "52.2", optional = false, features = ["prettyprint"] } -arrow-arith = "52.2" -arrow-array = "52.2" -arrow-buffer = "52.2" -arrow-cast = "52.2" -arrow-data = "52.2" -arrow-ipc = { version = "52.2", features = ["zstd"] } -arrow-ord = "52.2" -arrow-row = "52.2" -arrow-schema = "52.2" -arrow-select = "52.2" +arrow = { version = "53.2", optional = false, features = ["prettyprint"] } +arrow-arith = "53.2" +arrow-array = "53.2" +arrow-buffer = "53.2" +arrow-cast = "53.2" +arrow-data = "53.2" +arrow-ipc = { version = "53.2", features = ["zstd"] } +arrow-ord = "53.2" +arrow-row = "53.2" +arrow-schema = "53.2" +arrow-select = "53.2" async-recursion = "1.0" async-trait = "0.1" aws-config = "1.2.0" @@ -95,18 +95,18 @@ criterion = { version = "0.5", features = [ "html_reports", ] } crossbeam-queue = "0.3" -datafusion = { version = "41.0", default-features = false, features = [ +datafusion = { version = "42.0", default-features = false, features = [ "nested_expressions", "regex_expressions", "unicode_expressions", ] } -datafusion-common = "41.0" -datafusion-functions = { version = "41.0", features = ["regex_expressions"] } -datafusion-sql = "41.0" -datafusion-expr = "41.0" -datafusion-execution = "41.0" -datafusion-optimizer = "41.0" -datafusion-physical-expr = { version = "41.0", features = [ +datafusion-common = "42.0" +datafusion-functions = { version = "42.0", features = ["regex_expressions"] } +datafusion-sql = "42.0" +datafusion-expr = "42.0" +datafusion-execution = "42.0" +datafusion-optimizer = "42.0" +datafusion-physical-expr = { version = "42.0", features = [ "regex_expressions", ] } deepsize = "0.2.0" @@ -129,9 +129,9 @@ pin-project = "1.0" path_abs = "0.5" pprof = { version = "0.14.0", features = ["flamegraph", "criterion"] } proptest = "1.3.1" -prost = "0.12.2" -prost-build = "0.12.2" -prost-types = "0.12.2" +prost = "0.13.2" +prost-build = "0.13.2" +prost-types = "0.13.2" rand = { version = "0.8.3", features = ["small_rng"] } rangemap = { version = "1.0" } rayon = "1.10" diff --git a/rust/lance-datafusion/Cargo.toml b/rust/lance-datafusion/Cargo.toml index 41af9afb28..1e6840cf73 100644 --- a/rust/lance-datafusion/Cargo.toml +++ b/rust/lance-datafusion/Cargo.toml @@ -21,7 +21,7 @@ datafusion.workspace = true datafusion-common.workspace = true datafusion-functions.workspace = true datafusion-physical-expr.workspace = true -datafusion-substrait = { version = "41.0", optional = true } +datafusion-substrait = { version = "42.0", optional = true } futures.workspace = true lance-arrow.workspace = true lance-core = { workspace = true, features = ["datafusion"] } @@ -32,7 +32,8 @@ snafu.workspace = true tokio.workspace = true [dev-dependencies] -substrait-expr = { version = "0.2.1" } +# TODO: This is too old +#substrait-expr = { version = "0.2.1" } lance-datagen.workspace = true [features] diff --git a/rust/lance-datafusion/src/substrait.rs b/rust/lance-datafusion/src/substrait.rs index 57cffb1261..38d9bbdc80 100644 --- a/rust/lance-datafusion/src/substrait.rs +++ b/rust/lance-datafusion/src/substrait.rs @@ -382,6 +382,7 @@ pub async fn parse_substrait(expr: &[u8], input_schema: Arc) -> Result BinaryDecoder<'a, T> { .null_bit_buffer(null_buf); } - let buf = bytes.into(); + let buf = Buffer::from_vec(bytes.to_vec()); let array_data = data_builder .add_buffer(offset_data.buffers()[0].clone()) .add_buffer(buf) diff --git a/rust/lance-io/src/encodings/plain.rs b/rust/lance-io/src/encodings/plain.rs index 4f77fde5c7..7204e8264d 100644 --- a/rust/lance-io/src/encodings/plain.rs +++ b/rust/lance-io/src/encodings/plain.rs @@ -205,7 +205,7 @@ pub fn bytes_to_array( // alignment or size isn't right -- just make a copy if (bytes.len() < min_buffer_size) || (bytes.as_ptr().align_offset(*alignment) != 0) { - bytes.into() + Buffer::from_vec(bytes.to_vec()) } else { // SAFETY: the alignment is correct we can make this conversion unsafe { @@ -218,7 +218,7 @@ pub fn bytes_to_array( } } else { // cases we don't handle, just copy - bytes.into() + Buffer::from_vec(bytes.to_vec()) }; let array_data = ArrayDataBuilder::new(data_type.clone()) diff --git a/rust/lance/Cargo.toml b/rust/lance/Cargo.toml index 05b0a0a381..66339db32d 100644 --- a/rust/lance/Cargo.toml +++ b/rust/lance/Cargo.toml @@ -60,6 +60,7 @@ arrow.workspace = true datafusion.workspace = true datafusion-functions.workspace = true datafusion-physical-expr.workspace = true +datafusion-expr.workspace = true lapack = { version = "0.19.0", optional = true } snafu = { workspace = true } log = { workspace = true } diff --git a/rust/lance/src/datafusion/logical_plan.rs b/rust/lance/src/datafusion/logical_plan.rs index b45bdedbe2..6afb94e332 100644 --- a/rust/lance/src/datafusion/logical_plan.rs +++ b/rust/lance/src/datafusion/logical_plan.rs @@ -1,7 +1,7 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright The Lance Authors -use std::{any::Any, sync::Arc}; +use std::{any::Any, borrow::Cow, sync::Arc}; use arrow_schema::Schema as ArrowSchema; use async_trait::async_trait; @@ -34,7 +34,7 @@ impl TableProvider for Dataset { None } - fn get_logical_plan(&self) -> Option<&LogicalPlan> { + fn get_logical_plan(&self) -> Option> { None } diff --git a/rust/lance/src/dataset/scanner.rs b/rust/lance/src/dataset/scanner.rs index cb5fe09453..ec3cc64deb 100644 --- a/rust/lance/src/dataset/scanner.rs +++ b/rust/lance/src/dataset/scanner.rs @@ -26,11 +26,11 @@ use datafusion::physical_plan::{ filter::FilterExec, limit::GlobalLimitExec, repartition::RepartitionExec, - udaf::create_aggregate_expr, union::UnionExec, ExecutionPlan, SendableRecordBatchStream, }; use datafusion::scalar::ScalarValue; +use datafusion_physical_expr::aggregate::AggregateExprBuilder; use datafusion_physical_expr::{Partitioning, PhysicalExpr}; use futures::stream::{Stream, StreamExt}; use futures::TryStreamExt; @@ -958,17 +958,18 @@ impl Scanner { let plan = self.create_plan().await?; // Datafusion interprets COUNT(*) as COUNT(1) let one = Arc::new(Literal::new(ScalarValue::UInt8(Some(1)))); - let count_expr = create_aggregate_expr( - &count_udaf(), - &[one], - &[lit(1)], - &[], - &[], - &plan.schema(), - None, - false, - false, - )?; + + let input_phy_exprs: &[Arc] = &[one]; + let schema = plan.schema(); + + let mut builder = AggregateExprBuilder::new(count_udaf(), input_phy_exprs.to_vec()); + //builder = builder.logical_exprs(input_exprs.to_vec()); + builder = builder.schema(schema); + // TODO: This alias seem to be required? + builder = builder.alias("count".to_string()); + + let count_expr = builder.build()?; + let plan_schema = plan.schema(); let count_plan = Arc::new(AggregateExec::try_new( AggregateMode::Single, diff --git a/rust/lance/src/index/vector.rs b/rust/lance/src/index/vector.rs index bd05fcc643..35eb5981ec 100644 --- a/rust/lance/src/index/vector.rs +++ b/rust/lance/src/index/vector.rs @@ -408,7 +408,12 @@ pub(crate) async fn open_vector_index( vec_idx: &lance_index::pb::VectorIndex, reader: Arc, ) -> Result> { - let metric_type = pb::VectorMetricType::try_from(vec_idx.metric_type)?.into(); + let metric_type = pb::VectorMetricType::try_from(vec_idx.metric_type) + .map_err(|_| Error::Internal { + message: "Unexpected vector enum value".into(), + location: location!(), + })? + .into(); let mut last_stage: Option> = None; From 79fe55f48c355ac8bf559141c76c475862e287ec Mon Sep 17 00:00:00 2001 From: Jeremy Leibs Date: Tue, 26 Nov 2024 09:12:34 -0500 Subject: [PATCH 07/24] Use proper snafu converter --- rust/lance-core/src/error.rs | 10 ++++++++++ rust/lance/src/index/vector.rs | 7 +------ 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/rust/lance-core/src/error.rs b/rust/lance-core/src/error.rs index c186d77c37..be694b7b4d 100644 --- a/rust/lance-core/src/error.rs +++ b/rust/lance-core/src/error.rs @@ -226,6 +226,16 @@ impl From for Error { } } +impl From for Error { + #[track_caller] + fn from(e: prost::UnknownEnumValue) -> Self { + Self::IO { + source: box_error(e), + location: std::panic::Location::caller().to_snafu_location(), + } + } +} + impl From for Error { #[track_caller] fn from(e: tokio::task::JoinError) -> Self { diff --git a/rust/lance/src/index/vector.rs b/rust/lance/src/index/vector.rs index 35eb5981ec..bd05fcc643 100644 --- a/rust/lance/src/index/vector.rs +++ b/rust/lance/src/index/vector.rs @@ -408,12 +408,7 @@ pub(crate) async fn open_vector_index( vec_idx: &lance_index::pb::VectorIndex, reader: Arc, ) -> Result> { - let metric_type = pb::VectorMetricType::try_from(vec_idx.metric_type) - .map_err(|_| Error::Internal { - message: "Unexpected vector enum value".into(), - location: location!(), - })? - .into(); + let metric_type = pb::VectorMetricType::try_from(vec_idx.metric_type)?.into(); let mut last_stage: Option> = None; From 63053294b1cd4ffd1e8a3989828ca540e528e1a4 Mon Sep 17 00:00:00 2001 From: Jeremy Leibs Date: Tue, 26 Nov 2024 11:29:17 -0500 Subject: [PATCH 08/24] Update substrait dep --- rust/lance-datafusion/Cargo.toml | 3 +-- rust/lance-datafusion/src/substrait.rs | 2 -- rust/lance/src/dataset/scanner.rs | 6 ++---- 3 files changed, 3 insertions(+), 8 deletions(-) diff --git a/rust/lance-datafusion/Cargo.toml b/rust/lance-datafusion/Cargo.toml index 1e6840cf73..6887a747dc 100644 --- a/rust/lance-datafusion/Cargo.toml +++ b/rust/lance-datafusion/Cargo.toml @@ -32,8 +32,7 @@ snafu.workspace = true tokio.workspace = true [dev-dependencies] -# TODO: This is too old -#substrait-expr = { version = "0.2.1" } +substrait-expr = { version = "0.2.2" } lance-datagen.workspace = true [features] diff --git a/rust/lance-datafusion/src/substrait.rs b/rust/lance-datafusion/src/substrait.rs index 38d9bbdc80..57cffb1261 100644 --- a/rust/lance-datafusion/src/substrait.rs +++ b/rust/lance-datafusion/src/substrait.rs @@ -382,7 +382,6 @@ pub async fn parse_substrait(expr: &[u8], input_schema: Arc) -> Result Date: Tue, 26 Nov 2024 11:53:21 -0500 Subject: [PATCH 09/24] Change the handling of bytes.into() --- rust/lance-io/src/encodings/binary.rs | 2 +- rust/lance-io/src/encodings/plain.rs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/rust/lance-io/src/encodings/binary.rs b/rust/lance-io/src/encodings/binary.rs index 4738c59a77..2292eaae6d 100644 --- a/rust/lance-io/src/encodings/binary.rs +++ b/rust/lance-io/src/encodings/binary.rs @@ -224,7 +224,7 @@ impl<'a, T: ByteArrayType> BinaryDecoder<'a, T> { .null_bit_buffer(null_buf); } - let buf = Buffer::from_vec(bytes.to_vec()); + let buf = Buffer::from_vec(bytes.into()); let array_data = data_builder .add_buffer(offset_data.buffers()[0].clone()) .add_buffer(buf) diff --git a/rust/lance-io/src/encodings/plain.rs b/rust/lance-io/src/encodings/plain.rs index 7204e8264d..48a243ed2f 100644 --- a/rust/lance-io/src/encodings/plain.rs +++ b/rust/lance-io/src/encodings/plain.rs @@ -205,7 +205,7 @@ pub fn bytes_to_array( // alignment or size isn't right -- just make a copy if (bytes.len() < min_buffer_size) || (bytes.as_ptr().align_offset(*alignment) != 0) { - Buffer::from_vec(bytes.to_vec()) + Buffer::from_vec(bytes.into()) } else { // SAFETY: the alignment is correct we can make this conversion unsafe { @@ -218,7 +218,7 @@ pub fn bytes_to_array( } } else { // cases we don't handle, just copy - Buffer::from_vec(bytes.to_vec()) + Buffer::from_vec(bytes.into()) }; let array_data = ArrayDataBuilder::new(data_type.clone()) From 41876442a6d4efb0b911723470233a0c0be026a2 Mon Sep 17 00:00:00 2001 From: Weston Pace Date: Tue, 26 Nov 2024 10:12:26 -0800 Subject: [PATCH 10/24] Clean up and centralize bytes::Bytes -> arrow_buffer::Buffer routines --- rust/lance-arrow/Cargo.toml | 1 + rust/lance-arrow/src/deepcopy.rs | 2 +- rust/lance-arrow/src/lib.rs | 65 ++++++++++++++++++++++++++- rust/lance-io/src/encodings/binary.rs | 3 +- rust/lance-io/src/encodings/plain.rs | 16 ++----- 5 files changed, 72 insertions(+), 15 deletions(-) diff --git a/rust/lance-arrow/Cargo.toml b/rust/lance-arrow/Cargo.toml index 64dea8d8db..d6b870965b 100644 --- a/rust/lance-arrow/Cargo.toml +++ b/rust/lance-arrow/Cargo.toml @@ -20,6 +20,7 @@ arrow-data = { workspace = true } arrow-cast = { workspace = true } arrow-schema = { workspace = true } arrow-select = { workspace = true } +bytes = { workspace = true } half = { workspace = true } num-traits = { workspace = true } rand.workspace = true diff --git a/rust/lance-arrow/src/deepcopy.rs b/rust/lance-arrow/src/deepcopy.rs index 7a04fc1c9f..93b58fb9c8 100644 --- a/rust/lance-arrow/src/deepcopy.rs +++ b/rust/lance-arrow/src/deepcopy.rs @@ -8,7 +8,7 @@ use arrow_buffer::{Buffer, NullBuffer}; use arrow_data::ArrayData; pub fn deep_copy_buffer(buffer: &Buffer) -> Buffer { - Buffer::from(Vec::from(buffer.as_slice())) + Buffer::from(buffer.as_slice()) } fn deep_copy_nulls(nulls: &NullBuffer) -> Buffer { diff --git a/rust/lance-arrow/src/lib.rs b/rust/lance-arrow/src/lib.rs index eafd958659..cc0a6e1c68 100644 --- a/rust/lance-arrow/src/lib.rs +++ b/rust/lance-arrow/src/lib.rs @@ -5,14 +5,15 @@ //! //! To improve Arrow-RS ergonomic -use std::collections::HashMap; use std::sync::Arc; +use std::{collections::HashMap, ptr::NonNull}; use arrow_array::{ cast::AsArray, Array, ArrayRef, ArrowNumericType, FixedSizeBinaryArray, FixedSizeListArray, GenericListArray, OffsetSizeTrait, PrimitiveArray, RecordBatch, StructArray, UInt32Array, UInt8Array, }; +use arrow_buffer::MutableBuffer; use arrow_data::ArrayDataBuilder; use arrow_schema::{ArrowError, DataType, Field, FieldRef, Fields, IntervalUnit, Schema}; use arrow_select::{interleave::interleave, take::take}; @@ -654,6 +655,68 @@ pub fn interleave_batches( RecordBatch::try_new(schema, columns) } +pub trait BufferExt { + /// Create an `arrow_buffer::Buffer`` from a `bytes::Bytes` object + /// + /// The alignment must be specified (as `bytes_per_value`) since we want to make + /// sure we can safely reinterpret the buffer. + /// + /// If the buffer is properly aligned this will be zero-copy. If not, a copy + /// will be made and an owned buffer returned. + /// + /// If `bytes_per_value` is not a power of two, then we assume the buffer is + /// never going to be reinterpreted into another type and we can safely + /// ignore the alignment. + /// + /// Yes, the method name is odd. It's because there is already a `from_bytes` + /// which converts from `arrow_buffer::bytes::Bytes` (not `bytes::Bytes`) + fn from_bytes_bytes(bytes: bytes::Bytes, bytes_per_value: u64) -> Self; + + /// Allocates a new properly aligned arrow buffer and copies `bytes` into it + /// + /// `size_bytes` can be larger than `bytes` and, if so, the trailing bytes will + /// be zeroed out. + /// + /// # Panics + /// + /// Panics if `size_bytes` is less than `bytes.len()` + fn copy_bytes_bytes(bytes: bytes::Bytes, size_bytes: usize) -> Self; +} + +fn is_pwr_two(n: u64) -> bool { + n & (n - 1) == 0 +} + +impl BufferExt for arrow_buffer::Buffer { + fn from_bytes_bytes(bytes: bytes::Bytes, bytes_per_value: u64) -> Self { + if is_pwr_two(bytes_per_value) && bytes.as_ptr().align_offset(bytes_per_value as usize) != 0 + { + // The original buffer is not aligned, cannot zero-copy + let size_bytes = bytes.len(); + Self::copy_bytes_bytes(bytes, size_bytes) + } else { + // The original buffer is aligned, can zero-copy + // SAFETY: the alignment is correct we can make this conversion + unsafe { + Self::from_custom_allocation( + NonNull::new(bytes.as_ptr() as _).expect("should be a valid pointer"), + bytes.len(), + Arc::new(bytes), + ) + } + } + } + + fn copy_bytes_bytes(bytes: bytes::Bytes, size_bytes: usize) -> Self { + assert!(size_bytes >= bytes.len()); + let mut buf = MutableBuffer::with_capacity(size_bytes); + let to_fill = size_bytes - bytes.len(); + buf.extend(bytes); + buf.extend(std::iter::repeat(0).take(to_fill)); + Self::from(buf) + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/rust/lance-io/src/encodings/binary.rs b/rust/lance-io/src/encodings/binary.rs index 2292eaae6d..d7c4c2ba11 100644 --- a/rust/lance-io/src/encodings/binary.rs +++ b/rust/lance-io/src/encodings/binary.rs @@ -26,6 +26,7 @@ use arrow_schema::DataType; use async_trait::async_trait; use bytes::Bytes; use futures::{StreamExt, TryStreamExt}; +use lance_arrow::BufferExt; use snafu::{location, Location}; use tokio::io::AsyncWriteExt; @@ -224,7 +225,7 @@ impl<'a, T: ByteArrayType> BinaryDecoder<'a, T> { .null_bit_buffer(null_buf); } - let buf = Buffer::from_vec(bytes.into()); + let buf = Buffer::from_bytes_bytes(bytes, /*bytes_per_value=*/ 1); let array_data = data_builder .add_buffer(offset_data.buffers()[0].clone()) .add_buffer(buf) diff --git a/rust/lance-io/src/encodings/plain.rs b/rust/lance-io/src/encodings/plain.rs index 48a243ed2f..50fd1926dc 100644 --- a/rust/lance-io/src/encodings/plain.rs +++ b/rust/lance-io/src/encodings/plain.rs @@ -7,7 +7,6 @@ //! it stores the array directly in the file. It offers O(1) read access. use std::ops::{Range, RangeFrom, RangeFull, RangeTo}; -use std::ptr::NonNull; use std::slice::from_raw_parts; use std::sync::Arc; @@ -204,21 +203,14 @@ pub fn bytes_to_array( let min_buffer_size = len_plus_offset.saturating_mul(*byte_width); // alignment or size isn't right -- just make a copy - if (bytes.len() < min_buffer_size) || (bytes.as_ptr().align_offset(*alignment) != 0) { - Buffer::from_vec(bytes.into()) + if bytes.len() < min_buffer_size { + Buffer::copy_bytes_bytes(bytes, min_buffer_size) } else { - // SAFETY: the alignment is correct we can make this conversion - unsafe { - Buffer::from_custom_allocation( - NonNull::new(bytes.as_ptr() as _).expect("should be a valid pointer"), - bytes.len(), - Arc::new(bytes), - ) - } + Buffer::from_bytes_bytes(bytes, *alignment as u64) } } else { // cases we don't handle, just copy - Buffer::from_vec(bytes.into()) + Buffer::from_slice_ref(bytes) }; let array_data = ArrayDataBuilder::new(data_type.clone()) From 8eb87ead43e1ba14d19560bfe5771b9faf236cbf Mon Sep 17 00:00:00 2001 From: Jeremy Leibs Date: Tue, 26 Nov 2024 13:56:19 -0500 Subject: [PATCH 11/24] Eliminate more arrow-52 deps --- Cargo.lock | 751 +++++++++++++++++++++++++++++++--------------- Cargo.toml | 2 +- python/Cargo.toml | 10 +- 3 files changed, 509 insertions(+), 254 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 40107d0766..934b54d7d6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -181,9 +181,9 @@ checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" [[package]] name = "arrow" -version = "52.2.0" +version = "53.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05048a8932648b63f21c37d88b552ccc8a65afb6dfe9fc9f30ce79174c2e7a85" +checksum = "c91839b07e474b3995035fd8ac33ee54f9c9ccbbb1ea33d9909c71bffdf1259d" dependencies = [ "arrow-arith", "arrow-array", @@ -202,9 +202,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "52.2.0" +version = "53.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d8a57966e43bfe9a3277984a14c24ec617ad874e4c0e1d2a1b083a39cfbf22c" +checksum = "855c57c4efd26722b044dcd3e348252560e3e0333087fb9f6479dc0bf744054f" dependencies = [ "arrow-array", "arrow-buffer", @@ -217,9 +217,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "52.2.0" +version = "53.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16f4a9468c882dc66862cef4e1fd8423d47e67972377d85d80e022786427768c" +checksum = "bd03279cea46569acf9295f6224fbc370c5df184b4d2ecfe97ccb131d5615a7f" dependencies = [ "ahash", "arrow-buffer", @@ -228,15 +228,15 @@ dependencies = [ "chrono", "chrono-tz", "half", - "hashbrown", + "hashbrown 0.15.2", "num", ] [[package]] name = "arrow-buffer" -version = "52.2.0" +version = "53.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c975484888fc95ec4a632cdc98be39c085b1bb518531b0c80c5d462063e5daa1" +checksum = "9e4a9b9b1d6d7117f6138e13bc4dd5daa7f94e671b70e8c9c4dc37b4f5ecfc16" dependencies = [ "bytes", "half", @@ -245,9 +245,9 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "52.2.0" +version = "53.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da26719e76b81d8bc3faad1d4dbdc1bcc10d14704e63dc17fc9f3e7e1e567c8e" +checksum = "bc70e39916e60c5b7af7a8e2719e3ae589326039e1e863675a008bee5ffe90fd" dependencies = [ "arrow-array", "arrow-buffer", @@ -266,9 +266,9 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "52.2.0" +version = "53.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c13c36dc5ddf8c128df19bab27898eea64bf9da2b555ec1cd17a8ff57fba9ec2" +checksum = "789b2af43c1049b03a8d088ff6b2257cdcea1756cd76b174b1f2600356771b97" dependencies = [ "arrow-array", "arrow-buffer", @@ -285,9 +285,9 @@ dependencies = [ [[package]] name = "arrow-data" -version = "52.2.0" +version = "53.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd9d6f18c65ef7a2573ab498c374d8ae364b4a4edf67105357491c031f716ca5" +checksum = "e4e75edf21ffd53744a9b8e3ed11101f610e7ceb1a29860432824f1834a1f623" dependencies = [ "arrow-buffer", "arrow-schema", @@ -297,9 +297,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "52.2.0" +version = "53.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e786e1cdd952205d9a8afc69397b317cfbb6e0095e445c69cda7e8da5c1eeb0f" +checksum = "d186a909dece9160bf8312f5124d797884f608ef5435a36d9d608e0b2a9bcbf8" dependencies = [ "arrow-array", "arrow-buffer", @@ -313,9 +313,9 @@ dependencies = [ [[package]] name = "arrow-json" -version = "52.2.0" +version = "53.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb22284c5a2a01d73cebfd88a33511a3234ab45d66086b2ca2d1228c3498e445" +checksum = "b66ff2fedc1222942d0bd2fd391cb14a85baa3857be95c9373179bd616753b85" dependencies = [ "arrow-array", "arrow-buffer", @@ -333,9 +333,9 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "52.2.0" +version = "53.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42745f86b1ab99ef96d1c0bcf49180848a64fe2c7a7a0d945bc64fa2b21ba9bc" +checksum = "ece7b5bc1180e6d82d1a60e1688c199829e8842e38497563c3ab6ea813e527fd" dependencies = [ "arrow-array", "arrow-buffer", @@ -348,9 +348,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "52.2.0" +version = "53.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4cd09a518c602a55bd406bcc291a967b284cfa7a63edfbf8b897ea4748aad23c" +checksum = "745c114c8f0e8ce211c83389270de6fbe96a9088a7b32c2a041258a443fe83ff" dependencies = [ "ahash", "arrow-array", @@ -362,18 +362,18 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "52.2.0" +version = "53.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e972cd1ff4a4ccd22f86d3e53e835c2ed92e0eea6a3e8eadb72b4f1ac802cf8" +checksum = "b95513080e728e4cec37f1ff5af4f12c9688d47795d17cda80b6ec2cf74d4678" dependencies = [ "bitflags 2.6.0", ] [[package]] name = "arrow-select" -version = "52.2.0" +version = "53.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "600bae05d43483d216fb3494f8c32fdbefd8aa4e1de237e790dbb3d9f44690a3" +checksum = "8e415279094ea70323c032c6e739c48ad8d80e78a09bef7117b8718ad5bf3722" dependencies = [ "ahash", "arrow-array", @@ -385,9 +385,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "52.2.0" +version = "53.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0dc1985b67cb45f6606a248ac2b4a288849f196bab8c657ea5589f47cdd55e6" +checksum = "11d956cae7002eb8d83a27dbd34daaea1cf5b75852f0b84deb4d93a276e92bbf" dependencies = [ "arrow-array", "arrow-buffer", @@ -545,7 +545,7 @@ checksum = "3b43422f69d8ff38f95f1b2bb76517c91589a924d1559a0e935d7c8ce0274c11" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.89", ] [[package]] @@ -588,7 +588,7 @@ checksum = "6e0c28dcc82d7c8ead5cb13beb15405b57b8546e93215673ff8ca0349a028107" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.89", ] [[package]] @@ -1083,9 +1083,9 @@ dependencies = [ [[package]] name = "brotli" -version = "6.0.0" +version = "7.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "74f7971dbd9326d58187408ab83117d8ac1bb9c17b085fdacd1cf2f598719b6b" +checksum = "cc97b8f16f944bba54f0433f07e30be199b6dc2bd25937444bbad560bcea29bd" dependencies = [ "alloc-no-stdlib", "alloc-stdlib", @@ -1216,9 +1216,9 @@ dependencies = [ [[package]] name = "chrono-tz" -version = "0.9.0" +version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93698b29de5e97ad0ae26447b344c482a7284c737d9ddc5f9e52b74a336671bb" +checksum = "cd6dd8046d00723a59a2f8c5f295c515b9bb9a331ee4f8f3d4dd49e428acd3b6" dependencies = [ "chrono", "chrono-tz-build", @@ -1227,12 +1227,11 @@ dependencies = [ [[package]] name = "chrono-tz-build" -version = "0.3.0" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c088aee841df9c3041febbb73934cfc39708749bf96dc827e3359cd39ef11b1" +checksum = "e94fea34d77a245229e7746bd2beb786cd2a896f306ff491fb8cecb3074b10a7" dependencies = [ "parse-zoneinfo", - "phf", "phf_codegen", ] @@ -1294,7 +1293,7 @@ dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.89", ] [[package]] @@ -1557,7 +1556,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856" dependencies = [ "cfg-if", - "hashbrown", + "hashbrown 0.14.5", "lock_api", "once_cell", "parking_lot_core", @@ -1571,7 +1570,7 @@ checksum = "5041cc499144891f3790297212f32a74fb938e5136a14943f338ef9e0ae276cf" dependencies = [ "cfg-if", "crossbeam-utils", - "hashbrown", + "hashbrown 0.14.5", "lock_api", "once_cell", "parking_lot_core", @@ -1579,9 +1578,9 @@ dependencies = [ [[package]] name = "datafusion" -version = "41.0.0" +version = "42.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4fd4a99fc70d40ef7e52b243b4a399c3f8d353a40d5ecb200deee05e49c61bb" +checksum = "dae5f2abc725737d6e87b6d348a5aa2d0a77e4cf873045f004546da946e6e619" dependencies = [ "ahash", "arrow", @@ -1602,6 +1601,7 @@ dependencies = [ "datafusion-functions", "datafusion-functions-aggregate", "datafusion-functions-nested", + "datafusion-functions-window", "datafusion-optimizer", "datafusion-physical-expr", "datafusion-physical-expr-common", @@ -1612,12 +1612,12 @@ dependencies = [ "futures", "glob", "half", - "hashbrown", + "hashbrown 0.14.5", "indexmap", - "itertools 0.12.1", + "itertools 0.13.0", "log", "num_cpus", - "object_store", + "object_store 0.11.1", "parking_lot", "parquet", "paste", @@ -1635,9 +1635,9 @@ dependencies = [ [[package]] name = "datafusion-catalog" -version = "41.0.0" +version = "42.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e13b3cfbd84c6003594ae1972314e3df303a27ce8ce755fcea3240c90f4c0529" +checksum = "998761705551f11ffa4ee692cc285b44eb1def6e0d28c4eaf5041b9e2810dc1e" dependencies = [ "arrow-schema", "async-trait", @@ -1645,13 +1645,14 @@ dependencies = [ "datafusion-execution", "datafusion-expr", "datafusion-physical-plan", + "parking_lot", ] [[package]] name = "datafusion-common" -version = "41.0.0" +version = "42.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44fdbc877e3e40dcf88cc8f283d9f5c8851f0a3aa07fee657b1b75ac1ad49b9c" +checksum = "11986f191e88d950f10a5cc512a598afba27d92e04a0201215ad60785005115a" dependencies = [ "ahash", "arrow", @@ -1660,29 +1661,32 @@ dependencies = [ "arrow-schema", "chrono", "half", - "hashbrown", + "hashbrown 0.14.5", "instant", "libc", "num_cpus", - "object_store", + "object_store 0.11.1", "parquet", + "paste", "sqlparser", + "tokio", ] [[package]] name = "datafusion-common-runtime" -version = "41.0.0" +version = "42.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a7496d1f664179f6ce3a5cbef6566056ccaf3ea4aa72cc455f80e62c1dd86b1" +checksum = "694c9d7ea1b82f95768215c4cb5c2d5c613690624e832a7ee64be563139d582f" dependencies = [ + "log", "tokio", ] [[package]] name = "datafusion-execution" -version = "41.0.0" +version = "42.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "799e70968c815b611116951e3dd876aef04bf217da31b72eec01ee6a959336a1" +checksum = "30b4cedcd98151e0a297f34021b6b232ff0ebc0f2f18ea5e7446b5ebda99b1a1" dependencies = [ "arrow", "chrono", @@ -1690,9 +1694,9 @@ dependencies = [ "datafusion-common", "datafusion-expr", "futures", - "hashbrown", + "hashbrown 0.14.5", "log", - "object_store", + "object_store 0.11.1", "parking_lot", "rand", "tempfile", @@ -1701,9 +1705,9 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "41.0.0" +version = "42.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c1841c409d9518c17971d15c9bae62e629eb937e6fb6c68cd32e9186f8b30d2" +checksum = "a8dd114dc0296cacaee98ad3165724529fcca9a65b2875abcd447b9cc02b2b74" dependencies = [ "ahash", "arrow", @@ -1711,6 +1715,9 @@ dependencies = [ "arrow-buffer", "chrono", "datafusion-common", + "datafusion-expr-common", + "datafusion-functions-aggregate-common", + "datafusion-physical-expr-common", "paste", "serde_json", "sqlparser", @@ -1718,11 +1725,22 @@ dependencies = [ "strum_macros", ] +[[package]] +name = "datafusion-expr-common" +version = "42.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d1ba2bb018218d9260bbd7de6a46a20f61b93d4911dba8aa07735625004c4fb" +dependencies = [ + "arrow", + "datafusion-common", + "paste", +] + [[package]] name = "datafusion-functions" -version = "41.0.0" +version = "42.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8e481cf34d2a444bd8fa09b65945f0ce83dc92df8665b761505b3d9f351bebb" +checksum = "547cb780a4ac51fd8e52c0fb9188bc16cea4e35aebf6c454bda0b82a7a417304" dependencies = [ "arrow", "arrow-buffer", @@ -1733,9 +1751,9 @@ dependencies = [ "datafusion-common", "datafusion-execution", "datafusion-expr", - "hashbrown", + "hashbrown 0.14.5", "hex", - "itertools 0.12.1", + "itertools 0.13.0", "log", "md-5", "rand", @@ -1747,9 +1765,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" -version = "41.0.0" +version = "42.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b4ece19f73c02727e5e8654d79cd5652de371352c1df3c4ac3e419ecd6943fb" +checksum = "e68cf5aa7ebcac08bd04bb709a9a6d4963eafd227da62b628133bc509c40f5a0" dependencies = [ "ahash", "arrow", @@ -1757,17 +1775,34 @@ dependencies = [ "datafusion-common", "datafusion-execution", "datafusion-expr", + "datafusion-functions-aggregate-common", + "datafusion-physical-expr", "datafusion-physical-expr-common", + "half", "log", "paste", "sqlparser", ] +[[package]] +name = "datafusion-functions-aggregate-common" +version = "42.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2285d080dfecdfb8605b0ab2f1a41e2473208dc8e9bd6f5d1dbcfe97f517e6f" +dependencies = [ + "ahash", + "arrow", + "datafusion-common", + "datafusion-expr-common", + "datafusion-physical-expr-common", + "rand", +] + [[package]] name = "datafusion-functions-nested" -version = "41.0.0" +version = "42.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1474552cc824e8c9c88177d454db5781d4b66757d4aca75719306b8343a5e8d" +checksum = "6b6ffbbb7cf7bf0c0e05eb6207023fef341cac83a593a5365a6fc83803c572a9" dependencies = [ "arrow", "arrow-array", @@ -1779,17 +1814,30 @@ dependencies = [ "datafusion-expr", "datafusion-functions", "datafusion-functions-aggregate", - "itertools 0.12.1", + "datafusion-physical-expr-common", + "itertools 0.13.0", "log", "paste", "rand", ] +[[package]] +name = "datafusion-functions-window" +version = "42.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e78d30ebd6e9f74d4aeddec32744f5a18b5f9584591bc586fb5259c4848bac5" +dependencies = [ + "datafusion-common", + "datafusion-expr", + "datafusion-physical-expr-common", + "log", +] + [[package]] name = "datafusion-optimizer" -version = "41.0.0" +version = "42.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "791ff56f55608bc542d1ea7a68a64bdc86a9413f5a381d06a39fd49c2a3ab906" +checksum = "be172c44bf344df707e0c041fa3f41e6dc5fb0976f539c68bc442bca150ee58c" dependencies = [ "arrow", "async-trait", @@ -1797,9 +1845,9 @@ dependencies = [ "datafusion-common", "datafusion-expr", "datafusion-physical-expr", - "hashbrown", + "hashbrown 0.14.5", "indexmap", - "itertools 0.12.1", + "itertools 0.13.0", "log", "paste", "regex-syntax 0.8.4", @@ -1807,9 +1855,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "41.0.0" +version = "42.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a223962b3041304a3e20ed07a21d5de3d88d7e4e71ca192135db6d24e3365a4" +checksum = "43b86b7fa0b8161c49b0f005b0df193fc6d9b65ceec675f155422cda5d1583ca" dependencies = [ "ahash", "arrow", @@ -1823,12 +1871,14 @@ dependencies = [ "datafusion-common", "datafusion-execution", "datafusion-expr", + "datafusion-expr-common", + "datafusion-functions-aggregate-common", "datafusion-physical-expr-common", "half", - "hashbrown", + "hashbrown 0.14.5", "hex", "indexmap", - "itertools 0.12.1", + "itertools 0.13.0", "log", "paste", "petgraph", @@ -1837,35 +1887,37 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" -version = "41.0.0" +version = "42.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db5e7d8532a1601cd916881db87a70b0a599900d23f3db2897d389032da53bc6" +checksum = "242ba8a26351d9ca16295814c46743b0d1b00ec372174bdfbba991d0953dd596" dependencies = [ "ahash", "arrow", "datafusion-common", - "datafusion-expr", - "hashbrown", + "datafusion-expr-common", + "hashbrown 0.14.5", "rand", ] [[package]] name = "datafusion-physical-optimizer" -version = "41.0.0" +version = "42.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdb9c78f308e050f5004671039786a925c3fee83b90004e9fcfd328d7febdcc0" +checksum = "25ca088eb904bf1cfc9c5e5653110c70a6eaba43164085a9d180b35b77ce3b8b" dependencies = [ + "arrow-schema", "datafusion-common", "datafusion-execution", "datafusion-physical-expr", "datafusion-physical-plan", + "itertools 0.13.0", ] [[package]] name = "datafusion-physical-plan" -version = "41.0.0" +version = "42.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d1116949432eb2d30f6362707e2846d942e491052a206f2ddcb42d08aea1ffe" +checksum = "4989a53b824abc759685eb643f4d604c2fc2fea4e2c309ac3473bea263ecbbeb" dependencies = [ "ahash", "arrow", @@ -1880,13 +1932,14 @@ dependencies = [ "datafusion-execution", "datafusion-expr", "datafusion-functions-aggregate", + "datafusion-functions-aggregate-common", "datafusion-physical-expr", "datafusion-physical-expr-common", "futures", "half", - "hashbrown", + "hashbrown 0.14.5", "indexmap", - "itertools 0.12.1", + "itertools 0.13.0", "log", "once_cell", "parking_lot", @@ -1897,9 +1950,9 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "41.0.0" +version = "42.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b45d0180711165fe94015d7c4123eb3e1cf5fb60b1506453200b8d1ce666bef0" +checksum = "66b9b75b9da10ed656073ac0553708f17eb8fa5a7b065ef9848914c93150ab9e" dependencies = [ "arrow", "arrow-array", @@ -1914,19 +1967,19 @@ dependencies = [ [[package]] name = "datafusion-substrait" -version = "41.0.0" +version = "42.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf0a0055aa98246c79f98f0d03df11f16cb7adc87818d02d4413e3f3cdadbbee" +checksum = "220d7ab0ffadd8b1af753904b18dd92d270271810b1ce9f8be3c3dbe2392b636" dependencies = [ "arrow-buffer", "async-recursion", "chrono", "datafusion", - "itertools 0.12.1", - "object_store", + "itertools 0.13.0", + "object_store 0.11.1", "pbjson-types", - "prost", - "substrait 0.36.0", + "prost 0.13.3", + "substrait 0.41.9", "url", ] @@ -2088,7 +2141,7 @@ checksum = "3bf679796c0322556351f287a51b49e48f7c4986e727b5dd78c972d30e2e16cc" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.89", ] [[package]] @@ -2349,7 +2402,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.89", ] [[package]] @@ -2494,6 +2547,12 @@ dependencies = [ "allocator-api2", ] +[[package]] +name = "hashbrown" +version = "0.15.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289" + [[package]] name = "heck" version = "0.4.1" @@ -2771,7 +2830,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "de3fc2e30ba82dd1b3911c8de1ffc143c74a914a14e99514d7637e3099df5ea0" dependencies = [ "equivalent", - "hashbrown", + "hashbrown 0.14.5", ] [[package]] @@ -2903,7 +2962,7 @@ dependencies = [ "combine", "jni-sys", "log", - "thiserror", + "thiserror 1.0.69", "walkdir", "windows-sys 0.45.0", ] @@ -2967,6 +3026,7 @@ dependencies = [ "criterion", "dashmap 5.5.3", "datafusion", + "datafusion-expr", "datafusion-functions", "datafusion-physical-expr", "deepsize", @@ -2993,21 +3053,21 @@ dependencies = [ "lzma-sys", "mock_instant", "moka", - "object_store", + "object_store 0.10.2", "permutation", "pin-project", "pprof", "pretty_assertions", - "prost", - "prost-build", - "prost-types", + "prost 0.13.3", + "prost-build 0.13.3", + "prost-types 0.13.3", "rand", "random_word", "roaring", "rstest", "serde", "serde_json", - "snafu", + "snafu 0.7.5", "tantivy", "tempfile", "tfrecord", @@ -3029,6 +3089,7 @@ dependencies = [ "arrow-data", "arrow-schema", "arrow-select", + "bytes", "getrandom", "half", "num-traits", @@ -3058,14 +3119,14 @@ dependencies = [ "mock_instant", "moka", "num_cpus", - "object_store", + "object_store 0.10.2", "pin-project", "proptest", - "prost", + "prost 0.13.3", "rand", "roaring", "serde_json", - "snafu", + "snafu 0.7.5", "tempfile", "tokio", "tokio-stream", @@ -3096,8 +3157,8 @@ dependencies = [ "lance-datagen", "lazy_static", "log", - "prost", - "snafu", + "prost 0.13.3", + "snafu 0.7.5", "substrait-expr", "tokio", ] @@ -3150,14 +3211,14 @@ dependencies = [ "num-traits", "paste", "pprof", - "prost", - "prost-build", - "prost-types", + "prost 0.13.3", + "prost-build 0.13.3", + "prost-types 0.13.3", "rand", "rand_xoshiro", "rstest", "seq-macro", - "snafu", + "snafu 0.7.5", "tempfile", "test-log", "tokio", @@ -3188,11 +3249,11 @@ dependencies = [ "lance-io", "log", "pprof", - "prost", - "prost-build", - "prost-types", + "prost 0.13.3", + "prost-build 0.13.3", + "prost-types 0.13.3", "rand", - "snafu", + "snafu 0.7.5", "test-log", "tokio", ] @@ -3223,16 +3284,16 @@ dependencies = [ "lance-testing", "log", "num-traits", - "object_store", + "object_store 0.10.2", "pprof", "pretty_assertions", "proptest", - "prost", - "prost-build", - "prost-types", + "prost 0.13.3", + "prost-build 0.13.3", + "prost-types 0.13.3", "rand", "roaring", - "snafu", + "snafu 0.7.5", "tempfile", "test-log", "tokio", @@ -3279,17 +3340,17 @@ dependencies = [ "log", "moka", "num-traits", - "object_store", + "object_store 0.10.2", "pprof", - "prost", - "prost-build", + "prost 0.13.3", + "prost-build 0.13.3", "rand", "random_word", "rayon", "roaring", "serde", "serde_json", - "snafu", + "snafu 0.7.5", "tantivy", "tempfile", "test-log", @@ -3326,16 +3387,16 @@ dependencies = [ "lazy_static", "log", "mockall", - "object_store", + "object_store 0.10.2", "parquet", "path_abs", "pin-project", "pprof", - "prost", - "prost-build", + "prost 0.13.3", + "prost-build 0.13.3", "rand", "shellexpand", - "snafu", + "snafu 0.7.5", "tempfile", "test-log", "tokio", @@ -3360,7 +3421,7 @@ dependencies = [ "lazy_static", "serde", "serde_json", - "snafu", + "snafu 0.7.5", "tokio", ] @@ -3418,19 +3479,19 @@ dependencies = [ "lance-io", "lazy_static", "log", - "object_store", + "object_store 0.10.2", "pprof", "pretty_assertions", "proptest", - "prost", - "prost-build", - "prost-types", + "prost 0.13.3", + "prost-build 0.13.3", + "prost-types 0.13.3", "rand", "rangemap", "roaring", "serde", "serde_json", - "snafu", + "snafu 0.7.5", "tokio", "tracing", "url", @@ -3443,7 +3504,7 @@ version = "0.20.0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.89", ] [[package]] @@ -3491,9 +3552,9 @@ checksum = "0c2cdeb66e45e9f36bfad5bbdb4d2384e70936afbee843c6f6543f0c551ebb25" [[package]] name = "lexical-core" -version = "0.8.5" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2cde5de06e8d4c2faabc400238f9ae1c74d5412d03a7bd067645ccbc47070e46" +checksum = "0431c65b318a590c1de6b8fd6e72798c92291d27762d94c9e6c37ed7a73d8458" dependencies = [ "lexical-parse-float", "lexical-parse-integer", @@ -3504,9 +3565,9 @@ dependencies = [ [[package]] name = "lexical-parse-float" -version = "0.8.5" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "683b3a5ebd0130b8fb52ba0bdc718cc56815b6a097e28ae5a6997d0ad17dc05f" +checksum = "eb17a4bdb9b418051aa59d41d65b1c9be5affab314a872e5ad7f06231fb3b4e0" dependencies = [ "lexical-parse-integer", "lexical-util", @@ -3515,9 +3576,9 @@ dependencies = [ [[package]] name = "lexical-parse-integer" -version = "0.8.6" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d0994485ed0c312f6d965766754ea177d07f9c00c9b82a5ee62ed5b47945ee9" +checksum = "5df98f4a4ab53bf8b175b363a34c7af608fe31f93cc1fb1bf07130622ca4ef61" dependencies = [ "lexical-util", "static_assertions", @@ -3525,18 +3586,18 @@ dependencies = [ [[package]] name = "lexical-util" -version = "0.8.5" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5255b9ff16ff898710eb9eb63cb39248ea8a5bb036bea8085b1a767ff6c4e3fc" +checksum = "85314db53332e5c192b6bca611fb10c114a80d1b831ddac0af1e9be1b9232ca0" dependencies = [ "static_assertions", ] [[package]] name = "lexical-write-float" -version = "0.8.5" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "accabaa1c4581f05a3923d1b4cfd124c329352288b7b9da09e766b0668116862" +checksum = "6e7c3ad4e37db81c1cbe7cf34610340adc09c322871972f74877a712abc6c809" dependencies = [ "lexical-util", "lexical-write-integer", @@ -3545,9 +3606,9 @@ dependencies = [ [[package]] name = "lexical-write-integer" -version = "0.8.5" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1b6f3d1f4422866b68192d62f77bc5c700bee84f3069f2469d7bc8c77852446" +checksum = "eb89e9f6958b83258afa3deed90b5de9ef68eef090ad5086c791cd2345610162" dependencies = [ "lexical-util", "static_assertions", @@ -3612,7 +3673,7 @@ version = "0.12.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37ee39891760e7d94734f6f63fedc29a2e4a152f836120753a72503f09fcf904" dependencies = [ - "hashbrown", + "hashbrown 0.14.5", ] [[package]] @@ -3750,7 +3811,7 @@ dependencies = [ "cfg-if", "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.89", ] [[package]] @@ -3772,7 +3833,7 @@ dependencies = [ "rustc_version", "smallvec", "tagptr", - "thiserror", + "thiserror 1.0.69", "triomphe", "uuid", ] @@ -3962,7 +4023,28 @@ dependencies = [ "rustls-pemfile 2.1.3", "serde", "serde_json", - "snafu", + "snafu 0.7.5", + "tokio", + "tracing", + "url", + "walkdir", +] + +[[package]] +name = "object_store" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6eb4c22c6154a1e759d7099f9ffad7cc5ef8245f9efbab4a41b92623079c82f3" +dependencies = [ + "async-trait", + "bytes", + "chrono", + "futures", + "humantime", + "itertools 0.13.0", + "parking_lot", + "percent-encoding", + "snafu 0.8.5", "tokio", "tracing", "url", @@ -4060,9 +4142,9 @@ dependencies = [ [[package]] name = "parquet" -version = "52.2.0" +version = "53.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e977b9066b4d3b03555c22bdc442f3fadebd96a39111249113087d0edb2691cd" +checksum = "2b449890367085eb65d7d3321540abc3d7babbd179ce31df0016e90719114191" dependencies = [ "ahash", "arrow-array", @@ -4073,17 +4155,17 @@ dependencies = [ "arrow-schema", "arrow-select", "base64 0.22.1", - "brotli 6.0.0", + "brotli 7.0.0", "bytes", "chrono", "flate2", "futures", "half", - "hashbrown", + "hashbrown 0.15.2", "lz4_flex", "num", "num-bigint", - "object_store", + "object_store 0.11.1", "paste", "seq-macro", "snap", @@ -4123,9 +4205,9 @@ dependencies = [ [[package]] name = "pbjson" -version = "0.6.0" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1030c719b0ec2a2d25a5df729d6cff1acf3cc230bf766f4f97833591f7577b90" +checksum = "c7e6349fa080353f4a597daffd05cb81572a9c031a6d4fff7e504947496fcc68" dependencies = [ "base64 0.21.7", "serde", @@ -4133,28 +4215,28 @@ dependencies = [ [[package]] name = "pbjson-build" -version = "0.6.2" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2580e33f2292d34be285c5bc3dba5259542b083cfad6037b6d70345f24dcb735" +checksum = "6eea3058763d6e656105d1403cb04e0a41b7bbac6362d413e7c33be0c32279c9" dependencies = [ - "heck 0.4.1", - "itertools 0.11.0", - "prost", - "prost-types", + "heck 0.5.0", + "itertools 0.13.0", + "prost 0.13.3", + "prost-types 0.13.3", ] [[package]] name = "pbjson-types" -version = "0.6.0" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18f596653ba4ac51bdecbb4ef6773bc7f56042dc13927910de1684ad3d32aa12" +checksum = "e54e5e7bfb1652f95bc361d76f3c780d8e526b134b85417e774166ee941f0887" dependencies = [ "bytes", "chrono", "pbjson", "pbjson-build", - "prost", - "prost-build", + "prost 0.13.3", + "prost-build 0.13.3", "serde", ] @@ -4235,7 +4317,7 @@ checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.89", ] [[package]] @@ -4352,7 +4434,7 @@ dependencies = [ "smallvec", "symbolic-demangle", "tempfile", - "thiserror", + "thiserror 1.0.69", ] [[package]] @@ -4402,12 +4484,12 @@ dependencies = [ [[package]] name = "prettyplease" -version = "0.2.20" +version = "0.2.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f12335488a2f3b0a83b14edad48dca9879ce89b2edd10e80237e4e852dd645e" +checksum = "64d1ec885c64d0457d564db4ec299b2dae3f9c02808b8ad9c3a089c591b18033" dependencies = [ "proc-macro2", - "syn 2.0.87", + "syn 2.0.89", ] [[package]] @@ -4421,9 +4503,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.86" +version = "1.0.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" +checksum = "37d3544b3f2748c54e147655edb5025752e2303145b5aefb3c3ea2c78b973bb0" dependencies = [ "unicode-ident", ] @@ -4455,7 +4537,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "deb1435c188b76130da55f17a466d252ff7b1418b2ad3e037d127b94e3411f29" dependencies = [ "bytes", - "prost-derive", + "prost-derive 0.12.6", +] + +[[package]] +name = "prost" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b0487d90e047de87f984913713b85c601c05609aad5b0df4b4573fbf69aa13f" +dependencies = [ + "bytes", + "prost-derive 0.13.3", ] [[package]] @@ -4472,10 +4564,31 @@ dependencies = [ "once_cell", "petgraph", "prettyplease", - "prost", - "prost-types", + "prost 0.12.6", + "prost-types 0.12.6", + "regex", + "syn 2.0.89", + "tempfile", +] + +[[package]] +name = "prost-build" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c1318b19085f08681016926435853bbf7858f9c082d0999b80550ff5d9abe15" +dependencies = [ + "bytes", + "heck 0.5.0", + "itertools 0.13.0", + "log", + "multimap", + "once_cell", + "petgraph", + "prettyplease", + "prost 0.13.3", + "prost-types 0.13.3", "regex", - "syn 2.0.87", + "syn 2.0.89", "tempfile", ] @@ -4489,7 +4602,20 @@ dependencies = [ "itertools 0.12.1", "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.89", +] + +[[package]] +name = "prost-derive" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9552f850d5f0964a4e4d0bf306459ac29323ddfbae05e35a7c0d35cb0803cc5" +dependencies = [ + "anyhow", + "itertools 0.13.0", + "proc-macro2", + "quote", + "syn 2.0.89", ] [[package]] @@ -4498,7 +4624,16 @@ version = "0.12.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9091c90b0a32608e984ff2fa4091273cbdd755d54935c51d520887f4a1dbd5b0" dependencies = [ - "prost", + "prost 0.12.6", +] + +[[package]] +name = "prost-types" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4759aa0d3a6232fb8dbdb97b61de2c20047c68aca932c7ed76da9d788508d670" +dependencies = [ + "prost 0.13.3", ] [[package]] @@ -4554,7 +4689,7 @@ dependencies = [ "rustc-hash 2.0.0", "rustls 0.23.12", "socket2 0.5.7", - "thiserror", + "thiserror 1.0.69", "tokio", "tracing", ] @@ -4571,7 +4706,7 @@ dependencies = [ "rustc-hash 2.0.0", "rustls 0.23.12", "slab", - "thiserror", + "thiserror 1.0.69", "tinyvec", "tracing", ] @@ -4737,7 +4872,7 @@ checksum = "bd283d9651eeda4b2a83a43c1c91b266c40fd76ecd39a50a8c630ae69dc72891" dependencies = [ "getrandom", "libredox", - "thiserror", + "thiserror 1.0.69", ] [[package]] @@ -4796,7 +4931,7 @@ version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4f5f39ba4513916c1b2657b72af6ec671f091cd637992f58d0ede5cae4e5dea0" dependencies = [ - "hashbrown", + "hashbrown 0.14.5", "memchr", ] @@ -4806,7 +4941,17 @@ version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0eae2a1ebfecc58aff952ef8ccd364329abe627762f5bf09ff42eb9d98522479" dependencies = [ - "hashbrown", + "hashbrown 0.14.5", + "memchr", +] + +[[package]] +name = "regress" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1541daf4e4ed43a0922b7969bdc2170178bcacc5dabf7e39bc508a9fa3953a7a" +dependencies = [ + "hashbrown 0.14.5", "memchr", ] @@ -4921,7 +5066,7 @@ dependencies = [ "regex", "relative-path", "rustc_version", - "syn 2.0.87", + "syn 2.0.89", "unicode-ident", ] @@ -5163,7 +5308,7 @@ dependencies = [ "proc-macro2", "quote", "serde_derive_internals", - "syn 2.0.87", + "syn 2.0.89", ] [[package]] @@ -5222,22 +5367,22 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" [[package]] name = "serde" -version = "1.0.204" +version = "1.0.215" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc76f558e0cbb2a839d37354c575f1dc3fdc6546b5be373ba43d95f231bf7c12" +checksum = "6513c1ad0b11a9376da888e3e0baa0077f1aed55c17f50e7b2397136129fb88f" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.204" +version = "1.0.215" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0cd7e117be63d3c3678776753929474f3b04a43a080c744d6b0ae2a8c28e222" +checksum = "ad1e866f866923f252f05c889987993144fb74e722403468a4ebd70c3cd756c0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.89", ] [[package]] @@ -5248,14 +5393,14 @@ checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.89", ] [[package]] name = "serde_json" -version = "1.0.122" +version = "1.0.133" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "784b6203951c57ff748476b126ccb5e8e2959a5c19e5c617ab1956be3dbc68da" +checksum = "c7fceb2473b9166b2294ef05efcb65a3db80803f0b03ef86a5fc88a2b85ee377" dependencies = [ "itoa", "memchr", @@ -5265,14 +5410,14 @@ dependencies = [ [[package]] name = "serde_tokenstream" -version = "0.2.1" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8790a7c3fe883e443eaa2af6f705952bc5d6e8671a220b9335c8cae92c037e74" +checksum = "64060d864397305347a78851c51588fd283767e7e7589829e8121d65512340f1" dependencies = [ "proc-macro2", "quote", "serde", - "syn 2.0.87", + "syn 2.0.89", ] [[package]] @@ -5375,7 +5520,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e4de37ad025c587a29e8f3f5605c00f70b98715ef90b9061a815b9e59e9042d6" dependencies = [ "doc-comment", - "snafu-derive", + "snafu-derive 0.7.5", +] + +[[package]] +name = "snafu" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "223891c85e2a29c3fe8fb900c1fae5e69c2e42415e3177752e8718475efa5019" +dependencies = [ + "snafu-derive 0.8.5", ] [[package]] @@ -5390,6 +5544,18 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "snafu-derive" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03c3c6b7927ffe7ecaa769ee0e3994da3b8cafc8f444578982c83ecb161af917" +dependencies = [ + "heck 0.5.0", + "proc-macro2", + "quote", + "syn 2.0.89", +] + [[package]] name = "snap" version = "1.1.1" @@ -5424,9 +5590,9 @@ checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" [[package]] name = "sqlparser" -version = "0.49.0" +version = "0.50.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4a404d0e14905361b918cb8afdb73605e25c1d5029312bd9785142dcb3aa49e" +checksum = "b2e5b515a2bd5168426033e9efbfd05500114833916f1d5c268f938b4ee130ac" dependencies = [ "log", "sqlparser_derive", @@ -5440,7 +5606,7 @@ checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.89", ] [[package]] @@ -5498,7 +5664,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.87", + "syn 2.0.89", ] [[package]] @@ -5509,55 +5675,77 @@ checksum = "df6c402018947957c4c7f2af49304f5cd8a948858686bf958d519cf0aa644790" dependencies = [ "heck 0.5.0", "prettyplease", - "prost", - "prost-build", - "prost-types", + "prost 0.12.6", + "prost-build 0.12.6", + "prost-types 0.12.6", "schemars", "semver", "serde", "serde_json", "serde_yaml", - "syn 2.0.87", + "syn 2.0.89", "typify 0.0.16", "walkdir", ] [[package]] name = "substrait" -version = "0.36.0" +version = "0.41.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1ee6e584c8bf37104b7eb51c25eae07a9321b0e01379bec3b7c462d2f42afbf" +checksum = "2a3bf05f1d7a3fd7a97790d410f6e859b3a98dcde05e7a3fc00b31b0f60fe7cb" dependencies = [ "heck 0.5.0", "pbjson", "pbjson-build", "pbjson-types", "prettyplease", - "prost", - "prost-build", - "prost-types", + "prost 0.13.3", + "prost-build 0.13.3", + "prost-types 0.13.3", "schemars", "semver", "serde", "serde_json", "serde_yaml", - "syn 2.0.87", + "syn 2.0.89", "typify 0.1.0", "walkdir", ] +[[package]] +name = "substrait" +version = "0.49.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e13a66e9f86d17064bc06ca30971acdb5e2715a2973ce856801185b70aad7938" +dependencies = [ + "heck 0.5.0", + "prettyplease", + "prost 0.13.3", + "prost-build 0.13.3", + "prost-types 0.13.3", + "regress 0.10.1", + "schemars", + "semver", + "serde", + "serde_json", + "serde_yaml", + "syn 2.0.89", + "typify 0.2.0", + "walkdir", +] + [[package]] name = "substrait-expr" -version = "0.2.1" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c9a8b8cc82442b391b67e7c195f0d3de35838bb78b115468d28076ec54dd4577" +checksum = "45a6a94f5dd69c5329a9c96c93ac5f17a8d64089ca21d29d7971825f7451941d" dependencies = [ "once_cell", - "prost", - "substrait 0.29.4", + "prost 0.13.3", + "substrait 0.49.1", "substrait-expr-funcgen", "substrait-expr-macros", - "thiserror", + "thiserror 2.0.3", ] [[package]] @@ -5572,8 +5760,8 @@ dependencies = [ "quote", "serde_yaml", "substrait 0.29.4", - "syn 2.0.87", - "thiserror", + "syn 2.0.89", + "thiserror 1.0.69", ] [[package]] @@ -5584,7 +5772,7 @@ checksum = "919e5b5c5495d18dffb0b8369d74a143c893cbfb98b4337cecb31f3f9bcc112b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.89", ] [[package]] @@ -5629,9 +5817,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.87" +version = "2.0.89" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25aa4ce346d03a6dcd68dd8b4010bcb74e54e62c90c573f394c46eae99aba32d" +checksum = "44d46482f1c1c87acd84dea20c1bf5ebff4c757009ed6bf19cfd36fb10e92c4e" dependencies = [ "proc-macro2", "quote", @@ -5698,7 +5886,7 @@ dependencies = [ "tantivy-stacker", "tantivy-tokenizer-api", "tempfile", - "thiserror", + "thiserror 1.0.69", "time", "uuid", "winapi", @@ -5858,7 +6046,7 @@ checksum = "5999e24eaa32083191ba4e425deb75cdf25efefabe5aaccb7446dd0d4122a3f5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.89", ] [[package]] @@ -5883,31 +6071,51 @@ dependencies = [ "num-traits", "once_cell", "pin-project", - "prost", - "prost-build", + "prost 0.12.6", + "prost-build 0.12.6", "tar", - "thiserror", + "thiserror 1.0.69", "ureq", ] [[package]] name = "thiserror" -version = "1.0.63" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" +dependencies = [ + "thiserror-impl 1.0.69", +] + +[[package]] +name = "thiserror" +version = "2.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c006c85c7651b3cf2ada4584faa36773bd07bac24acfb39f3c431b36d7e667aa" +dependencies = [ + "thiserror-impl 2.0.3", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0342370b38b6a11b6cc11d6a805569958d54cfa061a29969c3b5ce2ea405724" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ - "thiserror-impl", + "proc-macro2", + "quote", + "syn 2.0.89", ] [[package]] name = "thiserror-impl" -version = "1.0.63" +version = "2.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261" +checksum = "f077553d607adc1caf65430528a576c757a71ed73944b66ebb58ef2bbd243568" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.89", ] [[package]] @@ -6021,7 +6229,7 @@ checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.89", ] [[package]] @@ -6132,7 +6340,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.89", ] [[package]] @@ -6233,6 +6441,16 @@ dependencies = [ "typify-macro 0.1.0", ] +[[package]] +name = "typify" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4c644dda9862f0fef3a570d8ddb3c2cfb1d5ac824a1f2ddfa7bc8f071a5ad8a" +dependencies = [ + "typify-impl 0.2.0", + "typify-macro 0.2.0", +] + [[package]] name = "typify-impl" version = "0.0.16" @@ -6246,8 +6464,8 @@ dependencies = [ "regress 0.8.0", "schemars", "serde_json", - "syn 2.0.87", - "thiserror", + "syn 2.0.89", + "thiserror 1.0.69", "unicode-ident", ] @@ -6266,8 +6484,28 @@ dependencies = [ "semver", "serde", "serde_json", - "syn 2.0.87", - "thiserror", + "syn 2.0.89", + "thiserror 1.0.69", + "unicode-ident", +] + +[[package]] +name = "typify-impl" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d59ab345b6c0d8ae9500b9ff334a4c7c0d316c1c628dc55726b95887eb8dbd11" +dependencies = [ + "heck 0.5.0", + "log", + "proc-macro2", + "quote", + "regress 0.10.1", + "schemars", + "semver", + "serde", + "serde_json", + "syn 2.0.89", + "thiserror 1.0.69", "unicode-ident", ] @@ -6283,7 +6521,7 @@ dependencies = [ "serde", "serde_json", "serde_tokenstream", - "syn 2.0.87", + "syn 2.0.89", "typify-impl 0.0.16", ] @@ -6300,10 +6538,27 @@ dependencies = [ "serde", "serde_json", "serde_tokenstream", - "syn 2.0.87", + "syn 2.0.89", "typify-impl 0.1.0", ] +[[package]] +name = "typify-macro" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "785e2cdcef0df8160fdd762ed548a637aaec1e83704fdbc14da0df66013ee8d0" +dependencies = [ + "proc-macro2", + "quote", + "schemars", + "semver", + "serde", + "serde_json", + "serde_tokenstream", + "syn 2.0.89", + "typify-impl 0.2.0", +] + [[package]] name = "unarray" version = "0.1.4" @@ -6504,7 +6759,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.89", "wasm-bindgen-shared", ] @@ -6538,7 +6793,7 @@ checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.89", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -6933,7 +7188,7 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.89", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 54a43ef05a..6009d5922f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -124,7 +124,7 @@ moka = { version = "0.12", features = ["future", "sync"] } num-traits = "0.2" # Set min to prevent use of versions with CVE-2024-41178 object_store = { version = "0.10.2" } -parquet = "52.0" +parquet = "53.0" pin-project = "1.0" path_abs = "0.5" pprof = { version = "0.14.0", features = ["flamegraph", "criterion"] } diff --git a/python/Cargo.toml b/python/Cargo.toml index f19fafab57..49ee66427c 100644 --- a/python/Cargo.toml +++ b/python/Cargo.toml @@ -12,11 +12,11 @@ name = "lance" crate-type = ["cdylib"] [dependencies] -arrow = { version = "52.2", features = ["pyarrow"] } -arrow-array = "52.2" -arrow-data = "52.2" -arrow-schema = "52.2" -arrow-select = "52.2" +arrow = { version = "53.2", features = ["pyarrow"] } +arrow-array = "53.2" +arrow-data = "53.2" +arrow-schema = "53.2" +arrow-select = "53.2" object_store = "0.10.1" async-trait = "0.1" chrono = "0.4.31" From 6e88cb0bbc81a4358d9cd68b04ba682a0834b6de Mon Sep 17 00:00:00 2001 From: Jeremy Leibs Date: Tue, 26 Nov 2024 14:10:11 -0500 Subject: [PATCH 12/24] Fix lance-datafusion build --- rust/lance-datafusion/Cargo.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rust/lance-datafusion/Cargo.toml b/rust/lance-datafusion/Cargo.toml index 6887a747dc..03e392bcd1 100644 --- a/rust/lance-datafusion/Cargo.toml +++ b/rust/lance-datafusion/Cargo.toml @@ -10,8 +10,8 @@ categories.workspace = true description = "Internal utilities used by other lance modules to simplify working with datafusion" [dependencies] -arrow.workspace = true -arrow-array.workspace = true +arrow = { workspace = true, features = ["ffi"] } +arrow-array = { workspace = true, features = ["ffi"] } arrow-buffer.workspace = true arrow-schema.workspace = true arrow-select.workspace = true From ddcf53c7d7e46ee7e0ad3f00cba621bf1a2ea38a Mon Sep 17 00:00:00 2001 From: Weston Pace Date: Tue, 26 Nov 2024 14:11:47 -0800 Subject: [PATCH 13/24] Add a prost_old to deal with tfrecord --- Cargo.lock | 1 + python/Cargo.lock | 1026 +++++++++++++++++++++--------- rust/lance/Cargo.toml | 3 +- rust/lance/src/utils/tfrecord.rs | 26 +- 4 files changed, 752 insertions(+), 304 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3387cb1f8e..b8c6491697 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3179,6 +3179,7 @@ dependencies = [ "pin-project", "pprof", "pretty_assertions", + "prost 0.12.6", "prost 0.13.3", "prost-build 0.13.3", "rand", diff --git a/python/Cargo.lock b/python/Cargo.lock index ebfe5d8de5..10ca0ad3ea 100644 --- a/python/Cargo.lock +++ b/python/Cargo.lock @@ -106,32 +106,68 @@ version = "52.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "05048a8932648b63f21c37d88b552ccc8a65afb6dfe9fc9f30ce79174c2e7a85" dependencies = [ - "arrow-arith", - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-csv", - "arrow-data", - "arrow-ipc", - "arrow-json", - "arrow-ord", - "arrow-row", - "arrow-schema", - "arrow-select", - "arrow-string", + "arrow-arith 52.2.0", + "arrow-array 52.2.0", + "arrow-buffer 52.2.0", + "arrow-cast 52.2.0", + "arrow-csv 52.2.0", + "arrow-data 52.2.0", + "arrow-ipc 52.2.0", + "arrow-json 52.2.0", + "arrow-ord 52.2.0", + "arrow-row 52.2.0", + "arrow-schema 52.2.0", + "arrow-select 52.2.0", + "arrow-string 52.2.0", "pyo3", ] +[[package]] +name = "arrow" +version = "53.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c91839b07e474b3995035fd8ac33ee54f9c9ccbbb1ea33d9909c71bffdf1259d" +dependencies = [ + "arrow-arith 53.3.0", + "arrow-array 53.3.0", + "arrow-buffer 53.3.0", + "arrow-cast 53.3.0", + "arrow-csv 53.3.0", + "arrow-data 53.3.0", + "arrow-ipc 53.3.0", + "arrow-json 53.3.0", + "arrow-ord 53.3.0", + "arrow-row 53.3.0", + "arrow-schema 53.3.0", + "arrow-select 53.3.0", + "arrow-string 53.3.0", +] + [[package]] name = "arrow-arith" version = "52.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d8a57966e43bfe9a3277984a14c24ec617ad874e4c0e1d2a1b083a39cfbf22c" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-array 52.2.0", + "arrow-buffer 52.2.0", + "arrow-data 52.2.0", + "arrow-schema 52.2.0", + "chrono", + "half", + "num", +] + +[[package]] +name = "arrow-arith" +version = "53.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "855c57c4efd26722b044dcd3e348252560e3e0333087fb9f6479dc0bf744054f" +dependencies = [ + "arrow-array 53.3.0", + "arrow-buffer 53.3.0", + "arrow-data 53.3.0", + "arrow-schema 53.3.0", "chrono", "half", "num", @@ -144,16 +180,32 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "16f4a9468c882dc66862cef4e1fd8423d47e67972377d85d80e022786427768c" dependencies = [ "ahash", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-buffer 52.2.0", + "arrow-data 52.2.0", + "arrow-schema 52.2.0", "chrono", - "chrono-tz", "half", "hashbrown 0.14.5", "num", ] +[[package]] +name = "arrow-array" +version = "53.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd03279cea46569acf9295f6224fbc370c5df184b4d2ecfe97ccb131d5615a7f" +dependencies = [ + "ahash", + "arrow-buffer 53.3.0", + "arrow-data 53.3.0", + "arrow-schema 53.3.0", + "chrono", + "chrono-tz", + "half", + "hashbrown 0.15.1", + "num", +] + [[package]] name = "arrow-buffer" version = "52.2.0" @@ -165,23 +217,54 @@ dependencies = [ "num", ] +[[package]] +name = "arrow-buffer" +version = "53.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e4a9b9b1d6d7117f6138e13bc4dd5daa7f94e671b70e8c9c4dc37b4f5ecfc16" +dependencies = [ + "bytes", + "half", + "num", +] + [[package]] name = "arrow-cast" version = "52.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "da26719e76b81d8bc3faad1d4dbdc1bcc10d14704e63dc17fc9f3e7e1e567c8e" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "arrow-select", + "arrow-array 52.2.0", + "arrow-buffer 52.2.0", + "arrow-data 52.2.0", + "arrow-schema 52.2.0", + "arrow-select 52.2.0", + "atoi", + "base64 0.22.1", + "chrono", + "half", + "lexical-core 0.8.5", + "num", + "ryu", +] + +[[package]] +name = "arrow-cast" +version = "53.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc70e39916e60c5b7af7a8e2719e3ae589326039e1e863675a008bee5ffe90fd" +dependencies = [ + "arrow-array 53.3.0", + "arrow-buffer 53.3.0", + "arrow-data 53.3.0", + "arrow-schema 53.3.0", + "arrow-select 53.3.0", "atoi", "base64 0.22.1", "chrono", "comfy-table", "half", - "lexical-core", + "lexical-core 1.0.2", "num", "ryu", ] @@ -192,16 +275,35 @@ version = "52.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c13c36dc5ddf8c128df19bab27898eea64bf9da2b555ec1cd17a8ff57fba9ec2" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-schema", + "arrow-array 52.2.0", + "arrow-buffer 52.2.0", + "arrow-cast 52.2.0", + "arrow-data 52.2.0", + "arrow-schema 52.2.0", + "chrono", + "csv", + "csv-core", + "lazy_static", + "lexical-core 0.8.5", + "regex", +] + +[[package]] +name = "arrow-csv" +version = "53.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "789b2af43c1049b03a8d088ff6b2257cdcea1756cd76b174b1f2600356771b97" +dependencies = [ + "arrow-array 53.3.0", + "arrow-buffer 53.3.0", + "arrow-cast 53.3.0", + "arrow-data 53.3.0", + "arrow-schema 53.3.0", "chrono", "csv", "csv-core", "lazy_static", - "lexical-core", + "lexical-core 1.0.2", "regex", ] @@ -211,8 +313,20 @@ version = "52.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dd9d6f18c65ef7a2573ab498c374d8ae364b4a4edf67105357491c031f716ca5" dependencies = [ - "arrow-buffer", - "arrow-schema", + "arrow-buffer 52.2.0", + "arrow-schema 52.2.0", + "half", + "num", +] + +[[package]] +name = "arrow-data" +version = "53.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4e75edf21ffd53744a9b8e3ed11101f610e7ceb1a29860432824f1834a1f623" +dependencies = [ + "arrow-buffer 53.3.0", + "arrow-schema 53.3.0", "half", "num", ] @@ -223,11 +337,25 @@ version = "52.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e786e1cdd952205d9a8afc69397b317cfbb6e0095e445c69cda7e8da5c1eeb0f" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-schema", + "arrow-array 52.2.0", + "arrow-buffer 52.2.0", + "arrow-cast 52.2.0", + "arrow-data 52.2.0", + "arrow-schema 52.2.0", + "flatbuffers", +] + +[[package]] +name = "arrow-ipc" +version = "53.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d186a909dece9160bf8312f5124d797884f608ef5435a36d9d608e0b2a9bcbf8" +dependencies = [ + "arrow-array 53.3.0", + "arrow-buffer 53.3.0", + "arrow-cast 53.3.0", + "arrow-data 53.3.0", + "arrow-schema 53.3.0", "flatbuffers", "lz4_flex", "zstd", @@ -239,15 +367,35 @@ version = "52.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fb22284c5a2a01d73cebfd88a33511a3234ab45d66086b2ca2d1228c3498e445" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-schema", + "arrow-array 52.2.0", + "arrow-buffer 52.2.0", + "arrow-cast 52.2.0", + "arrow-data 52.2.0", + "arrow-schema 52.2.0", + "chrono", + "half", + "indexmap", + "lexical-core 0.8.5", + "num", + "serde", + "serde_json", +] + +[[package]] +name = "arrow-json" +version = "53.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b66ff2fedc1222942d0bd2fd391cb14a85baa3857be95c9373179bd616753b85" +dependencies = [ + "arrow-array 53.3.0", + "arrow-buffer 53.3.0", + "arrow-cast 53.3.0", + "arrow-data 53.3.0", + "arrow-schema 53.3.0", "chrono", "half", "indexmap", - "lexical-core", + "lexical-core 1.0.2", "num", "serde", "serde_json", @@ -259,11 +407,26 @@ version = "52.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42745f86b1ab99ef96d1c0bcf49180848a64fe2c7a7a0d945bc64fa2b21ba9bc" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "arrow-select", + "arrow-array 52.2.0", + "arrow-buffer 52.2.0", + "arrow-data 52.2.0", + "arrow-schema 52.2.0", + "arrow-select 52.2.0", + "half", + "num", +] + +[[package]] +name = "arrow-ord" +version = "53.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ece7b5bc1180e6d82d1a60e1688c199829e8842e38497563c3ab6ea813e527fd" +dependencies = [ + "arrow-array 53.3.0", + "arrow-buffer 53.3.0", + "arrow-data 53.3.0", + "arrow-schema 53.3.0", + "arrow-select 53.3.0", "half", "num", ] @@ -275,10 +438,24 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4cd09a518c602a55bd406bcc291a967b284cfa7a63edfbf8b897ea4748aad23c" dependencies = [ "ahash", - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-array 52.2.0", + "arrow-buffer 52.2.0", + "arrow-data 52.2.0", + "arrow-schema 52.2.0", + "half", +] + +[[package]] +name = "arrow-row" +version = "53.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "745c114c8f0e8ce211c83389270de6fbe96a9088a7b32c2a041258a443fe83ff" +dependencies = [ + "ahash", + "arrow-array 53.3.0", + "arrow-buffer 53.3.0", + "arrow-data 53.3.0", + "arrow-schema 53.3.0", "half", ] @@ -291,6 +468,15 @@ dependencies = [ "bitflags 2.6.0", ] +[[package]] +name = "arrow-schema" +version = "53.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b95513080e728e4cec37f1ff5af4f12c9688d47795d17cda80b6ec2cf74d4678" +dependencies = [ + "bitflags 2.6.0", +] + [[package]] name = "arrow-select" version = "52.2.0" @@ -298,10 +484,24 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "600bae05d43483d216fb3494f8c32fdbefd8aa4e1de237e790dbb3d9f44690a3" dependencies = [ "ahash", - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-array 52.2.0", + "arrow-buffer 52.2.0", + "arrow-data 52.2.0", + "arrow-schema 52.2.0", + "num", +] + +[[package]] +name = "arrow-select" +version = "53.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e415279094ea70323c032c6e739c48ad8d80e78a09bef7117b8718ad5bf3722" +dependencies = [ + "ahash", + "arrow-array 53.3.0", + "arrow-buffer 53.3.0", + "arrow-data 53.3.0", + "arrow-schema 53.3.0", "num", ] @@ -311,11 +511,28 @@ version = "52.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f0dc1985b67cb45f6606a248ac2b4a288849f196bab8c657ea5589f47cdd55e6" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "arrow-select", + "arrow-array 52.2.0", + "arrow-buffer 52.2.0", + "arrow-data 52.2.0", + "arrow-schema 52.2.0", + "arrow-select 52.2.0", + "memchr", + "num", + "regex", + "regex-syntax", +] + +[[package]] +name = "arrow-string" +version = "53.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "11d956cae7002eb8d83a27dbd34daaea1cf5b75852f0b84deb4d93a276e92bbf" +dependencies = [ + "arrow-array 53.3.0", + "arrow-buffer 53.3.0", + "arrow-data 53.3.0", + "arrow-schema 53.3.0", + "arrow-select 53.3.0", "memchr", "num", "regex", @@ -979,9 +1196,9 @@ dependencies = [ [[package]] name = "brotli" -version = "6.0.0" +version = "7.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "74f7971dbd9326d58187408ab83117d8ac1bb9c17b085fdacd1cf2f598719b6b" +checksum = "cc97b8f16f944bba54f0433f07e30be199b6dc2bd25937444bbad560bcea29bd" dependencies = [ "alloc-no-stdlib", "alloc-stdlib", @@ -1136,9 +1353,9 @@ dependencies = [ [[package]] name = "chrono-tz" -version = "0.9.0" +version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93698b29de5e97ad0ae26447b344c482a7284c737d9ddc5f9e52b74a336671bb" +checksum = "cd6dd8046d00723a59a2f8c5f295c515b9bb9a331ee4f8f3d4dd49e428acd3b6" dependencies = [ "chrono", "chrono-tz-build", @@ -1147,12 +1364,11 @@ dependencies = [ [[package]] name = "chrono-tz-build" -version = "0.3.0" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c088aee841df9c3041febbb73934cfc39708749bf96dc827e3359cd39ef11b1" +checksum = "e94fea34d77a245229e7746bd2beb786cd2a896f306ff491fb8cecb3074b10a7" dependencies = [ "parse-zoneinfo", - "phf", "phf_codegen", ] @@ -1360,15 +1576,15 @@ dependencies = [ [[package]] name = "datafusion" -version = "41.0.0" +version = "42.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4fd4a99fc70d40ef7e52b243b4a399c3f8d353a40d5ecb200deee05e49c61bb" +checksum = "dae5f2abc725737d6e87b6d348a5aa2d0a77e4cf873045f004546da946e6e619" dependencies = [ "ahash", - "arrow", - "arrow-array", - "arrow-ipc", - "arrow-schema", + "arrow 53.3.0", + "arrow-array 53.3.0", + "arrow-ipc 53.3.0", + "arrow-schema 53.3.0", "async-compression", "async-trait", "bytes", @@ -1383,6 +1599,7 @@ dependencies = [ "datafusion-functions", "datafusion-functions-aggregate", "datafusion-functions-nested", + "datafusion-functions-window", "datafusion-optimizer", "datafusion-physical-expr", "datafusion-physical-expr-common", @@ -1395,10 +1612,10 @@ dependencies = [ "half", "hashbrown 0.14.5", "indexmap", - "itertools 0.12.1", + "itertools 0.13.0", "log", "num_cpus", - "object_store", + "object_store 0.11.1", "parking_lot", "parquet", "paste", @@ -1416,56 +1633,60 @@ dependencies = [ [[package]] name = "datafusion-catalog" -version = "41.0.0" +version = "42.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e13b3cfbd84c6003594ae1972314e3df303a27ce8ce755fcea3240c90f4c0529" +checksum = "998761705551f11ffa4ee692cc285b44eb1def6e0d28c4eaf5041b9e2810dc1e" dependencies = [ - "arrow-schema", + "arrow-schema 53.3.0", "async-trait", "datafusion-common", "datafusion-execution", "datafusion-expr", "datafusion-physical-plan", + "parking_lot", ] [[package]] name = "datafusion-common" -version = "41.0.0" +version = "42.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44fdbc877e3e40dcf88cc8f283d9f5c8851f0a3aa07fee657b1b75ac1ad49b9c" +checksum = "11986f191e88d950f10a5cc512a598afba27d92e04a0201215ad60785005115a" dependencies = [ "ahash", - "arrow", - "arrow-array", - "arrow-buffer", - "arrow-schema", + "arrow 53.3.0", + "arrow-array 53.3.0", + "arrow-buffer 53.3.0", + "arrow-schema 53.3.0", "chrono", "half", "hashbrown 0.14.5", "instant", "libc", "num_cpus", - "object_store", + "object_store 0.11.1", "parquet", + "paste", "sqlparser", + "tokio", ] [[package]] name = "datafusion-common-runtime" -version = "41.0.0" +version = "42.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a7496d1f664179f6ce3a5cbef6566056ccaf3ea4aa72cc455f80e62c1dd86b1" +checksum = "694c9d7ea1b82f95768215c4cb5c2d5c613690624e832a7ee64be563139d582f" dependencies = [ + "log", "tokio", ] [[package]] name = "datafusion-execution" -version = "41.0.0" +version = "42.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "799e70968c815b611116951e3dd876aef04bf217da31b72eec01ee6a959336a1" +checksum = "30b4cedcd98151e0a297f34021b6b232ff0ebc0f2f18ea5e7446b5ebda99b1a1" dependencies = [ - "arrow", + "arrow 53.3.0", "chrono", "dashmap 6.1.0", "datafusion-common", @@ -1473,7 +1694,7 @@ dependencies = [ "futures", "hashbrown 0.14.5", "log", - "object_store", + "object_store 0.11.1", "parking_lot", "rand", "tempfile", @@ -1482,16 +1703,19 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "41.0.0" +version = "42.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c1841c409d9518c17971d15c9bae62e629eb937e6fb6c68cd32e9186f8b30d2" +checksum = "a8dd114dc0296cacaee98ad3165724529fcca9a65b2875abcd447b9cc02b2b74" dependencies = [ "ahash", - "arrow", - "arrow-array", - "arrow-buffer", + "arrow 53.3.0", + "arrow-array 53.3.0", + "arrow-buffer 53.3.0", "chrono", "datafusion-common", + "datafusion-expr-common", + "datafusion-functions-aggregate-common", + "datafusion-physical-expr-common", "paste", "serde_json", "sqlparser", @@ -1499,14 +1723,25 @@ dependencies = [ "strum_macros", ] +[[package]] +name = "datafusion-expr-common" +version = "42.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d1ba2bb018218d9260bbd7de6a46a20f61b93d4911dba8aa07735625004c4fb" +dependencies = [ + "arrow 53.3.0", + "datafusion-common", + "paste", +] + [[package]] name = "datafusion-functions" -version = "41.0.0" +version = "42.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8e481cf34d2a444bd8fa09b65945f0ce83dc92df8665b761505b3d9f351bebb" +checksum = "547cb780a4ac51fd8e52c0fb9188bc16cea4e35aebf6c454bda0b82a7a417304" dependencies = [ - "arrow", - "arrow-buffer", + "arrow 53.3.0", + "arrow-buffer 53.3.0", "base64 0.22.1", "blake2", "blake3", @@ -1516,7 +1751,7 @@ dependencies = [ "datafusion-expr", "hashbrown 0.14.5", "hex", - "itertools 0.12.1", + "itertools 0.13.0", "log", "md-5", "rand", @@ -1528,51 +1763,81 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" -version = "41.0.0" +version = "42.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b4ece19f73c02727e5e8654d79cd5652de371352c1df3c4ac3e419ecd6943fb" +checksum = "e68cf5aa7ebcac08bd04bb709a9a6d4963eafd227da62b628133bc509c40f5a0" dependencies = [ "ahash", - "arrow", - "arrow-schema", + "arrow 53.3.0", + "arrow-schema 53.3.0", "datafusion-common", "datafusion-execution", "datafusion-expr", + "datafusion-functions-aggregate-common", + "datafusion-physical-expr", "datafusion-physical-expr-common", + "half", "log", "paste", "sqlparser", ] +[[package]] +name = "datafusion-functions-aggregate-common" +version = "42.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2285d080dfecdfb8605b0ab2f1a41e2473208dc8e9bd6f5d1dbcfe97f517e6f" +dependencies = [ + "ahash", + "arrow 53.3.0", + "datafusion-common", + "datafusion-expr-common", + "datafusion-physical-expr-common", + "rand", +] + [[package]] name = "datafusion-functions-nested" -version = "41.0.0" +version = "42.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1474552cc824e8c9c88177d454db5781d4b66757d4aca75719306b8343a5e8d" +checksum = "6b6ffbbb7cf7bf0c0e05eb6207023fef341cac83a593a5365a6fc83803c572a9" dependencies = [ - "arrow", - "arrow-array", - "arrow-buffer", - "arrow-ord", - "arrow-schema", + "arrow 53.3.0", + "arrow-array 53.3.0", + "arrow-buffer 53.3.0", + "arrow-ord 53.3.0", + "arrow-schema 53.3.0", "datafusion-common", "datafusion-execution", "datafusion-expr", "datafusion-functions", "datafusion-functions-aggregate", - "itertools 0.12.1", + "datafusion-physical-expr-common", + "itertools 0.13.0", "log", "paste", "rand", ] +[[package]] +name = "datafusion-functions-window" +version = "42.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e78d30ebd6e9f74d4aeddec32744f5a18b5f9584591bc586fb5259c4848bac5" +dependencies = [ + "datafusion-common", + "datafusion-expr", + "datafusion-physical-expr-common", + "log", +] + [[package]] name = "datafusion-optimizer" -version = "41.0.0" +version = "42.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "791ff56f55608bc542d1ea7a68a64bdc86a9413f5a381d06a39fd49c2a3ab906" +checksum = "be172c44bf344df707e0c041fa3f41e6dc5fb0976f539c68bc442bca150ee58c" dependencies = [ - "arrow", + "arrow 53.3.0", "async-trait", "chrono", "datafusion-common", @@ -1580,7 +1845,7 @@ dependencies = [ "datafusion-physical-expr", "hashbrown 0.14.5", "indexmap", - "itertools 0.12.1", + "itertools 0.13.0", "log", "paste", "regex-syntax", @@ -1588,28 +1853,30 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "41.0.0" +version = "42.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a223962b3041304a3e20ed07a21d5de3d88d7e4e71ca192135db6d24e3365a4" +checksum = "43b86b7fa0b8161c49b0f005b0df193fc6d9b65ceec675f155422cda5d1583ca" dependencies = [ "ahash", - "arrow", - "arrow-array", - "arrow-buffer", - "arrow-ord", - "arrow-schema", - "arrow-string", + "arrow 53.3.0", + "arrow-array 53.3.0", + "arrow-buffer 53.3.0", + "arrow-ord 53.3.0", + "arrow-schema 53.3.0", + "arrow-string 53.3.0", "base64 0.22.1", "chrono", "datafusion-common", "datafusion-execution", "datafusion-expr", + "datafusion-expr-common", + "datafusion-functions-aggregate-common", "datafusion-physical-expr-common", "half", "hashbrown 0.14.5", "hex", "indexmap", - "itertools 0.12.1", + "itertools 0.13.0", "log", "paste", "petgraph", @@ -1618,42 +1885,44 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" -version = "41.0.0" +version = "42.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db5e7d8532a1601cd916881db87a70b0a599900d23f3db2897d389032da53bc6" +checksum = "242ba8a26351d9ca16295814c46743b0d1b00ec372174bdfbba991d0953dd596" dependencies = [ "ahash", - "arrow", + "arrow 53.3.0", "datafusion-common", - "datafusion-expr", + "datafusion-expr-common", "hashbrown 0.14.5", "rand", ] [[package]] name = "datafusion-physical-optimizer" -version = "41.0.0" +version = "42.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdb9c78f308e050f5004671039786a925c3fee83b90004e9fcfd328d7febdcc0" +checksum = "25ca088eb904bf1cfc9c5e5653110c70a6eaba43164085a9d180b35b77ce3b8b" dependencies = [ + "arrow-schema 53.3.0", "datafusion-common", "datafusion-execution", "datafusion-physical-expr", "datafusion-physical-plan", + "itertools 0.13.0", ] [[package]] name = "datafusion-physical-plan" -version = "41.0.0" +version = "42.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d1116949432eb2d30f6362707e2846d942e491052a206f2ddcb42d08aea1ffe" +checksum = "4989a53b824abc759685eb643f4d604c2fc2fea4e2c309ac3473bea263ecbbeb" dependencies = [ "ahash", - "arrow", - "arrow-array", - "arrow-buffer", - "arrow-ord", - "arrow-schema", + "arrow 53.3.0", + "arrow-array 53.3.0", + "arrow-buffer 53.3.0", + "arrow-ord 53.3.0", + "arrow-schema 53.3.0", "async-trait", "chrono", "datafusion-common", @@ -1661,13 +1930,14 @@ dependencies = [ "datafusion-execution", "datafusion-expr", "datafusion-functions-aggregate", + "datafusion-functions-aggregate-common", "datafusion-physical-expr", "datafusion-physical-expr-common", "futures", "half", "hashbrown 0.14.5", "indexmap", - "itertools 0.12.1", + "itertools 0.13.0", "log", "once_cell", "parking_lot", @@ -1678,13 +1948,13 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "41.0.0" +version = "42.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b45d0180711165fe94015d7c4123eb3e1cf5fb60b1506453200b8d1ce666bef0" +checksum = "66b9b75b9da10ed656073ac0553708f17eb8fa5a7b065ef9848914c93150ab9e" dependencies = [ - "arrow", - "arrow-array", - "arrow-schema", + "arrow 53.3.0", + "arrow-array 53.3.0", + "arrow-schema 53.3.0", "datafusion-common", "datafusion-expr", "log", @@ -1695,18 +1965,18 @@ dependencies = [ [[package]] name = "datafusion-substrait" -version = "41.0.0" +version = "42.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf0a0055aa98246c79f98f0d03df11f16cb7adc87818d02d4413e3f3cdadbbee" +checksum = "220d7ab0ffadd8b1af753904b18dd92d270271810b1ce9f8be3c3dbe2392b636" dependencies = [ - "arrow-buffer", + "arrow-buffer 53.3.0", "async-recursion", "chrono", "datafusion", - "itertools 0.12.1", - "object_store", + "itertools 0.13.0", + "object_store 0.11.1", "pbjson-types", - "prost 0.12.6", + "prost 0.13.3", "substrait", "url", ] @@ -2769,14 +3039,14 @@ dependencies = [ name = "lance" version = "0.19.2" dependencies = [ - "arrow", - "arrow-arith", - "arrow-array", - "arrow-buffer", - "arrow-ord", - "arrow-row", - "arrow-schema", - "arrow-select", + "arrow 53.3.0", + "arrow-arith 53.3.0", + "arrow-array 53.3.0", + "arrow-buffer 53.3.0", + "arrow-ord 53.3.0", + "arrow-row 53.3.0", + "arrow-schema 53.3.0", + "arrow-select 53.3.0", "async-recursion", "async-trait", "async_cell", @@ -2787,6 +3057,7 @@ dependencies = [ "chrono", "dashmap 5.5.3", "datafusion", + "datafusion-expr", "datafusion-functions", "datafusion-physical-expr", "deepsize", @@ -2805,16 +3076,17 @@ dependencies = [ "lazy_static", "log", "moka", - "object_store", + "object_store 0.10.2", "permutation", "pin-project", "prost 0.12.6", - "prost-build 0.12.6", + "prost 0.13.3", + "prost-build 0.13.3", "rand", "roaring", "serde", "serde_json", - "snafu", + "snafu 0.7.5", "tantivy", "tempfile", "tfrecord", @@ -2828,12 +3100,13 @@ dependencies = [ name = "lance-arrow" version = "0.19.2" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-schema", - "arrow-select", + "arrow-array 53.3.0", + "arrow-buffer 53.3.0", + "arrow-cast 53.3.0", + "arrow-data 53.3.0", + "arrow-schema 53.3.0", + "arrow-select 53.3.0", + "bytes", "getrandom", "half", "num-traits", @@ -2844,9 +3117,9 @@ dependencies = [ name = "lance-core" version = "0.19.2" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-schema", + "arrow-array 53.3.0", + "arrow-buffer 53.3.0", + "arrow-schema 53.3.0", "async-trait", "byteorder", "bytes", @@ -2862,13 +3135,13 @@ dependencies = [ "mock_instant", "moka", "num_cpus", - "object_store", + "object_store 0.10.2", "pin-project", - "prost 0.12.6", + "prost 0.13.3", "rand", "roaring", "serde_json", - "snafu", + "snafu 0.7.5", "tokio", "tokio-stream", "tokio-util", @@ -2880,12 +3153,12 @@ dependencies = [ name = "lance-datafusion" version = "0.19.2" dependencies = [ - "arrow", - "arrow-array", - "arrow-buffer", - "arrow-ord", - "arrow-schema", - "arrow-select", + "arrow 53.3.0", + "arrow-array 53.3.0", + "arrow-buffer 53.3.0", + "arrow-ord 53.3.0", + "arrow-schema 53.3.0", + "arrow-select 53.3.0", "async-trait", "datafusion", "datafusion-common", @@ -2897,8 +3170,8 @@ dependencies = [ "lance-core", "lazy_static", "log", - "prost 0.12.6", - "snafu", + "prost 0.13.3", + "snafu 0.7.5", "tokio", ] @@ -2906,10 +3179,10 @@ dependencies = [ name = "lance-datagen" version = "0.19.2" dependencies = [ - "arrow", - "arrow-array", - "arrow-cast", - "arrow-schema", + "arrow 53.3.0", + "arrow-array 53.3.0", + "arrow-cast 53.3.0", + "arrow-schema 53.3.0", "chrono", "futures", "hex", @@ -2922,14 +3195,14 @@ name = "lance-encoding" version = "0.19.2" dependencies = [ "arrayref", - "arrow", - "arrow-arith", - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-schema", - "arrow-select", + "arrow 53.3.0", + "arrow-arith 53.3.0", + "arrow-array 53.3.0", + "arrow-buffer 53.3.0", + "arrow-cast 53.3.0", + "arrow-data 53.3.0", + "arrow-schema 53.3.0", + "arrow-select 53.3.0", "bytemuck", "byteorder", "bytes", @@ -2944,12 +3217,12 @@ dependencies = [ "log", "num-traits", "paste", - "prost 0.12.6", - "prost-build 0.12.6", - "prost-types 0.12.6", + "prost 0.13.3", + "prost-build 0.13.3", + "prost-types 0.13.3", "rand", "seq-macro", - "snafu", + "snafu 0.7.5", "tokio", "tracing", "zstd", @@ -2959,12 +3232,12 @@ dependencies = [ name = "lance-file" version = "0.19.2" dependencies = [ - "arrow-arith", - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "arrow-select", + "arrow-arith 53.3.0", + "arrow-array 53.3.0", + "arrow-buffer 53.3.0", + "arrow-data 53.3.0", + "arrow-schema 53.3.0", + "arrow-select 53.3.0", "async-recursion", "async-trait", "byteorder", @@ -2978,12 +3251,12 @@ dependencies = [ "lance-io", "log", "num-traits", - "object_store", - "prost 0.12.6", - "prost-build 0.12.6", - "prost-types 0.12.6", + "object_store 0.10.2", + "prost 0.13.3", + "prost-build 0.13.3", + "prost-types 0.13.3", "roaring", - "snafu", + "snafu 0.7.5", "tempfile", "tokio", "tracing", @@ -2993,11 +3266,11 @@ dependencies = [ name = "lance-index" version = "0.19.2" dependencies = [ - "arrow", - "arrow-array", - "arrow-ord", - "arrow-schema", - "arrow-select", + "arrow 53.3.0", + "arrow-array 53.3.0", + "arrow-ord 53.3.0", + "arrow-schema 53.3.0", + "arrow-select 53.3.0", "async-recursion", "async-trait", "bitvec", @@ -3024,15 +3297,15 @@ dependencies = [ "log", "moka", "num-traits", - "object_store", - "prost 0.12.6", - "prost-build 0.12.6", + "object_store 0.10.2", + "prost 0.13.3", + "prost-build 0.13.3", "rand", "rayon", "roaring", "serde", "serde_json", - "snafu", + "snafu 0.7.5", "tantivy", "tempfile", "tokio", @@ -3044,14 +3317,14 @@ dependencies = [ name = "lance-io" version = "0.19.2" dependencies = [ - "arrow", - "arrow-arith", - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-schema", - "arrow-select", + "arrow 53.3.0", + "arrow-arith 53.3.0", + "arrow-array 53.3.0", + "arrow-buffer 53.3.0", + "arrow-cast 53.3.0", + "arrow-data 53.3.0", + "arrow-schema 53.3.0", + "arrow-select 53.3.0", "async-priority-channel", "async-recursion", "async-trait", @@ -3066,14 +3339,14 @@ dependencies = [ "lance-core", "lazy_static", "log", - "object_store", + "object_store 0.10.2", "path_abs", "pin-project", - "prost 0.12.6", - "prost-build 0.12.6", + "prost 0.13.3", + "prost-build 0.13.3", "rand", "shellexpand", - "snafu", + "snafu 0.7.5", "tokio", "tracing", "url", @@ -3083,9 +3356,9 @@ dependencies = [ name = "lance-linalg" version = "0.19.2" dependencies = [ - "arrow-array", - "arrow-ord", - "arrow-schema", + "arrow-array 53.3.0", + "arrow-ord 53.3.0", + "arrow-schema 53.3.0", "bitvec", "cc", "deepsize", @@ -3106,11 +3379,11 @@ dependencies = [ name = "lance-table" version = "0.19.2" dependencies = [ - "arrow", - "arrow-array", - "arrow-buffer", - "arrow-ipc", - "arrow-schema", + "arrow 53.3.0", + "arrow-array 53.3.0", + "arrow-buffer 53.3.0", + "arrow-ipc 53.3.0", + "arrow-schema 53.3.0", "async-trait", "aws-credential-types", "aws-sdk-dynamodb", @@ -3125,16 +3398,16 @@ dependencies = [ "lance-io", "lazy_static", "log", - "object_store", - "prost 0.12.6", - "prost-build 0.12.6", - "prost-types 0.12.6", + "object_store 0.10.2", + "prost 0.13.3", + "prost-build 0.13.3", + "prost-types 0.13.3", "rand", "rangemap", "roaring", "serde", "serde_json", - "snafu", + "snafu 0.7.5", "tokio", "tracing", "url", @@ -3159,11 +3432,24 @@ version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2cde5de06e8d4c2faabc400238f9ae1c74d5412d03a7bd067645ccbc47070e46" dependencies = [ - "lexical-parse-float", - "lexical-parse-integer", - "lexical-util", - "lexical-write-float", - "lexical-write-integer", + "lexical-parse-float 0.8.5", + "lexical-parse-integer 0.8.6", + "lexical-util 0.8.5", + "lexical-write-float 0.8.5", + "lexical-write-integer 0.8.5", +] + +[[package]] +name = "lexical-core" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0431c65b318a590c1de6b8fd6e72798c92291d27762d94c9e6c37ed7a73d8458" +dependencies = [ + "lexical-parse-float 1.0.2", + "lexical-parse-integer 1.0.2", + "lexical-util 1.0.3", + "lexical-write-float 1.0.2", + "lexical-write-integer 1.0.2", ] [[package]] @@ -3172,8 +3458,19 @@ version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "683b3a5ebd0130b8fb52ba0bdc718cc56815b6a097e28ae5a6997d0ad17dc05f" dependencies = [ - "lexical-parse-integer", - "lexical-util", + "lexical-parse-integer 0.8.6", + "lexical-util 0.8.5", + "static_assertions", +] + +[[package]] +name = "lexical-parse-float" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb17a4bdb9b418051aa59d41d65b1c9be5affab314a872e5ad7f06231fb3b4e0" +dependencies = [ + "lexical-parse-integer 1.0.2", + "lexical-util 1.0.3", "static_assertions", ] @@ -3183,7 +3480,17 @@ version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6d0994485ed0c312f6d965766754ea177d07f9c00c9b82a5ee62ed5b47945ee9" dependencies = [ - "lexical-util", + "lexical-util 0.8.5", + "static_assertions", +] + +[[package]] +name = "lexical-parse-integer" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5df98f4a4ab53bf8b175b363a34c7af608fe31f93cc1fb1bf07130622ca4ef61" +dependencies = [ + "lexical-util 1.0.3", "static_assertions", ] @@ -3196,14 +3503,34 @@ dependencies = [ "static_assertions", ] +[[package]] +name = "lexical-util" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85314db53332e5c192b6bca611fb10c114a80d1b831ddac0af1e9be1b9232ca0" +dependencies = [ + "static_assertions", +] + [[package]] name = "lexical-write-float" version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "accabaa1c4581f05a3923d1b4cfd124c329352288b7b9da09e766b0668116862" dependencies = [ - "lexical-util", - "lexical-write-integer", + "lexical-util 0.8.5", + "lexical-write-integer 0.8.5", + "static_assertions", +] + +[[package]] +name = "lexical-write-float" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e7c3ad4e37db81c1cbe7cf34610340adc09c322871972f74877a712abc6c809" +dependencies = [ + "lexical-util 1.0.3", + "lexical-write-integer 1.0.2", "static_assertions", ] @@ -3213,7 +3540,17 @@ version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e1b6f3d1f4422866b68192d62f77bc5c700bee84f3069f2469d7bc8c77852446" dependencies = [ - "lexical-util", + "lexical-util 0.8.5", + "static_assertions", +] + +[[package]] +name = "lexical-write-integer" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb89e9f6958b83258afa3deed90b5de9ef68eef090ad5086c791cd2345610162" +dependencies = [ + "lexical-util 1.0.3", "static_assertions", ] @@ -3602,7 +3939,28 @@ dependencies = [ "rustls-pemfile 2.2.0", "serde", "serde_json", - "snafu", + "snafu 0.7.5", + "tokio", + "tracing", + "url", + "walkdir", +] + +[[package]] +name = "object_store" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6eb4c22c6154a1e759d7099f9ffad7cc5ef8245f9efbab4a41b92623079c82f3" +dependencies = [ + "async-trait", + "bytes", + "chrono", + "futures", + "humantime", + "itertools 0.13.0", + "parking_lot", + "percent-encoding", + "snafu 0.8.5", "tokio", "tracing", "url", @@ -3694,18 +4052,18 @@ dependencies = [ [[package]] name = "parquet" -version = "52.2.0" +version = "53.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e977b9066b4d3b03555c22bdc442f3fadebd96a39111249113087d0edb2691cd" +checksum = "2b449890367085eb65d7d3321540abc3d7babbd179ce31df0016e90719114191" dependencies = [ "ahash", - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-ipc", - "arrow-schema", - "arrow-select", + "arrow-array 53.3.0", + "arrow-buffer 53.3.0", + "arrow-cast 53.3.0", + "arrow-data 53.3.0", + "arrow-ipc 53.3.0", + "arrow-schema 53.3.0", + "arrow-select 53.3.0", "base64 0.22.1", "brotli", "bytes", @@ -3713,11 +4071,11 @@ dependencies = [ "flate2", "futures", "half", - "hashbrown 0.14.5", + "hashbrown 0.15.1", "lz4_flex", "num", "num-bigint", - "object_store", + "object_store 0.11.1", "paste", "seq-macro", "snap", @@ -3757,9 +4115,9 @@ dependencies = [ [[package]] name = "pbjson" -version = "0.6.0" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1030c719b0ec2a2d25a5df729d6cff1acf3cc230bf766f4f97833591f7577b90" +checksum = "c7e6349fa080353f4a597daffd05cb81572a9c031a6d4fff7e504947496fcc68" dependencies = [ "base64 0.21.7", "serde", @@ -3767,28 +4125,28 @@ dependencies = [ [[package]] name = "pbjson-build" -version = "0.6.2" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2580e33f2292d34be285c5bc3dba5259542b083cfad6037b6d70345f24dcb735" +checksum = "6eea3058763d6e656105d1403cb04e0a41b7bbac6362d413e7c33be0c32279c9" dependencies = [ - "heck 0.4.1", - "itertools 0.11.0", - "prost 0.12.6", - "prost-types 0.12.6", + "heck 0.5.0", + "itertools 0.13.0", + "prost 0.13.3", + "prost-types 0.13.3", ] [[package]] name = "pbjson-types" -version = "0.6.0" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18f596653ba4ac51bdecbb4ef6773bc7f56042dc13927910de1684ad3d32aa12" +checksum = "e54e5e7bfb1652f95bc361d76f3c780d8e526b134b85417e774166ee941f0887" dependencies = [ "bytes", "chrono", "pbjson", "pbjson-build", - "prost 0.12.6", - "prost-build 0.12.6", + "prost 0.13.3", + "prost-build 0.13.3", "serde", ] @@ -4002,6 +4360,16 @@ dependencies = [ "prost-derive 0.12.6", ] +[[package]] +name = "prost" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b0487d90e047de87f984913713b85c601c05609aad5b0df4b4573fbf69aa13f" +dependencies = [ + "bytes", + "prost-derive 0.13.3", +] + [[package]] name = "prost-build" version = "0.11.9" @@ -4045,6 +4413,27 @@ dependencies = [ "tempfile", ] +[[package]] +name = "prost-build" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c1318b19085f08681016926435853bbf7858f9c082d0999b80550ff5d9abe15" +dependencies = [ + "bytes", + "heck 0.5.0", + "itertools 0.13.0", + "log", + "multimap 0.10.0", + "once_cell", + "petgraph", + "prettyplease 0.2.25", + "prost 0.13.3", + "prost-types 0.13.3", + "regex", + "syn 2.0.87", + "tempfile", +] + [[package]] name = "prost-derive" version = "0.11.9" @@ -4071,6 +4460,19 @@ dependencies = [ "syn 2.0.87", ] +[[package]] +name = "prost-derive" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9552f850d5f0964a4e4d0bf306459ac29323ddfbae05e35a7c0d35cb0803cc5" +dependencies = [ + "anyhow", + "itertools 0.13.0", + "proc-macro2", + "quote", + "syn 2.0.87", +] + [[package]] name = "prost-types" version = "0.11.9" @@ -4089,6 +4491,15 @@ dependencies = [ "prost 0.12.6", ] +[[package]] +name = "prost-types" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4759aa0d3a6232fb8dbdb97b61de2c20047c68aca932c7ed76da9d788508d670" +dependencies = [ + "prost 0.13.3", +] + [[package]] name = "pulldown-cmark" version = "0.9.6" @@ -4104,11 +4515,11 @@ dependencies = [ name = "pylance" version = "0.19.2" dependencies = [ - "arrow", - "arrow-array", - "arrow-data", - "arrow-schema", - "arrow-select", + "arrow 52.2.0", + "arrow-array 52.2.0", + "arrow-data 52.2.0", + "arrow-schema 52.2.0", + "arrow-select 52.2.0", "async-trait", "bytes", "chrono", @@ -4127,14 +4538,14 @@ dependencies = [ "lance-table", "lazy_static", "log", - "object_store", + "object_store 0.10.2", "prost 0.12.6", "prost-build 0.11.9", "pyo3", "serde", "serde_json", "serde_yaml", - "snafu", + "snafu 0.7.5", "tokio", "tracing", "tracing-chrome", @@ -4969,7 +5380,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e4de37ad025c587a29e8f3f5605c00f70b98715ef90b9061a815b9e59e9042d6" dependencies = [ "doc-comment", - "snafu-derive", + "snafu-derive 0.7.5", +] + +[[package]] +name = "snafu" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "223891c85e2a29c3fe8fb900c1fae5e69c2e42415e3177752e8718475efa5019" +dependencies = [ + "snafu-derive 0.8.5", ] [[package]] @@ -4984,6 +5404,18 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "snafu-derive" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03c3c6b7927ffe7ecaa769ee0e3994da3b8cafc8f444578982c83ecb161af917" +dependencies = [ + "heck 0.5.0", + "proc-macro2", + "quote", + "syn 2.0.87", +] + [[package]] name = "snap" version = "1.1.1" @@ -5018,9 +5450,9 @@ checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" [[package]] name = "sqlparser" -version = "0.49.0" +version = "0.50.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4a404d0e14905361b918cb8afdb73605e25c1d5029312bd9785142dcb3aa49e" +checksum = "b2e5b515a2bd5168426033e9efbfd05500114833916f1d5c268f938b4ee130ac" dependencies = [ "log", "sqlparser_derive", @@ -5085,18 +5517,18 @@ dependencies = [ [[package]] name = "substrait" -version = "0.36.0" +version = "0.41.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1ee6e584c8bf37104b7eb51c25eae07a9321b0e01379bec3b7c462d2f42afbf" +checksum = "2a3bf05f1d7a3fd7a97790d410f6e859b3a98dcde05e7a3fc00b31b0f60fe7cb" dependencies = [ "heck 0.5.0", "pbjson", "pbjson-build", "pbjson-types", "prettyplease 0.2.25", - "prost 0.12.6", - "prost-build 0.12.6", - "prost-types 0.12.6", + "prost 0.13.3", + "prost-build 0.13.3", + "prost-types 0.13.3", "schemars", "semver", "serde", diff --git a/rust/lance/Cargo.toml b/rust/lance/Cargo.toml index b4de70f924..068ea0869a 100644 --- a/rust/lance/Cargo.toml +++ b/rust/lance/Cargo.toml @@ -69,6 +69,7 @@ moka.workspace = true permutation = { version = "0.4.0" } tantivy.workspace = true tfrecord = { version = "0.15.0", optional = true, features = ["async"] } +prost_old = { version = "0.12.6", package = "prost", optional = true } aws-sdk-dynamodb = { workspace = true, optional = true } tempfile.workspace = true tracing.workspace = true @@ -105,7 +106,7 @@ random_word = { version = "0.4.3", features = ["en"] } fp16kernels = ["lance-linalg/fp16kernels"] # Prevent dynamic linking of lzma, which comes from datafusion cli = ["clap", "lzma-sys/static"] -tensorflow = ["tfrecord"] +tensorflow = ["tfrecord", "prost_old"] dynamodb = ["lance-table/dynamodb", "aws-sdk-dynamodb"] dynamodb_tests = ["dynamodb"] substrait = ["lance-datafusion/substrait"] diff --git a/rust/lance/src/utils/tfrecord.rs b/rust/lance/src/utils/tfrecord.rs index a076d72813..92b99a2f23 100644 --- a/rust/lance/src/utils/tfrecord.rs +++ b/rust/lance/src/utils/tfrecord.rs @@ -17,7 +17,7 @@ use datafusion::physical_plan::SendableRecordBatchStream; use futures::{StreamExt, TryStreamExt}; use half::{bf16, f16}; use lance_arrow::bfloat16::{ARROW_EXT_META_KEY, ARROW_EXT_NAME_KEY, BFLOAT16_EXT_NAME}; -use prost::Message; +use prost_old::Message; use std::collections::HashMap; use std::sync::Arc; @@ -32,6 +32,20 @@ use tfrecord::protobuf::feature::Kind; use tfrecord::protobuf::{DataType as TensorDataType, TensorProto}; use tfrecord::record_reader::RecordStream; use tfrecord::{Example, Feature}; + +trait OldProstResultExt { + fn map_prost_err(self, location: Location) -> Result; +} + +impl OldProstResultExt for std::result::Result { + fn map_prost_err(self, location: Location) -> Result { + self.map_err(|err| Error::IO { + source: Box::new(err), + location, + }) + } +} + /// Infer the Arrow schema from a TFRecord file. /// /// The featured named by `tensor_features` will be assumed to be binary fields @@ -224,7 +238,7 @@ impl FeatureMeta { } fn extract_tensor(data: &[u8]) -> Result { - let tensor_proto = TensorProto::decode(data)?; + let tensor_proto = TensorProto::decode(data).map_prost_err(location!())?; Ok(FeatureType::Tensor { shape: tensor_proto .tensor_shape @@ -617,7 +631,7 @@ fn convert_fixedshape_tensor( DataType::Float16 => { let mut values = Float16Builder::with_capacity(features.len()); for tensors in tensor_iter { - if let Some(tensors) = tensors? { + if let Some(tensors) = tensors.map_prost_err(location!())? { for tensor in tensors { validate_tensor(&tensor, type_info)?; if tensor.half_val.is_empty() { @@ -645,7 +659,7 @@ fn convert_fixedshape_tensor( let mut values = FixedSizeBinaryBuilder::with_capacity(features.len(), 2); for tensors in tensor_iter { - if let Some(tensors) = tensors? { + if let Some(tensors) = tensors.map_prost_err(location!())? { for tensor in tensors { validate_tensor(&tensor, type_info)?; if tensor.half_val.is_empty() { @@ -673,7 +687,7 @@ fn convert_fixedshape_tensor( DataType::Float32 => { let mut values = Float32Builder::with_capacity(features.len()); for tensors in tensor_iter { - if let Some(tensors) = tensors? { + if let Some(tensors) = tensors.map_prost_err(location!())? { for tensor in tensors { validate_tensor(&tensor, type_info)?; if tensor.float_val.is_empty() { @@ -695,7 +709,7 @@ fn convert_fixedshape_tensor( DataType::Float64 => { let mut values = Float64Builder::with_capacity(features.len()); for tensors in tensor_iter { - if let Some(tensors) = tensors? { + if let Some(tensors) = tensors.map_prost_err(location!())? { for tensor in tensors { validate_tensor(&tensor, type_info)?; if tensor.float_val.is_empty() { From 9a38e2eb0bafd3f611fd235e59883bd80f84cbfa Mon Sep 17 00:00:00 2001 From: Jeremy Leibs Date: Tue, 26 Nov 2024 17:40:10 -0500 Subject: [PATCH 14/24] Fixing the python deps --- python/Cargo.lock | 480 ++++++++++++++++++++++++++++++---------------- python/Cargo.toml | 4 +- 2 files changed, 317 insertions(+), 167 deletions(-) diff --git a/python/Cargo.lock b/python/Cargo.lock index 4bbf63f81b..ecc540723b 100644 --- a/python/Cargo.lock +++ b/python/Cargo.lock @@ -102,9 +102,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" -version = "52.2.0" +version = "53.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05048a8932648b63f21c37d88b552ccc8a65afb6dfe9fc9f30ce79174c2e7a85" +checksum = "c91839b07e474b3995035fd8ac33ee54f9c9ccbbb1ea33d9909c71bffdf1259d" dependencies = [ "arrow-arith", "arrow-array", @@ -124,9 +124,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "52.2.0" +version = "53.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d8a57966e43bfe9a3277984a14c24ec617ad874e4c0e1d2a1b083a39cfbf22c" +checksum = "855c57c4efd26722b044dcd3e348252560e3e0333087fb9f6479dc0bf744054f" dependencies = [ "arrow-array", "arrow-buffer", @@ -139,9 +139,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "52.2.0" +version = "53.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16f4a9468c882dc66862cef4e1fd8423d47e67972377d85d80e022786427768c" +checksum = "bd03279cea46569acf9295f6224fbc370c5df184b4d2ecfe97ccb131d5615a7f" dependencies = [ "ahash", "arrow-buffer", @@ -150,15 +150,15 @@ dependencies = [ "chrono", "chrono-tz", "half", - "hashbrown 0.14.5", + "hashbrown 0.15.1", "num", ] [[package]] name = "arrow-buffer" -version = "52.2.0" +version = "53.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c975484888fc95ec4a632cdc98be39c085b1bb518531b0c80c5d462063e5daa1" +checksum = "9e4a9b9b1d6d7117f6138e13bc4dd5daa7f94e671b70e8c9c4dc37b4f5ecfc16" dependencies = [ "bytes", "half", @@ -167,9 +167,9 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "52.2.0" +version = "53.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da26719e76b81d8bc3faad1d4dbdc1bcc10d14704e63dc17fc9f3e7e1e567c8e" +checksum = "bc70e39916e60c5b7af7a8e2719e3ae589326039e1e863675a008bee5ffe90fd" dependencies = [ "arrow-array", "arrow-buffer", @@ -188,9 +188,9 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "52.2.0" +version = "53.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c13c36dc5ddf8c128df19bab27898eea64bf9da2b555ec1cd17a8ff57fba9ec2" +checksum = "789b2af43c1049b03a8d088ff6b2257cdcea1756cd76b174b1f2600356771b97" dependencies = [ "arrow-array", "arrow-buffer", @@ -207,9 +207,9 @@ dependencies = [ [[package]] name = "arrow-data" -version = "52.2.0" +version = "53.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd9d6f18c65ef7a2573ab498c374d8ae364b4a4edf67105357491c031f716ca5" +checksum = "e4e75edf21ffd53744a9b8e3ed11101f610e7ceb1a29860432824f1834a1f623" dependencies = [ "arrow-buffer", "arrow-schema", @@ -219,9 +219,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "52.2.0" +version = "53.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e786e1cdd952205d9a8afc69397b317cfbb6e0095e445c69cda7e8da5c1eeb0f" +checksum = "d186a909dece9160bf8312f5124d797884f608ef5435a36d9d608e0b2a9bcbf8" dependencies = [ "arrow-array", "arrow-buffer", @@ -235,9 +235,9 @@ dependencies = [ [[package]] name = "arrow-json" -version = "52.2.0" +version = "53.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb22284c5a2a01d73cebfd88a33511a3234ab45d66086b2ca2d1228c3498e445" +checksum = "b66ff2fedc1222942d0bd2fd391cb14a85baa3857be95c9373179bd616753b85" dependencies = [ "arrow-array", "arrow-buffer", @@ -255,9 +255,9 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "52.2.0" +version = "53.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42745f86b1ab99ef96d1c0bcf49180848a64fe2c7a7a0d945bc64fa2b21ba9bc" +checksum = "ece7b5bc1180e6d82d1a60e1688c199829e8842e38497563c3ab6ea813e527fd" dependencies = [ "arrow-array", "arrow-buffer", @@ -270,9 +270,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "52.2.0" +version = "53.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4cd09a518c602a55bd406bcc291a967b284cfa7a63edfbf8b897ea4748aad23c" +checksum = "745c114c8f0e8ce211c83389270de6fbe96a9088a7b32c2a041258a443fe83ff" dependencies = [ "ahash", "arrow-array", @@ -284,18 +284,18 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "52.2.0" +version = "53.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e972cd1ff4a4ccd22f86d3e53e835c2ed92e0eea6a3e8eadb72b4f1ac802cf8" +checksum = "b95513080e728e4cec37f1ff5af4f12c9688d47795d17cda80b6ec2cf74d4678" dependencies = [ "bitflags 2.6.0", ] [[package]] name = "arrow-select" -version = "52.2.0" +version = "53.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "600bae05d43483d216fb3494f8c32fdbefd8aa4e1de237e790dbb3d9f44690a3" +checksum = "8e415279094ea70323c032c6e739c48ad8d80e78a09bef7117b8718ad5bf3722" dependencies = [ "ahash", "arrow-array", @@ -307,9 +307,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "52.2.0" +version = "53.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0dc1985b67cb45f6606a248ac2b4a288849f196bab8c657ea5589f47cdd55e6" +checksum = "11d956cae7002eb8d83a27dbd34daaea1cf5b75852f0b84deb4d93a276e92bbf" dependencies = [ "arrow-array", "arrow-buffer", @@ -950,9 +950,9 @@ dependencies = [ [[package]] name = "brotli" -version = "6.0.0" +version = "7.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "74f7971dbd9326d58187408ab83117d8ac1bb9c17b085fdacd1cf2f598719b6b" +checksum = "cc97b8f16f944bba54f0433f07e30be199b6dc2bd25937444bbad560bcea29bd" dependencies = [ "alloc-no-stdlib", "alloc-stdlib", @@ -1070,9 +1070,9 @@ dependencies = [ [[package]] name = "chrono-tz" -version = "0.9.0" +version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93698b29de5e97ad0ae26447b344c482a7284c737d9ddc5f9e52b74a336671bb" +checksum = "cd6dd8046d00723a59a2f8c5f295c515b9bb9a331ee4f8f3d4dd49e428acd3b6" dependencies = [ "chrono", "chrono-tz-build", @@ -1081,12 +1081,11 @@ dependencies = [ [[package]] name = "chrono-tz-build" -version = "0.3.0" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c088aee841df9c3041febbb73934cfc39708749bf96dc827e3359cd39ef11b1" +checksum = "e94fea34d77a245229e7746bd2beb786cd2a896f306ff491fb8cecb3074b10a7" dependencies = [ "parse-zoneinfo", - "phf", "phf_codegen", ] @@ -1304,9 +1303,9 @@ dependencies = [ [[package]] name = "datafusion" -version = "41.0.0" +version = "42.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4fd4a99fc70d40ef7e52b243b4a399c3f8d353a40d5ecb200deee05e49c61bb" +checksum = "dae5f2abc725737d6e87b6d348a5aa2d0a77e4cf873045f004546da946e6e619" dependencies = [ "ahash", "arrow", @@ -1327,6 +1326,7 @@ dependencies = [ "datafusion-functions", "datafusion-functions-aggregate", "datafusion-functions-nested", + "datafusion-functions-window", "datafusion-optimizer", "datafusion-physical-expr", "datafusion-physical-expr-common", @@ -1339,10 +1339,10 @@ dependencies = [ "half", "hashbrown 0.14.5", "indexmap", - "itertools 0.12.1", + "itertools 0.13.0", "log", "num_cpus", - "object_store", + "object_store 0.11.1", "parking_lot", "parquet", "paste", @@ -1360,9 +1360,9 @@ dependencies = [ [[package]] name = "datafusion-catalog" -version = "41.0.0" +version = "42.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e13b3cfbd84c6003594ae1972314e3df303a27ce8ce755fcea3240c90f4c0529" +checksum = "998761705551f11ffa4ee692cc285b44eb1def6e0d28c4eaf5041b9e2810dc1e" dependencies = [ "arrow-schema", "async-trait", @@ -1370,13 +1370,14 @@ dependencies = [ "datafusion-execution", "datafusion-expr", "datafusion-physical-plan", + "parking_lot", ] [[package]] name = "datafusion-common" -version = "41.0.0" +version = "42.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44fdbc877e3e40dcf88cc8f283d9f5c8851f0a3aa07fee657b1b75ac1ad49b9c" +checksum = "11986f191e88d950f10a5cc512a598afba27d92e04a0201215ad60785005115a" dependencies = [ "ahash", "arrow", @@ -1389,25 +1390,28 @@ dependencies = [ "instant", "libc", "num_cpus", - "object_store", + "object_store 0.11.1", "parquet", + "paste", "sqlparser", + "tokio", ] [[package]] name = "datafusion-common-runtime" -version = "41.0.0" +version = "42.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a7496d1f664179f6ce3a5cbef6566056ccaf3ea4aa72cc455f80e62c1dd86b1" +checksum = "694c9d7ea1b82f95768215c4cb5c2d5c613690624e832a7ee64be563139d582f" dependencies = [ + "log", "tokio", ] [[package]] name = "datafusion-execution" -version = "41.0.0" +version = "42.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "799e70968c815b611116951e3dd876aef04bf217da31b72eec01ee6a959336a1" +checksum = "30b4cedcd98151e0a297f34021b6b232ff0ebc0f2f18ea5e7446b5ebda99b1a1" dependencies = [ "arrow", "chrono", @@ -1417,7 +1421,7 @@ dependencies = [ "futures", "hashbrown 0.14.5", "log", - "object_store", + "object_store 0.11.1", "parking_lot", "rand", "tempfile", @@ -1426,9 +1430,9 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "41.0.0" +version = "42.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c1841c409d9518c17971d15c9bae62e629eb937e6fb6c68cd32e9186f8b30d2" +checksum = "a8dd114dc0296cacaee98ad3165724529fcca9a65b2875abcd447b9cc02b2b74" dependencies = [ "ahash", "arrow", @@ -1436,6 +1440,9 @@ dependencies = [ "arrow-buffer", "chrono", "datafusion-common", + "datafusion-expr-common", + "datafusion-functions-aggregate-common", + "datafusion-physical-expr-common", "paste", "serde_json", "sqlparser", @@ -1443,11 +1450,22 @@ dependencies = [ "strum_macros", ] +[[package]] +name = "datafusion-expr-common" +version = "42.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d1ba2bb018218d9260bbd7de6a46a20f61b93d4911dba8aa07735625004c4fb" +dependencies = [ + "arrow", + "datafusion-common", + "paste", +] + [[package]] name = "datafusion-functions" -version = "41.0.0" +version = "42.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8e481cf34d2a444bd8fa09b65945f0ce83dc92df8665b761505b3d9f351bebb" +checksum = "547cb780a4ac51fd8e52c0fb9188bc16cea4e35aebf6c454bda0b82a7a417304" dependencies = [ "arrow", "arrow-buffer", @@ -1460,7 +1478,7 @@ dependencies = [ "datafusion-expr", "hashbrown 0.14.5", "hex", - "itertools 0.12.1", + "itertools 0.13.0", "log", "md-5", "rand", @@ -1472,9 +1490,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" -version = "41.0.0" +version = "42.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b4ece19f73c02727e5e8654d79cd5652de371352c1df3c4ac3e419ecd6943fb" +checksum = "e68cf5aa7ebcac08bd04bb709a9a6d4963eafd227da62b628133bc509c40f5a0" dependencies = [ "ahash", "arrow", @@ -1482,17 +1500,34 @@ dependencies = [ "datafusion-common", "datafusion-execution", "datafusion-expr", + "datafusion-functions-aggregate-common", + "datafusion-physical-expr", "datafusion-physical-expr-common", + "half", "log", "paste", "sqlparser", ] +[[package]] +name = "datafusion-functions-aggregate-common" +version = "42.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2285d080dfecdfb8605b0ab2f1a41e2473208dc8e9bd6f5d1dbcfe97f517e6f" +dependencies = [ + "ahash", + "arrow", + "datafusion-common", + "datafusion-expr-common", + "datafusion-physical-expr-common", + "rand", +] + [[package]] name = "datafusion-functions-nested" -version = "41.0.0" +version = "42.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1474552cc824e8c9c88177d454db5781d4b66757d4aca75719306b8343a5e8d" +checksum = "6b6ffbbb7cf7bf0c0e05eb6207023fef341cac83a593a5365a6fc83803c572a9" dependencies = [ "arrow", "arrow-array", @@ -1504,17 +1539,30 @@ dependencies = [ "datafusion-expr", "datafusion-functions", "datafusion-functions-aggregate", - "itertools 0.12.1", + "datafusion-physical-expr-common", + "itertools 0.13.0", "log", "paste", "rand", ] +[[package]] +name = "datafusion-functions-window" +version = "42.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e78d30ebd6e9f74d4aeddec32744f5a18b5f9584591bc586fb5259c4848bac5" +dependencies = [ + "datafusion-common", + "datafusion-expr", + "datafusion-physical-expr-common", + "log", +] + [[package]] name = "datafusion-optimizer" -version = "41.0.0" +version = "42.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "791ff56f55608bc542d1ea7a68a64bdc86a9413f5a381d06a39fd49c2a3ab906" +checksum = "be172c44bf344df707e0c041fa3f41e6dc5fb0976f539c68bc442bca150ee58c" dependencies = [ "arrow", "async-trait", @@ -1524,7 +1572,7 @@ dependencies = [ "datafusion-physical-expr", "hashbrown 0.14.5", "indexmap", - "itertools 0.12.1", + "itertools 0.13.0", "log", "paste", "regex-syntax", @@ -1532,9 +1580,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "41.0.0" +version = "42.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a223962b3041304a3e20ed07a21d5de3d88d7e4e71ca192135db6d24e3365a4" +checksum = "43b86b7fa0b8161c49b0f005b0df193fc6d9b65ceec675f155422cda5d1583ca" dependencies = [ "ahash", "arrow", @@ -1548,12 +1596,14 @@ dependencies = [ "datafusion-common", "datafusion-execution", "datafusion-expr", + "datafusion-expr-common", + "datafusion-functions-aggregate-common", "datafusion-physical-expr-common", "half", "hashbrown 0.14.5", "hex", "indexmap", - "itertools 0.12.1", + "itertools 0.13.0", "log", "paste", "petgraph", @@ -1562,35 +1612,37 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" -version = "41.0.0" +version = "42.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db5e7d8532a1601cd916881db87a70b0a599900d23f3db2897d389032da53bc6" +checksum = "242ba8a26351d9ca16295814c46743b0d1b00ec372174bdfbba991d0953dd596" dependencies = [ "ahash", "arrow", "datafusion-common", - "datafusion-expr", + "datafusion-expr-common", "hashbrown 0.14.5", "rand", ] [[package]] name = "datafusion-physical-optimizer" -version = "41.0.0" +version = "42.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdb9c78f308e050f5004671039786a925c3fee83b90004e9fcfd328d7febdcc0" +checksum = "25ca088eb904bf1cfc9c5e5653110c70a6eaba43164085a9d180b35b77ce3b8b" dependencies = [ + "arrow-schema", "datafusion-common", "datafusion-execution", "datafusion-physical-expr", "datafusion-physical-plan", + "itertools 0.13.0", ] [[package]] name = "datafusion-physical-plan" -version = "41.0.0" +version = "42.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d1116949432eb2d30f6362707e2846d942e491052a206f2ddcb42d08aea1ffe" +checksum = "4989a53b824abc759685eb643f4d604c2fc2fea4e2c309ac3473bea263ecbbeb" dependencies = [ "ahash", "arrow", @@ -1605,13 +1657,14 @@ dependencies = [ "datafusion-execution", "datafusion-expr", "datafusion-functions-aggregate", + "datafusion-functions-aggregate-common", "datafusion-physical-expr", "datafusion-physical-expr-common", "futures", "half", "hashbrown 0.14.5", "indexmap", - "itertools 0.12.1", + "itertools 0.13.0", "log", "once_cell", "parking_lot", @@ -1622,9 +1675,9 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "41.0.0" +version = "42.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b45d0180711165fe94015d7c4123eb3e1cf5fb60b1506453200b8d1ce666bef0" +checksum = "66b9b75b9da10ed656073ac0553708f17eb8fa5a7b065ef9848914c93150ab9e" dependencies = [ "arrow", "arrow-array", @@ -1639,18 +1692,18 @@ dependencies = [ [[package]] name = "datafusion-substrait" -version = "41.0.0" +version = "42.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf0a0055aa98246c79f98f0d03df11f16cb7adc87818d02d4413e3f3cdadbbee" +checksum = "220d7ab0ffadd8b1af753904b18dd92d270271810b1ce9f8be3c3dbe2392b636" dependencies = [ "arrow-buffer", "async-recursion", "chrono", "datafusion", - "itertools 0.12.1", - "object_store", + "itertools 0.13.0", + "object_store 0.11.1", "pbjson-types", - "prost 0.12.6", + "prost 0.13.3", "substrait", "url", ] @@ -2687,6 +2740,7 @@ dependencies = [ "chrono", "dashmap 5.5.3", "datafusion", + "datafusion-expr", "datafusion-functions", "datafusion-physical-expr", "deepsize", @@ -2705,17 +2759,17 @@ dependencies = [ "lazy_static", "log", "moka", - "object_store", + "object_store 0.10.2", "permutation", "pin-project", - "prost 0.12.6", - "prost-build 0.12.6", - "prost-types 0.12.6", + "prost 0.13.3", + "prost-build 0.13.3", + "prost-types 0.13.3", "rand", "roaring", "serde", "serde_json", - "snafu", + "snafu 0.7.5", "tantivy", "tempfile", "tfrecord", @@ -2735,6 +2789,7 @@ dependencies = [ "arrow-data", "arrow-schema", "arrow-select", + "bytes", "getrandom", "half", "num-traits", @@ -2763,13 +2818,13 @@ dependencies = [ "mock_instant", "moka", "num_cpus", - "object_store", + "object_store 0.10.2", "pin-project", - "prost 0.12.6", + "prost 0.13.3", "rand", "roaring", "serde_json", - "snafu", + "snafu 0.7.5", "tokio", "tokio-stream", "tokio-util", @@ -2798,8 +2853,8 @@ dependencies = [ "lance-core", "lazy_static", "log", - "prost 0.12.6", - "snafu", + "prost 0.13.3", + "snafu 0.7.5", "tokio", ] @@ -2845,12 +2900,12 @@ dependencies = [ "log", "num-traits", "paste", - "prost 0.12.6", - "prost-build 0.12.6", - "prost-types 0.12.6", + "prost 0.13.3", + "prost-build 0.13.3", + "prost-types 0.13.3", "rand", "seq-macro", - "snafu", + "snafu 0.7.5", "tokio", "tracing", "zstd", @@ -2879,12 +2934,12 @@ dependencies = [ "lance-io", "log", "num-traits", - "object_store", - "prost 0.12.6", - "prost-build 0.12.6", - "prost-types 0.12.6", + "object_store 0.10.2", + "prost 0.13.3", + "prost-build 0.13.3", + "prost-types 0.13.3", "roaring", - "snafu", + "snafu 0.7.5", "tempfile", "tokio", "tracing", @@ -2925,15 +2980,15 @@ dependencies = [ "log", "moka", "num-traits", - "object_store", - "prost 0.12.6", - "prost-build 0.12.6", + "object_store 0.10.2", + "prost 0.13.3", + "prost-build 0.13.3", "rand", "rayon", "roaring", "serde", "serde_json", - "snafu", + "snafu 0.7.5", "tantivy", "tempfile", "tokio", @@ -2967,14 +3022,14 @@ dependencies = [ "lance-core", "lazy_static", "log", - "object_store", + "object_store 0.10.2", "path_abs", "pin-project", - "prost 0.12.6", - "prost-build 0.12.6", + "prost 0.13.3", + "prost-build 0.13.3", "rand", "shellexpand", - "snafu", + "snafu 0.7.5", "tokio", "tracing", "url", @@ -3026,16 +3081,16 @@ dependencies = [ "lance-io", "lazy_static", "log", - "object_store", - "prost 0.12.6", - "prost-build 0.12.6", - "prost-types 0.12.6", + "object_store 0.10.2", + "prost 0.13.3", + "prost-build 0.13.3", + "prost-types 0.13.3", "rand", "rangemap", "roaring", "serde", "serde_json", - "snafu", + "snafu 0.7.5", "tokio", "tracing", "url", @@ -3056,9 +3111,9 @@ checksum = "0c2cdeb66e45e9f36bfad5bbdb4d2384e70936afbee843c6f6543f0c551ebb25" [[package]] name = "lexical-core" -version = "0.8.5" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2cde5de06e8d4c2faabc400238f9ae1c74d5412d03a7bd067645ccbc47070e46" +checksum = "0431c65b318a590c1de6b8fd6e72798c92291d27762d94c9e6c37ed7a73d8458" dependencies = [ "lexical-parse-float", "lexical-parse-integer", @@ -3069,9 +3124,9 @@ dependencies = [ [[package]] name = "lexical-parse-float" -version = "0.8.5" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "683b3a5ebd0130b8fb52ba0bdc718cc56815b6a097e28ae5a6997d0ad17dc05f" +checksum = "eb17a4bdb9b418051aa59d41d65b1c9be5affab314a872e5ad7f06231fb3b4e0" dependencies = [ "lexical-parse-integer", "lexical-util", @@ -3080,9 +3135,9 @@ dependencies = [ [[package]] name = "lexical-parse-integer" -version = "0.8.6" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d0994485ed0c312f6d965766754ea177d07f9c00c9b82a5ee62ed5b47945ee9" +checksum = "5df98f4a4ab53bf8b175b363a34c7af608fe31f93cc1fb1bf07130622ca4ef61" dependencies = [ "lexical-util", "static_assertions", @@ -3090,18 +3145,18 @@ dependencies = [ [[package]] name = "lexical-util" -version = "0.8.5" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5255b9ff16ff898710eb9eb63cb39248ea8a5bb036bea8085b1a767ff6c4e3fc" +checksum = "85314db53332e5c192b6bca611fb10c114a80d1b831ddac0af1e9be1b9232ca0" dependencies = [ "static_assertions", ] [[package]] name = "lexical-write-float" -version = "0.8.5" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "accabaa1c4581f05a3923d1b4cfd124c329352288b7b9da09e766b0668116862" +checksum = "6e7c3ad4e37db81c1cbe7cf34610340adc09c322871972f74877a712abc6c809" dependencies = [ "lexical-util", "lexical-write-integer", @@ -3110,9 +3165,9 @@ dependencies = [ [[package]] name = "lexical-write-integer" -version = "0.8.5" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1b6f3d1f4422866b68192d62f77bc5c700bee84f3069f2469d7bc8c77852446" +checksum = "eb89e9f6958b83258afa3deed90b5de9ef68eef090ad5086c791cd2345610162" dependencies = [ "lexical-util", "static_assertions", @@ -3487,7 +3542,28 @@ dependencies = [ "rustls-pemfile 2.2.0", "serde", "serde_json", - "snafu", + "snafu 0.7.5", + "tokio", + "tracing", + "url", + "walkdir", +] + +[[package]] +name = "object_store" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6eb4c22c6154a1e759d7099f9ffad7cc5ef8245f9efbab4a41b92623079c82f3" +dependencies = [ + "async-trait", + "bytes", + "chrono", + "futures", + "humantime", + "itertools 0.13.0", + "parking_lot", + "percent-encoding", + "snafu 0.8.5", "tokio", "tracing", "url", @@ -3579,9 +3655,9 @@ dependencies = [ [[package]] name = "parquet" -version = "52.2.0" +version = "53.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e977b9066b4d3b03555c22bdc442f3fadebd96a39111249113087d0edb2691cd" +checksum = "2b449890367085eb65d7d3321540abc3d7babbd179ce31df0016e90719114191" dependencies = [ "ahash", "arrow-array", @@ -3598,11 +3674,11 @@ dependencies = [ "flate2", "futures", "half", - "hashbrown 0.14.5", + "hashbrown 0.15.1", "lz4_flex", "num", "num-bigint", - "object_store", + "object_store 0.11.1", "paste", "seq-macro", "snap", @@ -3642,9 +3718,9 @@ dependencies = [ [[package]] name = "pbjson" -version = "0.6.0" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1030c719b0ec2a2d25a5df729d6cff1acf3cc230bf766f4f97833591f7577b90" +checksum = "c7e6349fa080353f4a597daffd05cb81572a9c031a6d4fff7e504947496fcc68" dependencies = [ "base64 0.21.7", "serde", @@ -3652,28 +3728,28 @@ dependencies = [ [[package]] name = "pbjson-build" -version = "0.6.2" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2580e33f2292d34be285c5bc3dba5259542b083cfad6037b6d70345f24dcb735" +checksum = "6eea3058763d6e656105d1403cb04e0a41b7bbac6362d413e7c33be0c32279c9" dependencies = [ - "heck 0.4.1", - "itertools 0.11.0", - "prost 0.12.6", - "prost-types 0.12.6", + "heck 0.5.0", + "itertools 0.13.0", + "prost 0.13.3", + "prost-types 0.13.3", ] [[package]] name = "pbjson-types" -version = "0.6.0" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18f596653ba4ac51bdecbb4ef6773bc7f56042dc13927910de1684ad3d32aa12" +checksum = "e54e5e7bfb1652f95bc361d76f3c780d8e526b134b85417e774166ee941f0887" dependencies = [ "bytes", "chrono", "pbjson", "pbjson-build", - "prost 0.12.6", - "prost-build 0.12.6", + "prost 0.13.3", + "prost-build 0.13.3", "serde", ] @@ -3871,6 +3947,16 @@ dependencies = [ "prost-derive 0.12.6", ] +[[package]] +name = "prost" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b0487d90e047de87f984913713b85c601c05609aad5b0df4b4573fbf69aa13f" +dependencies = [ + "bytes", + "prost-derive 0.13.3", +] + [[package]] name = "prost-build" version = "0.11.9" @@ -3914,6 +4000,27 @@ dependencies = [ "tempfile", ] +[[package]] +name = "prost-build" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c1318b19085f08681016926435853bbf7858f9c082d0999b80550ff5d9abe15" +dependencies = [ + "bytes", + "heck 0.5.0", + "itertools 0.13.0", + "log", + "multimap 0.10.0", + "once_cell", + "petgraph", + "prettyplease 0.2.25", + "prost 0.13.3", + "prost-types 0.13.3", + "regex", + "syn 2.0.89", + "tempfile", +] + [[package]] name = "prost-derive" version = "0.11.9" @@ -3940,6 +4047,19 @@ dependencies = [ "syn 2.0.89", ] +[[package]] +name = "prost-derive" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9552f850d5f0964a4e4d0bf306459ac29323ddfbae05e35a7c0d35cb0803cc5" +dependencies = [ + "anyhow", + "itertools 0.13.0", + "proc-macro2", + "quote", + "syn 2.0.89", +] + [[package]] name = "prost-types" version = "0.11.9" @@ -3958,6 +4078,15 @@ dependencies = [ "prost 0.12.6", ] +[[package]] +name = "prost-types" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4759aa0d3a6232fb8dbdb97b61de2c20047c68aca932c7ed76da9d788508d670" +dependencies = [ + "prost 0.13.3", +] + [[package]] name = "pylance" version = "0.20.0" @@ -3985,14 +4114,14 @@ dependencies = [ "lance-table", "lazy_static", "log", - "object_store", - "prost 0.12.6", + "object_store 0.10.2", + "prost 0.13.3", "prost-build 0.11.9", "pyo3", "serde", "serde_json", "serde_yaml", - "snafu", + "snafu 0.7.5", "tokio", "tracing", "tracing-chrome", @@ -4003,15 +4132,15 @@ dependencies = [ [[package]] name = "pyo3" -version = "0.21.2" +version = "0.22.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5e00b96a521718e08e03b1a622f01c8a8deb50719335de3f60b3b3950f069d8" +checksum = "f402062616ab18202ae8319da13fa4279883a2b8a9d9f83f20dbade813ce1884" dependencies = [ "cfg-if", "indoc", "libc", "memoffset", - "parking_lot", + "once_cell", "portable-atomic", "pyo3-build-config", "pyo3-ffi", @@ -4021,9 +4150,9 @@ dependencies = [ [[package]] name = "pyo3-build-config" -version = "0.21.2" +version = "0.22.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7883df5835fafdad87c0d888b266c8ec0f4c9ca48a5bed6bbb592e8dedee1b50" +checksum = "b14b5775b5ff446dd1056212d778012cbe8a0fbffd368029fd9e25b514479c38" dependencies = [ "once_cell", "target-lexicon", @@ -4031,9 +4160,9 @@ dependencies = [ [[package]] name = "pyo3-ffi" -version = "0.21.2" +version = "0.22.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01be5843dc60b916ab4dad1dca6d20b9b4e6ddc8e15f50c47fe6d85f1fb97403" +checksum = "9ab5bcf04a2cdcbb50c7d6105de943f543f9ed92af55818fd17b660390fc8636" dependencies = [ "libc", "pyo3-build-config", @@ -4041,9 +4170,9 @@ dependencies = [ [[package]] name = "pyo3-macros" -version = "0.21.2" +version = "0.22.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77b34069fc0682e11b31dbd10321cbf94808394c56fd996796ce45217dfac53c" +checksum = "0fd24d897903a9e6d80b968368a34e1525aeb719d568dba8b3d4bfa5dc67d453" dependencies = [ "proc-macro2", "pyo3-macros-backend", @@ -4053,11 +4182,11 @@ dependencies = [ [[package]] name = "pyo3-macros-backend" -version = "0.21.2" +version = "0.22.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08260721f32db5e1a5beae69a55553f56b99bd0e1c3e6e0a5e8851a9d0f5a85c" +checksum = "36c011a03ba1e50152b4b394b479826cad97e7a21eb52df179cd91ac411cbfbe" dependencies = [ - "heck 0.4.1", + "heck 0.5.0", "proc-macro2", "pyo3-build-config", "quote", @@ -4806,7 +4935,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e4de37ad025c587a29e8f3f5605c00f70b98715ef90b9061a815b9e59e9042d6" dependencies = [ "doc-comment", - "snafu-derive", + "snafu-derive 0.7.5", +] + +[[package]] +name = "snafu" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "223891c85e2a29c3fe8fb900c1fae5e69c2e42415e3177752e8718475efa5019" +dependencies = [ + "snafu-derive 0.8.5", ] [[package]] @@ -4821,6 +4959,18 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "snafu-derive" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03c3c6b7927ffe7ecaa769ee0e3994da3b8cafc8f444578982c83ecb161af917" +dependencies = [ + "heck 0.5.0", + "proc-macro2", + "quote", + "syn 2.0.89", +] + [[package]] name = "snap" version = "1.1.1" @@ -4845,9 +4995,9 @@ checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" [[package]] name = "sqlparser" -version = "0.49.0" +version = "0.50.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4a404d0e14905361b918cb8afdb73605e25c1d5029312bd9785142dcb3aa49e" +checksum = "b2e5b515a2bd5168426033e9efbfd05500114833916f1d5c268f938b4ee130ac" dependencies = [ "log", "sqlparser_derive", @@ -4912,18 +5062,18 @@ dependencies = [ [[package]] name = "substrait" -version = "0.36.0" +version = "0.41.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1ee6e584c8bf37104b7eb51c25eae07a9321b0e01379bec3b7c462d2f42afbf" +checksum = "2a3bf05f1d7a3fd7a97790d410f6e859b3a98dcde05e7a3fc00b31b0f60fe7cb" dependencies = [ "heck 0.5.0", "pbjson", "pbjson-build", "pbjson-types", "prettyplease 0.2.25", - "prost 0.12.6", - "prost-build 0.12.6", - "prost-types 0.12.6", + "prost 0.13.3", + "prost-build 0.13.3", + "prost-types 0.13.3", "schemars", "semver", "serde", diff --git a/python/Cargo.toml b/python/Cargo.toml index 49ee66427c..bf57922e31 100644 --- a/python/Cargo.toml +++ b/python/Cargo.toml @@ -42,8 +42,8 @@ lance-linalg = { path = "../rust/lance-linalg" } lance-table = { path = "../rust/lance-table" } lazy_static = "1" log = "0.4" -prost = "0.12.2" -pyo3 = { version = "0.21", features = [ +prost = "0.13.2" +pyo3 = { version = "0.22", features = [ "extension-module", "abi3-py39", "gil-refs", From 155bc5c440a046918e0e2757e787e32ab48cd2d8 Mon Sep 17 00:00:00 2001 From: Jeremy Leibs Date: Tue, 26 Nov 2024 17:53:02 -0500 Subject: [PATCH 15/24] Need py-clone feature to build wheel --- python/Cargo.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/python/Cargo.toml b/python/Cargo.toml index bf57922e31..57549345a3 100644 --- a/python/Cargo.toml +++ b/python/Cargo.toml @@ -47,6 +47,7 @@ pyo3 = { version = "0.22", features = [ "extension-module", "abi3-py39", "gil-refs", + "py-clone", ] } tokio = { version = "1.23", features = ["rt-multi-thread"] } uuid = "1.3.0" From 7dd031b6ae398bdf0297277b4cb735cea474dcbe Mon Sep 17 00:00:00 2001 From: Jeremy Leibs Date: Tue, 26 Nov 2024 18:08:50 -0500 Subject: [PATCH 16/24] Update lock again --- Cargo.lock | 760 +++++++++++++++++++---------------------------------- 1 file changed, 267 insertions(+), 493 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b8c6491697..f79deab56f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -40,6 +40,15 @@ dependencies = [ "memchr", ] +[[package]] +name = "aligned-vec" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e0966165eaf052580bd70eb1b32cb3d6245774c0104d1b2793e9650bf83b52a" +dependencies = [ + "equator", +] + [[package]] name = "all_asserts" version = "2.3.1" @@ -177,17 +186,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c91839b07e474b3995035fd8ac33ee54f9c9ccbbb1ea33d9909c71bffdf1259d" dependencies = [ "arrow-arith", - "arrow-array 53.3.0", - "arrow-buffer 53.3.0", - "arrow-cast 53.3.0", + "arrow-array", + "arrow-buffer", + "arrow-cast", "arrow-csv", - "arrow-data 53.3.0", - "arrow-ipc 53.3.0", + "arrow-data", + "arrow-ipc", "arrow-json", "arrow-ord", "arrow-row", - "arrow-schema 53.3.0", - "arrow-select 53.3.0", + "arrow-schema", + "arrow-select", "arrow-string", ] @@ -197,31 +206,15 @@ version = "53.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "855c57c4efd26722b044dcd3e348252560e3e0333087fb9f6479dc0bf744054f" dependencies = [ - "arrow-array 53.3.0", - "arrow-buffer 53.3.0", - "arrow-data 53.3.0", - "arrow-schema 53.3.0", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", "chrono", "half", "num", ] -[[package]] -name = "arrow-array" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16f4a9468c882dc66862cef4e1fd8423d47e67972377d85d80e022786427768c" -dependencies = [ - "ahash", - "arrow-buffer 52.2.0", - "arrow-data 52.2.0", - "arrow-schema 52.2.0", - "chrono", - "half", - "hashbrown 0.14.5", - "num", -] - [[package]] name = "arrow-array" version = "53.3.0" @@ -229,9 +222,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bd03279cea46569acf9295f6224fbc370c5df184b4d2ecfe97ccb131d5615a7f" dependencies = [ "ahash", - "arrow-buffer 53.3.0", - "arrow-data 53.3.0", - "arrow-schema 53.3.0", + "arrow-buffer", + "arrow-data", + "arrow-schema", "chrono", "chrono-tz", "half", @@ -239,17 +232,6 @@ dependencies = [ "num", ] -[[package]] -name = "arrow-buffer" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c975484888fc95ec4a632cdc98be39c085b1bb518531b0c80c5d462063e5daa1" -dependencies = [ - "bytes", - "half", - "num", -] - [[package]] name = "arrow-buffer" version = "53.3.0" @@ -261,43 +243,23 @@ dependencies = [ "num", ] -[[package]] -name = "arrow-cast" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da26719e76b81d8bc3faad1d4dbdc1bcc10d14704e63dc17fc9f3e7e1e567c8e" -dependencies = [ - "arrow-array 52.2.0", - "arrow-buffer 52.2.0", - "arrow-data 52.2.0", - "arrow-schema 52.2.0", - "arrow-select 52.2.0", - "atoi", - "base64 0.22.1", - "chrono", - "half", - "lexical-core 0.8.5", - "num", - "ryu", -] - [[package]] name = "arrow-cast" version = "53.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bc70e39916e60c5b7af7a8e2719e3ae589326039e1e863675a008bee5ffe90fd" dependencies = [ - "arrow-array 53.3.0", - "arrow-buffer 53.3.0", - "arrow-data 53.3.0", - "arrow-schema 53.3.0", - "arrow-select 53.3.0", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", "atoi", "base64 0.22.1", "chrono", "comfy-table", "half", - "lexical-core 1.0.2", + "lexical-core", "num", "ryu", ] @@ -308,68 +270,42 @@ version = "53.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "789b2af43c1049b03a8d088ff6b2257cdcea1756cd76b174b1f2600356771b97" dependencies = [ - "arrow-array 53.3.0", - "arrow-buffer 53.3.0", - "arrow-cast 53.3.0", - "arrow-data 53.3.0", - "arrow-schema 53.3.0", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-schema", "chrono", "csv", "csv-core", "lazy_static", - "lexical-core 1.0.2", + "lexical-core", "regex", ] -[[package]] -name = "arrow-data" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd9d6f18c65ef7a2573ab498c374d8ae364b4a4edf67105357491c031f716ca5" -dependencies = [ - "arrow-buffer 52.2.0", - "arrow-schema 52.2.0", - "half", - "num", -] - [[package]] name = "arrow-data" version = "53.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e4e75edf21ffd53744a9b8e3ed11101f610e7ceb1a29860432824f1834a1f623" dependencies = [ - "arrow-buffer 53.3.0", - "arrow-schema 53.3.0", + "arrow-buffer", + "arrow-schema", "half", "num", ] -[[package]] -name = "arrow-ipc" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e786e1cdd952205d9a8afc69397b317cfbb6e0095e445c69cda7e8da5c1eeb0f" -dependencies = [ - "arrow-array 52.2.0", - "arrow-buffer 52.2.0", - "arrow-cast 52.2.0", - "arrow-data 52.2.0", - "arrow-schema 52.2.0", - "flatbuffers", -] - [[package]] name = "arrow-ipc" version = "53.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d186a909dece9160bf8312f5124d797884f608ef5435a36d9d608e0b2a9bcbf8" dependencies = [ - "arrow-array 53.3.0", - "arrow-buffer 53.3.0", - "arrow-cast 53.3.0", - "arrow-data 53.3.0", - "arrow-schema 53.3.0", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-schema", "flatbuffers", "lz4_flex", "zstd", @@ -381,15 +317,15 @@ version = "53.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b66ff2fedc1222942d0bd2fd391cb14a85baa3857be95c9373179bd616753b85" dependencies = [ - "arrow-array 53.3.0", - "arrow-buffer 53.3.0", - "arrow-cast 53.3.0", - "arrow-data 53.3.0", - "arrow-schema 53.3.0", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-schema", "chrono", "half", "indexmap", - "lexical-core 1.0.2", + "lexical-core", "num", "serde", "serde_json", @@ -401,11 +337,11 @@ version = "53.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ece7b5bc1180e6d82d1a60e1688c199829e8842e38497563c3ab6ea813e527fd" dependencies = [ - "arrow-array 53.3.0", - "arrow-buffer 53.3.0", - "arrow-data 53.3.0", - "arrow-schema 53.3.0", - "arrow-select 53.3.0", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", "half", "num", ] @@ -417,19 +353,13 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "745c114c8f0e8ce211c83389270de6fbe96a9088a7b32c2a041258a443fe83ff" dependencies = [ "ahash", - "arrow-array 53.3.0", - "arrow-buffer 53.3.0", - "arrow-data 53.3.0", - "arrow-schema 53.3.0", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", "half", ] -[[package]] -name = "arrow-schema" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e972cd1ff4a4ccd22f86d3e53e835c2ed92e0eea6a3e8eadb72b4f1ac802cf8" - [[package]] name = "arrow-schema" version = "53.3.0" @@ -439,20 +369,6 @@ dependencies = [ "bitflags 2.6.0", ] -[[package]] -name = "arrow-select" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "600bae05d43483d216fb3494f8c32fdbefd8aa4e1de237e790dbb3d9f44690a3" -dependencies = [ - "ahash", - "arrow-array 52.2.0", - "arrow-buffer 52.2.0", - "arrow-data 52.2.0", - "arrow-schema 52.2.0", - "num", -] - [[package]] name = "arrow-select" version = "53.3.0" @@ -460,10 +376,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e415279094ea70323c032c6e739c48ad8d80e78a09bef7117b8718ad5bf3722" dependencies = [ "ahash", - "arrow-array 53.3.0", - "arrow-buffer 53.3.0", - "arrow-data 53.3.0", - "arrow-schema 53.3.0", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", "num", ] @@ -473,11 +389,11 @@ version = "53.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "11d956cae7002eb8d83a27dbd34daaea1cf5b75852f0b84deb4d93a276e92bbf" dependencies = [ - "arrow-array 53.3.0", - "arrow-buffer 53.3.0", - "arrow-data 53.3.0", - "arrow-schema 53.3.0", - "arrow-select 53.3.0", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", "memchr", "num", "regex", @@ -1165,17 +1081,6 @@ dependencies = [ "brotli-decompressor 2.5.1", ] -[[package]] -name = "brotli" -version = "6.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "74f7971dbd9326d58187408ab83117d8ac1bb9c17b085fdacd1cf2f598719b6b" -dependencies = [ - "alloc-no-stdlib", - "alloc-stdlib", - "brotli-decompressor 4.0.1", -] - [[package]] name = "brotli" version = "7.0.0" @@ -1213,12 +1118,6 @@ version = "3.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" -[[package]] -name = "bytecount" -version = "0.6.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ce89b21cab1437276d2650d57e971f9d548a2d9037cc231abdc0562b97498ce" - [[package]] name = "bytemuck" version = "1.18.0" @@ -1268,37 +1167,6 @@ dependencies = [ "pkg-config", ] -[[package]] -name = "camino" -version = "1.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0ec6b951b160caa93cc0c7b209e5a3bff7aae9062213451ac99493cd844c239" -dependencies = [ - "serde", -] - -[[package]] -name = "cargo-platform" -version = "0.1.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24b1f0365a6c6bb4020cd05806fd0d33c44d38046b8bd7f0e40814b9763cabfc" -dependencies = [ - "serde", -] - -[[package]] -name = "cargo_metadata" -version = "0.14.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4acbb09d9ee8e23699b9634375c72795d095bf268439da88562cf9b501f181fa" -dependencies = [ - "camino", - "cargo-platform", - "semver", - "serde", - "serde_json", -] - [[package]] name = "cast" version = "0.3.0" @@ -1716,9 +1584,9 @@ checksum = "dae5f2abc725737d6e87b6d348a5aa2d0a77e4cf873045f004546da946e6e619" dependencies = [ "ahash", "arrow", - "arrow-array 53.3.0", - "arrow-ipc 53.3.0", - "arrow-schema 53.3.0", + "arrow-array", + "arrow-ipc", + "arrow-schema", "async-compression", "async-trait", "bytes", @@ -1751,7 +1619,7 @@ dependencies = [ "num_cpus", "object_store 0.11.1", "parking_lot", - "parquet 53.3.0", + "parquet", "paste", "pin-project-lite", "rand", @@ -1771,7 +1639,7 @@ version = "42.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "998761705551f11ffa4ee692cc285b44eb1def6e0d28c4eaf5041b9e2810dc1e" dependencies = [ - "arrow-schema 53.3.0", + "arrow-schema", "async-trait", "datafusion-common", "datafusion-execution", @@ -1788,9 +1656,9 @@ checksum = "11986f191e88d950f10a5cc512a598afba27d92e04a0201215ad60785005115a" dependencies = [ "ahash", "arrow", - "arrow-array 53.3.0", - "arrow-buffer 53.3.0", - "arrow-schema 53.3.0", + "arrow-array", + "arrow-buffer", + "arrow-schema", "chrono", "half", "hashbrown 0.14.5", @@ -1798,7 +1666,7 @@ dependencies = [ "libc", "num_cpus", "object_store 0.11.1", - "parquet 53.3.0", + "parquet", "paste", "sqlparser", "tokio", @@ -1843,8 +1711,8 @@ checksum = "a8dd114dc0296cacaee98ad3165724529fcca9a65b2875abcd447b9cc02b2b74" dependencies = [ "ahash", "arrow", - "arrow-array 53.3.0", - "arrow-buffer 53.3.0", + "arrow-array", + "arrow-buffer", "chrono", "datafusion-common", "datafusion-expr-common", @@ -1875,7 +1743,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "547cb780a4ac51fd8e52c0fb9188bc16cea4e35aebf6c454bda0b82a7a417304" dependencies = [ "arrow", - "arrow-buffer 53.3.0", + "arrow-buffer", "base64 0.22.1", "blake2", "blake3", @@ -1903,7 +1771,7 @@ checksum = "e68cf5aa7ebcac08bd04bb709a9a6d4963eafd227da62b628133bc509c40f5a0" dependencies = [ "ahash", "arrow", - "arrow-schema 53.3.0", + "arrow-schema", "datafusion-common", "datafusion-execution", "datafusion-expr", @@ -1937,10 +1805,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6b6ffbbb7cf7bf0c0e05eb6207023fef341cac83a593a5365a6fc83803c572a9" dependencies = [ "arrow", - "arrow-array 53.3.0", - "arrow-buffer 53.3.0", + "arrow-array", + "arrow-buffer", "arrow-ord", - "arrow-schema 53.3.0", + "arrow-schema", "datafusion-common", "datafusion-execution", "datafusion-expr", @@ -1993,10 +1861,10 @@ checksum = "43b86b7fa0b8161c49b0f005b0df193fc6d9b65ceec675f155422cda5d1583ca" dependencies = [ "ahash", "arrow", - "arrow-array 53.3.0", - "arrow-buffer 53.3.0", + "arrow-array", + "arrow-buffer", "arrow-ord", - "arrow-schema 53.3.0", + "arrow-schema", "arrow-string", "base64 0.22.1", "chrono", @@ -2037,7 +1905,7 @@ version = "42.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "25ca088eb904bf1cfc9c5e5653110c70a6eaba43164085a9d180b35b77ce3b8b" dependencies = [ - "arrow-schema 53.3.0", + "arrow-schema", "datafusion-common", "datafusion-execution", "datafusion-physical-expr", @@ -2053,10 +1921,10 @@ checksum = "4989a53b824abc759685eb643f4d604c2fc2fea4e2c309ac3473bea263ecbbeb" dependencies = [ "ahash", "arrow", - "arrow-array 53.3.0", - "arrow-buffer 53.3.0", + "arrow-array", + "arrow-buffer", "arrow-ord", - "arrow-schema 53.3.0", + "arrow-schema", "async-trait", "chrono", "datafusion-common", @@ -2087,8 +1955,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "66b9b75b9da10ed656073ac0553708f17eb8fa5a7b065ef9848914c93150ab9e" dependencies = [ "arrow", - "arrow-array 53.3.0", - "arrow-schema 53.3.0", + "arrow-array", + "arrow-schema", "datafusion-common", "datafusion-expr", "log", @@ -2103,7 +1971,7 @@ version = "42.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "220d7ab0ffadd8b1af753904b18dd92d270271810b1ce9f8be3c3dbe2392b636" dependencies = [ - "arrow-buffer 53.3.0", + "arrow-buffer", "async-recursion", "chrono", "datafusion", @@ -2256,6 +2124,26 @@ dependencies = [ "log", ] +[[package]] +name = "equator" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c35da53b5a021d2484a7cc49b2ac7f2d840f8236a286f84202369bd338d761ea" +dependencies = [ + "equator-macro", +] + +[[package]] +name = "equator-macro" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3bf679796c0322556351f287a51b49e48f7c4986e727b5dd78c972d30e2e16cc" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.89", +] + [[package]] name = "equivalent" version = "1.0.1" @@ -2272,15 +2160,6 @@ dependencies = [ "windows-sys 0.52.0", ] -[[package]] -name = "error-chain" -version = "0.12.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d2f06b9cac1506ece98fe3231e3cc9c4410ec3d5b1f24ae1c8946f0742cdefc" -dependencies = [ - "version_check", -] - [[package]] name = "event-listener" version = "2.5.3" @@ -2423,9 +2302,9 @@ dependencies = [ [[package]] name = "fsst" -version = "0.19.2" +version = "0.20.0" dependencies = [ - "arrow-array 53.3.0", + "arrow-array", "lance-datagen", "rand", "rand_xoshiro", @@ -3123,18 +3002,18 @@ dependencies = [ [[package]] name = "lance" -version = "0.19.2" +version = "0.20.0" dependencies = [ "all_asserts", "approx", "arrow", "arrow-arith", - "arrow-array 53.3.0", - "arrow-buffer 53.3.0", + "arrow-array", + "arrow-buffer", "arrow-ord", "arrow-row", - "arrow-schema 53.3.0", - "arrow-select 53.3.0", + "arrow-schema", + "arrow-select", "async-recursion", "async-trait", "async_cell", @@ -3155,7 +3034,7 @@ dependencies = [ "env_logger 0.10.2", "futures", "half", - "itertools 0.12.1", + "itertools 0.13.0", "lance-arrow", "lance-core", "lance-datafusion", @@ -3182,6 +3061,7 @@ dependencies = [ "prost 0.12.6", "prost 0.13.3", "prost-build 0.13.3", + "prost-types 0.13.3", "rand", "random_word", "roaring", @@ -3202,14 +3082,14 @@ dependencies = [ [[package]] name = "lance-arrow" -version = "0.19.2" -dependencies = [ - "arrow-array 53.3.0", - "arrow-buffer 53.3.0", - "arrow-cast 53.3.0", - "arrow-data 53.3.0", - "arrow-schema 53.3.0", - "arrow-select 53.3.0", +version = "0.20.0" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-schema", + "arrow-select", "bytes", "getrandom", "half", @@ -3219,11 +3099,11 @@ dependencies = [ [[package]] name = "lance-core" -version = "0.19.2" +version = "0.20.0" dependencies = [ - "arrow-array 53.3.0", - "arrow-buffer 53.3.0", - "arrow-schema 53.3.0", + "arrow-array", + "arrow-buffer", + "arrow-schema", "async-trait", "byteorder", "bytes", @@ -3258,14 +3138,14 @@ dependencies = [ [[package]] name = "lance-datafusion" -version = "0.19.2" +version = "0.20.0" dependencies = [ "arrow", - "arrow-array 53.3.0", - "arrow-buffer 53.3.0", + "arrow-array", + "arrow-buffer", "arrow-ord", - "arrow-schema 53.3.0", - "arrow-select 53.3.0", + "arrow-schema", + "arrow-select", "async-trait", "datafusion", "datafusion-common", @@ -3286,12 +3166,12 @@ dependencies = [ [[package]] name = "lance-datagen" -version = "0.19.2" +version = "0.20.0" dependencies = [ "arrow", - "arrow-array 53.3.0", - "arrow-cast 53.3.0", - "arrow-schema 53.3.0", + "arrow-array", + "arrow-cast", + "arrow-schema", "chrono", "criterion", "futures", @@ -3303,17 +3183,17 @@ dependencies = [ [[package]] name = "lance-encoding" -version = "0.19.2" +version = "0.20.0" dependencies = [ "arrayref", "arrow", "arrow-arith", - "arrow-array 53.3.0", - "arrow-buffer 53.3.0", - "arrow-cast 53.3.0", - "arrow-data 53.3.0", - "arrow-schema 53.3.0", - "arrow-select 53.3.0", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-schema", + "arrow-select", "bytemuck", "byteorder", "bytes", @@ -3322,7 +3202,7 @@ dependencies = [ "futures", "hex", "hyperloglogplus", - "itertools 0.12.1", + "itertools 0.13.0", "lance-arrow", "lance-core", "lance-datagen", @@ -3349,11 +3229,11 @@ dependencies = [ [[package]] name = "lance-encoding-datafusion" -version = "0.19.2" +version = "0.20.0" dependencies = [ - "arrow-array 53.3.0", - "arrow-buffer 53.3.0", - "arrow-schema 53.3.0", + "arrow-array", + "arrow-buffer", + "arrow-schema", "bytes", "datafusion", "datafusion-common", @@ -3381,14 +3261,14 @@ dependencies = [ [[package]] name = "lance-file" -version = "0.19.2" +version = "0.20.0" dependencies = [ "arrow-arith", - "arrow-array 53.3.0", - "arrow-buffer 53.3.0", - "arrow-data 53.3.0", - "arrow-schema 53.3.0", - "arrow-select 53.3.0", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", "async-recursion", "async-trait", "byteorder", @@ -3423,14 +3303,14 @@ dependencies = [ [[package]] name = "lance-index" -version = "0.19.2" +version = "0.20.0" dependencies = [ "approx", "arrow", - "arrow-array 53.3.0", + "arrow-array", "arrow-ord", - "arrow-schema 53.3.0", - "arrow-select 53.3.0", + "arrow-schema", + "arrow-select", "async-recursion", "async-trait", "bitvec", @@ -3446,7 +3326,7 @@ dependencies = [ "deepsize", "futures", "half", - "itertools 0.12.1", + "itertools 0.13.0", "lance-arrow", "lance-core", "lance-datafusion", @@ -3482,16 +3362,16 @@ dependencies = [ [[package]] name = "lance-io" -version = "0.19.2" +version = "0.20.0" dependencies = [ "arrow", "arrow-arith", - "arrow-array 53.3.0", - "arrow-buffer 53.3.0", - "arrow-cast 53.3.0", - "arrow-data 53.3.0", - "arrow-schema 53.3.0", - "arrow-select 53.3.0", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-schema", + "arrow-select", "async-priority-channel", "async-recursion", "async-trait", @@ -3509,7 +3389,7 @@ dependencies = [ "log", "mockall", "object_store 0.10.2", - "parquet 52.2.0", + "parquet", "path_abs", "pin-project", "pprof", @@ -3527,13 +3407,14 @@ dependencies = [ [[package]] name = "lance-jni" -version = "0.19.2" +version = "0.20.0" dependencies = [ "arrow", - "arrow-schema 53.3.0", + "arrow-schema", "datafusion", "jni", "lance", + "lance-datafusion", "lance-encoding", "lance-index", "lance-io", @@ -3547,13 +3428,13 @@ dependencies = [ [[package]] name = "lance-linalg" -version = "0.19.2" +version = "0.20.0" dependencies = [ "approx", "arrow-arith", - "arrow-array 53.3.0", + "arrow-array", "arrow-ord", - "arrow-schema 53.3.0", + "arrow-schema", "bitvec", "cc", "criterion", @@ -3576,13 +3457,13 @@ dependencies = [ [[package]] name = "lance-table" -version = "0.19.2" +version = "0.20.0" dependencies = [ "arrow", - "arrow-array 53.3.0", - "arrow-buffer 53.3.0", - "arrow-ipc 53.3.0", - "arrow-schema 53.3.0", + "arrow-array", + "arrow-buffer", + "arrow-ipc", + "arrow-schema", "async-trait", "aws-credential-types", "aws-sdk-dynamodb", @@ -3620,7 +3501,7 @@ dependencies = [ [[package]] name = "lance-test-macros" -version = "0.19.2" +version = "0.20.0" dependencies = [ "proc-macro2", "quote", @@ -3629,10 +3510,10 @@ dependencies = [ [[package]] name = "lance-testing" -version = "0.19.2" +version = "0.20.0" dependencies = [ - "arrow-array 53.3.0", - "arrow-schema 53.3.0", + "arrow-array", + "arrow-schema", "lance-arrow", "num-traits", "rand", @@ -3670,41 +3551,17 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0c2cdeb66e45e9f36bfad5bbdb4d2384e70936afbee843c6f6543f0c551ebb25" -[[package]] -name = "lexical-core" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2cde5de06e8d4c2faabc400238f9ae1c74d5412d03a7bd067645ccbc47070e46" -dependencies = [ - "lexical-parse-float 0.8.5", - "lexical-parse-integer 0.8.6", - "lexical-util 0.8.5", - "lexical-write-float 0.8.5", - "lexical-write-integer 0.8.5", -] - [[package]] name = "lexical-core" version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0431c65b318a590c1de6b8fd6e72798c92291d27762d94c9e6c37ed7a73d8458" dependencies = [ - "lexical-parse-float 1.0.2", - "lexical-parse-integer 1.0.2", - "lexical-util 1.0.3", - "lexical-write-float 1.0.2", - "lexical-write-integer 1.0.2", -] - -[[package]] -name = "lexical-parse-float" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "683b3a5ebd0130b8fb52ba0bdc718cc56815b6a097e28ae5a6997d0ad17dc05f" -dependencies = [ - "lexical-parse-integer 0.8.6", - "lexical-util 0.8.5", - "static_assertions", + "lexical-parse-float", + "lexical-parse-integer", + "lexical-util", + "lexical-write-float", + "lexical-write-integer", ] [[package]] @@ -3713,18 +3570,8 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eb17a4bdb9b418051aa59d41d65b1c9be5affab314a872e5ad7f06231fb3b4e0" dependencies = [ - "lexical-parse-integer 1.0.2", - "lexical-util 1.0.3", - "static_assertions", -] - -[[package]] -name = "lexical-parse-integer" -version = "0.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d0994485ed0c312f6d965766754ea177d07f9c00c9b82a5ee62ed5b47945ee9" -dependencies = [ - "lexical-util 0.8.5", + "lexical-parse-integer", + "lexical-util", "static_assertions", ] @@ -3734,16 +3581,7 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5df98f4a4ab53bf8b175b363a34c7af608fe31f93cc1fb1bf07130622ca4ef61" dependencies = [ - "lexical-util 1.0.3", - "static_assertions", -] - -[[package]] -name = "lexical-util" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5255b9ff16ff898710eb9eb63cb39248ea8a5bb036bea8085b1a767ff6c4e3fc" -dependencies = [ + "lexical-util", "static_assertions", ] @@ -3756,35 +3594,14 @@ dependencies = [ "static_assertions", ] -[[package]] -name = "lexical-write-float" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "accabaa1c4581f05a3923d1b4cfd124c329352288b7b9da09e766b0668116862" -dependencies = [ - "lexical-util 0.8.5", - "lexical-write-integer 0.8.5", - "static_assertions", -] - [[package]] name = "lexical-write-float" version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6e7c3ad4e37db81c1cbe7cf34610340adc09c322871972f74877a712abc6c809" dependencies = [ - "lexical-util 1.0.3", - "lexical-write-integer 1.0.2", - "static_assertions", -] - -[[package]] -name = "lexical-write-integer" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1b6f3d1f4422866b68192d62f77bc5c700bee84f3069f2469d7bc8c77852446" -dependencies = [ - "lexical-util 0.8.5", + "lexical-util", + "lexical-write-integer", "static_assertions", ] @@ -3794,7 +3611,7 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eb89e9f6958b83258afa3deed90b5de9ef68eef090ad5086c791cd2345610162" dependencies = [ - "lexical-util 1.0.3", + "lexical-util", "static_assertions", ] @@ -3880,15 +3697,6 @@ dependencies = [ "pkg-config", ] -[[package]] -name = "mach2" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19b955cdeb2a02b9117f121ce63aa52d08ade45de53e48fe6a38b39c10f6f709" -dependencies = [ - "libc", -] - [[package]] name = "match_cfg" version = "0.1.0" @@ -3983,14 +3791,13 @@ dependencies = [ [[package]] name = "mockall" -version = "0.12.1" +version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43766c2b5203b10de348ffe19f7e54564b64f3d6018ff7648d1e2d6d3a0f0a48" +checksum = "39a6bfcc6c8c7eed5ee98b9c3e33adc726054389233e201c95dab2d41a3839d2" dependencies = [ "cfg-if", "downcast", "fragile", - "lazy_static", "mockall_derive", "predicates", "predicates-tree", @@ -3998,9 +3805,9 @@ dependencies = [ [[package]] name = "mockall_derive" -version = "0.12.1" +version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af7cbce79ec385a1d4f54baa90a76401eb15d9cab93685f62e7e9f942aa00ae2" +checksum = "25ca3004c2efe9011bd4e461bd8256445052b9615405b4f7ea43fc8ca5c20898" dependencies = [ "cfg-if", "proc-macro2", @@ -4010,22 +3817,21 @@ dependencies = [ [[package]] name = "moka" -version = "0.11.3" +version = "0.12.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa6e72583bf6830c956235bff0d5afec8cf2952f579ebad18ae7821a917d950f" +checksum = "32cf62eb4dd975d2dde76432fb1075c49e3ee2331cf36f1f8fd4b66550d32b6f" dependencies = [ - "async-io 1.13.0", - "async-lock 2.8.0", + "async-lock 3.4.0", + "async-trait", "crossbeam-channel", "crossbeam-epoch", "crossbeam-utils", + "event-listener 5.3.1", "futures-util", "once_cell", "parking_lot", "quanta", "rustc_version", - "scheduled-thread-pool", - "skeptic", "smallvec", "tagptr", "thiserror 1.0.69", @@ -4335,39 +4141,6 @@ dependencies = [ "windows-targets 0.52.6", ] -[[package]] -name = "parquet" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e977b9066b4d3b03555c22bdc442f3fadebd96a39111249113087d0edb2691cd" -dependencies = [ - "ahash", - "arrow-array 52.2.0", - "arrow-buffer 52.2.0", - "arrow-cast 52.2.0", - "arrow-data 52.2.0", - "arrow-ipc 52.2.0", - "arrow-schema 52.2.0", - "arrow-select 52.2.0", - "base64 0.22.1", - "brotli 6.0.0", - "bytes", - "chrono", - "flate2", - "half", - "hashbrown 0.14.5", - "lz4_flex", - "num", - "num-bigint", - "paste", - "seq-macro", - "snap", - "thrift", - "twox-hash", - "zstd", - "zstd-sys", -] - [[package]] name = "parquet" version = "53.3.0" @@ -4375,13 +4148,13 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b449890367085eb65d7d3321540abc3d7babbd179ce31df0016e90719114191" dependencies = [ "ahash", - "arrow-array 53.3.0", - "arrow-buffer 53.3.0", - "arrow-cast 53.3.0", - "arrow-data 53.3.0", - "arrow-ipc 53.3.0", - "arrow-schema 53.3.0", - "arrow-select 53.3.0", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-ipc", + "arrow-schema", + "arrow-select", "base64 0.22.1", "brotli 7.0.0", "bytes", @@ -4644,10 +4417,11 @@ checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" [[package]] name = "pprof" -version = "0.13.0" +version = "0.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef5c97c51bd34c7e742402e216abdeb44d415fbe6ae41d56b114723e953711cb" +checksum = "ebbe2f8898beba44815fdc9e5a4ae9c929e21c5dc29b0c774a15555f7f58d6d0" dependencies = [ + "aligned-vec", "backtrace", "cfg-if", "criterion", @@ -4719,6 +4493,15 @@ dependencies = [ "syn 2.0.89", ] +[[package]] +name = "proc-macro-crate" +version = "3.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ecf48c7ca261d60b74ab1a7b20da18bede46776b2e55535cb958eb595c5fa7b" +dependencies = [ + "toml_edit", +] + [[package]] name = "proc-macro2" version = "1.0.92" @@ -4854,26 +4637,14 @@ dependencies = [ "prost 0.13.3", ] -[[package]] -name = "pulldown-cmark" -version = "0.9.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57206b407293d2bcd3af849ce869d52068623f19e1b5ff8e8778e3309439682b" -dependencies = [ - "bitflags 2.6.0", - "memchr", - "unicase", -] - [[package]] name = "quanta" -version = "0.11.1" +version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a17e662a7a8291a865152364c20c7abc5e60486ab2001e8ec10b24862de0b9ab" +checksum = "8e5167a477619228a0b284fac2674e3c388cba90631d7b7de620e6f1fcd08da5" dependencies = [ "crossbeam-utils", "libc", - "mach2", "once_cell", "raw-cpuid", "wasi", @@ -5049,11 +4820,11 @@ checksum = "f60fcc7d6849342eff22c4350c8b9a989ee8ceabc4b481253e8946b9fe83d684" [[package]] name = "raw-cpuid" -version = "10.7.0" +version = "11.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c297679cb867470fa8c9f67dbba74a78d78e3e98d7cf2b08d6d71540f797332" +checksum = "1ab240315c661615f2ee9f0f2cd32d5a7343a84d5ebcccb99d46e6637565e7b0" dependencies = [ - "bitflags 1.3.2", + "bitflags 2.6.0", ] [[package]] @@ -5262,9 +5033,9 @@ dependencies = [ [[package]] name = "rstest" -version = "0.19.0" +version = "0.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d5316d2a1479eeef1ea21e7f9ddc67c191d497abc8fc3ba2467857abbb68330" +checksum = "0a2c585be59b6b5dd66a9d2084aa1d8bd52fbdb806eafdeffb52791147862035" dependencies = [ "futures", "futures-timer", @@ -5274,12 +5045,13 @@ dependencies = [ [[package]] name = "rstest_macros" -version = "0.19.0" +version = "0.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04a9df72cc1f67020b0d63ad9bfe4a323e459ea7eb68e03bd9824db49f9a4c25" +checksum = "825ea780781b15345a146be27eaefb05085e337e869bff01b4306a4fd4a9ad5a" dependencies = [ "cfg-if", "glob", + "proc-macro-crate", "proc-macro2", "quote", "regex", @@ -5319,9 +5091,9 @@ checksum = "583034fd73374156e66797ed8e5b0d5690409c9226b22d87cb7f19821c05d152" [[package]] name = "rustc_version" -version = "0.4.0" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366" +checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" dependencies = [ "semver", ] @@ -5506,15 +5278,6 @@ dependencies = [ "windows-sys 0.52.0", ] -[[package]] -name = "scheduled-thread-pool" -version = "0.2.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3cbc66816425a074528352f5789333ecff06ca41b36b0b0efdfbb29edc391a19" -dependencies = [ - "parking_lot", -] - [[package]] name = "schemars" version = "0.8.21" @@ -5717,21 +5480,6 @@ version = "0.3.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d" -[[package]] -name = "skeptic" -version = "0.13.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16d23b015676c90a0f01c197bfdc786c20342c73a0afdda9025adb0bc42940a8" -dependencies = [ - "bytecount", - "cargo_metadata", - "error-chain", - "glob", - "pulldown-cmark", - "tempfile", - "walkdir", -] - [[package]] name = "sketches-ddsketch" version = "0.2.2" @@ -6499,6 +6247,23 @@ dependencies = [ "tokio", ] +[[package]] +name = "toml_datetime" +version = "0.6.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0dd7358ecb8fc2f8d014bf86f6f638ce72ba252a2c3a2572f2a795f1d23efb41" + +[[package]] +name = "toml_edit" +version = "0.22.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ae48d6208a266e853d946088ed816055e556cc6028c5e8e2b84d9fa5dd7c7f5" +dependencies = [ + "indexmap", + "toml_datetime", + "winnow", +] + [[package]] name = "tower" version = "0.4.13" @@ -6600,9 +6365,9 @@ dependencies = [ [[package]] name = "triomphe" -version = "0.1.13" +version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6631e42e10b40c0690bf92f404ebcfe6e1fdb480391d15f17cc8e96eeed5369" +checksum = "859eb650cfee7434994602c3a68b25d77ad9e68c8a6cd491616ef86661382eb3" [[package]] name = "try-lock" @@ -7281,6 +7046,15 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" +[[package]] +name = "winnow" +version = "0.6.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36c1fec1a2bb5866f07c25f68c26e565c4c200aebb96d7e55710c19d3e8ac49b" +dependencies = [ + "memchr", +] + [[package]] name = "wyz" version = "0.5.1" From 65c2d37b1d431c60fd0cd307da98dd49b1e4bb98 Mon Sep 17 00:00:00 2001 From: Weston Pace Date: Mon, 2 Dec 2024 17:15:32 -0800 Subject: [PATCH 17/24] Fix substrait issue. Fix clippy warnings. Fix pyo3 upgrade lint --- python/src/arrow.rs | 2 +- python/src/datagen.rs | 13 ++- python/src/dataset.rs | 101 +++++++++++++-------- python/src/dataset/commit.rs | 8 +- python/src/dataset/optimize.rs | 21 +++-- python/src/debug.rs | 21 +++-- python/src/file.rs | 2 + python/src/fragment.rs | 7 +- python/src/indices.rs | 12 ++- python/src/lib.rs | 6 +- python/src/schema.rs | 4 +- python/src/tracing.rs | 1 + rust/lance-datafusion/src/substrait.rs | 8 +- rust/lance-encoding-datafusion/src/zone.rs | 2 +- rust/lance/src/io/exec/rowids.rs | 8 +- 15 files changed, 131 insertions(+), 85 deletions(-) diff --git a/python/src/arrow.rs b/python/src/arrow.rs index bf3fb5f68c..d7e30c01a6 100644 --- a/python/src/arrow.rs +++ b/python/src/arrow.rs @@ -33,7 +33,7 @@ impl BFloat16 { } #[classmethod] - fn from_bytes(_cls: &PyType, bytes: &[u8]) -> PyResult { + fn from_bytes(_cls: &Bound<'_, PyType>, bytes: &[u8]) -> PyResult { if bytes.len() != 2 { PyValueError::new_err(format!( "BFloat16::from_bytes: expected 2 bytes, got {}", diff --git a/python/src/datagen.rs b/python/src/datagen.rs index c23949b203..7980fa8ee7 100644 --- a/python/src/datagen.rs +++ b/python/src/datagen.rs @@ -2,7 +2,11 @@ use arrow::pyarrow::PyArrowType; use arrow_array::RecordBatch; use arrow_schema::Schema; use lance_datagen::{BatchCount, ByteCount}; -use pyo3::{pyfunction, types::PyModule, wrap_pyfunction, PyResult, Python}; +use pyo3::{ + pyfunction, + types::{PyModule, PyModuleMethods}, + wrap_pyfunction, Bound, PyResult, Python, +}; const DEFAULT_BATCH_SIZE_BYTES: u64 = 32 * 1024; const DEFAULT_BATCH_COUNT: u32 = 4; @@ -13,6 +17,7 @@ pub fn is_datagen_supported() -> bool { } #[pyfunction] +#[pyo3(signature=(schema, batch_count=None, bytes_in_batch=None))] pub fn rand_batches( schema: PyArrowType, batch_count: Option, @@ -35,10 +40,10 @@ pub fn rand_batches( .collect::>>>() } -pub fn register_datagen(py: Python, m: &PyModule) -> PyResult<()> { - let datagen = PyModule::new(py, "datagen")?; +pub fn register_datagen(py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> { + let datagen = PyModule::new_bound(py, "datagen")?; datagen.add_wrapped(wrap_pyfunction!(is_datagen_supported))?; datagen.add_wrapped(wrap_pyfunction!(rand_batches))?; - m.add_submodule(datagen)?; + m.add_submodule(&datagen)?; Ok(()) } diff --git a/python/src/dataset.rs b/python/src/dataset.rs index 40636558be..65fb933270 100644 --- a/python/src/dataset.rs +++ b/python/src/dataset.rs @@ -139,12 +139,13 @@ pub struct MergeInsertBuilder { #[pymethods] impl MergeInsertBuilder { #[new] - pub fn new(dataset: &PyAny, on: &PyAny) -> PyResult { + pub fn new(dataset: Bound<'_, PyAny>, on: Bound<'_, PyAny>) -> PyResult { let dataset: Py = dataset.extract()?; let ds = dataset.borrow(on.py()).ds.clone(); // Either a single string, which we put in a vector or an iterator // of strings, which we collect into a vector - let on = PyAny::downcast::(on) + let on = on + .downcast::() .map(|val| vec![val.to_string()]) .or_else(|_| { let iterator = on.iter().map_err(|_| { @@ -154,7 +155,7 @@ impl MergeInsertBuilder { })?; let mut keys = Vec::new(); for key in iterator { - keys.push(PyAny::downcast::(key?)?.to_string()); + keys.push(key?.downcast::()?.to_string()); } PyResult::Ok(keys) })?; @@ -170,6 +171,7 @@ impl MergeInsertBuilder { Ok(Self { builder, dataset }) } + #[pyo3(signature=(condition=None))] pub fn when_matched_update_all<'a>( mut slf: PyRefMut<'a, Self>, condition: Option<&str>, @@ -190,6 +192,7 @@ impl MergeInsertBuilder { Ok(slf) } + #[pyo3(signature=(expr=None))] pub fn when_not_matched_by_source_delete<'a>( mut slf: PyRefMut<'a, Self>, expr: Option<&str>, @@ -218,11 +221,11 @@ impl MergeInsertBuilder { .spawn(Some(py), job.execute_reader(new_data))? .map_err(|err| PyIOError::new_err(err.to_string()))?; - let dataset = self.dataset.as_ref(py); + let dataset = self.dataset.bind(py); dataset.borrow_mut().ds = new_self.0; let merge_stats = new_self.1; - let merge_dict = PyDict::new(py); + let merge_dict = PyDict::new_bound(py); merge_dict.set_item("num_inserted_rows", merge_stats.num_inserted_rows)?; merge_dict.set_item("num_updated_rows", merge_stats.num_updated_rows)?; merge_dict.set_item("num_deleted_rows", merge_stats.num_deleted_rows)?; @@ -343,7 +346,7 @@ impl Operation { name: String, fields: Vec, dataset_version: u64, - fragment_ids: &PySet, + fragment_ids: Bound<'_, PySet>, ) -> PyResult { let fragment_ids: Vec = fragment_ids .iter() @@ -391,7 +394,7 @@ impl Operation { } } -pub fn transforms_from_python(transforms: &PyAny) -> PyResult { +pub fn transforms_from_python(transforms: &Bound<'_, PyAny>) -> PyResult { if let Ok(transforms) = transforms.extract::<&PyDict>() { let expressions = transforms .iter() @@ -448,6 +451,7 @@ pub struct Dataset { impl Dataset { #[allow(clippy::too_many_arguments)] #[new] + #[pyo3(signature=(uri, version=None, block_size=None, index_cache_size=None, metadata_cache_size=None, commit_handler=None, storage_options=None, manifest=None))] fn new( py: Python, uri: String, @@ -476,10 +480,10 @@ impl Dataset { let mut builder = DatasetBuilder::from_uri(&uri).with_read_params(params); if let Some(ver) = version { - if let Ok(i) = ver.downcast::(py) { + if let Ok(i) = ver.downcast_bound::(py) { let v: u64 = i.extract()?; builder = builder.with_version(v); - } else if let Ok(v) = ver.downcast::(py) { + } else if let Ok(v) = ver.downcast_bound::(py) { let t: &str = v.extract()?; builder = builder.with_tag(t); } else { @@ -544,7 +548,7 @@ impl Dataset { fn serialized_manifest(&self, py: Python) -> PyObject { let manifest_bytes = self.ds.manifest().serialized(); - PyBytes::new(py, &manifest_bytes).into() + PyBytes::new_bound(py, &manifest_bytes).into() } /// Load index metadata. @@ -558,7 +562,7 @@ impl Dataset { index_metadata .iter() .map(|idx| { - let dict = PyDict::new(py); + let dict = PyDict::new_bound(py); let schema = self_.ds.schema(); let idx_schema = schema.project_by_ids(idx.fields.as_slice(), true); @@ -587,7 +591,7 @@ impl Dataset { .map(|f| f.name.clone()) .collect::>(); - let fragment_set = PySet::empty(py).unwrap(); + let fragment_set = PySet::empty_bound(py).unwrap(); if let Some(bitmap) = &idx.fragment_bitmap { for fragment_id in bitmap.iter() { fragment_set.add(fragment_id).unwrap(); @@ -609,6 +613,7 @@ impl Dataset { } #[allow(clippy::too_many_arguments)] + #[pyo3(signature=(columns=None, columns_with_transform=None, filter=None, prefilter=None, limit=None, offset=None, nearest=None, batch_size=None, io_buffer_size=None, batch_readahead=None, fragment_readahead=None, scan_in_order=None, fragments=None, with_row_id=None, with_row_address=None, use_stats=None, substrait_filter=None, fast_search=None, full_text_query=None, late_materialization=None, use_scalar_index=None))] fn scanner( self_: PyRef<'_, Self>, columns: Option>, @@ -629,7 +634,7 @@ impl Dataset { use_stats: Option, substrait_filter: Option>, fast_search: Option, - full_text_query: Option<&PyDict>, + full_text_query: Option>, late_materialization: Option, use_scalar_index: Option, ) -> PyResult { @@ -672,7 +677,8 @@ impl Dataset { None } else { Some( - PyAny::downcast::(columns)? + columns + .downcast::()? .iter() .map(|c| c.extract::()) .collect::>>()?, @@ -862,12 +868,14 @@ impl Dataset { Ok(Scanner::new(scan)) } + #[pyo3(signature=(filter=None))] fn count_rows(&self, filter: Option) -> PyResult { RT.runtime .block_on(self.ds.count_rows(filter)) .map_err(|err| PyIOError::new_err(err.to_string())) } + #[pyo3(signature=(row_indices, columns = None, columns_with_transform = None))] fn take( self_: PyRef<'_, Self>, row_indices: Vec, @@ -894,6 +902,7 @@ impl Dataset { batch.to_pyarrow(self_.py()) } + #[pyo3(signature=(row_indices, columns = None, columns_with_transform = None))] fn take_rows( self_: PyRef<'_, Self>, row_indices: Vec, @@ -981,7 +990,7 @@ impl Dataset { Ok(PyArrowType(Box::new(LanceReader::from_stream(stream)))) } - fn alter_columns(&mut self, alterations: &PyList) -> PyResult<()> { + fn alter_columns(&mut self, alterations: Bound<'_, PyList>) -> PyResult<()> { let alterations = alterations .iter() .map(|obj| { @@ -1066,7 +1075,12 @@ impl Dataset { Ok(()) } - fn update(&mut self, updates: &PyDict, predicate: Option<&str>) -> PyResult { + #[pyo3(signature=(updates, predicate=None))] + fn update( + &mut self, + updates: Bound<'_, PyDict>, + predicate: Option<&str>, + ) -> PyResult { let mut builder = UpdateBuilder::new(self.ds.clone()); if let Some(predicate) = predicate { builder = builder @@ -1074,7 +1088,7 @@ impl Dataset { .map_err(|err| PyValueError::new_err(err.to_string()))?; } - for (key, value) in updates { + for (key, value) in &updates { let column: &str = key.extract()?; let expr: &str = value.extract()?; @@ -1092,7 +1106,7 @@ impl Dataset { .map_err(|err| PyIOError::new_err(err.to_string()))?; self.ds = new_self.new_dataset; - let update_dict = PyDict::new(updates.py()); + let update_dict = PyDict::new_bound(updates.py()); let num_rows_updated = new_self.rows_updated; update_dict.set_item("num_rows_updated", num_rows_updated)?; Ok(update_dict.into()) @@ -1111,7 +1125,7 @@ impl Dataset { let pyvers: Vec = versions .iter() .map(|v| { - let dict = PyDict::new(py); + let dict = PyDict::new_bound(py); dict.set_item("version", v.version).unwrap(); dict.set_item( "timestamp", @@ -1119,7 +1133,8 @@ impl Dataset { ) .unwrap(); let tup: Vec<(&String, &String)> = v.metadata.iter().collect(); - dict.set_item("metadata", tup.into_py_dict(py)).unwrap(); + dict.set_item("metadata", tup.into_py_dict_bound(py)) + .unwrap(); dict.to_object(py) }) .collect::>() @@ -1140,10 +1155,10 @@ impl Dataset { } fn checkout_version(&self, py: Python, version: PyObject) -> PyResult { - if let Ok(i) = version.downcast::(py) { + if let Ok(i) = version.downcast_bound::(py) { let ref_: u64 = i.extract()?; self._checkout_version(ref_) - } else if let Ok(v) = version.downcast::(py) { + } else if let Ok(v) = version.downcast_bound::(py) { let ref_: &str = v.extract()?; self._checkout_version(ref_) } else { @@ -1163,6 +1178,7 @@ impl Dataset { } /// Cleanup old versions from the dataset + #[pyo3(signature = (older_than_micros, delete_unverified = None, error_if_tagged_old_versions = None))] fn cleanup_old_versions( &self, older_than_micros: i64, @@ -1191,9 +1207,9 @@ impl Dataset { .list_tags() .map_err(|err| PyValueError::new_err(err.to_string()))?; Python::with_gil(|py| { - let pytags = PyDict::new(py); + let pytags = PyDict::new_bound(py); for (k, v) in tags.iter() { - let dict = PyDict::new(py); + let dict = PyDict::new_bound(py); dict.set_item("version", v.version).unwrap(); dict.set_item("manifest_size", v.manifest_size).unwrap(); dict.to_object(py); @@ -1263,6 +1279,7 @@ impl Dataset { Ok(()) } + #[pyo3(signature = (columns, index_type, name = None, replace = None, storage_options = None, kwargs = None))] fn create_index( &mut self, columns: Vec<&str>, @@ -1409,11 +1426,12 @@ impl Dataset { #[allow(clippy::too_many_arguments)] #[staticmethod] + #[pyo3(signature = (dest, operation, read_version = None, commit_lock = None, storage_options = None, enable_v2_manifest_paths = None, detached = None, max_retries = None))] fn commit( dest: &Bound, operation: Operation, read_version: Option, - commit_lock: Option<&PyAny>, + commit_lock: Option>, storage_options: Option>, enable_v2_manifest_paths: Option, detached: Option, @@ -1427,7 +1445,7 @@ impl Dataset { ..Default::default() }); - let commit_handler = commit_lock.map(|commit_lock| { + let commit_handler = commit_lock.as_ref().map(|commit_lock| { Arc::new(PyCommitLock::new(commit_lock.to_object(commit_lock.py()))) as Arc }); @@ -1467,10 +1485,11 @@ impl Dataset { } #[staticmethod] + #[pyo3(signature = (dest, transactions, commit_lock = None, storage_options = None, enable_v2_manifest_paths = None, detached = None, max_retries = None))] fn commit_batch<'py>( dest: &Bound<'py, PyAny>, transactions: Vec>, - commit_lock: Option<&'py PyAny>, + commit_lock: Option>, storage_options: Option>, enable_v2_manifest_paths: Option, detached: Option, @@ -1554,12 +1573,13 @@ impl Dataset { Ok(()) } + #[pyo3(signature = (reader, batch_size = None))] fn add_columns_from_reader( &mut self, - reader: &Bound, + reader: Bound<'_, PyAny>, batch_size: Option, ) -> PyResult<()> { - let batches = ArrowArrayStreamReader::from_pyarrow_bound(reader)?; + let batches = ArrowArrayStreamReader::from_pyarrow_bound(&reader)?; let transforms = NewColumnTransform::Reader(Box::new(batches)); @@ -1575,9 +1595,10 @@ impl Dataset { Ok(()) } + #[pyo3(signature = (transforms, read_columns = None, batch_size = None))] fn add_columns( &mut self, - transforms: &PyAny, + transforms: &Bound<'_, PyAny>, read_columns: Option>, batch_size: Option, ) -> PyResult<()> { @@ -1624,11 +1645,11 @@ impl Dataset { #[pyfunction(name = "_write_dataset")] pub fn write_dataset( - reader: &Bound, - dest: &Bound, - options: &PyDict, + reader: Bound<'_, PyAny>, + dest: Bound<'_, PyAny>, + options: Bound<'_, PyDict>, ) -> PyResult { - let params = get_write_params(options)?; + let params = get_write_params(options.as_gil_ref())?; let py = options.py(); let dest = if dest.is_instance_of::() { let dataset: Dataset = dest.extract()?; @@ -1645,7 +1666,7 @@ pub fn write_dataset( RT.block_on(Some(py), LanceDataset::write(batches, dest, params))? .map_err(|err| PyIOError::new_err(err.to_string()))? } else { - let batches = ArrowArrayStreamReader::from_pyarrow_bound(reader)?; + let batches = ArrowArrayStreamReader::from_pyarrow_bound(&reader)?; RT.block_on(Some(py), LanceDataset::write(batches, dest, params))? .map_err(|err| PyIOError::new_err(err.to_string()))? }; @@ -1915,7 +1936,7 @@ impl WriteFragmentProgress for PyWriteProgress { Python::with_gil(|py| -> PyResult<()> { self.py_obj - .call_method(py, "_do_begin", (json_str,), None)?; + .call_method_bound(py, "_do_begin", (json_str,), None)?; Ok(()) }) .map_err(|e| { @@ -1932,7 +1953,7 @@ impl WriteFragmentProgress for PyWriteProgress { Python::with_gil(|py| -> PyResult<()> { self.py_obj - .call_method(py, "_do_complete", (json_str,), None)?; + .call_method_bound(py, "_do_complete", (json_str,), None)?; Ok(()) }) .map_err(|e| { @@ -1947,13 +1968,13 @@ impl WriteFragmentProgress for PyWriteProgress { /// Formats a Python error just as it would in Python interpreter. fn format_python_error(e: PyErr, py: Python) -> PyResult { - let sys_mod = py.import("sys")?; + let sys_mod = py.import_bound("sys")?; // the traceback is the third element of the tuple returned by sys.exc_info() let traceback = sys_mod.call_method0("exc_info")?.get_item(2)?; - let tracback_mod = py.import("traceback")?; + let tracback_mod = py.import_bound("traceback")?; let fmt_func = tracback_mod.getattr("format_exception")?; - let e_type = e.get_type(py).to_owned(); + let e_type = e.get_type_bound(py).to_owned(); let formatted = fmt_func.call1((e_type, &e, traceback))?; let lines: Vec = formatted.extract()?; Ok(lines.join("")) diff --git a/python/src/dataset/commit.rs b/python/src/dataset/commit.rs index d34cef4112..4e6ecbc594 100644 --- a/python/src/dataset/commit.rs +++ b/python/src/dataset/commit.rs @@ -24,7 +24,7 @@ use pyo3::{exceptions::PyIOError, prelude::*}; lazy_static! { static ref PY_CONFLICT_ERROR: PyResult = { Python::with_gil(|py| { - py.import("lance") + py.import_bound("lance") .and_then(|lance| lance.getattr("commit")) .and_then(|commit| commit.getattr("CommitConflictError")) .map(|error| error.to_object(py)) @@ -34,7 +34,7 @@ lazy_static! { fn handle_error(py_err: PyErr, py: Python) -> CommitError { let conflict_err_type = match &*PY_CONFLICT_ERROR { - Ok(err) => err.as_ref(py).get_type(), + Ok(err) => err.bind(py).get_type(), Err(import_error) => { return CommitError::OtherError(Error::Internal { message: format!("Error importing from pylance {}", import_error), @@ -43,7 +43,7 @@ fn handle_error(py_err: PyErr, py: Python) -> CommitError { } }; - if py_err.is_instance(py, conflict_err_type) { + if py_err.is_instance_bound(py, &conflict_err_type) { CommitError::CommitConflict } else { CommitError::OtherError(Error::Internal { @@ -113,7 +113,7 @@ impl CommitLease for PyCommitLease { // context manager. PyIOError::new_err("commit failed").restore(py); let args = py - .import("sys") + .import_bound("sys") .unwrap() .getattr("exc_info") .unwrap() diff --git a/python/src/dataset/optimize.rs b/python/src/dataset/optimize.rs index eeac1ea8a8..9ba4f5e989 100644 --- a/python/src/dataset/optimize.rs +++ b/python/src/dataset/optimize.rs @@ -23,7 +23,7 @@ use pyo3::{exceptions::PyNotImplementedError, pyclass::CompareOp, types::PyTuple use super::*; -fn parse_compaction_options(options: &PyDict) -> PyResult { +fn parse_compaction_options(options: &Bound<'_, PyDict>) -> PyResult { let mut opts = CompactionOptions::default(); for (key, value) in options.into_iter() { @@ -68,7 +68,8 @@ fn unwrap_dataset(dataset: PyObject) -> PyResult> { } fn wrap_fragment(py: Python<'_>, fragment: &Fragment) -> PyResult { - let fragment_metadata = PyModule::import(py, "lance.fragment")?.getattr("FragmentMetadata")?; + let fragment_metadata = + PyModule::import_bound(py, "lance.fragment")?.getattr("FragmentMetadata")?; let fragment_json = serde_json::to_string(&fragment).map_err(|x| { PyValueError::new_err(format!("failed to serialize fragment metadata: {}", x)) })?; @@ -190,8 +191,8 @@ impl PyCompactionPlan { pub fn __reduce__(&self, py: Python<'_>) -> PyResult<(PyObject, PyObject)> { let state = self.json()?; - let state = PyTuple::new(py, vec![state]).extract()?; - let from_json = PyModule::import(py, "lance.optimize")? + let state = PyTuple::new_bound(py, vec![state]).extract()?; + let from_json = PyModule::import_bound(py, "lance.optimize")? .getattr("CompactionPlan")? .getattr("from_json")? .extract()?; @@ -302,8 +303,8 @@ impl PyCompactionTask { pub fn __reduce__(&self, py: Python<'_>) -> PyResult<(PyObject, PyObject)> { let state = self.json()?; - let state = PyTuple::new(py, vec![state]).extract()?; - let from_json = PyModule::import(py, "lance.optimize")? + let state = PyTuple::new_bound(py, vec![state]).extract()?; + let from_json = PyModule::import_bound(py, "lance.optimize")? .getattr("CompactionTask")? .getattr("from_json")? .extract()?; @@ -417,8 +418,8 @@ impl PyRewriteResult { pub fn __reduce__(&self, py: Python<'_>) -> PyResult<(PyObject, PyObject)> { let state = self.json()?; - let state = PyTuple::new(py, vec![state]).extract()?; - let from_json = PyModule::import(py, "lance.optimize")? + let state = PyTuple::new_bound(py, vec![state]).extract()?; + let from_json = PyModule::import_bound(py, "lance.optimize")? .getattr("RewriteResult")? .getattr("from_json")? .extract()?; @@ -472,7 +473,7 @@ impl PyCompaction { // Make sure we parse the options within a scoped GIL context, so we // aren't holding the GIL while blocking the thread on the operation. let opts = Python::with_gil(|py| { - let options = options.downcast::(py)?; + let options = options.downcast_bound::(py)?; parse_compaction_options(options) })?; let mut new_ds = dataset.ds.as_ref().clone(); @@ -509,7 +510,7 @@ impl PyCompaction { // Make sure we parse the options within a scoped GIL context, so we // aren't holding the GIL while blocking the thread on the operation. let opts = Python::with_gil(|py| { - let options = options.downcast::(py)?; + let options = options.downcast_bound::(py)?; parse_compaction_options(options) })?; let plan = RT diff --git a/python/src/debug.rs b/python/src/debug.rs index 8856c1fb28..105f73feec 100644 --- a/python/src/debug.rs +++ b/python/src/debug.rs @@ -13,20 +13,20 @@ use crate::{Dataset, FragmentMetadata, RT}; /// /// This can be used to view the field ids and types in the schema. #[pyfunction] -pub fn format_schema(dataset: &PyAny) -> PyResult { +pub fn format_schema(dataset: &Bound<'_, PyAny>) -> PyResult { let py = dataset.py(); let dataset = dataset.getattr("_ds")?.extract::>()?; - let dataset_ref = &dataset.as_ref(py).borrow().ds; + let dataset_ref = &dataset.bind(py).borrow().ds; let schema = dataset_ref.schema(); Ok(format!("{:#?}", schema)) } /// Print the full Lance manifest of the dataset. #[pyfunction] -pub fn format_manifest(dataset: &PyAny) -> PyResult { +pub fn format_manifest(dataset: &Bound<'_, PyAny>) -> PyResult { let py = dataset.py(); let dataset = dataset.getattr("_ds")?.extract::>()?; - let dataset_ref = &dataset.as_ref(py).borrow().ds; + let dataset_ref = &dataset.bind(py).borrow().ds; let manifest = dataset_ref.manifest(); Ok(format!("{:#?}", manifest)) } @@ -81,17 +81,20 @@ impl PrettyPrintableFragment { /// Debug print a LanceFragment. #[pyfunction] -pub fn format_fragment(fragment: &PyAny, dataset: &PyAny) -> PyResult { +pub fn format_fragment( + fragment: &Bound<'_, PyAny>, + dataset: &Bound<'_, PyAny>, +) -> PyResult { let py = fragment.py(); let fragment = fragment .getattr("_metadata")? .extract::>()?; let dataset = dataset.getattr("_ds")?.extract::>()?; - let dataset_ref = &dataset.as_ref(py).borrow().ds; + let dataset_ref = &dataset.bind(py).borrow().ds; let schema = dataset_ref.schema(); - let meta = fragment.as_ref(py).borrow().inner.clone(); + let meta = fragment.bind(py).borrow().inner.clone(); let pp_meta = PrettyPrintableFragment::new(&meta, schema); Ok(format!("{:#?}", pp_meta)) } @@ -104,12 +107,12 @@ pub fn format_fragment(fragment: &PyAny, dataset: &PyAny) -> PyResult { #[pyfunction] #[pyo3(signature = (dataset, /, max_transactions = 10))] pub fn list_transactions( - dataset: &PyAny, + dataset: &Bound<'_, PyAny>, max_transactions: usize, ) -> PyResult>> { let py = dataset.py(); let dataset = dataset.getattr("_ds")?.extract::>()?; - let mut dataset = dataset.as_ref(py).borrow().ds.clone(); + let mut dataset = dataset.bind(py).borrow().ds.clone(); RT.block_on(Some(py), async move { let mut transactions = vec![]; diff --git a/python/src/file.rs b/python/src/file.rs index e6e3d237d1..ade9c825e5 100644 --- a/python/src/file.rs +++ b/python/src/file.rs @@ -213,6 +213,7 @@ impl LanceFileWriter { #[pymethods] impl LanceFileWriter { #[new] + #[pyo3(signature=(path, schema=None, data_cache_bytes=None, version=None, storage_options=None, keep_original_array=None))] pub fn new( path: String, schema: Option>, @@ -390,6 +391,7 @@ impl LanceFileReader { #[pymethods] impl LanceFileReader { #[new] + #[pyo3(signature=(path, storage_options=None))] pub fn new(path: String, storage_options: Option>) -> PyResult { RT.runtime.block_on(Self::open(path, storage_options)) } diff --git a/python/src/fragment.rs b/python/src/fragment.rs index bc46ce54ad..802d33039d 100644 --- a/python/src/fragment.rs +++ b/python/src/fragment.rs @@ -125,6 +125,7 @@ impl FileFragment { FragmentMetadata::new(self.fragment.metadata().clone()) } + #[pyo3(signature=(_filter=None))] fn count_rows(&self, _filter: Option) -> PyResult { RT.runtime.block_on(async { self.fragment @@ -134,6 +135,7 @@ impl FileFragment { }) } + #[pyo3(signature=(row_indices, columns=None))] fn take( self_: PyRef<'_, Self>, row_indices: Vec, @@ -159,6 +161,7 @@ impl FileFragment { } #[allow(clippy::too_many_arguments)] + #[pyo3(signature=(columns=None, columns_with_transform=None, batch_size=None, filter=None, limit=None, offset=None, with_row_id=None, batch_readahead=None))] fn scanner( self_: PyRef<'_, Self>, columns: Option>, @@ -215,6 +218,7 @@ impl FileFragment { Ok(Scanner::new(scn)) } + #[pyo3(signature=(reader, batch_size=None))] fn add_columns_from_reader( &mut self, reader: &Bound, @@ -234,9 +238,10 @@ impl FileFragment { Ok((FragmentMetadata::new(fragment), LanceSchema(schema))) } + #[pyo3(signature=(transforms, read_columns=None, batch_size=None))] fn add_columns( &mut self, - transforms: &PyAny, + transforms: &Bound<'_, PyAny>, read_columns: Option>, batch_size: Option, ) -> PyResult<(FragmentMetadata, LanceSchema)> { diff --git a/python/src/indices.rs b/python/src/indices.rs index 9b7b315e8f..e2db9d3943 100644 --- a/python/src/indices.rs +++ b/python/src/indices.rs @@ -13,6 +13,8 @@ use lance_index::vector::{ }; use lance_linalg::distance::DistanceType; use pyo3::exceptions::PyValueError; +use pyo3::types::PyModuleMethods; +use pyo3::Bound; use pyo3::{ pyfunction, types::{PyList, PyModule}, @@ -198,6 +200,7 @@ async fn do_transform_vectors( #[pyfunction] #[allow(clippy::too_many_arguments)] +#[pyo3(signature=(dataset, column, dimension, num_subvectors, distance_type, ivf_centroids, pq_codebook, dst_uri, fragments, partitions_ds_uri=None))] pub fn transform_vectors( py: Python<'_>, dataset: &Dataset, @@ -285,7 +288,7 @@ pub fn shuffle_transformed_vectors( match result { Ok(partition_files) => { - let py_list = PyList::new(py, partition_files); + let py_list = PyList::new_bound(py, partition_files); Ok(py_list.into()) } Err(e) => Err(pyo3::exceptions::PyRuntimeError::new_err(e.to_string())), @@ -329,6 +332,7 @@ async fn do_load_shuffled_vectors( } #[pyfunction] +#[pyo3(signature=(filenames, dir_path, dataset, column, ivf_centroids, pq_codebook, pq_dimension, num_subvectors, distance_type, index_name=None))] #[allow(clippy::too_many_arguments)] pub fn load_shuffled_vectors( filenames: Vec, @@ -375,13 +379,13 @@ pub fn load_shuffled_vectors( )? } -pub fn register_indices(py: Python, m: &PyModule) -> PyResult<()> { - let indices = PyModule::new(py, "indices")?; +pub fn register_indices(py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> { + let indices = PyModule::new_bound(py, "indices")?; indices.add_wrapped(wrap_pyfunction!(train_ivf_model))?; indices.add_wrapped(wrap_pyfunction!(train_pq_model))?; indices.add_wrapped(wrap_pyfunction!(transform_vectors))?; indices.add_wrapped(wrap_pyfunction!(shuffle_transformed_vectors))?; indices.add_wrapped(wrap_pyfunction!(load_shuffled_vectors))?; - m.add_submodule(indices)?; + m.add_submodule(&indices)?; Ok(()) } diff --git a/python/src/lib.rs b/python/src/lib.rs index ec39c834fd..7bce47fba3 100644 --- a/python/src/lib.rs +++ b/python/src/lib.rs @@ -102,7 +102,7 @@ lazy_static! { } #[pymodule] -fn lance(py: Python, m: &PyModule) -> PyResult<()> { +fn lance(py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> { let env = Env::new() .filter_or("LANCE_LOG", "warn") .write_style("LANCE_LOG_STYLE"); @@ -297,10 +297,10 @@ fn read_tfrecord( #[pyfunction] #[pyo3(signature = (dataset,))] -fn manifest_needs_migration(dataset: &PyAny) -> PyResult { +fn manifest_needs_migration(dataset: &Bound<'_, PyAny>) -> PyResult { let py = dataset.py(); let dataset = dataset.getattr("_ds")?.extract::>()?; - let dataset_ref = &dataset.as_ref(py).borrow().ds; + let dataset_ref = &dataset.bind(py).borrow().ds; let indices = RT .block_on(Some(py), dataset_ref.load_indices())? .map_err(|err| PyIOError::new_err(format!("Could not read dataset metadata: {}", err)))?; diff --git a/python/src/schema.rs b/python/src/schema.rs index 9670b48257..5e345a81d2 100644 --- a/python/src/schema.rs +++ b/python/src/schema.rs @@ -76,8 +76,8 @@ impl LanceSchema { states.push(field.encode_to_vec().into_py(py)); } - let state = PyTuple::new(py, states).extract()?; - let from_protos = PyModule::import(py, "lance.schema")? + let state = PyTuple::new_bound(py, states).extract()?; + let from_protos = PyModule::import_bound(py, "lance.schema")? .getattr("LanceSchema")? .getattr("_from_protos")? .extract()?; diff --git a/python/src/tracing.rs b/python/src/tracing.rs index a9373c3e50..8904fee140 100644 --- a/python/src/tracing.rs +++ b/python/src/tracing.rs @@ -55,6 +55,7 @@ fn get_filter(level: Option<&str>) -> PyResult { } #[pyfunction] +#[pyo3(signature=(path=None, level=None))] pub fn trace_to_chrome(path: Option<&str>, level: Option<&str>) -> PyResult { let mut builder = ChromeLayerBuilder::new() .trace_style(TraceStyle::Async) diff --git a/rust/lance-datafusion/src/substrait.rs b/rust/lance-datafusion/src/substrait.rs index 57cffb1261..d7ba8cf394 100644 --- a/rust/lance-datafusion/src/substrait.rs +++ b/rust/lance-datafusion/src/substrait.rs @@ -112,9 +112,15 @@ fn remove_extension_types( field_counter += 1; } } + let mut names = vec![String::new(); index_mapping.len()]; + for (old_idx, old_name) in substrait_schema.names.iter().enumerate() { + if let Some(new_idx) = index_mapping.get(&old_idx) { + names[*new_idx] = old_name.clone(); + } + } let new_arrow_schema = Arc::new(Schema::new(kept_arrow_fields)); let new_substrait_schema = NamedStruct { - names: vec![], + names, r#struct: Some(Struct { nullability: fields.nullability, type_variation_reference: fields.type_variation_reference, diff --git a/rust/lance-encoding-datafusion/src/zone.rs b/rust/lance-encoding-datafusion/src/zone.rs index 03b8e5278a..7f6a5e2dc2 100644 --- a/rust/lance-encoding-datafusion/src/zone.rs +++ b/rust/lance-encoding-datafusion/src/zone.rs @@ -146,7 +146,7 @@ pub(crate) fn extract_zone_info( let mut zone_index = zone_index.clone(); let inner = zone_index.inner.take().unwrap(); let rows_per_zone = zone_index.rows_per_zone; - let zone_map_buffer = zone_index.zone_map_buffer.as_ref().unwrap().clone(); + let zone_map_buffer = *zone_index.zone_map_buffer.as_ref().unwrap(); assert_eq!( zone_map_buffer.buffer_type, i32::from(pb::buffer::BufferType::Column) diff --git a/rust/lance/src/io/exec/rowids.rs b/rust/lance/src/io/exec/rowids.rs index 90d36532c7..3c6af04083 100644 --- a/rust/lance/src/io/exec/rowids.rs +++ b/rust/lance/src/io/exec/rowids.rs @@ -240,8 +240,8 @@ impl ExecutionPlan for AddRowAddrExec { DataFusionError::Internal("RowAddrExec: rowid column stats not found".into()) })?; let row_addr_col_stats = ColumnStatistics { - null_count: row_id_col_stats.null_count.clone(), - distinct_count: row_id_col_stats.distinct_count.clone(), + null_count: row_id_col_stats.null_count, + distinct_count: row_id_col_stats.distinct_count, max_value: Precision::Absent, min_value: Precision::Absent, }; @@ -251,7 +251,6 @@ impl ExecutionPlan for AddRowAddrExec { // is a minimum size of 64 bytes. let mut added_byte_size = stats .num_rows - .clone() .map(|n| (n * 8).max(64)) .add(&Precision::Exact(base_size)); if row_id_col_stats @@ -261,8 +260,7 @@ impl ExecutionPlan for AddRowAddrExec { .unwrap_or_default() { // Account for null buffer. - added_byte_size = - added_byte_size.add(&stats.num_rows.clone().map(|n| n.div_ceil(8).max(64))); + added_byte_size = added_byte_size.add(&stats.num_rows.map(|n| n.div_ceil(8).max(64))); } stats.total_byte_size = stats.total_byte_size.add(&added_byte_size); stats From a982b49dd3f2dc2ce3743d1e5631d618fe734a56 Mon Sep 17 00:00:00 2001 From: Weston Pace Date: Mon, 2 Dec 2024 18:22:39 -0800 Subject: [PATCH 18/24] Fix a few more python lint errors hidden by feature flags I didn't have on earlier --- python/src/dataset.rs | 4 ++-- python/src/lib.rs | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/python/src/dataset.rs b/python/src/dataset.rs index 65fb933270..99b509fe69 100644 --- a/python/src/dataset.rs +++ b/python/src/dataset.rs @@ -1509,8 +1509,8 @@ impl Dataset { }); let py = dest.py(); - let dest = if dest.is_instance_of::() { - let dataset: Dataset = dest.extract()?; + let dest = if dest.is_instance_of::() { + let dataset: Self = dest.extract()?; WriteDestination::Dataset(dataset.ds.clone()) } else { WriteDestination::Uri(dest.extract()?) diff --git a/python/src/lib.rs b/python/src/lib.rs index 7bce47fba3..9b82ff2a53 100644 --- a/python/src/lib.rs +++ b/python/src/lib.rs @@ -89,10 +89,10 @@ pub fn is_datagen_supported() -> bool { // A fallback module for when datagen is not enabled #[cfg(not(feature = "datagen"))] -fn register_datagen(py: Python, m: &PyModule) -> PyResult<()> { - let datagen = PyModule::new(py, "datagen")?; +fn register_datagen(py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> { + let datagen = PyModule::new_bound(py, "datagen")?; datagen.add_wrapped(wrap_pyfunction!(is_datagen_supported))?; - m.add_submodule(datagen)?; + m.add_submodule(&datagen)?; Ok(()) } From b5d80607471ebac8f7177688e02f563ca1da642b Mon Sep 17 00:00:00 2001 From: Lei Xu Date: Wed, 4 Dec 2024 10:23:35 -0800 Subject: [PATCH 19/24] fix warnings --- rust/lance-arrow/src/bfloat16.rs | 2 +- rust/lance-file/src/writer/statistics.rs | 2 +- rust/lance-index/src/scalar/label_list.rs | 2 +- rust/lance-io/src/object_store.rs | 14 +++++++------- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/rust/lance-arrow/src/bfloat16.rs b/rust/lance-arrow/src/bfloat16.rs index 467da00a5a..06079d9baa 100644 --- a/rust/lance-arrow/src/bfloat16.rs +++ b/rust/lance-arrow/src/bfloat16.rs @@ -90,7 +90,7 @@ impl BFloat16Array { } } -impl<'a> ArrayAccessor for &'a BFloat16Array { +impl ArrayAccessor for &BFloat16Array { type Item = bf16; fn value(&self, index: usize) -> Self::Item { diff --git a/rust/lance-file/src/writer/statistics.rs b/rust/lance-file/src/writer/statistics.rs index 2ef4d081d4..7ba7c4a909 100644 --- a/rust/lance-file/src/writer/statistics.rs +++ b/rust/lance-file/src/writer/statistics.rs @@ -486,7 +486,7 @@ fn get_boolean_statistics(arrays: &[&ArrayRef]) -> StatisticsRow { fn cast_dictionary_arrays<'a, T: ArrowDictionaryKeyType + 'static>( arrays: &'a [&'a ArrayRef], -) -> Vec<&Arc> { +) -> Vec<&'a Arc> { arrays .iter() .map(|x| x.as_dictionary::().values()) diff --git a/rust/lance-index/src/scalar/label_list.rs b/rust/lance-index/src/scalar/label_list.rs index 0d487e5936..a54fcf9ed5 100644 --- a/rust/lance-index/src/scalar/label_list.rs +++ b/rust/lance-index/src/scalar/label_list.rs @@ -78,7 +78,7 @@ impl LabelListIndex { fn search_values<'a>( &'a self, values: &'a Vec, - ) -> BoxStream> { + ) -> BoxStream<'a, Result> { futures::stream::iter(values) .then(move |value| { let value_query = SargableQuery::Equals(value.clone()); diff --git a/rust/lance-io/src/object_store.rs b/rust/lance-io/src/object_store.rs index f668cdfaae..80bfea8726 100644 --- a/rust/lance-io/src/object_store.rs +++ b/rust/lance-io/src/object_store.rs @@ -58,20 +58,20 @@ pub trait ObjectStoreExt { /// Read all files (start from base directory) recursively /// /// unmodified_since can be specified to only return files that have not been modified since the given time. - async fn read_dir_all( - &self, + async fn read_dir_all<'a>( + &'a self, dir_path: impl Into<&Path> + Send, unmodified_since: Option>, - ) -> Result>>; + ) -> Result>>; } #[async_trait] impl ObjectStoreExt for O { - async fn read_dir_all( - &self, + async fn read_dir_all<'a>( + &'a self, dir_path: impl Into<&Path> + Send, unmodified_since: Option>, - ) -> Result>> { + ) -> Result>> { let mut output = self.list(Some(dir_path.into())); if let Some(unmodified_since_val) = unmodified_since { output = output @@ -652,7 +652,7 @@ impl ObjectStore { pub fn remove_stream<'a>( &'a self, locations: BoxStream<'a, Result>, - ) -> BoxStream> { + ) -> BoxStream<'a, Result> { self.inner .delete_stream(locations.err_into::().boxed()) .err_into::() From 5541a86d8187045fd1f32774628f2c2f974dca76 Mon Sep 17 00:00:00 2001 From: Lei Xu Date: Wed, 4 Dec 2024 10:49:48 -0800 Subject: [PATCH 20/24] use cargo clippy --fix --- rust/lance-core/src/utils/deletion.rs | 1 - rust/lance-core/src/utils/futures.rs | 2 +- rust/lance-core/src/utils/hash.rs | 4 ++-- rust/lance-core/src/utils/testing.rs | 4 ++-- .../src/encodings/logical/binary.rs | 2 +- .../src/encodings/logical/blob.rs | 2 +- .../src/encodings/logical/list.rs | 2 +- .../src/encodings/logical/primitive.rs | 4 ++-- .../src/encodings/logical/struct.rs | 20 +++++++++---------- .../encodings/physical/bitpack_fastlanes.rs | 2 +- rust/lance-index/src/vector/graph.rs | 4 ++-- rust/lance-index/src/vector/hnsw/builder.rs | 6 +++--- rust/lance-index/src/vector/ivf/shuffler.rs | 6 +----- rust/lance-index/src/vector/sq/storage.rs | 2 +- rust/lance-io/src/encodings/binary.rs | 16 +++++++-------- rust/lance-io/src/encodings/dictionary.rs | 8 ++++---- rust/lance-io/src/encodings/plain.rs | 2 +- rust/lance-io/src/scheduler.rs | 2 +- rust/lance-io/src/utils.rs | 6 +----- rust/lance-linalg/src/simd.rs | 1 - rust/lance-linalg/src/simd/f32.rs | 2 -- rust/lance-table/src/rowids.rs | 2 +- rust/lance-table/src/rowids/bitmap.rs | 4 ++-- rust/lance/src/dataset/optimize/remapping.rs | 2 +- rust/lance/src/index/scalar.rs | 1 - rust/lance/src/io/exec/scalar_index.rs | 2 +- 26 files changed, 48 insertions(+), 61 deletions(-) diff --git a/rust/lance-core/src/utils/deletion.rs b/rust/lance-core/src/utils/deletion.rs index 1735f90b8c..44e1b79a19 100644 --- a/rust/lance-core/src/utils/deletion.rs +++ b/rust/lance-core/src/utils/deletion.rs @@ -194,7 +194,6 @@ impl Extend for DeletionVector { /// pub fn get(i: u32) -> bool { ... } /// } /// impl BitAnd for DeletionVector { ... } - impl IntoIterator for DeletionVector { type IntoIter = Box + Send>; type Item = u32; diff --git a/rust/lance-core/src/utils/futures.rs b/rust/lance-core/src/utils/futures.rs index 9acce93ce2..2267f600e7 100644 --- a/rust/lance-core/src/utils/futures.rs +++ b/rust/lance-core/src/utils/futures.rs @@ -74,7 +74,7 @@ impl<'a, T: Clone> SharedStream<'a, T> { } } -impl<'a, T: Clone> Stream for SharedStream<'a, T> { +impl Stream for SharedStream<'_, T> { type Item = T; fn poll_next( diff --git a/rust/lance-core/src/utils/hash.rs b/rust/lance-core/src/utils/hash.rs index 58e6fd47bf..14ef805a58 100644 --- a/rust/lance-core/src/utils/hash.rs +++ b/rust/lance-core/src/utils/hash.rs @@ -7,13 +7,13 @@ use std::hash::Hasher; // the equality for this `U8SliceKey` means that the &[u8] contents are equal. #[derive(Eq)] pub struct U8SliceKey<'a>(pub &'a [u8]); -impl<'a> PartialEq for U8SliceKey<'a> { +impl PartialEq for U8SliceKey<'_> { fn eq(&self, other: &Self) -> bool { self.0 == other.0 } } -impl<'a> std::hash::Hash for U8SliceKey<'a> { +impl std::hash::Hash for U8SliceKey<'_> { fn hash(&self, state: &mut H) { self.0.hash(state); } diff --git a/rust/lance-core/src/utils/testing.rs b/rust/lance-core/src/utils/testing.rs index 9746787f71..f111236486 100644 --- a/rust/lance-core/src/utils/testing.rs +++ b/rust/lance-core/src/utils/testing.rs @@ -218,7 +218,7 @@ impl Default for MockClock<'_> { } } -impl<'a> MockClock<'a> { +impl MockClock<'_> { pub fn new() -> Self { Default::default() } @@ -228,7 +228,7 @@ impl<'a> MockClock<'a> { } } -impl<'a> Drop for MockClock<'a> { +impl Drop for MockClock<'_> { fn drop(&mut self) { // Reset the clock to the epoch mock_instant::MockClock::set_system_time(TimeDelta::try_days(0).unwrap().to_std().unwrap()); diff --git a/rust/lance-encoding/src/encodings/logical/binary.rs b/rust/lance-encoding/src/encodings/logical/binary.rs index 1791f31b15..a08d6d8af6 100644 --- a/rust/lance-encoding/src/encodings/logical/binary.rs +++ b/rust/lance-encoding/src/encodings/logical/binary.rs @@ -27,7 +27,7 @@ pub struct BinarySchedulingJob<'a> { inner: Box, } -impl<'a> SchedulingJob for BinarySchedulingJob<'a> { +impl SchedulingJob for BinarySchedulingJob<'_> { fn schedule_next( &mut self, context: &mut SchedulerContext, diff --git a/rust/lance-encoding/src/encodings/logical/blob.rs b/rust/lance-encoding/src/encodings/logical/blob.rs index ea26cb84e2..77ba8c48e4 100644 --- a/rust/lance-encoding/src/encodings/logical/blob.rs +++ b/rust/lance-encoding/src/encodings/logical/blob.rs @@ -57,7 +57,7 @@ struct BlobFieldSchedulingJob<'a> { descriptions_job: Box, } -impl<'a> SchedulingJob for BlobFieldSchedulingJob<'a> { +impl SchedulingJob for BlobFieldSchedulingJob<'_> { fn schedule_next( &mut self, context: &mut SchedulerContext, diff --git a/rust/lance-encoding/src/encodings/logical/list.rs b/rust/lance-encoding/src/encodings/logical/list.rs index cfe6a7b143..8522c217d2 100644 --- a/rust/lance-encoding/src/encodings/logical/list.rs +++ b/rust/lance-encoding/src/encodings/logical/list.rs @@ -424,7 +424,7 @@ impl<'a> ListFieldSchedulingJob<'a> { } } -impl<'a> SchedulingJob for ListFieldSchedulingJob<'a> { +impl SchedulingJob for ListFieldSchedulingJob<'_> { fn schedule_next( &mut self, context: &mut SchedulerContext, diff --git a/rust/lance-encoding/src/encodings/logical/primitive.rs b/rust/lance-encoding/src/encodings/logical/primitive.rs index e73cd2b282..ca30e03dd7 100644 --- a/rust/lance-encoding/src/encodings/logical/primitive.rs +++ b/rust/lance-encoding/src/encodings/logical/primitive.rs @@ -141,7 +141,7 @@ impl<'a> PrimitiveFieldSchedulingJob<'a> { } } -impl<'a> SchedulingJob for PrimitiveFieldSchedulingJob<'a> { +impl SchedulingJob for PrimitiveFieldSchedulingJob<'_> { fn schedule_next( &mut self, context: &mut SchedulerContext, @@ -1148,7 +1148,7 @@ impl<'a> StructuralPrimitiveFieldSchedulingJob<'a> { } } -impl<'a> StructuralSchedulingJob for StructuralPrimitiveFieldSchedulingJob<'a> { +impl StructuralSchedulingJob for StructuralPrimitiveFieldSchedulingJob<'_> { fn schedule_next( &mut self, context: &mut SchedulerContext, diff --git a/rust/lance-encoding/src/encodings/logical/struct.rs b/rust/lance-encoding/src/encodings/logical/struct.rs index a4cc44afc7..1416b88ede 100644 --- a/rust/lance-encoding/src/encodings/logical/struct.rs +++ b/rust/lance-encoding/src/encodings/logical/struct.rs @@ -42,21 +42,21 @@ struct SchedulingJobWithStatus<'a> { rows_remaining: u64, } -impl<'a> PartialEq for SchedulingJobWithStatus<'a> { +impl PartialEq for SchedulingJobWithStatus<'_> { fn eq(&self, other: &Self) -> bool { self.col_idx == other.col_idx } } -impl<'a> Eq for SchedulingJobWithStatus<'a> {} +impl Eq for SchedulingJobWithStatus<'_> {} -impl<'a> PartialOrd for SchedulingJobWithStatus<'a> { +impl PartialOrd for SchedulingJobWithStatus<'_> { fn partial_cmp(&self, other: &Self) -> Option { Some(self.cmp(other)) } } -impl<'a> Ord for SchedulingJobWithStatus<'a> { +impl Ord for SchedulingJobWithStatus<'_> { fn cmp(&self, other: &Self) -> std::cmp::Ordering { // Note this is reversed to make it min-heap other.rows_scheduled.cmp(&self.rows_scheduled) @@ -106,7 +106,7 @@ impl<'a> SimpleStructSchedulerJob<'a> { } } -impl<'a> SchedulingJob for SimpleStructSchedulerJob<'a> { +impl SchedulingJob for SimpleStructSchedulerJob<'_> { fn schedule_next( &mut self, mut context: &mut SchedulerContext, @@ -239,21 +239,21 @@ struct StructuralSchedulingJobWithStatus<'a> { rows_remaining: u64, } -impl<'a> PartialEq for StructuralSchedulingJobWithStatus<'a> { +impl PartialEq for StructuralSchedulingJobWithStatus<'_> { fn eq(&self, other: &Self) -> bool { self.col_idx == other.col_idx } } -impl<'a> Eq for StructuralSchedulingJobWithStatus<'a> {} +impl Eq for StructuralSchedulingJobWithStatus<'_> {} -impl<'a> PartialOrd for StructuralSchedulingJobWithStatus<'a> { +impl PartialOrd for StructuralSchedulingJobWithStatus<'_> { fn partial_cmp(&self, other: &Self) -> Option { Some(self.cmp(other)) } } -impl<'a> Ord for StructuralSchedulingJobWithStatus<'a> { +impl Ord for StructuralSchedulingJobWithStatus<'_> { fn cmp(&self, other: &Self) -> std::cmp::Ordering { // Note this is reversed to make it min-heap other.rows_scheduled.cmp(&self.rows_scheduled) @@ -297,7 +297,7 @@ impl<'a> RepDefStructSchedulingJob<'a> { } } -impl<'a> StructuralSchedulingJob for RepDefStructSchedulingJob<'a> { +impl StructuralSchedulingJob for RepDefStructSchedulingJob<'_> { fn schedule_next( &mut self, mut context: &mut SchedulerContext, diff --git a/rust/lance-encoding/src/encodings/physical/bitpack_fastlanes.rs b/rust/lance-encoding/src/encodings/physical/bitpack_fastlanes.rs index 72f540249f..9449360083 100644 --- a/rust/lance-encoding/src/encodings/physical/bitpack_fastlanes.rs +++ b/rust/lance-encoding/src/encodings/physical/bitpack_fastlanes.rs @@ -204,7 +204,7 @@ pub fn compute_compressed_bit_width_for_non_neg(arrays: &[ArrayRef]) -> u64 { // It outputs an fastlanes bitpacked EncodedArray macro_rules! encode_fixed_width { ($self:expr, $unpacked:expr, $data_type:ty, $buffer_index:expr) => {{ - let num_chunks = ($unpacked.num_values + ELEMS_PER_CHUNK - 1) / ELEMS_PER_CHUNK; + let num_chunks = $unpacked.num_values.div_ceil(ELEMS_PER_CHUNK); let num_full_chunks = $unpacked.num_values / ELEMS_PER_CHUNK; let uncompressed_bit_width = std::mem::size_of::<$data_type>() as u64 * 8; diff --git a/rust/lance-index/src/vector/graph.rs b/rust/lance-index/src/vector/graph.rs index 9e79dc231a..e31ab4d344 100644 --- a/rust/lance-index/src/vector/graph.rs +++ b/rust/lance-index/src/vector/graph.rs @@ -152,7 +152,7 @@ pub struct Visited<'a> { recently_visited: Vec, } -impl<'a> Visited<'a> { +impl Visited<'_> { pub fn insert(&mut self, node_id: u32) { let node_id_usize = node_id as usize; if !self.visited[node_id_usize] { @@ -171,7 +171,7 @@ impl<'a> Visited<'a> { } } -impl<'a> Drop for Visited<'a> { +impl Drop for Visited<'_> { fn drop(&mut self) { for node_id in self.recently_visited.iter() { self.visited.set(*node_id as usize, false); diff --git a/rust/lance-index/src/vector/hnsw/builder.rs b/rust/lance-index/src/vector/hnsw/builder.rs index fc5e43a1b8..a30bbf993c 100644 --- a/rust/lance-index/src/vector/hnsw/builder.rs +++ b/rust/lance-index/src/vector/hnsw/builder.rs @@ -507,7 +507,7 @@ impl<'a> HnswLevelView<'a> { } } -impl<'a> Graph for HnswLevelView<'a> { +impl Graph for HnswLevelView<'_> { fn len(&self) -> usize { self.nodes.len() } @@ -528,7 +528,7 @@ impl<'a> HnswBottomView<'a> { } } -impl<'a> Graph for HnswBottomView<'a> { +impl Graph for HnswBottomView<'_> { fn len(&self) -> usize { self.nodes.len() } @@ -544,7 +544,7 @@ pub struct HnswQueryParams { pub ef: usize, } -impl<'a> From<&'a Query> for HnswQueryParams { +impl From<&Query> for HnswQueryParams { fn from(query: &Query) -> Self { let k = query.k * query.refine_factor.unwrap_or(1) as usize; Self { diff --git a/rust/lance-index/src/vector/ivf/shuffler.rs b/rust/lance-index/src/vector/ivf/shuffler.rs index 2f6d97ed7f..f1d0b16960 100644 --- a/rust/lance-index/src/vector/ivf/shuffler.rs +++ b/rust/lance-index/src/vector/ivf/shuffler.rs @@ -739,11 +739,7 @@ impl IvfShuffler { continue; } - let local_start = if start < cur_start { - 0 - } else { - start - cur_start - }; + let local_start = start.saturating_sub(cur_start); let local_end = std::cmp::min(end - cur_start, *partition_size); input.push(ShuffleInput { diff --git a/rust/lance-index/src/vector/sq/storage.rs b/rust/lance-index/src/vector/sq/storage.rs index 4428cfcd76..eaaa486f23 100644 --- a/rust/lance-index/src/vector/sq/storage.rs +++ b/rust/lance-index/src/vector/sq/storage.rs @@ -398,7 +398,7 @@ impl<'a> SQDistCalculator<'a> { } } -impl<'a> DistCalculator for SQDistCalculator<'a> { +impl DistCalculator for SQDistCalculator<'_> { fn distance(&self, id: u32) -> f32 { let (offset, chunk) = self.storage.chunk(id); let sq_code = chunk.sq_code_slice(id - offset); diff --git a/rust/lance-io/src/encodings/binary.rs b/rust/lance-io/src/encodings/binary.rs index f8187a3717..8eccf95532 100644 --- a/rust/lance-io/src/encodings/binary.rs +++ b/rust/lance-io/src/encodings/binary.rs @@ -88,7 +88,7 @@ impl<'a> BinaryEncoder<'a> { } #[async_trait] -impl<'a> Encoder for BinaryEncoder<'a> { +impl Encoder for BinaryEncoder<'_> { async fn encode(&mut self, arrs: &[&dyn Array]) -> Result { assert!(!arrs.is_empty()); let data_type = arrs[0].data_type(); @@ -286,7 +286,7 @@ fn plan_take_chunks( } #[async_trait] -impl<'a, T: ByteArrayType> Decoder for BinaryDecoder<'a, T> { +impl Decoder for BinaryDecoder<'_, T> { async fn decode(&self) -> Result { self.get(..).await } @@ -394,7 +394,7 @@ impl<'a, T: ByteArrayType> Decoder for BinaryDecoder<'a, T> { } #[async_trait] -impl<'a, T: ByteArrayType> AsyncIndex for BinaryDecoder<'a, T> { +impl AsyncIndex for BinaryDecoder<'_, T> { type Output = Result; async fn get(&self, index: usize) -> Self::Output { @@ -403,7 +403,7 @@ impl<'a, T: ByteArrayType> AsyncIndex for BinaryDecoder<'a, T> { } #[async_trait] -impl<'a, T: ByteArrayType> AsyncIndex> for BinaryDecoder<'a, T> { +impl AsyncIndex> for BinaryDecoder<'_, T> { type Output = Result; async fn get(&self, index: RangeFrom) -> Self::Output { @@ -412,7 +412,7 @@ impl<'a, T: ByteArrayType> AsyncIndex> for BinaryDecoder<'a, T> } #[async_trait] -impl<'a, T: ByteArrayType> AsyncIndex> for BinaryDecoder<'a, T> { +impl AsyncIndex> for BinaryDecoder<'_, T> { type Output = Result; async fn get(&self, index: RangeTo) -> Self::Output { @@ -421,7 +421,7 @@ impl<'a, T: ByteArrayType> AsyncIndex> for BinaryDecoder<'a, T> { } #[async_trait] -impl<'a, T: ByteArrayType> AsyncIndex for BinaryDecoder<'a, T> { +impl AsyncIndex for BinaryDecoder<'_, T> { type Output = Result; async fn get(&self, _: RangeFull) -> Self::Output { @@ -430,7 +430,7 @@ impl<'a, T: ByteArrayType> AsyncIndex for BinaryDecoder<'a, T> { } #[async_trait] -impl<'a, T: ByteArrayType> AsyncIndex for BinaryDecoder<'a, T> { +impl AsyncIndex for BinaryDecoder<'_, T> { type Output = Result; async fn get(&self, params: ReadBatchParams) -> Self::Output { @@ -445,7 +445,7 @@ impl<'a, T: ByteArrayType> AsyncIndex for BinaryDecoder<'a, T> } #[async_trait] -impl<'a, T: ByteArrayType> AsyncIndex> for BinaryDecoder<'a, T> { +impl AsyncIndex> for BinaryDecoder<'_, T> { type Output = Result; async fn get(&self, index: Range) -> Self::Output { diff --git a/rust/lance-io/src/encodings/dictionary.rs b/rust/lance-io/src/encodings/dictionary.rs index 72b150e023..494b439cfd 100644 --- a/rust/lance-io/src/encodings/dictionary.rs +++ b/rust/lance-io/src/encodings/dictionary.rs @@ -62,7 +62,7 @@ impl<'a> DictionaryEncoder<'a> { } #[async_trait] -impl<'a> Encoder for DictionaryEncoder<'a> { +impl Encoder for DictionaryEncoder<'_> { async fn encode(&mut self, array: &[&dyn Array]) -> Result { use DataType::*; @@ -171,7 +171,7 @@ impl<'a> DictionaryDecoder<'a> { } #[async_trait] -impl<'a> Decoder for DictionaryDecoder<'a> { +impl Decoder for DictionaryDecoder<'_> { async fn decode(&self) -> Result { self.decode_impl(..).await } @@ -182,7 +182,7 @@ impl<'a> Decoder for DictionaryDecoder<'a> { } #[async_trait] -impl<'a> AsyncIndex for DictionaryDecoder<'a> { +impl AsyncIndex for DictionaryDecoder<'_> { type Output = Result; async fn get(&self, _index: usize) -> Self::Output { @@ -196,7 +196,7 @@ impl<'a> AsyncIndex for DictionaryDecoder<'a> { } #[async_trait] -impl<'a> AsyncIndex for DictionaryDecoder<'a> { +impl AsyncIndex for DictionaryDecoder<'_> { type Output = Result; async fn get(&self, params: ReadBatchParams) -> Self::Output { diff --git a/rust/lance-io/src/encodings/plain.rs b/rust/lance-io/src/encodings/plain.rs index 4f77fde5c7..9951e21374 100644 --- a/rust/lance-io/src/encodings/plain.rs +++ b/rust/lance-io/src/encodings/plain.rs @@ -401,7 +401,7 @@ fn make_chunked_requests( } #[async_trait] -impl<'a> Decoder for PlainDecoder<'a> { +impl Decoder for PlainDecoder<'_> { async fn decode(&self) -> Result { self.get(0..self.length).await } diff --git a/rust/lance-io/src/scheduler.rs b/rust/lance-io/src/scheduler.rs index b6cfff300a..7fc11da709 100644 --- a/rust/lance-io/src/scheduler.rs +++ b/rust/lance-io/src/scheduler.rs @@ -64,7 +64,7 @@ struct IopsReservation<'a> { value: Option>, } -impl<'a> IopsReservation<'a> { +impl IopsReservation<'_> { // Forget the reservation, so it won't be released on drop fn forget(&mut self) { if let Some(value) = self.value.take() { diff --git a/rust/lance-io/src/utils.rs b/rust/lance-io/src/utils.rs index 37253339a5..1f2f45b83c 100644 --- a/rust/lance-io/src/utils.rs +++ b/rust/lance-io/src/utils.rs @@ -118,11 +118,7 @@ pub async fn read_struct< pub async fn read_last_block(reader: &dyn Reader) -> object_store::Result { let file_size = reader.size().await?; let block_size = reader.block_size(); - let begin = if file_size < block_size { - 0 - } else { - file_size - block_size - }; + let begin = file_size.saturating_sub(block_size); reader.get_range(begin..file_size).await } diff --git a/rust/lance-linalg/src/simd.rs b/rust/lance-linalg/src/simd.rs index 74c3b56d3b..ff95164c75 100644 --- a/rust/lance-linalg/src/simd.rs +++ b/rust/lance-linalg/src/simd.rs @@ -42,7 +42,6 @@ pub trait SIMD: fn zeros() -> Self; /// Gather elements from the slice, using i32 indices. - /// Load aligned data from aligned memory. /// /// # Safety diff --git a/rust/lance-linalg/src/simd/f32.rs b/rust/lance-linalg/src/simd/f32.rs index 8deb50338b..8091bc83a1 100644 --- a/rust/lance-linalg/src/simd/f32.rs +++ b/rust/lance-linalg/src/simd/f32.rs @@ -485,7 +485,6 @@ impl<'a> From<&'a [f32; 16]> for f32x16 { impl SIMD for f32x16 { #[inline] - fn splat(val: f32) -> Self { #[cfg(all(target_arch = "x86_64", target_feature = "avx512f"))] unsafe { @@ -602,7 +601,6 @@ impl SIMD for f32x16 { } #[inline] - unsafe fn store_unaligned(&self, ptr: *mut f32) { #[cfg(all(target_arch = "x86_64", target_feature = "avx512f"))] unsafe { diff --git a/rust/lance-table/src/rowids.rs b/rust/lance-table/src/rowids.rs index 38ee381f8d..1375f0526f 100644 --- a/rust/lance-table/src/rowids.rs +++ b/rust/lance-table/src/rowids.rs @@ -343,7 +343,7 @@ pub struct RowIdSeqSlice<'a> { offset_last: usize, } -impl<'a> RowIdSeqSlice<'a> { +impl RowIdSeqSlice<'_> { pub fn iter(&self) -> impl Iterator + '_ { let mut known_size = self.segments.iter().map(|segment| segment.len()).sum(); known_size -= self.offset_start; diff --git a/rust/lance-table/src/rowids/bitmap.rs b/rust/lance-table/src/rowids/bitmap.rs index dc628ddcf8..97777af5be 100644 --- a/rust/lance-table/src/rowids/bitmap.rs +++ b/rust/lance-table/src/rowids/bitmap.rs @@ -92,7 +92,7 @@ pub struct BitmapSlice<'a> { len: usize, } -impl<'a> BitmapSlice<'a> { +impl BitmapSlice<'_> { pub fn count_ones(&self) -> usize { if self.len == 0 { return 0; @@ -138,7 +138,7 @@ impl<'a> BitmapSlice<'a> { } } -impl<'a> From> for Bitmap { +impl From> for Bitmap { fn from(slice: BitmapSlice) -> Self { let mut bitmap = Self::new_empty(slice.len); for i in 0..slice.len { diff --git a/rust/lance/src/dataset/optimize/remapping.rs b/rust/lance/src/dataset/optimize/remapping.rs index 026cbcc356..4b09bf7b3f 100644 --- a/rust/lance/src/dataset/optimize/remapping.rs +++ b/rust/lance/src/dataset/optimize/remapping.rs @@ -95,7 +95,7 @@ impl<'a, I: Iterator> MissingIds<'a, I> { } } -impl<'a, I: Iterator> Iterator for MissingIds<'a, I> { +impl> Iterator for MissingIds<'_, I> { type Item = u64; fn next(&mut self) -> Option { diff --git a/rust/lance/src/index/scalar.rs b/rust/lance/src/index/scalar.rs index 8efa3ec829..c0394bdb65 100644 --- a/rust/lance/src/index/scalar.rs +++ b/rust/lance/src/index/scalar.rs @@ -87,7 +87,6 @@ impl TrainingRequest { // to make index types "generic" and "pluggable". We will need to create some // kind of core proto for scalar indices that the scanner can read to determine // how and when to use a scalar index. - pub trait ScalarIndexDetails { fn get_type(&self) -> ScalarIndexType; } diff --git a/rust/lance/src/io/exec/scalar_index.rs b/rust/lance/src/io/exec/scalar_index.rs index 0f39ed6124..319b4870af 100644 --- a/rust/lance/src/io/exec/scalar_index.rs +++ b/rust/lance/src/io/exec/scalar_index.rs @@ -362,7 +362,7 @@ impl<'a> FragIdIter<'a> { } } -impl<'a> Iterator for FragIdIter<'a> { +impl Iterator for FragIdIter<'_> { type Item = u64; fn next(&mut self) -> Option { From aee77699f4ddd92b692791efd02db52e513ab884 Mon Sep 17 00:00:00 2001 From: Lei Xu Date: Wed, 4 Dec 2024 11:03:00 -0800 Subject: [PATCH 21/24] fix all clippy --- rust/lance-core/src/utils/bit.rs | 1 - rust/lance-encoding/benches/decoder.rs | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/lance-core/src/utils/bit.rs b/rust/lance-core/src/utils/bit.rs index 75a13e783a..7d69fee8da 100644 --- a/rust/lance-core/src/utils/bit.rs +++ b/rust/lance-core/src/utils/bit.rs @@ -60,7 +60,6 @@ pub fn log_2_ceil(val: u32) -> u32 { } #[cfg(test)] - pub mod tests { use crate::utils::bit::log_2_ceil; diff --git a/rust/lance-encoding/benches/decoder.rs b/rust/lance-encoding/benches/decoder.rs index c6a80538a8..500274fa34 100644 --- a/rust/lance-encoding/benches/decoder.rs +++ b/rust/lance-encoding/benches/decoder.rs @@ -299,6 +299,7 @@ fn bench_decode_packed_struct(c: &mut Criterion) { }); } +#[allow(dead_code)] fn bench_decode_str_with_fixed_size_binary_encoding(c: &mut Criterion) { let rt = tokio::runtime::Runtime::new().unwrap(); let mut group = c.benchmark_group("decode_primitive"); From 4a116895e24e8457f2666b6bc3ed32c6c3906618 Mon Sep 17 00:00:00 2001 From: Weston Pace Date: Wed, 4 Dec 2024 11:08:52 -0800 Subject: [PATCH 22/24] Addressing clippy --- rust/lance-arrow/src/bfloat16.rs | 2 +- rust/lance-core/src/utils/bit.rs | 1 - rust/lance-core/src/utils/deletion.rs | 1 - rust/lance-core/src/utils/futures.rs | 2 +- rust/lance-core/src/utils/hash.rs | 4 ++-- rust/lance-core/src/utils/testing.rs | 4 ++-- .../src/encodings/logical/binary.rs | 2 +- .../lance-encoding/src/encodings/logical/blob.rs | 2 +- .../lance-encoding/src/encodings/logical/list.rs | 2 +- .../src/encodings/logical/primitive.rs | 4 ++-- .../src/encodings/logical/struct.rs | 10 +++++----- rust/lance-io/src/encodings/binary.rs | 16 ++++++++-------- rust/lance-io/src/encodings/dictionary.rs | 8 ++++---- rust/lance-io/src/encodings/plain.rs | 2 +- rust/lance-io/src/object_store.rs | 6 +++--- rust/lance-io/src/scheduler.rs | 2 +- rust/lance-io/src/utils.rs | 6 +----- rust/lance-linalg/src/simd.rs | 2 -- rust/lance-linalg/src/simd/f32.rs | 2 -- rust/lance/src/dataset/scanner.rs | 1 - 20 files changed, 34 insertions(+), 45 deletions(-) diff --git a/rust/lance-arrow/src/bfloat16.rs b/rust/lance-arrow/src/bfloat16.rs index 467da00a5a..06079d9baa 100644 --- a/rust/lance-arrow/src/bfloat16.rs +++ b/rust/lance-arrow/src/bfloat16.rs @@ -90,7 +90,7 @@ impl BFloat16Array { } } -impl<'a> ArrayAccessor for &'a BFloat16Array { +impl ArrayAccessor for &BFloat16Array { type Item = bf16; fn value(&self, index: usize) -> Self::Item { diff --git a/rust/lance-core/src/utils/bit.rs b/rust/lance-core/src/utils/bit.rs index 75a13e783a..7d69fee8da 100644 --- a/rust/lance-core/src/utils/bit.rs +++ b/rust/lance-core/src/utils/bit.rs @@ -60,7 +60,6 @@ pub fn log_2_ceil(val: u32) -> u32 { } #[cfg(test)] - pub mod tests { use crate::utils::bit::log_2_ceil; diff --git a/rust/lance-core/src/utils/deletion.rs b/rust/lance-core/src/utils/deletion.rs index 1735f90b8c..44e1b79a19 100644 --- a/rust/lance-core/src/utils/deletion.rs +++ b/rust/lance-core/src/utils/deletion.rs @@ -194,7 +194,6 @@ impl Extend for DeletionVector { /// pub fn get(i: u32) -> bool { ... } /// } /// impl BitAnd for DeletionVector { ... } - impl IntoIterator for DeletionVector { type IntoIter = Box + Send>; type Item = u32; diff --git a/rust/lance-core/src/utils/futures.rs b/rust/lance-core/src/utils/futures.rs index 9acce93ce2..2267f600e7 100644 --- a/rust/lance-core/src/utils/futures.rs +++ b/rust/lance-core/src/utils/futures.rs @@ -74,7 +74,7 @@ impl<'a, T: Clone> SharedStream<'a, T> { } } -impl<'a, T: Clone> Stream for SharedStream<'a, T> { +impl Stream for SharedStream<'_, T> { type Item = T; fn poll_next( diff --git a/rust/lance-core/src/utils/hash.rs b/rust/lance-core/src/utils/hash.rs index 58e6fd47bf..14ef805a58 100644 --- a/rust/lance-core/src/utils/hash.rs +++ b/rust/lance-core/src/utils/hash.rs @@ -7,13 +7,13 @@ use std::hash::Hasher; // the equality for this `U8SliceKey` means that the &[u8] contents are equal. #[derive(Eq)] pub struct U8SliceKey<'a>(pub &'a [u8]); -impl<'a> PartialEq for U8SliceKey<'a> { +impl PartialEq for U8SliceKey<'_> { fn eq(&self, other: &Self) -> bool { self.0 == other.0 } } -impl<'a> std::hash::Hash for U8SliceKey<'a> { +impl std::hash::Hash for U8SliceKey<'_> { fn hash(&self, state: &mut H) { self.0.hash(state); } diff --git a/rust/lance-core/src/utils/testing.rs b/rust/lance-core/src/utils/testing.rs index 9746787f71..f111236486 100644 --- a/rust/lance-core/src/utils/testing.rs +++ b/rust/lance-core/src/utils/testing.rs @@ -218,7 +218,7 @@ impl Default for MockClock<'_> { } } -impl<'a> MockClock<'a> { +impl MockClock<'_> { pub fn new() -> Self { Default::default() } @@ -228,7 +228,7 @@ impl<'a> MockClock<'a> { } } -impl<'a> Drop for MockClock<'a> { +impl Drop for MockClock<'_> { fn drop(&mut self) { // Reset the clock to the epoch mock_instant::MockClock::set_system_time(TimeDelta::try_days(0).unwrap().to_std().unwrap()); diff --git a/rust/lance-encoding/src/encodings/logical/binary.rs b/rust/lance-encoding/src/encodings/logical/binary.rs index 1791f31b15..a08d6d8af6 100644 --- a/rust/lance-encoding/src/encodings/logical/binary.rs +++ b/rust/lance-encoding/src/encodings/logical/binary.rs @@ -27,7 +27,7 @@ pub struct BinarySchedulingJob<'a> { inner: Box, } -impl<'a> SchedulingJob for BinarySchedulingJob<'a> { +impl SchedulingJob for BinarySchedulingJob<'_> { fn schedule_next( &mut self, context: &mut SchedulerContext, diff --git a/rust/lance-encoding/src/encodings/logical/blob.rs b/rust/lance-encoding/src/encodings/logical/blob.rs index ea26cb84e2..77ba8c48e4 100644 --- a/rust/lance-encoding/src/encodings/logical/blob.rs +++ b/rust/lance-encoding/src/encodings/logical/blob.rs @@ -57,7 +57,7 @@ struct BlobFieldSchedulingJob<'a> { descriptions_job: Box, } -impl<'a> SchedulingJob for BlobFieldSchedulingJob<'a> { +impl SchedulingJob for BlobFieldSchedulingJob<'_> { fn schedule_next( &mut self, context: &mut SchedulerContext, diff --git a/rust/lance-encoding/src/encodings/logical/list.rs b/rust/lance-encoding/src/encodings/logical/list.rs index cfe6a7b143..8522c217d2 100644 --- a/rust/lance-encoding/src/encodings/logical/list.rs +++ b/rust/lance-encoding/src/encodings/logical/list.rs @@ -424,7 +424,7 @@ impl<'a> ListFieldSchedulingJob<'a> { } } -impl<'a> SchedulingJob for ListFieldSchedulingJob<'a> { +impl SchedulingJob for ListFieldSchedulingJob<'_> { fn schedule_next( &mut self, context: &mut SchedulerContext, diff --git a/rust/lance-encoding/src/encodings/logical/primitive.rs b/rust/lance-encoding/src/encodings/logical/primitive.rs index e73cd2b282..ca30e03dd7 100644 --- a/rust/lance-encoding/src/encodings/logical/primitive.rs +++ b/rust/lance-encoding/src/encodings/logical/primitive.rs @@ -141,7 +141,7 @@ impl<'a> PrimitiveFieldSchedulingJob<'a> { } } -impl<'a> SchedulingJob for PrimitiveFieldSchedulingJob<'a> { +impl SchedulingJob for PrimitiveFieldSchedulingJob<'_> { fn schedule_next( &mut self, context: &mut SchedulerContext, @@ -1148,7 +1148,7 @@ impl<'a> StructuralPrimitiveFieldSchedulingJob<'a> { } } -impl<'a> StructuralSchedulingJob for StructuralPrimitiveFieldSchedulingJob<'a> { +impl StructuralSchedulingJob for StructuralPrimitiveFieldSchedulingJob<'_> { fn schedule_next( &mut self, context: &mut SchedulerContext, diff --git a/rust/lance-encoding/src/encodings/logical/struct.rs b/rust/lance-encoding/src/encodings/logical/struct.rs index a4cc44afc7..196b26f471 100644 --- a/rust/lance-encoding/src/encodings/logical/struct.rs +++ b/rust/lance-encoding/src/encodings/logical/struct.rs @@ -42,21 +42,21 @@ struct SchedulingJobWithStatus<'a> { rows_remaining: u64, } -impl<'a> PartialEq for SchedulingJobWithStatus<'a> { +impl PartialEq for SchedulingJobWithStatus<'_> { fn eq(&self, other: &Self) -> bool { self.col_idx == other.col_idx } } -impl<'a> Eq for SchedulingJobWithStatus<'a> {} +impl Eq for SchedulingJobWithStatus<'_> {} -impl<'a> PartialOrd for SchedulingJobWithStatus<'a> { +impl PartialOrd for SchedulingJobWithStatus<'_> { fn partial_cmp(&self, other: &Self) -> Option { Some(self.cmp(other)) } } -impl<'a> Ord for SchedulingJobWithStatus<'a> { +impl Ord for SchedulingJobWithStatus<'_> { fn cmp(&self, other: &Self) -> std::cmp::Ordering { // Note this is reversed to make it min-heap other.rows_scheduled.cmp(&self.rows_scheduled) @@ -106,7 +106,7 @@ impl<'a> SimpleStructSchedulerJob<'a> { } } -impl<'a> SchedulingJob for SimpleStructSchedulerJob<'a> { +impl SchedulingJob for SimpleStructSchedulerJob<'_> { fn schedule_next( &mut self, mut context: &mut SchedulerContext, diff --git a/rust/lance-io/src/encodings/binary.rs b/rust/lance-io/src/encodings/binary.rs index d7c4c2ba11..edb2b03471 100644 --- a/rust/lance-io/src/encodings/binary.rs +++ b/rust/lance-io/src/encodings/binary.rs @@ -89,7 +89,7 @@ impl<'a> BinaryEncoder<'a> { } #[async_trait] -impl<'a> Encoder for BinaryEncoder<'a> { +impl Encoder for BinaryEncoder<'_> { async fn encode(&mut self, arrs: &[&dyn Array]) -> Result { assert!(!arrs.is_empty()); let data_type = arrs[0].data_type(); @@ -287,7 +287,7 @@ fn plan_take_chunks( } #[async_trait] -impl<'a, T: ByteArrayType> Decoder for BinaryDecoder<'a, T> { +impl Decoder for BinaryDecoder<'_, T> { async fn decode(&self) -> Result { self.get(..).await } @@ -395,7 +395,7 @@ impl<'a, T: ByteArrayType> Decoder for BinaryDecoder<'a, T> { } #[async_trait] -impl<'a, T: ByteArrayType> AsyncIndex for BinaryDecoder<'a, T> { +impl AsyncIndex for BinaryDecoder<'_, T> { type Output = Result; async fn get(&self, index: usize) -> Self::Output { @@ -404,7 +404,7 @@ impl<'a, T: ByteArrayType> AsyncIndex for BinaryDecoder<'a, T> { } #[async_trait] -impl<'a, T: ByteArrayType> AsyncIndex> for BinaryDecoder<'a, T> { +impl AsyncIndex> for BinaryDecoder<'_, T> { type Output = Result; async fn get(&self, index: RangeFrom) -> Self::Output { @@ -413,7 +413,7 @@ impl<'a, T: ByteArrayType> AsyncIndex> for BinaryDecoder<'a, T> } #[async_trait] -impl<'a, T: ByteArrayType> AsyncIndex> for BinaryDecoder<'a, T> { +impl AsyncIndex> for BinaryDecoder<'_, T> { type Output = Result; async fn get(&self, index: RangeTo) -> Self::Output { @@ -422,7 +422,7 @@ impl<'a, T: ByteArrayType> AsyncIndex> for BinaryDecoder<'a, T> { } #[async_trait] -impl<'a, T: ByteArrayType> AsyncIndex for BinaryDecoder<'a, T> { +impl AsyncIndex for BinaryDecoder<'_, T> { type Output = Result; async fn get(&self, _: RangeFull) -> Self::Output { @@ -431,7 +431,7 @@ impl<'a, T: ByteArrayType> AsyncIndex for BinaryDecoder<'a, T> { } #[async_trait] -impl<'a, T: ByteArrayType> AsyncIndex for BinaryDecoder<'a, T> { +impl AsyncIndex for BinaryDecoder<'_, T> { type Output = Result; async fn get(&self, params: ReadBatchParams) -> Self::Output { @@ -446,7 +446,7 @@ impl<'a, T: ByteArrayType> AsyncIndex for BinaryDecoder<'a, T> } #[async_trait] -impl<'a, T: ByteArrayType> AsyncIndex> for BinaryDecoder<'a, T> { +impl AsyncIndex> for BinaryDecoder<'_, T> { type Output = Result; async fn get(&self, index: Range) -> Self::Output { diff --git a/rust/lance-io/src/encodings/dictionary.rs b/rust/lance-io/src/encodings/dictionary.rs index 72b150e023..494b439cfd 100644 --- a/rust/lance-io/src/encodings/dictionary.rs +++ b/rust/lance-io/src/encodings/dictionary.rs @@ -62,7 +62,7 @@ impl<'a> DictionaryEncoder<'a> { } #[async_trait] -impl<'a> Encoder for DictionaryEncoder<'a> { +impl Encoder for DictionaryEncoder<'_> { async fn encode(&mut self, array: &[&dyn Array]) -> Result { use DataType::*; @@ -171,7 +171,7 @@ impl<'a> DictionaryDecoder<'a> { } #[async_trait] -impl<'a> Decoder for DictionaryDecoder<'a> { +impl Decoder for DictionaryDecoder<'_> { async fn decode(&self) -> Result { self.decode_impl(..).await } @@ -182,7 +182,7 @@ impl<'a> Decoder for DictionaryDecoder<'a> { } #[async_trait] -impl<'a> AsyncIndex for DictionaryDecoder<'a> { +impl AsyncIndex for DictionaryDecoder<'_> { type Output = Result; async fn get(&self, _index: usize) -> Self::Output { @@ -196,7 +196,7 @@ impl<'a> AsyncIndex for DictionaryDecoder<'a> { } #[async_trait] -impl<'a> AsyncIndex for DictionaryDecoder<'a> { +impl AsyncIndex for DictionaryDecoder<'_> { type Output = Result; async fn get(&self, params: ReadBatchParams) -> Self::Output { diff --git a/rust/lance-io/src/encodings/plain.rs b/rust/lance-io/src/encodings/plain.rs index 50fd1926dc..844a4c516c 100644 --- a/rust/lance-io/src/encodings/plain.rs +++ b/rust/lance-io/src/encodings/plain.rs @@ -393,7 +393,7 @@ fn make_chunked_requests( } #[async_trait] -impl<'a> Decoder for PlainDecoder<'a> { +impl Decoder for PlainDecoder<'_> { async fn decode(&self) -> Result { self.get(0..self.length).await } diff --git a/rust/lance-io/src/object_store.rs b/rust/lance-io/src/object_store.rs index f668cdfaae..7f9064d828 100644 --- a/rust/lance-io/src/object_store.rs +++ b/rust/lance-io/src/object_store.rs @@ -62,7 +62,7 @@ pub trait ObjectStoreExt { &self, dir_path: impl Into<&Path> + Send, unmodified_since: Option>, - ) -> Result>>; + ) -> Result>>; } #[async_trait] @@ -71,7 +71,7 @@ impl ObjectStoreExt for O { &self, dir_path: impl Into<&Path> + Send, unmodified_since: Option>, - ) -> Result>> { + ) -> Result>> { let mut output = self.list(Some(dir_path.into())); if let Some(unmodified_since_val) = unmodified_since { output = output @@ -652,7 +652,7 @@ impl ObjectStore { pub fn remove_stream<'a>( &'a self, locations: BoxStream<'a, Result>, - ) -> BoxStream> { + ) -> BoxStream<'a, Result> { self.inner .delete_stream(locations.err_into::().boxed()) .err_into::() diff --git a/rust/lance-io/src/scheduler.rs b/rust/lance-io/src/scheduler.rs index b6cfff300a..7fc11da709 100644 --- a/rust/lance-io/src/scheduler.rs +++ b/rust/lance-io/src/scheduler.rs @@ -64,7 +64,7 @@ struct IopsReservation<'a> { value: Option>, } -impl<'a> IopsReservation<'a> { +impl IopsReservation<'_> { // Forget the reservation, so it won't be released on drop fn forget(&mut self) { if let Some(value) = self.value.take() { diff --git a/rust/lance-io/src/utils.rs b/rust/lance-io/src/utils.rs index 37253339a5..1f2f45b83c 100644 --- a/rust/lance-io/src/utils.rs +++ b/rust/lance-io/src/utils.rs @@ -118,11 +118,7 @@ pub async fn read_struct< pub async fn read_last_block(reader: &dyn Reader) -> object_store::Result { let file_size = reader.size().await?; let block_size = reader.block_size(); - let begin = if file_size < block_size { - 0 - } else { - file_size - block_size - }; + let begin = file_size.saturating_sub(block_size); reader.get_range(begin..file_size).await } diff --git a/rust/lance-linalg/src/simd.rs b/rust/lance-linalg/src/simd.rs index 74c3b56d3b..da4429a251 100644 --- a/rust/lance-linalg/src/simd.rs +++ b/rust/lance-linalg/src/simd.rs @@ -41,8 +41,6 @@ pub trait SIMD: /// Create a new instance with all lanes set to zero. fn zeros() -> Self; - /// Gather elements from the slice, using i32 indices. - /// Load aligned data from aligned memory. /// /// # Safety diff --git a/rust/lance-linalg/src/simd/f32.rs b/rust/lance-linalg/src/simd/f32.rs index 8deb50338b..8091bc83a1 100644 --- a/rust/lance-linalg/src/simd/f32.rs +++ b/rust/lance-linalg/src/simd/f32.rs @@ -485,7 +485,6 @@ impl<'a> From<&'a [f32; 16]> for f32x16 { impl SIMD for f32x16 { #[inline] - fn splat(val: f32) -> Self { #[cfg(all(target_arch = "x86_64", target_feature = "avx512f"))] unsafe { @@ -602,7 +601,6 @@ impl SIMD for f32x16 { } #[inline] - unsafe fn store_unaligned(&self, ptr: *mut f32) { #[cfg(all(target_arch = "x86_64", target_feature = "avx512f"))] unsafe { diff --git a/rust/lance/src/dataset/scanner.rs b/rust/lance/src/dataset/scanner.rs index 427a18de76..9c7de8ba6f 100644 --- a/rust/lance/src/dataset/scanner.rs +++ b/rust/lance/src/dataset/scanner.rs @@ -15,7 +15,6 @@ use datafusion::functions_aggregate; use datafusion::functions_aggregate::count::count_udaf; use datafusion::logical_expr::Expr; use datafusion::physical_expr::PhysicalSortExpr; -use datafusion::physical_expr_common::aggregate::AggregateExprBuilder; use datafusion::physical_plan::coalesce_batches::CoalesceBatchesExec; use datafusion::physical_plan::empty::EmptyExec; use datafusion::physical_plan::expressions; From 2eb07a0991384ed2eaefd9aeaf4fc4668335e7ad Mon Sep 17 00:00:00 2001 From: Weston Pace Date: Wed, 4 Dec 2024 11:54:39 -0800 Subject: [PATCH 23/24] Rework the substrait logic a bit so we are tracking names correctly if the schema has structs --- python/src/dataset.rs | 29 +++++++-------- rust/lance-datafusion/src/substrait.rs | 49 ++++++++++++++++---------- 2 files changed, 45 insertions(+), 33 deletions(-) diff --git a/python/src/dataset.rs b/python/src/dataset.rs index 331c988d86..1a0ef1f27f 100644 --- a/python/src/dataset.rs +++ b/python/src/dataset.rs @@ -140,7 +140,7 @@ pub struct MergeInsertBuilder { #[pymethods] impl MergeInsertBuilder { #[new] - pub fn new(dataset: Bound<'_, PyAny>, on: Bound<'_, PyAny>) -> PyResult { + pub fn new(dataset: &Bound<'_, PyAny>, on: &Bound<'_, PyAny>) -> PyResult { let dataset: Py = dataset.extract()?; let ds = dataset.borrow(on.py()).ds.clone(); // Either a single string, which we put in a vector or an iterator @@ -347,7 +347,7 @@ impl Operation { name: String, fields: Vec, dataset_version: u64, - fragment_ids: Bound<'_, PySet>, + fragment_ids: &Bound<'_, PySet>, ) -> PyResult { let fragment_ids: Vec = fragment_ids .iter() @@ -635,7 +635,7 @@ impl Dataset { use_stats: Option, substrait_filter: Option>, fast_search: Option, - full_text_query: Option>, + full_text_query: Option<&Bound<'_, PyDict>>, late_materialization: Option, use_scalar_index: Option, ) -> PyResult { @@ -991,7 +991,7 @@ impl Dataset { Ok(PyArrowType(Box::new(LanceReader::from_stream(stream)))) } - fn alter_columns(&mut self, alterations: Bound<'_, PyList>) -> PyResult<()> { + fn alter_columns(&mut self, alterations: &Bound<'_, PyList>) -> PyResult<()> { let alterations = alterations .iter() .map(|obj| { @@ -1079,7 +1079,7 @@ impl Dataset { #[pyo3(signature=(updates, predicate=None))] fn update( &mut self, - updates: Bound<'_, PyDict>, + updates: &Bound<'_, PyDict>, predicate: Option<&str>, ) -> PyResult { let mut builder = UpdateBuilder::new(self.ds.clone()); @@ -1089,7 +1089,7 @@ impl Dataset { .map_err(|err| PyValueError::new_err(err.to_string()))?; } - for (key, value) in &updates { + for (key, value) in updates { let column: &str = key.extract()?; let expr: &str = value.extract()?; @@ -1426,6 +1426,7 @@ impl Dataset { } #[staticmethod] + #[pyo3(signature = (dest, storage_options = None))] fn drop(dest: String, storage_options: Option>) -> PyResult<()> { RT.spawn(None, async move { let (object_store, path) = @@ -1444,7 +1445,7 @@ impl Dataset { dest: &Bound, operation: Operation, read_version: Option, - commit_lock: Option>, + commit_lock: Option<&Bound<'_, PyAny>>, storage_options: Option>, enable_v2_manifest_paths: Option, detached: Option, @@ -1502,7 +1503,7 @@ impl Dataset { fn commit_batch<'py>( dest: &Bound<'py, PyAny>, transactions: Vec>, - commit_lock: Option>, + commit_lock: Option<&Bound<'py, PyAny>>, storage_options: Option>, enable_v2_manifest_paths: Option, detached: Option, @@ -1589,10 +1590,10 @@ impl Dataset { #[pyo3(signature = (reader, batch_size = None))] fn add_columns_from_reader( &mut self, - reader: Bound<'_, PyAny>, + reader: &Bound<'_, PyAny>, batch_size: Option, ) -> PyResult<()> { - let batches = ArrowArrayStreamReader::from_pyarrow_bound(&reader)?; + let batches = ArrowArrayStreamReader::from_pyarrow_bound(reader)?; let transforms = NewColumnTransform::Reader(Box::new(batches)); @@ -1658,9 +1659,9 @@ impl Dataset { #[pyfunction(name = "_write_dataset")] pub fn write_dataset( - reader: Bound<'_, PyAny>, - dest: Bound<'_, PyAny>, - options: Bound<'_, PyDict>, + reader: &Bound<'_, PyAny>, + dest: &Bound<'_, PyAny>, + options: &Bound<'_, PyDict>, ) -> PyResult { let params = get_write_params(options.as_gil_ref())?; let py = options.py(); @@ -1679,7 +1680,7 @@ pub fn write_dataset( RT.block_on(Some(py), LanceDataset::write(batches, dest, params))? .map_err(|err| PyIOError::new_err(err.to_string()))? } else { - let batches = ArrowArrayStreamReader::from_pyarrow_bound(&reader)?; + let batches = ArrowArrayStreamReader::from_pyarrow_bound(reader)?; RT.block_on(Some(py), LanceDataset::write(batches, dest, params))? .map_err(|err| PyIOError::new_err(err.to_string()))? }; diff --git a/rust/lance-datafusion/src/substrait.rs b/rust/lance-datafusion/src/substrait.rs index d7ba8cf394..6f835a85f5 100644 --- a/rust/lance-datafusion/src/substrait.rs +++ b/rust/lance-datafusion/src/substrait.rs @@ -1,7 +1,7 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright The Lance Authors -use arrow_schema::Schema; +use arrow_schema::Schema as ArrowSchema; use datafusion::{ datasource::empty::EmptyTable, execution::context::SessionContext, logical_expr::Expr, }; @@ -20,7 +20,7 @@ use datafusion_substrait::substrait::proto::{ r#type::{Kind, Struct}, read_rel::{NamedTable, ReadType}, rel, Expression, ExtendedExpression, NamedStruct, Plan, PlanRel, ProjectRel, ReadRel, Rel, - RelRoot, + RelRoot, Type, }; use lance_core::{Error, Result}; use prost::Message; @@ -29,7 +29,7 @@ use std::collections::HashMap; use std::sync::Arc; /// Convert a DF Expr into a Substrait ExtendedExpressions message -pub fn encode_substrait(expr: Expr, schema: Arc) -> Result> { +pub fn encode_substrait(expr: Expr, schema: Arc) -> Result> { use datafusion::logical_expr::{builder::LogicalTableSource, logical_plan, LogicalPlan}; use datafusion_substrait::substrait::proto::{plan_rel, ExpressionReference, NamedStruct}; @@ -81,10 +81,17 @@ pub fn encode_substrait(expr: Expr, schema: Arc) -> Result> { } } +fn count_fields(dtype: &Type) -> usize { + match dtype.kind.as_ref().unwrap() { + Kind::Struct(struct_type) => struct_type.types.iter().map(count_fields).sum::() + 1, + _ => 1, + } +} + fn remove_extension_types( substrait_schema: &NamedStruct, - arrow_schema: Arc, -) -> Result<(NamedStruct, Arc, HashMap)> { + arrow_schema: Arc, +) -> Result<(NamedStruct, Arc, HashMap)> { let fields = substrait_schema.r#struct.as_ref().unwrap(); if fields.types.len() != arrow_schema.fields.len() { return Err(Error::InvalidInput { @@ -96,21 +103,25 @@ fn remove_extension_types( let mut kept_arrow_fields = Vec::with_capacity(arrow_schema.fields.len()); let mut index_mapping = HashMap::with_capacity(arrow_schema.fields.len()); let mut field_counter = 0; - for (field_index, (substrait_field, arrow_field)) in fields - .types - .iter() - .zip(arrow_schema.fields.iter()) - .enumerate() - { - if !matches!( - substrait_field.kind.as_ref().unwrap(), - Kind::UserDefined(_) | Kind::UserDefinedTypeReference(_) - ) { + let mut field_index = 0; + // TODO: this logic doesn't catch user defined fields inside of struct fields + for (substrait_field, arrow_field) in fields.types.iter().zip(arrow_schema.fields.iter()) { + let num_fields = count_fields(substrait_field); + + if !substrait_schema.names[field_index].starts_with("__unlikely_name_placeholder") + && !matches!( + substrait_field.kind.as_ref().unwrap(), + Kind::UserDefined(_) | Kind::UserDefinedTypeReference(_) + ) + { kept_substrait_fields.push(substrait_field.clone()); kept_arrow_fields.push(arrow_field.clone()); - index_mapping.insert(field_index, field_counter); - field_counter += 1; + for i in 0..num_fields { + index_mapping.insert(field_index + i, field_counter + i); + } + field_counter += num_fields; } + field_index += num_fields; } let mut names = vec![String::new(); index_mapping.len()]; for (old_idx, old_name) in substrait_schema.names.iter().enumerate() { @@ -118,7 +129,7 @@ fn remove_extension_types( names[*new_idx] = old_name.clone(); } } - let new_arrow_schema = Arc::new(Schema::new(kept_arrow_fields)); + let new_arrow_schema = Arc::new(ArrowSchema::new(kept_arrow_fields)); let new_substrait_schema = NamedStruct { names, r#struct: Some(Struct { @@ -247,7 +258,7 @@ fn remap_expr_references(expr: &mut Expression, mapping: &HashMap) /// Convert a Substrait ExtendedExpressions message into a DF Expr /// /// The ExtendedExpressions message must contain a single scalar expression -pub async fn parse_substrait(expr: &[u8], input_schema: Arc) -> Result { +pub async fn parse_substrait(expr: &[u8], input_schema: Arc) -> Result { let envelope = ExtendedExpression::decode(expr)?; if envelope.referred_expr.is_empty() { return Err(Error::InvalidInput { From 2a63d5e8d86d2c1b28a8b9c681d7a6c71789b5ed Mon Sep 17 00:00:00 2001 From: Weston Pace Date: Wed, 4 Dec 2024 12:44:04 -0800 Subject: [PATCH 24/24] Update Java CI to use ubuntu 24.04 --- .github/workflows/java.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/java.yml b/.github/workflows/java.yml index aac412df33..b8ee97da2c 100644 --- a/.github/workflows/java.yml +++ b/.github/workflows/java.yml @@ -25,7 +25,7 @@ env: jobs: rust-clippy-fmt: - runs-on: ubuntu-22.04 + runs-on: ubuntu-24.04 name: Rust Clippy and Fmt Check defaults: run: @@ -46,7 +46,7 @@ jobs: run: cargo clippy --all-targets -- -D warnings build-and-test-java: - runs-on: ubuntu-22.04 + runs-on: ubuntu-24.04 strategy: matrix: java-version: [8, 11, 17]