From 80d52e956cbd719bad9ba155e896388764d9bf58 Mon Sep 17 00:00:00 2001 From: Clement Rey Date: Tue, 23 Jan 2024 18:24:18 +0100 Subject: [PATCH] Datatype deduplication 1: remove polars (#4880) All the grunt work left to get rid of polars. - Remove all helpers and APIs built specifically for polars' `DataFrame`. - Refactor tests that rely on dataframe joins to not require join semantics in the first place (`re_data_store` has no knowledge of those anyway). - The one test that does require join semantics has moved over to `re_query`, where join semantics belong. - All `polars-*` dep have been removed. Don't look at the commit log as it makes no sense: i changed strategies a bunch of times on the way. --- - Part of #4789 - DNR: requires #4856 --- Part of the tiny datatype deduplication PR series: - #4880 - #4883 --- Cargo.lock | 400 +-------------- Cargo.toml | 3 - crates/re_data_store/Cargo.toml | 51 +- .../re_data_store/examples/dump_dataframe.rs | 77 --- .../examples/latest_component.rs | 57 --- .../examples/latest_components.rs | 46 -- .../examples/range_components.rs | 100 ---- crates/re_data_store/src/arrow_util.rs | 235 +-------- crates/re_data_store/src/lib.rs | 6 - crates/re_data_store/src/polars_util.rs | 291 ----------- crates/re_data_store/src/store.rs | 3 - crates/re_data_store/src/store_polars.rs | 383 -------------- crates/re_data_store/src/store_read.rs | 121 ----- crates/re_data_store/src/test_util.rs | 34 +- crates/re_data_store/tests/correctness.rs | 82 --- crates/re_data_store/tests/data_store.rs | 478 +++++------------- crates/re_data_store/tests/dump.rs | 35 +- crates/re_data_store/tests/internals.rs | 12 +- crates/re_query/Cargo.toml | 3 + crates/re_query/tests/store.rs | 76 +++ deny.toml | 4 +- 21 files changed, 267 insertions(+), 2230 deletions(-) delete mode 100644 crates/re_data_store/examples/dump_dataframe.rs delete mode 100644 crates/re_data_store/examples/latest_component.rs delete mode 100644 crates/re_data_store/examples/latest_components.rs delete mode 100644 crates/re_data_store/examples/range_components.rs delete mode 100644 crates/re_data_store/src/polars_util.rs delete mode 100644 crates/re_data_store/src/store_polars.rs create mode 100644 crates/re_query/tests/store.rs diff --git a/Cargo.lock b/Cargo.lock index c0edd9a1455b..05782332d7e2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -247,7 +247,7 @@ dependencies = [ "objc", "objc-foundation", "objc_id", - "parking_lot 0.12.1", + "parking_lot", "thiserror", "winapi", "x11rb 0.12.0", @@ -284,15 +284,6 @@ dependencies = [ "serde", ] -[[package]] -name = "argminmax" -version = "0.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "202108b46429b765ef483f8a24d5c46f48c14acfdacc086dd4ab6dddf6bcdbd2" -dependencies = [ - "num-traits", -] - [[package]] name = "array-init" version = "2.1.0" @@ -338,12 +329,9 @@ dependencies = [ "foreign_vec", "getrandom", "hash_hasher", - "lexical-core", - "multiversion", "num-traits", "rustc_version", "simdutf8", - "strength_reduce", ] [[package]] @@ -1195,7 +1183,6 @@ version = "6.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6e7b787b0dc42e8111badfdbe4c3059158ccb2db8780352fa1b01e8ccf45cc4d" dependencies = [ - "crossterm", "strum", "strum_macros", "unicode-width", @@ -1416,31 +1403,6 @@ dependencies = [ "cfg-if", ] -[[package]] -name = "crossterm" -version = "0.25.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e64e6c0fbe2c17357405f7c758c1ef960fce08bdfb2c03d88d2a18d7e09c4b67" -dependencies = [ - "bitflags 1.3.2", - "crossterm_winapi", - "libc", - "mio", - "parking_lot 0.12.1", - "signal-hook", - "signal-hook-mio", - "winapi", -] - -[[package]] -name = "crossterm_winapi" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2ae1b35a484aa10e07fe0638d02301c5ad24de82d310ccbd2f3693da5f09bf1c" -dependencies = [ - "winapi", -] - [[package]] name = "crunchy" version = "0.2.2" @@ -1653,7 +1615,7 @@ dependencies = [ "js-sys", "log", "objc", - "parking_lot 0.12.1", + "parking_lot", "percent-encoding", "pollster", "puffin", @@ -1919,7 +1881,7 @@ dependencies = [ "emath", "log", "nohash-hasher", - "parking_lot 0.12.1", + "parking_lot", "serde", ] @@ -2181,21 +2143,6 @@ dependencies = [ "libc", ] -[[package]] -name = "futures" -version = "0.3.28" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23342abe12aba583913b2e62f22225ff9c950774065e4bfb61a19cd9770fec40" -dependencies = [ - "futures-channel", - "futures-core", - "futures-executor", - "futures-io", - "futures-sink", - "futures-task", - "futures-util", -] - [[package]] name = "futures-channel" version = "0.3.28" @@ -2203,7 +2150,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "955518d47e09b25bbebc7a18df10b81f0c766eaf4c4f1cccef2fca5f2a4fb5f2" dependencies = [ "futures-core", - "futures-sink", ] [[package]] @@ -2212,17 +2158,6 @@ version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4bca583b7e26f571124fe5b7561d49cb2868d79116cfa0eefce955557c6fee8c" -[[package]] -name = "futures-executor" -version = "0.3.28" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ccecee823288125bd88b4d7f565c9e58e41858e47ab72e8ea2d64e93624386e0" -dependencies = [ - "futures-core", - "futures-task", - "futures-util", -] - [[package]] name = "futures-io" version = "0.3.28" @@ -2286,7 +2221,6 @@ version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "26b01e40b772d54cf6c6d721c1d1abd0647a0106a12ecaa1c186273392a69533" dependencies = [ - "futures-channel", "futures-core", "futures-io", "futures-macro", @@ -2544,16 +2478,6 @@ version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" -[[package]] -name = "hashbrown" -version = "0.13.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43a3c133739dddd0d2990f9a4bdf8eb4b21ef50e4851ca85ab661199821d510e" -dependencies = [ - "ahash", - "rayon", -] - [[package]] name = "hashbrown" version = "0.14.2" @@ -2987,70 +2911,6 @@ version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "884e2677b40cc8c339eaefcb701c32ef1fd2493d71118dc0ca4b6a736c93bd67" -[[package]] -name = "lexical-core" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2cde5de06e8d4c2faabc400238f9ae1c74d5412d03a7bd067645ccbc47070e46" -dependencies = [ - "lexical-parse-float", - "lexical-parse-integer", - "lexical-util", - "lexical-write-float", - "lexical-write-integer", -] - -[[package]] -name = "lexical-parse-float" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "683b3a5ebd0130b8fb52ba0bdc718cc56815b6a097e28ae5a6997d0ad17dc05f" -dependencies = [ - "lexical-parse-integer", - "lexical-util", - "static_assertions", -] - -[[package]] -name = "lexical-parse-integer" -version = "0.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d0994485ed0c312f6d965766754ea177d07f9c00c9b82a5ee62ed5b47945ee9" -dependencies = [ - "lexical-util", - "static_assertions", -] - -[[package]] -name = "lexical-util" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5255b9ff16ff898710eb9eb63cb39248ea8a5bb036bea8085b1a767ff6c4e3fc" -dependencies = [ - "static_assertions", -] - -[[package]] -name = "lexical-write-float" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "accabaa1c4581f05a3923d1b4cfd124c329352288b7b9da09e766b0668116862" -dependencies = [ - "lexical-util", - "lexical-write-integer", - "static_assertions", -] - -[[package]] -name = "lexical-write-integer" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1b6f3d1f4422866b68192d62f77bc5c700bee84f3069f2469d7bc8c77852446" -dependencies = [ - "lexical-util", - "static_assertions", -] - [[package]] name = "libc" version = "0.2.150" @@ -3348,28 +3208,6 @@ version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5ce46fe64a9d73be07dcbe690a38ce1b293be448fd8ce1e6c1b8062c9f72c6a" -[[package]] -name = "multiversion" -version = "0.7.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8cda45dade5144c2c929bf2ed6c24bebbba784e9198df049ec87d722b9462bd1" -dependencies = [ - "multiversion-macros", - "target-features", -] - -[[package]] -name = "multiversion-macros" -version = "0.7.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04bffdccbd4798b61dce08c97ce8c66a68976f95541aaf284a6e90c1d1c306e1" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", - "target-features", -] - [[package]] name = "naga" version = "0.19.0" @@ -3765,17 +3603,6 @@ version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bb813b8af86854136c6922af0598d719255ecb2179515e6e7730d468f05c9cae" -[[package]] -name = "parking_lot" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d17b78036a60663b797adeaee46f5c9dfebb86948d1255007a1d6be0271ff99" -dependencies = [ - "instant", - "lock_api", - "parking_lot_core 0.8.6", -] - [[package]] name = "parking_lot" version = "0.12.1" @@ -3783,21 +3610,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" dependencies = [ "lock_api", - "parking_lot_core 0.9.7", -] - -[[package]] -name = "parking_lot_core" -version = "0.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60a2cfe6f0ad2bfc16aefa463b497d5c7a5ecd44a23efa72aa342d90177356dc" -dependencies = [ - "cfg-if", - "instant", - "libc", - "redox_syscall 0.2.16", - "smallvec", - "winapi", + "parking_lot_core", ] [[package]] @@ -3968,97 +3781,6 @@ dependencies = [ "miniz_oxide", ] -[[package]] -name = "polars-arrow" -version = "0.29.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d2402d68b447ece4afca6bcac435ff0aa82d70d41c9dd3f0ed249396b41ed5b" -dependencies = [ - "arrow2", - "hashbrown 0.13.2", - "multiversion", - "num-traits", - "polars-error", - "thiserror", -] - -[[package]] -name = "polars-core" -version = "0.29.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "caa1f5af0505c67333487c1287f29644c172afbab7374e48a77843646064411c" -dependencies = [ - "ahash", - "arrow2", - "bitflags 1.3.2", - "chrono", - "comfy-table", - "either", - "hashbrown 0.13.2", - "indexmap 1.9.3", - "num-traits", - "once_cell", - "polars-arrow", - "polars-error", - "polars-row", - "polars-utils", - "rayon", - "regex", - "smartstring", - "thiserror", - "wasm-timer", - "xxhash-rust", -] - -[[package]] -name = "polars-error" -version = "0.29.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73c760a2564a930a0b139d08045b7f2a3c0c86ce2668710b44561091d8d1e019" -dependencies = [ - "arrow2", - "regex", - "thiserror", -] - -[[package]] -name = "polars-ops" -version = "0.29.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "192e1871522f2c5b1161f7a51432f27389da751132dc3a472ac77ca213186ae1" -dependencies = [ - "argminmax", - "arrow2", - "either", - "memchr", - "polars-arrow", - "polars-core", - "polars-utils", - "smartstring", -] - -[[package]] -name = "polars-row" -version = "0.29.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbe1bda56117570fc1efd79b23fe40ab2375939387c6aaa048d181f0c14fb6b8" -dependencies = [ - "arrow2", - "polars-error", - "polars-utils", -] - -[[package]] -name = "polars-utils" -version = "0.29.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1719b022df1b19866b3021ca8c4c1acf860b853f5fc17388e91c8f41064974dc" -dependencies = [ - "once_cell", - "rayon", - "smartstring", -] - [[package]] name = "poll-promise" version = "0.3.0" @@ -4250,7 +3972,7 @@ dependencies = [ "cfg-if", "lz4_flex", "once_cell", - "parking_lot 0.12.1", + "parking_lot", "serde", ] @@ -4287,7 +4009,7 @@ dependencies = [ "indoc", "libc", "memoffset 0.9.0", - "parking_lot 0.12.1", + "parking_lot", "pyo3-build-config", "pyo3-ffi", "pyo3-macros", @@ -4522,7 +4244,7 @@ dependencies = [ "backtrace", "itertools 0.12.0", "libc", - "parking_lot 0.12.1", + "parking_lot", "re_analytics", "re_build_info", ] @@ -4536,7 +4258,7 @@ dependencies = [ "image", "itertools 0.12.0", "once_cell", - "parking_lot 0.12.1", + "parking_lot", "rayon", "re_build_tools", "re_log", @@ -4564,9 +4286,7 @@ dependencies = [ "mimalloc", "nohash-hasher", "once_cell", - "parking_lot 0.12.1", - "polars-core", - "polars-ops", + "parking_lot", "rand", "re_error", "re_format", @@ -4623,7 +4343,7 @@ dependencies = [ "itertools 0.12.0", "mimalloc", "nohash-hasher", - "parking_lot 0.12.1", + "parking_lot", "rand", "re_data_store", "re_format", @@ -4680,7 +4400,7 @@ dependencies = [ "js-sys", "log", "log-once", - "parking_lot 0.12.1", + "parking_lot", "tracing", "wasm-bindgen", ] @@ -4694,7 +4414,7 @@ dependencies = [ "js-sys", "lz4_flex", "mimalloc", - "parking_lot 0.12.1", + "parking_lot", "re_build_info", "re_log", "re_log_types", @@ -4759,7 +4479,7 @@ dependencies = [ "memory-stats", "nohash-hasher", "once_cell", - "parking_lot 0.12.1", + "parking_lot", "re_format", "re_log", "re_tracing", @@ -4789,6 +4509,7 @@ dependencies = [ "re_types_core", "rmp-serde", "serde", + "similar-asserts", "smallvec", "thiserror", ] @@ -4806,7 +4527,7 @@ dependencies = [ "mimalloc", "nohash-hasher", "once_cell", - "parking_lot 0.12.1", + "parking_lot", "paste", "rand", "re_data_store", @@ -4848,7 +4569,7 @@ dependencies = [ "never", "notify", "ordered-float", - "parking_lot 0.12.1", + "parking_lot", "pathdiff", "profiling", "re_build_tools", @@ -4906,7 +4627,7 @@ dependencies = [ "ndarray", "ndarray-rand", "once_cell", - "parking_lot 0.12.1", + "parking_lot", "rand", "re_build_info", "re_build_tools", @@ -4946,7 +4667,7 @@ name = "re_smart_channel" version = "0.13.0-alpha.2" dependencies = [ "crossbeam", - "parking_lot 0.12.1", + "parking_lot", "re_tracing", "web-time", ] @@ -5023,7 +4744,7 @@ dependencies = [ "mimalloc", "nohash-hasher", "once_cell", - "parking_lot 0.12.1", + "parking_lot", "rayon", "re_data_store", "re_data_ui", @@ -5134,7 +4855,7 @@ dependencies = [ "ahash", "nohash-hasher", "once_cell", - "parking_lot 0.12.1", + "parking_lot", "serde", ] @@ -5271,7 +4992,7 @@ dependencies = [ "egui_commonmark", "egui_extras", "egui_tiles", - "parking_lot 0.12.1", + "parking_lot", "re_entity_db", "re_log", "re_log_types", @@ -5362,7 +5083,7 @@ dependencies = [ "ndarray", "nohash-hasher", "once_cell", - "parking_lot 0.12.1", + "parking_lot", "re_data_source", "re_data_store", "re_entity_db", @@ -5442,7 +5163,7 @@ dependencies = [ "ewebsock", "futures-channel", "futures-util", - "parking_lot 0.12.1", + "parking_lot", "re_format", "re_log", "re_log_types", @@ -5589,7 +5310,7 @@ dependencies = [ "ahash", "arrow2", "once_cell", - "parking_lot 0.12.1", + "parking_lot", "re_log", "re_sdk", ] @@ -5604,7 +5325,7 @@ dependencies = [ "itertools 0.12.0", "mimalloc", "once_cell", - "parking_lot 0.12.1", + "parking_lot", "pyo3", "pyo3-build-config", "rand", @@ -6140,27 +5861,6 @@ dependencies = [ "rerun", ] -[[package]] -name = "signal-hook" -version = "0.3.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "732768f1176d21d09e076c23a93123d40bba92d50c4058da34d45c8de8e682b9" -dependencies = [ - "libc", - "signal-hook-registry", -] - -[[package]] -name = "signal-hook-mio" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29ad2e15f37ec9a6cc544097b78a1ec90001e9f71b81338ca39f430adaca99af" -dependencies = [ - "libc", - "mio", - "signal-hook", -] - [[package]] name = "signal-hook-registry" version = "1.4.1" @@ -6245,17 +5945,6 @@ dependencies = [ "serde", ] -[[package]] -name = "smartstring" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fb72c633efbaa2dd666986505016c32c3044395ceaf881518399d2f4127ee29" -dependencies = [ - "autocfg", - "static_assertions", - "version_check", -] - [[package]] name = "smithay-client-toolkit" version = "0.18.0" @@ -6358,12 +6047,6 @@ version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9e08d8363704e6c71fc928674353e6b7c23dcea9d82d7012c8faf2a3a025f8d0" -[[package]] -name = "strength_reduce" -version = "0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe895eb47f22e2ddd4dabc02bce419d2e643c8e3b585c78158b349195bc24d82" - [[package]] name = "strsim" version = "0.10.0" @@ -6434,12 +6117,6 @@ dependencies = [ "windows 0.51.1", ] -[[package]] -name = "target-features" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06f6b473c37f9add4cf1df5b4d66a8ef58ab6c895f1a3b3f949cf3e21230140e" - [[package]] name = "target-lexicon" version = "0.12.7" @@ -7156,21 +6833,6 @@ dependencies = [ "web-sys", ] -[[package]] -name = "wasm-timer" -version = "0.2.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be0ecb0db480561e9a7642b5d3e4187c128914e58aa84330b9493e3eb68c5e7f" -dependencies = [ - "futures", - "js-sys", - "parking_lot 0.11.2", - "pin-utils", - "wasm-bindgen", - "wasm-bindgen-futures", - "web-sys", -] - [[package]] name = "wasmparser" version = "0.80.2" @@ -7361,7 +7023,7 @@ dependencies = [ "js-sys", "log", "naga", - "parking_lot 0.12.1", + "parking_lot", "profiling", "raw-window-handle 0.6.0", "smallvec", @@ -7389,7 +7051,7 @@ dependencies = [ "log", "naga", "once_cell", - "parking_lot 0.12.1", + "parking_lot", "profiling", "raw-window-handle 0.6.0", "rustc-hash", @@ -7430,7 +7092,7 @@ dependencies = [ "naga", "objc", "once_cell", - "parking_lot 0.12.1", + "parking_lot", "profiling", "range-alloc", "raw-window-handle 0.6.0", @@ -7929,12 +7591,6 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1dbabb1cbd15a1d6d12d9ed6b35cc6777d4af87ab3ba155ea37215f20beab80c" -[[package]] -name = "xxhash-rust" -version = "0.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "735a71d46c4d68d71d4b24d03fdc2b98e38cea81730595801db779c04fe80d70" - [[package]] name = "yaml-rust" version = "0.4.5" diff --git a/Cargo.toml b/Cargo.toml index 5c7519f26eb1..728b89cc878d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -173,9 +173,6 @@ paste = "1.0" pathdiff = "0.2" pico-args = "0.5" ply-rs = { version = "0.1", default-features = false } -polars-core = "0.29" -polars-lazy = "0.29" -polars-ops = "0.29" poll-promise = "0.3" pollster = "0.3" prettyplease = "0.2" diff --git a/crates/re_data_store/Cargo.toml b/crates/re_data_store/Cargo.toml index 314fcb452282..6a7ce969b98e 100644 --- a/crates/re_data_store/Cargo.toml +++ b/crates/re_data_store/Cargo.toml @@ -22,16 +22,13 @@ default = [] ## Enables `parking_lot`'s deadlock detection background thread. deadlock_detection = ["parking_lot/deadlock_detection"] -## Integration with `polars`, to efficiently use the datastore with dataframes. -polars = ["dep:polars-core", "dep:polars-ops"] - [dependencies] # Rerun dependencies: re_error.workspace = true re_format = { workspace = true, features = ["arrow"] } re_log_types.workspace = true -re_log.workspace = true +re_log = { workspace = true, features = ["setup"] } re_tracing.workspace = true re_types_core.workspace = true @@ -48,22 +45,6 @@ smallvec.workspace = true thiserror.workspace = true web-time.workspace = true -# Optional dependencies: -polars-core = { workspace = true, optional = true, features = [ - "diagonal_concat", - "dtype-date", - "dtype-datetime", - "dtype-time", - "dtype-struct", - "sort_multiple", -] } -polars-ops = { workspace = true, optional = true, features = [ - "dtype-date", - "dtype-datetime", - "dtype-time", - "dtype-struct", -] } - [dev-dependencies] re_types = { workspace = true, features = ["datagen", "testing"] } @@ -71,15 +52,6 @@ re_types = { workspace = true, features = ["datagen", "testing"] } anyhow.workspace = true criterion.workspace = true mimalloc.workspace = true -polars-core = { workspace = true, features = [ - "diagonal_concat", - "dtype-date", - "dtype-datetime", - "dtype-struct", - "dtype-time", - "fmt", - "sort_multiple", -] } rand.workspace = true similar-asserts.workspace = true tinyvec.workspace = true @@ -88,27 +60,6 @@ tinyvec.workspace = true bench = false -[[example]] -name = "dump_dataframe" -path = "examples/dump_dataframe.rs" -required-features = ["polars"] - -[[example]] -name = "latest_component" -path = "examples/latest_component.rs" -required-features = ["polars"] - -[[example]] -name = "latest_components" -path = "examples/latest_components.rs" -required-features = ["polars"] - -[[example]] -name = "range_components" -path = "examples/range_components.rs" -required-features = ["polars"] - - [[bench]] name = "arrow2" harness = false diff --git a/crates/re_data_store/examples/dump_dataframe.rs b/crates/re_data_store/examples/dump_dataframe.rs deleted file mode 100644 index c4cbaba7aef2..000000000000 --- a/crates/re_data_store/examples/dump_dataframe.rs +++ /dev/null @@ -1,77 +0,0 @@ -//! Demonstrates usage of [`DataStore::to_dataframe`]. -//! -//! ```text -//! POLARS_FMT_MAX_ROWS=100 cargo r -p re_data_store --example dump_dataframe -//! ``` - -use re_data_store::{test_row, DataStore}; -use re_log_types::{build_frame_nr, build_log_time, EntityPath, Time}; -use re_types::datagen::{build_some_instances, build_some_instances_from, build_some_positions2d}; -use re_types::{components::InstanceKey, testing::build_some_large_structs}; -use re_types_core::Loggable as _; - -// --- - -fn main() { - let mut store = DataStore::new( - re_log_types::StoreId::random(re_log_types::StoreKind::Recording), - InstanceKey::name(), - Default::default(), - ); - - let ent_paths = [ - EntityPath::from("this/that"), - EntityPath::from("and/this/other/thing"), - ]; - - for ent_path in &ent_paths { - let row1 = test_row!(ent_path @ [ - build_frame_nr(1.into()), build_log_time(Time::now()), - ] => 2; [build_some_instances(2), build_some_large_structs(2)]); - store.insert_row(&row1).unwrap(); - } - - for ent_path in &ent_paths { - let row2 = test_row!(ent_path @ [ - build_frame_nr(2.into()) - ] => 2; [build_some_instances(2), build_some_positions2d(2)]); - store.insert_row(&row2).unwrap(); - // Insert timelessly too! - let row2 = - test_row!(ent_path @ [] => 2; [build_some_instances(2), build_some_positions2d(2)]); - store.insert_row(&row2).unwrap(); - - let row3 = test_row!(ent_path @ [ - build_frame_nr(3.into()), build_log_time(Time::now()), - ] => 4; [build_some_instances_from(25..29), build_some_positions2d(4)]); - store.insert_row(&row3).unwrap(); - // Insert timelessly too! - let row3 = test_row!(ent_path @ [] => 4; [build_some_instances_from(25..29), build_some_positions2d(4)]); - store.insert_row(&row3).unwrap(); - } - - for ent_path in &ent_paths { - let row4_1 = test_row!(ent_path @ [ - build_frame_nr(4.into()), build_log_time(Time::now()), - ] => 3; [build_some_instances_from(20..23), build_some_large_structs(3)]); - store.insert_row(&row4_1).unwrap(); - - let row4_15 = test_row!(ent_path @ [ - build_frame_nr(4.into()), - ] => 3; [build_some_instances_from(20..23), build_some_positions2d(3)]); - store.insert_row(&row4_15).unwrap(); - - let row4_2 = test_row!(ent_path @ [ - build_frame_nr(4.into()), build_log_time(Time::now()), - ] => 3; [build_some_instances_from(25..28), build_some_large_structs(3)]); - store.insert_row(&row4_2).unwrap(); - - let row4_25 = test_row!(ent_path @ [ - build_frame_nr(4.into()), build_log_time(Time::now()), - ] => 3; [build_some_instances_from(25..28), build_some_positions2d(3)]); - store.insert_row(&row4_25).unwrap(); - } - - let df = store.to_dataframe(); - println!("{df}"); -} diff --git a/crates/re_data_store/examples/latest_component.rs b/crates/re_data_store/examples/latest_component.rs deleted file mode 100644 index 4e7f35048cb9..000000000000 --- a/crates/re_data_store/examples/latest_component.rs +++ /dev/null @@ -1,57 +0,0 @@ -//! Demonstrates usage of [`re_data_store::polars_util::latest_component`]. -//! -//! ```text -//! POLARS_FMT_MAX_ROWS=100 cargo r -p re_data_store --example latest_component -//! ``` - -use re_data_store::polars_util::latest_component; -use re_data_store::{test_row, DataStore, LatestAtQuery, TimeType, Timeline}; -use re_log_types::{build_frame_nr, EntityPath}; -use re_types::datagen::build_some_positions2d; -use re_types::{ - components::{InstanceKey, Position2D}, - testing::{build_some_large_structs, LargeStruct}, -}; -use re_types_core::Loggable as _; - -fn main() { - let mut store = DataStore::new( - re_log_types::StoreId::random(re_log_types::StoreKind::Recording), - InstanceKey::name(), - Default::default(), - ); - - let ent_path = EntityPath::from("my/entity"); - - let row = test_row!(ent_path @ [build_frame_nr(2.into())] => 4; [build_some_large_structs(4)]); - store.insert_row(&row).unwrap(); - - let row = test_row!(ent_path @ [build_frame_nr(3.into())] => 2; [build_some_positions2d(2)]); - store.insert_row(&row).unwrap(); - - let timeline_frame_nr = Timeline::new("frame_nr", TimeType::Sequence); - - println!("Store contents:\n{}", store.to_dataframe()); - - println!("\n-----\n"); - - let df = latest_component( - &store, - &LatestAtQuery::new(timeline_frame_nr, 10.into()), - &ent_path, - LargeStruct::name(), - ) - .unwrap(); - println!("Query results from {:?}'s PoV:\n{df}", LargeStruct::name()); - - println!("\n-----\n"); - - let df = latest_component( - &store, - &LatestAtQuery::new(timeline_frame_nr, 10.into()), - &ent_path, - Position2D::name(), - ) - .unwrap(); - println!("Query results from {:?}'s PoV:\n{df}", Position2D::name()); -} diff --git a/crates/re_data_store/examples/latest_components.rs b/crates/re_data_store/examples/latest_components.rs deleted file mode 100644 index 99adfe54a37c..000000000000 --- a/crates/re_data_store/examples/latest_components.rs +++ /dev/null @@ -1,46 +0,0 @@ -//! Demonstrates usage of [`re_data_store::polars_util::latest_components`]. -//! -//! ```text -//! POLARS_FMT_MAX_ROWS=100 cargo r -p re_data_store --example latest_components -//! ``` - -use polars_core::prelude::*; - -use re_data_store::polars_util::latest_components; -use re_data_store::{test_row, DataStore, LatestAtQuery, TimeType, Timeline}; -use re_log_types::{build_frame_nr, EntityPath}; -use re_types::datagen::build_some_positions2d; -use re_types::{ - components::{InstanceKey, Position2D}, - testing::{build_some_large_structs, LargeStruct}, -}; -use re_types_core::Loggable as _; - -fn main() { - let mut store = DataStore::new( - re_log_types::StoreId::random(re_log_types::StoreKind::Recording), - InstanceKey::name(), - Default::default(), - ); - - let ent_path = EntityPath::from("my/entity"); - - let row = test_row!(ent_path @ [build_frame_nr(2.into())] => 4; [build_some_large_structs(4)]); - store.insert_row(&row).unwrap(); - - let row = test_row!(ent_path @ [build_frame_nr(3.into())] => 2; [build_some_positions2d(2)]); - store.insert_row(&row).unwrap(); - - let timeline_frame_nr = Timeline::new("frame_nr", TimeType::Sequence); - let df = latest_components( - &store, - &LatestAtQuery::new(timeline_frame_nr, 10.into()), - &ent_path, - &[Position2D::name(), LargeStruct::name()], - &JoinType::Outer, - ) - .unwrap(); - - println!("Store contents:\n{}", store.to_dataframe()); - println!("Query results:\n{df}"); -} diff --git a/crates/re_data_store/examples/range_components.rs b/crates/re_data_store/examples/range_components.rs deleted file mode 100644 index 002a74c94dd6..000000000000 --- a/crates/re_data_store/examples/range_components.rs +++ /dev/null @@ -1,100 +0,0 @@ -//! Demonstrates usage of [`re_data_store::polars_util::range_components`]. -//! -//! ```text -//! POLARS_FMT_MAX_ROWS=100 cargo r -p re_data_store --all-features --example range_components -//! ``` - -use polars_core::prelude::JoinType; -use re_data_store::{polars_util, test_row, DataStore, RangeQuery, TimeRange}; -use re_log_types::{build_frame_nr, EntityPath, TimeType, Timeline}; -use re_types::datagen::build_some_positions2d; -use re_types::{ - components::{InstanceKey, Position2D}, - testing::{build_some_large_structs, LargeStruct}, -}; -use re_types_core::Loggable as _; - -fn main() { - let mut store = DataStore::new( - re_log_types::StoreId::random(re_log_types::StoreKind::Recording), - InstanceKey::name(), - Default::default(), - ); - - let ent_path = EntityPath::from("this/that"); - - let frame1 = 1.into(); - let frame2 = 2.into(); - let frame3 = 3.into(); - let frame4 = 4.into(); - - let row = test_row!(ent_path @ [build_frame_nr(frame1)] => 2; [build_some_large_structs(2)]); - store.insert_row(&row).unwrap(); - - let row = test_row!(ent_path @ [build_frame_nr(frame2)] => 2; [build_some_positions2d(2)]); - store.insert_row(&row).unwrap(); - - let row = test_row!(ent_path @ [build_frame_nr(frame3)] => 4; [build_some_positions2d(4)]); - store.insert_row(&row).unwrap(); - - let row = test_row!(ent_path @ [build_frame_nr(frame4)] => 3; [build_some_large_structs(3)]); - store.insert_row(&row).unwrap(); - - let row = test_row!(ent_path @ [build_frame_nr(frame4)] => 1; [build_some_positions2d(1)]); - store.insert_row(&row).unwrap(); - - let row = test_row!(ent_path @ [build_frame_nr(frame4)] => 3; [build_some_large_structs(3)]); - store.insert_row(&row).unwrap(); - - let row = test_row!(ent_path @ [build_frame_nr(frame4)] => 3; [build_some_positions2d(3)]); - store.insert_row(&row).unwrap(); - - let timeline_frame_nr = Timeline::new("frame_nr", TimeType::Sequence); - let query = RangeQuery::new(timeline_frame_nr, TimeRange::new(2.into(), 4.into())); - - println!("Store contents:\n{}", store.to_dataframe()); - - println!("\n-----\n"); - - let dfs = polars_util::range_components( - &store, - &query, - &ent_path, - LargeStruct::name(), - [InstanceKey::name(), LargeStruct::name(), Position2D::name()], - &JoinType::Outer, - ); - for (time, df) in dfs.map(Result::unwrap) { - eprintln!( - "Found data at time {} from {}'s PoV (outer-joining):\n{}", - time.map_or_else( - || "".into(), - |time| TimeType::Sequence.format_utc(time) - ), - LargeStruct::name(), - df, - ); - } - - println!("\n-----\n"); - - let dfs = polars_util::range_components( - &store, - &query, - &ent_path, - Position2D::name(), - [InstanceKey::name(), LargeStruct::name(), Position2D::name()], - &JoinType::Outer, - ); - for (time, df) in dfs.map(Result::unwrap) { - eprintln!( - "Found data at time {} from {}'s PoV (outer-joining):\n{}", - time.map_or_else( - || "".into(), - |time| TimeType::Sequence.format_utc(time) - ), - Position2D::name(), - df, - ); - } -} diff --git a/crates/re_data_store/src/arrow_util.rs b/crates/re_data_store/src/arrow_util.rs index dd2fa79a8476..898025d49845 100644 --- a/crates/re_data_store/src/arrow_util.rs +++ b/crates/re_data_store/src/arrow_util.rs @@ -1,12 +1,4 @@ -use arrow2::{ - array::{ - growable::make_growable, Array, FixedSizeListArray, ListArray, StructArray, UnionArray, - }, - bitmap::Bitmap, - datatypes::{DataType, Field, UnionMode}, - offset::Offsets, -}; -use itertools::Itertools; +use arrow2::array::{Array, ListArray}; // --- @@ -16,15 +8,6 @@ pub trait ArrayExt: Array { /// * Panics if `self` is not a `ListArray`. /// * Panics if `child_nr` is out of bounds. fn get_child_length(&self, child_nr: usize) -> usize; - - /// Create a new `Array` which avoids problematic types for polars. - /// - /// This does the following conversion: - /// - `FixedSizeList` -> `List` - /// - `Union` -> `Struct` - /// - /// Nested types are expanded and cleaned recursively - fn clean_for_polars(&self) -> Box; } impl ArrayExt for dyn Array { @@ -42,220 +25,4 @@ impl ArrayExt for dyn Array { .nth(child_nr) .unwrap() } - - /// Create a new `Array` which avoids problematic types for polars. - /// - /// This does the following conversion: - /// - `FixedSizeList` -> `List` - /// - `Union` -> `Struct` - /// - /// Nested types are expanded and cleaned recursively - fn clean_for_polars(&self) -> Box { - let datatype = self.data_type(); - let datatype = if let DataType::Extension(_, inner, _) = datatype { - (**inner).clone() - } else { - datatype.clone() - }; - - match &datatype { - DataType::List(field) => { - // Recursively clean the contents - let typed_arr = self.as_any().downcast_ref::>().unwrap(); - let clean_vals = typed_arr.values().as_ref().clean_for_polars(); - let clean_data = DataType::List(Box::new(Field::new( - &field.name, - clean_vals.data_type().clone(), - field.is_nullable, - ))); - ListArray::::try_new( - clean_data, - typed_arr.offsets().clone(), - clean_vals, - typed_arr.validity().cloned(), - ) - .unwrap() - .boxed() - } - DataType::LargeList(field) => { - // Recursively clean the contents - let typed_arr = self.as_any().downcast_ref::>().unwrap(); - let clean_vals = typed_arr.values().as_ref().clean_for_polars(); - let clean_data = DataType::LargeList(Box::new(Field::new( - &field.name, - clean_vals.data_type().clone(), - field.is_nullable, - ))); - ListArray::::try_new( - clean_data, - typed_arr.offsets().clone(), - clean_vals, - typed_arr.validity().cloned(), - ) - .unwrap() - .boxed() - } - DataType::FixedSizeList(field, len) => { - // Recursively clean the contents and convert `FixedSizeListArray` -> `ListArray` - let typed_arr = self.as_any().downcast_ref::().unwrap(); - let clean_vals = typed_arr.values().as_ref().clean_for_polars(); - let clean_data = DataType::List(Box::new(Field::new( - &field.name, - clean_vals.data_type().clone(), - field.is_nullable, - ))); - let lengths = std::iter::repeat(len).take(typed_arr.len()).cloned(); - let offsets = Offsets::try_from_lengths(lengths).unwrap(); - ListArray::::try_new( - clean_data, - offsets.into(), - clean_vals, - typed_arr.validity().cloned(), - ) - .unwrap() - .boxed() - } - DataType::Struct(fields) => { - // Recursively clean the contents - let typed_arr = self.as_any().downcast_ref::().unwrap(); - let clean_vals = typed_arr - .values() - .iter() - .map(|v| v.as_ref().clean_for_polars()) - .collect_vec(); - let clean_fields = itertools::izip!(fields, &clean_vals) - .map(|(f, v)| Field::new(&f.name, v.data_type().clone(), f.is_nullable)) - .collect_vec(); - let clean_data = DataType::Struct(clean_fields); - StructArray::try_new(clean_data, clean_vals, typed_arr.validity().cloned()) - .unwrap() - .boxed() - } - DataType::Union(fields, ids, UnionMode::Dense) => { - // Recursively clean the contents and convert `UnionArray` -> `StructArray` - let typed_arr = self.as_any().downcast_ref::().unwrap(); - - // Note: Union calls its stored value-arrays "fields" - let clean_vals = typed_arr - .fields() - .iter() - .map(|v| v.as_ref().clean_for_polars()) - .collect_vec(); - - let ids = ids - .clone() - .unwrap_or_else(|| (0i32..(clean_vals.len() as i32)).collect_vec()); - - // For Dense Unions, the value-arrays need to be padded to the - // correct length, which we do by growing using the existing type - // table. - let padded_vals = itertools::izip!(&clean_vals, &ids) - .map(|(dense, id)| { - let mut next = 0; - let mut grow = make_growable(&[dense.as_ref()], true, self.len()); - typed_arr.types().iter().for_each(|t| { - if *t == *id as i8 { - grow.extend(0, next, 1); - next += 1; - } else { - grow.extend_validity(1); - } - }); - grow.as_box() - }) - .collect_vec(); - - let clean_field_types = itertools::izip!(fields, &clean_vals) - .map(|(f, v)| Field::new(&f.name, v.data_type().clone(), f.is_nullable)) - .collect_vec(); - - // The new type will be a struct - let clean_data = DataType::Struct(clean_field_types); - - StructArray::try_new(clean_data, padded_vals, typed_arr.validity().cloned()) - .unwrap() - .boxed() - } - DataType::Union(fields, ids, UnionMode::Sparse) => { - // Recursively clean the contents and convert `UnionArray` -> `StructArray` - let typed_arr = self.as_any().downcast_ref::().unwrap(); - - // Note: Union calls its stored value-arrays "fields" - let clean_vals = typed_arr - .fields() - .iter() - .map(|v| v.as_ref().clean_for_polars()) - .collect_vec(); - - let ids = ids - .clone() - .unwrap_or_else(|| (0i32..(clean_vals.len() as i32)).collect_vec()); - - // For Sparse Unions, the value-arrays is already the right - // correct length, but should have a validity derived from the types array. - let padded_vals = itertools::izip!(&clean_vals, &ids) - .map(|(sparse, id)| { - let validity = Bitmap::from( - typed_arr - .types() - .iter() - .map(|t| *t == *id as i8) - .collect_vec(), - ); - sparse.with_validity(Some(validity)) - }) - .collect_vec(); - - let clean_field_types = itertools::izip!(fields, &clean_vals) - .map(|(f, v)| Field::new(&f.name, v.data_type().clone(), f.is_nullable)) - .collect_vec(); - - // The new type will be a struct - let clean_data = DataType::Struct(clean_field_types); - - StructArray::try_new(clean_data, padded_vals, typed_arr.validity().cloned()) - .unwrap() - .boxed() - } - _ => self.to_boxed(), - } - } -} - -#[test] -fn test_clean_for_polars_nomodify() { - use re_log_types::DataCell; - use re_types::datagen::build_some_colors; - - // Colors don't need polars cleaning - let cell: DataCell = build_some_colors(5).into(); - let cleaned = cell.as_arrow_ref().clean_for_polars(); - assert_eq!(cell.as_arrow_ref(), &*cleaned); - - #[cfg(feature = "polars")] - crate::polars_util::dataframe_from_cells(&[Some(cell)]).unwrap(); -} - -#[test] -fn test_clean_for_polars_modify() { - use std::f32::consts::TAU; - - use re_log_types::DataCell; - use re_types::components::Transform3D; - use re_types::datatypes::{Angle, RotationAxisAngle, Scale3D}; - - let cell = DataCell::try_from_native([ - Transform3D::from_translation([1.0, 0.0, 0.0]), // - Transform3D::from_rotation_scale( - RotationAxisAngle::new([0.0, 0.0, 1.0], Angle::Radians(TAU / 8.)), - Scale3D::from(2.0), - ), - ]) - .unwrap(); - - let cleaned = cell.as_arrow_ref().clean_for_polars(); - assert_ne!(cell.as_arrow_ref(), &*cleaned); - - #[cfg(feature = "polars")] - crate::polars_util::dataframe_from_cells(&[Some(cell)]).unwrap(); } diff --git a/crates/re_data_store/src/lib.rs b/crates/re_data_store/src/lib.rs index 2e9d99a66211..0f3688a4c411 100644 --- a/crates/re_data_store/src/lib.rs +++ b/crates/re_data_store/src/lib.rs @@ -28,12 +28,6 @@ mod store_stats; mod store_subscriber; mod store_write; -#[cfg(feature = "polars")] -mod store_polars; - -#[cfg(feature = "polars")] -pub mod polars_util; - #[doc(hidden)] pub mod test_util; diff --git a/crates/re_data_store/src/polars_util.rs b/crates/re_data_store/src/polars_util.rs deleted file mode 100644 index 59b33ffdb264..000000000000 --- a/crates/re_data_store/src/polars_util.rs +++ /dev/null @@ -1,291 +0,0 @@ -use itertools::Itertools; -use polars_core::{prelude::*, series::Series}; -use polars_ops::prelude::*; -use re_log_types::{DataCell, EntityPath, RowId, TimeInt}; -use re_types_core::ComponentName; - -use crate::{ArrayExt, DataStore, LatestAtQuery, RangeQuery}; - -// --- - -pub type SharedPolarsError = Arc; - -pub type SharedResult = ::std::result::Result; - -// --- LatestAt --- - -/// Queries a single component from its own point-of-view as well as its cluster key, and -/// returns a `DataFrame`. -/// -/// As the cluster key is guaranteed to always be present, the returned dataframe can be joined -/// with any number of other dataframes returned by this function [`latest_component`] and -/// [`latest_components`]. -/// -/// See `example/latest_component.rs` for an example of use. -/// -/// # Temporal semantics -/// -/// Temporal indices take precedence, then timeless indices are queried to fill the holes left -/// by missing temporal data. -// -// TODO(cmc): can this really fail though? -pub fn latest_component( - store: &DataStore, - query: &LatestAtQuery, - ent_path: &EntityPath, - primary: ComponentName, -) -> SharedResult { - let cluster_key = store.cluster_key(); - - let components = &[cluster_key, primary]; - let (_, cells) = store - .latest_at(query, ent_path, primary, components) - .map_or_else( - || (RowId::ZERO, [(); 2].map(|_| None)), - |(_, row_id, cells)| (row_id, cells), - ); - - dataframe_from_cells(&cells) -} - -/// Queries any number of components and their cluster keys from their respective point-of-views, -/// then joins all of them in one final `DataFrame` using the specified `join_type`. -/// -/// As the cluster key is guaranteed to always be present, the returned dataframe can be joined -/// with any number of other dataframes returned by this function [`latest_component`] and -/// [`latest_components`]. -/// -/// See `example/latest_components.rs` for an example of use. -/// -/// # Temporal semantics -/// -/// Temporal indices take precedence, then timeless indices are queried to fill the holes left -/// by missing temporal data. -// -// TODO(cmc): can this really fail though? -pub fn latest_components( - store: &DataStore, - query: &LatestAtQuery, - ent_path: &EntityPath, - primaries: &[ComponentName], - join_type: &JoinType, -) -> SharedResult { - let cluster_key = store.cluster_key(); - - let dfs = primaries - .iter() - .filter(|primary| **primary != cluster_key) - .map(|primary| latest_component(store, query, ent_path, *primary)); - - join_dataframes(cluster_key, join_type, dfs) -} - -// --- Range --- - -/// Iterates over the rows of any number of components and their respective cluster keys, all from -/// the single point-of-view of the `primary` component, returning an iterator of `DataFrame`s. -/// -/// The iterator only ever yields dataframes iff the `primary` component has changed. -/// A change affecting only secondary components will not yield a dataframe. -/// -/// This is a streaming-join: every yielded dataframe will be the result of joining the latest -/// known state of all components, from their respective point-of-views. -/// -/// ⚠ The semantics are subtle! See `example/range_components.rs` for an example of use. -/// -/// # Temporal semantics -/// -/// Yields the contents of the temporal indices. -/// Iff the query's time range starts at `TimeInt::MIN`, this will yield the contents of the -/// timeless indices before anything else. -/// -/// When yielding timeless entries, the associated time will be `None`. -pub fn range_components<'a, const N: usize>( - store: &'a DataStore, - query: &'a RangeQuery, - ent_path: &'a EntityPath, - primary: ComponentName, - components: [ComponentName; N], - join_type: &'a JoinType, -) -> impl Iterator, DataFrame)>> + 'a { - let cluster_key = store.cluster_key(); - - // TODO(cmc): Ideally, we'd want to simply add the cluster and primary key to the `components` - // array if they are missing, yielding either `[ComponentName; N+1]` or `[ComponentName; N+2]`. - // Unfortunately this is not supported on stable at the moment, and requires - // feature(generic_const_exprs) on nightly. - // - // The alternative to these assertions (and thus putting the burden on the caller), for now, - // would be to drop the constant sizes all the way down, which would be way more painful to - // deal with. - assert!(components.contains(&cluster_key)); - assert!(components.contains(&primary)); - - let mut state = None; - - // NOTE: This will return none for `TimeInt::Min`, i.e. range queries that start infinitely far - // into the past don't have a latest-at state! - let latest_time = query.range.min.as_i64().checked_sub(1).map(Into::into); - - let mut df_latest = None; - if let Some(latest_time) = latest_time { - let df = latest_components( - store, - &LatestAtQuery::new(query.timeline, latest_time), - ent_path, - &components, - join_type, - ); - - df_latest = Some(df); - } - - let primary_col = components - .iter() - .find_position(|component| **component == primary) - .map(|(col, _)| col) - .unwrap(); // asserted on entry - - // send the latest-at state before anything else - df_latest - .into_iter() - // NOTE: `false` here means we will _not_ yield the latest-at state as an actual - // ArchetypeView! - // That is a very important detail: for overlapping range queries to be correct in a - // multi-tenant cache context, we need to make sure to inherit the latest-at state - // from T-1, while also making sure to _not_ yield the view that comes with that state. - // - // Consider e.g. what happens when one system queries for `range(10, 20)` while another - // queries for `range(9, 20)`: the data at timestamp `10` would differ because of the - // statefulness of range queries! - .map(move |df| (latest_time, false, df)) - // followed by the range - .chain( - store - .range(query, ent_path, components) - .map(move |(time, _, cells)| { - ( - time, - cells[primary_col].is_some(), // is_primary - dataframe_from_cells(&cells), - ) - }), - ) - .filter_map(move |(time, is_primary, df)| { - state = Some(join_dataframes( - cluster_key, - join_type, - // The order matters here: the newly yielded dataframe goes to the right so that it - // overwrites the data in the state if their column overlaps! - // See [`join_dataframes`]. - [state.clone() /* shallow */, Some(df)] - .into_iter() - .flatten(), - )); - - // We only yield if the primary component has been updated! - is_primary.then_some(state.clone().unwrap().map(|df| { - // Make sure to return everything in the order it was asked! - let columns = df.get_column_names(); - let df = df - .select( - components - .clone() - .iter() - .filter(|col| columns.contains(&col.as_ref())), - ) - .unwrap(); - (time, df) - })) - }) -} - -// --- Joins --- - -// TODO(#1759): none of this mess should be here - -pub fn dataframe_from_cells( - cells: &[Option; N], -) -> SharedResult { - let series: Result, _> = cells - .iter() - .flatten() - .map(|cell| { - Series::try_from(( - cell.component_name().as_ref(), - cell.as_arrow_ref().clean_for_polars(), - )) - }) - .collect(); - - DataFrame::new(series?).map_err(Into::into) -} - -/// Reduces an iterator of dataframes into a single dataframe by sequentially joining them using -/// the specified `join_type` and `cluster_key`. -/// -/// Note that if both the accumulator and the next dataframe in the stream share a column name -/// (other than the cluster key), the column data from the next dataframe takes precedence and -/// completely overwrites the current column data in the accumulator! -pub fn join_dataframes( - cluster_key: ComponentName, - join_type: &JoinType, - dfs: impl Iterator>, -) -> SharedResult { - let df = dfs - .into_iter() - .filter(|df| df.as_ref().map_or(true, |df| !df.is_empty())) - .reduce(|left, right| { - let mut left = left?; - let right = right?; - - // If both `left` and `right` have data for the same column, `right` always takes - // precedence. - for col in right - .get_column_names() - .iter() - .filter(|col| *col != &cluster_key) - { - _ = left.drop_in_place(col); - } - - left.join( - &right, - [cluster_key], - [cluster_key], - join_type.clone(), - None, - ) - .map(|df| drop_all_nulls(&df, &cluster_key).unwrap()) - .map_err(Into::into) - }) - .unwrap_or_else(|| Ok(DataFrame::default()))?; - - Ok(df.sort([cluster_key.as_str()], false).unwrap_or(df)) -} - -/// Returns a new `DataFrame` where all rows that only contain null values (ignoring the cluster -/// column) are dropped. -pub fn drop_all_nulls(df: &DataFrame, cluster_key: &ComponentName) -> PolarsResult { - let cols = df - .get_column_names() - .into_iter() - .filter(|col| *col != cluster_key.as_str()); - - let mut iter = df.select_series(cols)?.into_iter(); - - // fast path for no nulls in df - if iter.clone().all(|s| !s.has_validity()) { - return Ok(df.clone()); - } - - let mask = iter - .next() - .ok_or_else(|| PolarsError::NoData("No data to drop nulls from".into()))?; - let mut mask = mask.is_not_null(); - - for s in iter { - mask = mask | s.is_not_null(); - } - df.filter(&mask) -} diff --git a/crates/re_data_store/src/store.rs b/crates/re_data_store/src/store.rs index 943d2c7160c7..62885548f32c 100644 --- a/crates/re_data_store/src/store.rs +++ b/crates/re_data_store/src/store.rs @@ -168,9 +168,6 @@ pub struct StoreGeneration { /// know what's going on internally. /// For even more information, you can set `RERUN_DATA_STORE_DISPLAY_SCHEMAS=1` in your /// environment, which will result in additional schema information being printed out. -/// -/// Additionally, if the `polars` feature is enabled, you can dump the entire datastore as a -/// flat denormalized dataframe using [`Self::to_dataframe`]. pub struct DataStore { pub(crate) id: StoreId, diff --git a/crates/re_data_store/src/store_polars.rs b/crates/re_data_store/src/store_polars.rs deleted file mode 100644 index bb7f7dc9e005..000000000000 --- a/crates/re_data_store/src/store_polars.rs +++ /dev/null @@ -1,383 +0,0 @@ -#![allow(clippy::all, unused_variables, dead_code)] - -use std::collections::{BTreeSet, VecDeque}; - -use arrow2::{ - array::{new_empty_array, Array, BooleanArray, ListArray, Utf8Array}, - bitmap::Bitmap, - compute::concatenate::concatenate, - offset::Offsets, -}; -use polars_core::{functions::diag_concat_df, prelude::*}; -use re_log_types::{DataCell, DataTable}; -use re_types_core::ComponentName; - -use crate::{ - store::InsertIdVec, ArrayExt, DataStore, DataStoreConfig, IndexedBucket, IndexedBucketInner, - PersistentIndexedTable, PersistentIndexedTableInner, -}; - -// TODO(#1692): all of this stuff should be defined by Data{Cell,Row,Table}, not the store. -// TODO(#1759): remove this and reimplement it on top of store serialization - -// --- - -impl DataStore { - /// Dumps the entire datastore as a flat, denormalized dataframe. - /// - /// This cannot fail: it always tries to yield as much valuable information as it can, even in - /// the face of errors. - pub fn to_dataframe(&self) -> DataFrame { - re_tracing::profile_function!(); - - const TIMELESS_COL: &str = "_is_timeless"; - - let timeless_dfs = self.timeless_tables.values().map(|index| { - let ent_path = index.ent_path.clone(); - - let mut df = index.to_dataframe(self, &self.config); - let num_rows = df.get_columns()[0].len(); - - // Add a column where every row is a boolean true (timeless) - let timeless = { - let timeless = BooleanArray::from(vec![Some(true); num_rows]).boxed(); - new_infallible_series(TIMELESS_COL, timeless.as_ref(), num_rows) - }; - let df = df.with_column(timeless).unwrap(); // cannot fail - - (ent_path, df.clone()) - }); - - let temporal_dfs = self.tables.values().map(|index| { - let dfs: Vec<_> = index - .buckets - .values() - .map(|bucket| (index.ent_path.clone(), bucket)) - .map(|(ent_path, bucket)| { - let mut df = bucket.to_dataframe(self, &self.config); - let num_rows = df.get_columns()[0].len(); - - // Add a column where every row is the entity path. - let entities = { - let ent_path = ent_path.to_string(); - let ent_path = Some(ent_path.as_str()); - let entities = Utf8Array::::from(vec![ent_path; num_rows]).boxed(); - new_infallible_series("entity", entities.as_ref(), num_rows) - }; - let df = df.with_column(entities).unwrap(); // cannot fail - - df.clone() - }) - .collect(); - - // Concatenate all buckets of the index together. - // - // This has to be done diagonally since each bucket can and will have different - // numbers of columns (== components) and rows. - let df = diag_concat_df(dfs.as_slice()) - // TODO(cmc): is there any way this can fail in this case? - .unwrap(); - - (index.ent_path.clone(), df) - }); - - let dfs: Vec<_> = timeless_dfs - .chain(temporal_dfs) - .map(|(ent_path, mut df)| { - let num_rows = df.get_columns()[0].len(); - // Add a column where every row is the entity path. - let entities = { - let ent_path = ent_path.to_string(); - let ent_path = Some(ent_path.as_str()); - let entities = Utf8Array::::from(vec![ent_path; num_rows]).boxed(); - new_infallible_series("entity", entities.as_ref(), num_rows) - }; - df.with_column(entities).unwrap().clone() // cannot fail - }) - .collect(); - - // Some internal functions of `polars` will panic if everything's empty: early exit. - if dfs.iter().all(|df| df.is_empty()) { - return DataFrame::empty(); - } - - // Concatenate all indices together. - // - // This has to be done diagonally since these indices refer to different entities with - // potentially wildly different sets of components and lengths. - // - // NOTE: The only way this can fail in this case is if all these frames are empty, because - // the store itself is empty, which we check just above. - let df = diag_concat_df(dfs.as_slice()).unwrap(); - - // Arrange the columns in the order that makes the most sense as a user. - let timelines: BTreeSet<&str> = self - .tables - .keys() - .map(|(_, timeline)| timeline.name().as_str()) - .collect(); - let df = sort_df_columns(&df, self.config.store_insert_ids, &timelines); - - let has_timeless = df.column(TIMELESS_COL).is_ok(); - let insert_id_col = DataStore::insert_id_component_name(); - - const ASCENDING: bool = false; - const DESCENDING: bool = true; - - // Now we want to sort based on _the contents_ of the columns, and we need to make sure - // we do so in as stable a way as possible given our constraints: we cannot actually sort - // the component columns themselves as they are internally lists of their own. - let (sort_cols, sort_orders): (Vec<_>, Vec<_>) = [ - df.column(TIMELESS_COL) - .is_ok() - .then_some((TIMELESS_COL, DESCENDING)), - df.column(insert_id_col.as_ref()) - .is_ok() - .then_some((insert_id_col.as_ref(), ASCENDING)), - ] - .into_iter() - .flatten() - // NOTE: Already properly arranged above, and already contains insert_id if needed. - .chain( - df.get_column_names() - .into_iter() - .filter(|col| *col != TIMELESS_COL) // we handle this one separately - .filter(|col| *col != insert_id_col) // we handle this one separately - .filter(|col| df.column(col).unwrap().list().is_err()) // lists cannot be sorted - .map(|col| (col, ASCENDING)), - ) - .unzip(); - - let df = if !sort_cols.is_empty() { - df.sort(sort_cols, sort_orders).unwrap() - } else { - df - }; - - if has_timeless { - df.drop(TIMELESS_COL).unwrap() - } else { - df - } - } -} - -impl PersistentIndexedTable { - /// Dumps the entire table as a flat, denormalized dataframe. - /// - /// This cannot fail: it always tries to yield as much valuable information as it can, even in - /// the face of errors. - pub fn to_dataframe(&self, store: &DataStore, config: &DataStoreConfig) -> DataFrame { - re_tracing::profile_function!(); - - let Self { - ent_path: _, - cluster_key: _, - inner, - } = self; - - let inner = &*inner.read(); - let PersistentIndexedTableInner { - col_insert_id, - col_row_id, - col_num_instances, - columns, - is_sorted, - } = inner; - - let num_rows = inner.num_rows() as usize; - - let insert_ids = config - .store_insert_ids - .then(|| insert_ids_as_series(&col_insert_id)); - - let comp_series = - // One column for insert IDs, if they are available. - std::iter::once(insert_ids) - .flatten() // filter options - .chain(columns.iter().filter_map(|(component, cells)| { - let datatype = store.lookup_datatype(component)?.clone(); - column_as_series(store, num_rows, datatype, *component, cells).into() - })); - - DataFrame::new(comp_series.collect::>()) - // This cannot fail at this point, all series are guaranteed to have data and be of - // same length. - .unwrap() - } -} - -impl IndexedBucket { - /// Dumps the entire bucket as a flat, denormalized dataframe. - /// - /// This cannot fail: it always tries to yield as much valuable information as it can, even in - /// the face of errors. - pub fn to_dataframe(&self, store: &DataStore, config: &DataStoreConfig) -> DataFrame { - re_tracing::profile_function!(); - - let IndexedBucketInner { - is_sorted: _, - time_range: _, - col_time, - col_insert_id, - col_row_id, - max_row_id: _, - col_num_instances, - columns, - size_bytes: _, - } = &*self.inner.read(); - - let (_, times) = DataTable::serialize_primitive_column( - self.timeline.name(), - col_time, - self.timeline.datatype().into(), - ); - let num_rows = times.len(); - - let insert_ids = config - .store_insert_ids - .then(|| insert_ids_as_series(&col_insert_id)); - - // Need to create one `Series` for the time index and one for each component index. - let comp_series = [ - // One column for insert IDs, if they are available. - insert_ids, - // One column for the time index. - Some(new_infallible_series( - self.timeline.name().as_str(), - &*times, - num_rows, - )), - ] - .into_iter() - .flatten() // filter options - // One column for each component index. - .chain(columns.iter().filter_map(|(component, cells)| { - let datatype = store.lookup_datatype(component)?.clone(); - column_as_series(store, num_rows, datatype, *component, cells).into() - })); - - DataFrame::new(comp_series.collect::>()) - // This cannot fail at this point, all series are guaranteed to have data and be of - // same length. - .unwrap() - } -} - -// --- - -fn insert_ids_as_series(col_insert_id: &InsertIdVec) -> Series { - re_tracing::profile_function!(); - - let insert_ids = DataTable::serialize_primitive_deque(col_insert_id); - new_infallible_series( - DataStore::insert_id_component_name().as_ref(), - &insert_ids, - insert_ids.len(), - ) -} - -fn column_as_series( - store: &DataStore, - num_rows: usize, - datatype: arrow2::datatypes::DataType, - component: ComponentName, - cells: &VecDeque>, -) -> Series { - re_tracing::profile_function!(); - - // Computing the validity bitmap is just a matter of checking whether the data was - // available in the component tables. - let comp_validity: Vec<_> = cells.iter().map(|cell| cell.is_some()).collect(); - - // Each cell is actually a list, so we need to compute offsets one cell at a time. - let comp_lengths = cells.iter().map(|cell| { - cell.as_ref() - .map_or(0, |cell| cell.num_instances() as usize) - }); - - let comp_values: Vec<_> = cells - .iter() - .flatten() - .map(|cell| cell.as_arrow_ref()) - .collect(); - - // Bring everything together into one big list. - let comp_values = ListArray::::new( - ListArray::::default_datatype(datatype.clone()), - Offsets::try_from_lengths(comp_lengths).unwrap().into(), - // It's possible that all rows being referenced were already garbage collected (or simply - // never existed to begin with), at which point `comp_rows` will be empty… and you can't - // call `concatenate` on an empty list without panicking. - if comp_values.is_empty() { - new_empty_array(datatype) - } else { - concatenate(comp_values.as_slice()).unwrap().to_boxed() - }, - Some(Bitmap::from(comp_validity)), - ); - - new_infallible_series(component.as_ref(), &comp_values, num_rows) -} - -// --- - -fn new_infallible_series(name: &str, data: &dyn Array, len: usize) -> Series { - re_tracing::profile_function!(); - - Series::try_from((name, data.as_ref().clean_for_polars())).unwrap_or_else(|_| { - let errs = Utf8Array::::from(vec![Some(""); len]); - Series::try_from((name, errs.boxed())).unwrap() // cannot fail - }) -} - -/// Sorts the columns of the given dataframe according to the following rules: -// - insert ID comes first if it's available, -// - followed by lexically sorted timelines, -// - followed by the entity path, -// - followed by native components (i.e. "rerun.XXX") in lexical order, -// - and finally extension components (i.e. "ext.XXX") in lexical order. -fn sort_df_columns( - df: &DataFrame, - store_insert_ids: bool, - timelines: &BTreeSet<&str>, -) -> DataFrame { - re_tracing::profile_function!(); - - let columns: Vec<_> = { - let mut all = df.get_column_names(); - all.sort(); - - all.remove(all.binary_search(&"entity").expect("has to exist")); - - let timelines = timelines.iter().copied().map(Some).collect::>(); - - let native_components = all - .iter() - .copied() - .filter(|name| name.starts_with("rerun.")) - .map(Some) - .collect::>(); - - let extension_components = all - .iter() - .copied() - .filter(|name| name.starts_with("ext.")) - .map(Some) - .collect::>(); - - [ - // vec![store_insert_ids.then(|| DataStore::insert_id_key().as_str())], - timelines, - vec![Some("entity")], - native_components, - extension_components, - ] - .into_iter() - .flatten() // flatten vectors - .flatten() // filter options - .collect() - }; - - df.select(columns).unwrap() -} diff --git a/crates/re_data_store/src/store_read.rs b/crates/re_data_store/src/store_read.rs index 3a598f41c937..5c36a08c28f7 100644 --- a/crates/re_data_store/src/store_read.rs +++ b/crates/re_data_store/src/store_read.rs @@ -215,55 +215,6 @@ impl DataStore { /// /// Temporal indices take precedence, then timeless tables are queried to fill the holes left /// by missing temporal data. - /// - /// ## Example - /// - /// The following example demonstrate how to fetch the latest cells for a given component - /// and its associated cluster key, and wrap the result into a nice-to-work-with polars's - /// dataframe. - /// - /// ```rust - /// # use polars_core::{prelude::*, series::Series}; - /// # use re_log_types::{EntityPath, RowId, TimeInt}; - /// # use re_types_core::{ComponentName}; - /// # use re_data_store::{DataStore, LatestAtQuery, RangeQuery}; - /// # - /// pub fn latest_component( - /// store: &DataStore, - /// query: &LatestAtQuery, - /// ent_path: &EntityPath, - /// primary: ComponentName, - /// ) -> anyhow::Result { - /// let cluster_key = store.cluster_key(); - /// - /// let components = &[cluster_key, primary]; - /// let (_, cells) = store - /// .latest_at(&query, ent_path, primary, components) - /// .map_or_else( - /// || (RowId::ZERO, [(); 2].map(|_| None)), - /// |(_, row_id, cells)| (row_id, cells), - /// ); - /// - /// let series: Result, _> = cells - /// .iter() - /// .flatten() - /// .map(|cell| { - /// Series::try_from(( - /// cell.component_name().as_str(), - /// cell.to_arrow(), - /// )) - /// }) - /// .collect(); - /// - /// DataFrame::new(series?).map_err(Into::into) - /// } - /// ``` - /// - /// Thanks to the cluster key, one is free to repeat this process as many times as they wish, - /// then reduce the resulting dataframes down to one by joining them as they see fit. - /// This is what our `latest_components` polars helper does. - /// - /// For more information about working with dataframes, see the `polars` feature. pub fn latest_at( &self, query: &LatestAtQuery, @@ -390,78 +341,6 @@ impl DataStore { /// timeless tables before anything else. /// /// When yielding timeless entries, the associated time will be `None`. - /// - /// ## Example - /// - /// The following example demonstrate how to range over the cells of a given - /// component and its associated cluster key, and turn the results into a nice-to-work-with - /// iterator of polars's dataframe. - /// Additionally, it yields the latest-at state of the component at the start of the time range, - /// if available. - /// - /// ```rust - /// # use arrow2::array::Array; - /// # use polars_core::{prelude::*, series::Series}; - /// # use re_log_types::{DataCell, EntityPath, RowId, TimeInt}; - /// # use re_data_store::{DataStore, LatestAtQuery, RangeQuery}; - /// # use re_types_core::ComponentName; - /// # - /// # pub fn dataframe_from_cells( - /// # cells: [Option; N], - /// # ) -> anyhow::Result { - /// # let series: Result, _> = cells - /// # .iter() - /// # .flatten() - /// # .map(|cell| { - /// # Series::try_from(( - /// # cell.component_name().as_ref(), - /// # cell.to_arrow(), - /// # )) - /// # }) - /// # .collect(); - /// # - /// # DataFrame::new(series?).map_err(Into::into) - /// # } - /// # - /// pub fn range_component<'a>( - /// store: &'a DataStore, - /// query: &'a RangeQuery, - /// ent_path: &'a EntityPath, - /// primary: ComponentName, - /// ) -> impl Iterator, DataFrame)>> + 'a { - /// let cluster_key = store.cluster_key(); - /// - /// let components = [cluster_key, primary]; - /// - /// // Fetch the latest-at data just before the start of the time range. - /// let latest_time = query.range.min.as_i64().saturating_sub(1).into(); - /// let df_latest = { - /// let query = LatestAtQuery::new(query.timeline, latest_time); - /// let (_, cells) = store - /// .latest_at(&query, ent_path, primary, &components) - /// .map_or_else( - /// || (RowId::ZERO, [(); 2].map(|_| None)), - /// |(_, row_id, cells)| (row_id, cells), - /// ); - /// dataframe_from_cells(cells) - /// }; - /// - /// // Send the latest-at state before anything else.. - /// std::iter::once(df_latest.map(|df| (Some(latest_time), df))) - /// // ..but only if it's not an empty dataframe. - /// .filter(|df| df.as_ref().map_or(true, |(_, df)| !df.is_empty())) - /// .chain(store.range(query, ent_path, components).map( - /// move |(time, _, cells)| dataframe_from_cells(cells).map(|df| (time, df)) - /// )) - /// } - /// ``` - /// - /// Thanks to the cluster key, one is free to repeat this process as many times as they wish, - /// then join the resulting streams to yield a full-fledged dataframe for every update of the - /// primary component. - /// This is what our `range_components` polars helper does. - /// - /// For more information about working with dataframes, see the `polars` feature. pub fn range<'a, const N: usize>( &'a self, query: &RangeQuery, diff --git a/crates/re_data_store/src/test_util.rs b/crates/re_data_store/src/test_util.rs index bfdad6b8130b..c717a6e35115 100644 --- a/crates/re_data_store/src/test_util.rs +++ b/crates/re_data_store/src/test_util.rs @@ -1,4 +1,6 @@ -use crate::{DataStore, DataStoreConfig}; +use re_log_types::DataTable; + +use crate::{DataStore, DataStoreConfig, WriteError}; // --- @@ -61,3 +63,33 @@ pub fn sanity_unwrap(store: &DataStore) { err.unwrap(); } } + +// We very often re-use RowIds when generating test data. +pub fn insert_table_with_retries(store: &mut DataStore, table: &DataTable) { + for row in table.to_rows() { + let mut row = row.unwrap(); + loop { + match store.insert_row(&row) { + Ok(_) => break, + Err(WriteError::ReusedRowId(_)) => { + row.row_id = row.row_id.next(); + } + err @ Err(_) => err.map(|_| ()).unwrap(), + } + } + } +} + +#[cfg(not(target_arch = "wasm32"))] +pub fn init_logs() { + use std::sync::atomic::{AtomicBool, Ordering}; + + static INIT: AtomicBool = AtomicBool::new(false); + + if INIT + .compare_exchange(false, true, Ordering::SeqCst, Ordering::SeqCst) + .is_ok() + { + re_log::setup_native_logging(); + } +} diff --git a/crates/re_data_store/tests/correctness.rs b/crates/re_data_store/tests/correctness.rs index 70392f61eee2..450271ac5454 100644 --- a/crates/re_data_store/tests/correctness.rs +++ b/crates/re_data_store/tests/correctness.rs @@ -408,88 +408,6 @@ fn latest_at_emptiness_edge_cases_impl(store: &mut DataStore) { // --- -// This one demonstrates a nasty edge case when stream-joining multiple iterators that happen to -// share the same exact row of data at some point (because, for that specific entry, it turns out -// that those component where inserted together). -// -// When that happens, one must be very careful to not only compare time and index row numbers, but -// also make sure that, if all else if equal, the primary iterator comes last so that it gathers as -// much state as possible! - -#[cfg(feature = "polars")] -#[test] -fn range_join_across_single_row() { - init_logs(); - - for config in re_data_store::test_util::all_configs() { - let mut store = DataStore::new( - re_log_types::StoreId::random(re_log_types::StoreKind::Recording), - InstanceKey::name(), - config.clone(), - ); - range_join_across_single_row_impl(&mut store); - } -} - -#[cfg(feature = "polars")] -fn range_join_across_single_row_impl(store: &mut DataStore) { - use polars_core::{ - prelude::{DataFrame, JoinType}, - series::Series, - }; - use re_data_store::ArrayExt as _; - use re_types::components::{Color, Position2D}; - - let ent_path = EntityPath::from("this/that"); - - let positions = build_some_positions2d(3); - let colors = build_some_colors(3); - let row = - test_row!(ent_path @ [build_frame_nr(42.into())] => 3; [positions.clone(), colors.clone()]); - store.insert_row(&row).unwrap(); - - let timeline_frame_nr = Timeline::new("frame_nr", TimeType::Sequence); - let query = re_data_store::RangeQuery::new( - timeline_frame_nr, - re_data_store::TimeRange::new(TimeInt::MIN, TimeInt::MAX), - ); - let components = [InstanceKey::name(), Position2D::name(), Color::name()]; - let dfs = re_data_store::polars_util::range_components( - store, - &query, - &ent_path, - Position2D::name(), - components, - &JoinType::Outer, - ) - .collect::>(); - - let df_expected = { - let instances = - InstanceKey::to_arrow(vec![InstanceKey(0), InstanceKey(1), InstanceKey(2)]).unwrap(); - let positions = Position2D::to_arrow(positions).unwrap(); - let colors = Color::to_arrow(colors).unwrap(); - - DataFrame::new(vec![ - Series::try_from((InstanceKey::name().as_ref(), instances)).unwrap(), - Series::try_from(( - Position2D::name().as_ref(), - positions.as_ref().clean_for_polars(), - )) - .unwrap(), - Series::try_from((Color::name().as_ref(), colors)).unwrap(), - ]) - .unwrap() - }; - - assert_eq!(1, dfs.len()); - let (_, df) = dfs[0].clone().unwrap(); - - assert_eq!(df_expected, df); -} - -// --- - #[test] fn gc_correct() { init_logs(); diff --git a/crates/re_data_store/tests/data_store.rs b/crates/re_data_store/tests/data_store.rs index 136c0da84bb8..66fd981e5304 100644 --- a/crates/re_data_store/tests/data_store.rs +++ b/crates/re_data_store/tests/data_store.rs @@ -1,24 +1,16 @@ -#![cfg(feature = "polars")] - //! Straightforward high-level API tests. //! //! Testing & demonstrating expected usage of the datastore APIs, no funny stuff. -use std::sync::atomic::{AtomicBool, Ordering}; - -use nohash_hasher::IntMap; -use polars_core::{prelude::*, series::Series}; -use polars_ops::prelude::DataFrameJoinOps; +use itertools::Itertools; use rand::Rng; -use re_data_store::WriteError; use re_data_store::{ - polars_util, test_row, test_util::sanity_unwrap, ArrayExt as _, DataStore, DataStoreConfig, - DataStoreStats, GarbageCollectionOptions, GarbageCollectionTarget, LatestAtQuery, RangeQuery, - TimeInt, TimeRange, -}; -use re_log_types::{ - build_frame_nr, DataCell, DataRow, DataTable, EntityPath, TableId, TimeType, Timeline, + test_row, + test_util::{init_logs, insert_table_with_retries, sanity_unwrap}, + DataStore, DataStoreConfig, DataStoreStats, GarbageCollectionOptions, GarbageCollectionTarget, + LatestAtQuery, RangeQuery, TimeInt, TimeRange, }; +use re_log_types::{build_frame_nr, DataRow, DataTable, EntityPath, TableId, TimeType, Timeline}; use re_types::datagen::{ build_some_colors, build_some_instances, build_some_instances_from, build_some_positions2d, }; @@ -28,24 +20,6 @@ use re_types::{ }; use re_types_core::{ComponentName, Loggable as _}; -// --- - -// We very often re-use RowIds when generating test data. -fn insert_table_with_retries(store: &mut DataStore, table: &DataTable) { - for row in table.to_rows() { - let mut row = row.unwrap(); - loop { - match store.insert_row(&row) { - Ok(_) => break, - Err(WriteError::ReusedRowId(_)) => { - row.row_id = row.row_id.next(); - } - err @ Err(_) => err.map(|_| ()).unwrap(), - } - } - } -} - // --- LatestComponentsAt --- #[test] @@ -354,21 +328,25 @@ fn latest_at_impl(store: &mut DataStore) { let assert_latest_components = |frame_nr: TimeInt, rows: &[(ComponentName, &DataRow)]| { let timeline_frame_nr = Timeline::new("frame_nr", TimeType::Sequence); - let components_all = &[Color::name(), Position2D::name()]; - - let df = polars_util::latest_components( - &store, - &LatestAtQuery::new(timeline_frame_nr, frame_nr), - &ent_path, - components_all, - &JoinType::Outer, - ) - .unwrap(); - let df_expected = joint_df(store.cluster_key(), rows); - - store.sort_indices_if_needed(); - assert_eq!(df_expected, df, "{store}"); + for (component_name, expected) in rows { + let (_, _, cells) = store + .latest_at::<1>( + &LatestAtQuery::new(timeline_frame_nr, frame_nr), + &ent_path, + *component_name, + &[*component_name], + ) + .unwrap(); + + let expected = expected + .cells + .iter() + .filter(|cell| cell.component_name() == *component_name) + .collect_vec(); + let actual = cells.iter().flatten().collect_vec(); + assert_eq!(expected, actual); + } }; // TODO(cmc): bring back some log_time scenarios @@ -498,332 +476,121 @@ fn range_impl(store: &mut DataStore) { } let store = store2; - let mut expected_timeless = Vec::::new(); - let mut expected_at_times: IntMap> = Default::default(); - - for (time, rows) in rows_at_times { - if let Some(time) = time { - let dfs = expected_at_times.entry(*time).or_default(); - dfs.push(joint_df(store.cluster_key(), rows)); - } else { - expected_timeless.push(joint_df(store.cluster_key(), rows)); - } - } - let timeline_frame_nr = Timeline::new("frame_nr", TimeType::Sequence); store.sort_indices_if_needed(); // for assertions below - let components = [InstanceKey::name(), components[0], components[1]]; + let components = [components[0], components[1]]; let query = RangeQuery::new(timeline_frame_nr, time_range); - let dfs = polars_util::range_components( - &store, - &query, - &ent_path, - components[1], - components, - &JoinType::Outer, - ); - - let mut dfs_processed = 0usize; - let mut timeless_count = 0usize; - let mut time_counters: IntMap = Default::default(); - for (time, df) in dfs.map(Result::unwrap) { - let df_expected = if let Some(time) = time { - let time_count = time_counters.entry(time.as_i64()).or_default(); - let df_expected = &expected_at_times[&time][*time_count]; - *time_count += 1; - df_expected - } else { - let df_expected = &expected_timeless[timeless_count]; - timeless_count += 1; - df_expected - }; - - assert_eq!(*df_expected, df, "{store}"); - - dfs_processed += 1; + let results = store.range(&query, &ent_path, components); + + let mut results_processed = 0usize; + for (i, (time, _, cells)) in results.enumerate() { + let (expected_time, expected_rows) = rows_at_times[i]; + assert_eq!(expected_time, time); + + for (component_name, expected) in expected_rows { + let expected = expected + .cells + .iter() + .filter(|cell| cell.component_name() == *component_name) + .collect_vec(); + let actual = cells.iter().flatten().collect_vec(); + assert_eq!(expected, actual); + + results_processed += 1; + } } - let dfs_processed_expected = rows_at_times.len(); - assert_eq!(dfs_processed_expected, dfs_processed); + let results_processed_expected = rows_at_times.len(); + assert_eq!(results_processed_expected, results_processed); }; // TODO(cmc): bring back some log_time scenarios - // Unit ranges (Color's PoV) + // Unit ranges (multi-PoV) assert_range_components( TimeRange::new(frame1, frame1), [Color::name(), Position2D::name()], &[ - ( - Some(frame1), - &[ - (Color::name(), &row1), - (Position2D::name(), &row4_4), // timeless - ], - ), // + (Some(frame1), &[(Color::name(), &row1)]), // ], ); assert_range_components( TimeRange::new(frame2, frame2), [Color::name(), Position2D::name()], - &[], - ); - assert_range_components( - TimeRange::new(frame3, frame3), - [Color::name(), Position2D::name()], - &[], - ); - assert_range_components( - TimeRange::new(frame4, frame4), - [Color::name(), Position2D::name()], - &[ - ( - Some(frame4), - &[(Color::name(), &row4_1), (Position2D::name(), &row3)], - ), - ( - Some(frame4), - &[(Color::name(), &row4_2), (Position2D::name(), &row3)], - ), - ( - Some(frame4), - &[(Color::name(), &row4_3), (Position2D::name(), &row4_25)], // !!! - ), - ], - ); - assert_range_components( - TimeRange::new(frame5, frame5), - [Color::name(), Position2D::name()], - &[], - ); - - // Unit ranges (Position2D's PoV) - - assert_range_components( - TimeRange::new(frame1, frame1), - [Position2D::name(), Color::name()], - &[], - ); - assert_range_components( - TimeRange::new(frame2, frame2), - [Position2D::name(), Color::name()], &[ - ( - Some(frame2), - &[(Position2D::name(), &row2), (Color::name(), &row1)], - ), // + (Some(frame2), &[(Position2D::name(), &row2)]), // ], ); assert_range_components( TimeRange::new(frame3, frame3), - [Position2D::name(), Color::name()], + [Color::name(), Position2D::name()], &[ - ( - Some(frame3), - &[(Position2D::name(), &row3), (Color::name(), &row1)], - ), // + (Some(frame3), &[(Position2D::name(), &row3)]), // ], ); assert_range_components( TimeRange::new(frame4, frame4), - [Position2D::name(), Color::name()], + [Color::name(), Position2D::name()], &[ - ( - Some(frame4), - &[(Position2D::name(), &row4_25), (Color::name(), &row4_2)], - ), - ( - Some(frame4), - &[(Position2D::name(), &row4_4), (Color::name(), &row4_3)], - ), + (Some(frame4), &[(Color::name(), &row4_1)]), + (Some(frame4), &[(Color::name(), &row4_2)]), + (Some(frame4), &[(Position2D::name(), &row4_25)]), + (Some(frame4), &[(Color::name(), &row4_3)]), + (Some(frame4), &[(Position2D::name(), &row4_4)]), ], ); assert_range_components( TimeRange::new(frame5, frame5), - [Position2D::name(), Color::name()], + [Color::name(), Position2D::name()], &[], ); - // Full range (Color's PoV) + // Full range (multi-PoV) assert_range_components( TimeRange::new(frame1, frame5), [Color::name(), Position2D::name()], &[ - ( - Some(frame1), - &[ - (Color::name(), &row1), - (Position2D::name(), &row4_4), // timeless - ], - ), - ( - Some(frame4), - &[(Color::name(), &row4_1), (Position2D::name(), &row3)], - ), - ( - Some(frame4), - &[(Color::name(), &row4_2), (Position2D::name(), &row3)], - ), - ( - Some(frame4), - &[(Color::name(), &row4_3), (Position2D::name(), &row4_25)], // !!! - ), + (Some(frame1), &[(Color::name(), &row1)]), // + (Some(frame2), &[(Position2D::name(), &row2)]), // + (Some(frame3), &[(Position2D::name(), &row3)]), // + (Some(frame4), &[(Color::name(), &row4_1)]), + (Some(frame4), &[(Color::name(), &row4_2)]), + (Some(frame4), &[(Position2D::name(), &row4_25)]), + (Some(frame4), &[(Color::name(), &row4_3)]), + (Some(frame4), &[(Position2D::name(), &row4_4)]), ], ); - // Full range (Position2D's PoV) - - assert_range_components( - TimeRange::new(frame1, frame5), - [Position2D::name(), Color::name()], - &[ - ( - Some(frame2), - &[(Position2D::name(), &row2), (Color::name(), &row1)], - ), - ( - Some(frame3), - &[(Position2D::name(), &row3), (Color::name(), &row1)], - ), - ( - Some(frame4), - &[(Position2D::name(), &row4_25), (Color::name(), &row4_2)], - ), - ( - Some(frame4), - &[(Position2D::name(), &row4_4), (Color::name(), &row4_3)], - ), - ], - ); - - // Infinite range (Color's PoV) + // Infinite range (multi-PoV) assert_range_components( TimeRange::new(TimeInt::MIN, TimeInt::MAX), [Color::name(), Position2D::name()], &[ - (None, &[(Color::name(), &row1)]), - ( - None, - &[(Color::name(), &row4_1), (Position2D::name(), &row3)], - ), - ( - None, - &[(Color::name(), &row4_2), (Position2D::name(), &row3)], - ), - ( - None, - &[(Color::name(), &row4_3), (Position2D::name(), &row4_25)], // !!! - ), - ( - Some(frame1), - &[ - (Color::name(), &row1), - (Position2D::name(), &row4_4), // timeless - ], - ), - ( - Some(frame4), - &[(Color::name(), &row4_1), (Position2D::name(), &row3)], - ), - ( - Some(frame4), - &[(Color::name(), &row4_2), (Position2D::name(), &row3)], - ), - ( - Some(frame4), - &[(Color::name(), &row4_3), (Position2D::name(), &row4_25)], // !!! - ), - ], - ); - - // Infinite range (Position2D's PoV) - - assert_range_components( - TimeRange::new(TimeInt::MIN, TimeInt::MAX), - [Position2D::name(), Color::name()], - &[ - (None, &[(Position2D::name(), &row2), (Color::name(), &row1)]), - (None, &[(Position2D::name(), &row3), (Color::name(), &row1)]), - ( - None, - &[(Position2D::name(), &row4_25), (Color::name(), &row4_2)], - ), - ( - None, - &[(Position2D::name(), &row4_4), (Color::name(), &row4_3)], - ), - ( - Some(frame2), - &[(Position2D::name(), &row2), (Color::name(), &row1)], - ), - ( - Some(frame3), - &[(Position2D::name(), &row3), (Color::name(), &row1)], - ), - ( - Some(frame4), - &[(Position2D::name(), &row4_25), (Color::name(), &row4_2)], - ), - ( - Some(frame4), - &[(Position2D::name(), &row4_4), (Color::name(), &row4_3)], - ), + (None, &[(Color::name(), &row1)]), // + (None, &[(Position2D::name(), &row2)]), // + (None, &[(Position2D::name(), &row3)]), // + (None, &[(Color::name(), &row4_1)]), + (None, &[(Color::name(), &row4_2)]), + (None, &[(Position2D::name(), &row4_25)]), + (None, &[(Color::name(), &row4_3)]), + (None, &[(Position2D::name(), &row4_4)]), + (Some(frame1), &[(Color::name(), &row1)]), // + (Some(frame2), &[(Position2D::name(), &row2)]), // + (Some(frame3), &[(Position2D::name(), &row3)]), // + (Some(frame4), &[(Color::name(), &row4_1)]), + (Some(frame4), &[(Color::name(), &row4_2)]), + (Some(frame4), &[(Position2D::name(), &row4_25)]), + (Some(frame4), &[(Color::name(), &row4_3)]), + (Some(frame4), &[(Position2D::name(), &row4_4)]), ], ); } -// --- Common helpers --- - -/// Given a list of rows, crafts a `latest_components`-looking dataframe. -fn joint_df(cluster_key: ComponentName, rows: &[(ComponentName, &DataRow)]) -> DataFrame { - let df = rows - .iter() - .map(|(component, row)| { - let cluster_comp = if let Some(idx) = row.find_cell(&cluster_key) { - Series::try_from((cluster_key.as_ref(), row.cells[idx].to_arrow_monolist())) - .unwrap() - } else { - let num_instances = row.num_instances(); - Series::try_from(( - cluster_key.as_ref(), - DataCell::from_component::(0..num_instances.get() as u64) - .to_arrow_monolist(), - )) - .unwrap() - }; - - let comp_idx = row.find_cell(component).unwrap(); - let df = DataFrame::new(vec![ - cluster_comp, - Series::try_from(( - component.as_ref(), - row.cells[comp_idx] - .to_arrow_monolist() - .as_ref() - .clean_for_polars(), - )) - .unwrap(), - ]) - .unwrap(); - - df.explode(df.get_column_names()).unwrap() - }) - .reduce(|left, right| { - left.outer_join(&right, [cluster_key.as_ref()], [cluster_key.as_ref()]) - .unwrap() - }) - .unwrap_or_default(); - - let df = polars_util::drop_all_nulls(&df, &cluster_key).unwrap(); - - df.sort([cluster_key.as_ref()], false).unwrap_or(df) -} - // --- GC --- #[test] @@ -862,7 +629,7 @@ fn gc_impl(store: &mut DataStore) { } sanity_unwrap(store); - _ = store.to_dataframe(); // simple way of checking that everything is still readable + _ = store.to_data_table(); // simple way of checking that everything is still readable let stats = DataStoreStats::from_store(store); @@ -957,21 +724,25 @@ fn protected_gc_impl(store: &mut DataStore) { let assert_latest_components = |frame_nr: TimeInt, rows: &[(ComponentName, &DataRow)]| { let timeline_frame_nr = Timeline::new("frame_nr", TimeType::Sequence); - let components_all = &[Color::name(), Position2D::name()]; - - let df = polars_util::latest_components( - store, - &LatestAtQuery::new(timeline_frame_nr, frame_nr), - &ent_path, - components_all, - &JoinType::Outer, - ) - .unwrap(); - let df_expected = joint_df(store.cluster_key(), rows); - - store.sort_indices_if_needed(); - assert_eq!(df_expected, df, "{store}"); + for (component_name, expected) in rows { + let (_, _, cells) = store + .latest_at::<1>( + &LatestAtQuery::new(timeline_frame_nr, frame_nr), + &ent_path, + *component_name, + &[*component_name], + ) + .unwrap(); + + let expected = expected + .cells + .iter() + .filter(|cell| cell.component_name() == *component_name) + .collect_vec(); + let actual = cells.iter().flatten().collect_vec(); + assert_eq!(expected, actual); + } }; // The timeless data was preserved @@ -1053,21 +824,25 @@ fn protected_gc_clear_impl(store: &mut DataStore) { let assert_latest_components = |frame_nr: TimeInt, rows: &[(ComponentName, &DataRow)]| { let timeline_frame_nr = Timeline::new("frame_nr", TimeType::Sequence); - let components_all = &[Color::name(), Position2D::name()]; - let df = polars_util::latest_components( - store, - &LatestAtQuery::new(timeline_frame_nr, frame_nr), - &ent_path, - components_all, - &JoinType::Outer, - ) - .unwrap(); - - let df_expected = joint_df(store.cluster_key(), rows); - - store.sort_indices_if_needed(); - assert_eq!(df_expected, df, "{store}"); + for (component_name, expected) in rows { + let (_, _, cells) = store + .latest_at::<1>( + &LatestAtQuery::new(timeline_frame_nr, frame_nr), + &ent_path, + *component_name, + &[*component_name], + ) + .unwrap(); + + let expected = expected + .cells + .iter() + .filter(|cell| cell.component_name() == *component_name) + .collect_vec(); + let actual = cells.iter().flatten().collect_vec(); + assert_eq!(expected, actual); + } }; // Only points are preserved, since colors were cleared and then GC'd @@ -1099,16 +874,3 @@ fn protected_gc_clear_impl(store: &mut DataStore) { let stats = DataStoreStats::from_store(store); assert_eq!(stats.timeless.num_rows, 0); } - -// --- - -pub fn init_logs() { - static INIT: AtomicBool = AtomicBool::new(false); - - if INIT - .compare_exchange(false, true, Ordering::SeqCst, Ordering::SeqCst) - .is_ok() - { - re_log::setup_native_logging(); - } -} diff --git a/crates/re_data_store/tests/dump.rs b/crates/re_data_store/tests/dump.rs index 3bc6f972e03f..06dbbf451176 100644 --- a/crates/re_data_store/tests/dump.rs +++ b/crates/re_data_store/tests/dump.rs @@ -1,12 +1,10 @@ //! Dumping a datastore to log messages and back. -use std::sync::atomic::{AtomicBool, Ordering}; - use itertools::Itertools; -use re_data_store::WriteError; use re_data_store::{ - test_row, test_util::sanity_unwrap, DataStore, DataStoreStats, GarbageCollectionOptions, - TimeInt, TimeRange, Timeline, + test_row, + test_util::{init_logs, insert_table_with_retries, sanity_unwrap}, + DataStore, DataStoreStats, GarbageCollectionOptions, TimeInt, TimeRange, Timeline, }; use re_log_types::{ build_frame_nr, build_log_time, DataRow, DataTable, EntityPath, RowId, TableId, @@ -17,22 +15,6 @@ use re_types_core::Loggable as _; // --- -// We very often re-use RowIds when generating test data. -fn insert_table_with_retries(store: &mut DataStore, table: &DataTable) { - for row in table.to_rows() { - let mut row = row.unwrap(); - loop { - match store.insert_row(&row) { - Ok(_) => break, - Err(WriteError::ReusedRowId(_)) => { - row.row_id = row.row_id.next(); - } - err @ Err(_) => err.map(|_| ()).unwrap(), - } - } - } -} - // Panic on RowId clash. fn insert_table(store: &mut DataStore, table: &DataTable) { for row in table.to_rows() { @@ -291,17 +273,6 @@ fn data_store_dump_filtered_impl(store1: &mut DataStore, store2: &mut DataStore) // --- -pub fn init_logs() { - static INIT: AtomicBool = AtomicBool::new(false); - - if INIT - .compare_exchange(false, true, Ordering::SeqCst, Ordering::SeqCst) - .is_ok() - { - re_log::setup_native_logging(); - } -} - fn create_insert_table(ent_path: impl Into) -> DataTable { let ent_path = ent_path.into(); diff --git a/crates/re_data_store/tests/internals.rs b/crates/re_data_store/tests/internals.rs index 516e3014b712..df06befe9978 100644 --- a/crates/re_data_store/tests/internals.rs +++ b/crates/re_data_store/tests/internals.rs @@ -2,9 +2,7 @@ //! //! They're awful, but sometimes you just have to… -use std::sync::atomic::{AtomicBool, Ordering::SeqCst}; - -use re_data_store::{DataStore, DataStoreConfig}; +use re_data_store::{test_util::init_logs, DataStore, DataStoreConfig}; use re_log_types::{build_frame_nr, DataRow, EntityPath, RowId, TimePoint}; use re_types::{components::InstanceKey, datagen::build_some_instances}; use re_types_core::Loggable as _; @@ -144,11 +142,3 @@ fn pathological_bucket_topology() { ); } } - -fn init_logs() { - static INIT: AtomicBool = AtomicBool::new(false); - - if INIT.compare_exchange(false, true, SeqCst, SeqCst).is_ok() { - re_log::setup_native_logging(); - } -} diff --git a/crates/re_query/Cargo.toml b/crates/re_query/Cargo.toml index 204e6cdd1308..b1a5a00c4791 100644 --- a/crates/re_query/Cargo.toml +++ b/crates/re_query/Cargo.toml @@ -50,10 +50,13 @@ criterion.workspace = true itertools = { workspace = true } mimalloc.workspace = true rand = { workspace = true, features = ["std", "std_rng"] } +similar-asserts.workspace = true + [lib] bench = false + [[bench]] name = "query_benchmark" harness = false diff --git a/crates/re_query/tests/store.rs b/crates/re_query/tests/store.rs new file mode 100644 index 000000000000..1aec304a1b36 --- /dev/null +++ b/crates/re_query/tests/store.rs @@ -0,0 +1,76 @@ +use itertools::Itertools; + +use re_data_store::{test_row, DataStore}; +use re_log_types::EntityPath; +use re_log_types::{build_frame_nr, TimeInt, TimeType, Timeline}; +use re_types::{ + archetypes::Points2D, + components::{Color, InstanceKey, Position2D}, + datagen::{build_some_colors, build_some_positions2d}, +}; +use re_types_core::Loggable as _; + +// --- + +// This one demonstrates a nasty edge case when stream-joining multiple iterators that happen to +// share the same exact row of data at some point (because, for that specific entry, it turns out +// that those component where inserted together). +// +// When that happens, one must be very careful to not only compare time and index row numbers, but +// also make sure that, if all else if equal, the primary iterator comes last so that it gathers as +// much state as possible! + +#[test] +fn range_join_across_single_row() { + for config in re_data_store::test_util::all_configs() { + let mut store = DataStore::new( + re_log_types::StoreId::random(re_log_types::StoreKind::Recording), + InstanceKey::name(), + config.clone(), + ); + range_join_across_single_row_impl(&mut store); + } +} + +fn range_join_across_single_row_impl(store: &mut DataStore) { + let ent_path = EntityPath::from("this/that"); + + let positions = build_some_positions2d(3); + let colors = build_some_colors(3); + let row = + test_row!(ent_path @ [build_frame_nr(42.into())] => 3; [positions.clone(), colors.clone()]); + store.insert_row(&row).unwrap(); + + let timeline_frame_nr = Timeline::new("frame_nr", TimeType::Sequence); + let query = re_data_store::RangeQuery::new( + timeline_frame_nr, + re_data_store::TimeRange::new(TimeInt::MIN, TimeInt::MAX), + ); + + let mut arch_views = re_query::range_archetype::( + store, &query, &ent_path, + ); + + let arch_view = arch_views.next().unwrap(); + assert!(arch_views.next().is_none()); + + // dbg!(arch_view); + + let actual_instance_keys = arch_view.iter_instance_keys().collect_vec(); + let actual_positions = arch_view + .iter_required_component::() + .unwrap() + .collect_vec(); + let actual_colors = arch_view + .iter_optional_component::() + .unwrap() + .collect_vec(); + + let expected_instance_keys = vec![InstanceKey(0), InstanceKey(1), InstanceKey(2)]; + let expected_positions = positions; + let expected_colors = colors.into_iter().map(Some).collect_vec(); + + similar_asserts::assert_eq!(expected_instance_keys, actual_instance_keys); + similar_asserts::assert_eq!(expected_positions, actual_positions); + similar_asserts::assert_eq!(expected_colors, actual_colors); +} diff --git a/deny.toml b/deny.toml index 1870db027366..f977cc685799 100644 --- a/deny.toml +++ b/deny.toml @@ -27,7 +27,6 @@ vulnerability = "deny" unmaintained = "warn" yanked = "deny" ignore = [ - "RUSTSEC-2020-0071", # https://rustsec.org/advisories/RUSTSEC-2020-0071 - Potential segfault in the time crate. Remove once a new polars is released with https://github.com/pola-rs/polars/pull/6979 "RUSTSEC-2023-0052", # https://rustsec.org/advisories/RUSTSEC-2023-0052 - webpki: CPU denial of service in certificate path building - can be fixed by `cargo update -p ureq`, but then we run into duplicate crates: https://github.com/algesten/ureq/issues/653 "RUSTSEC-2023-0065", # https://rustsec.org/advisories/RUSTSEC-2023-0065 - Tungstenite WebSocket server can be DOS-attacked by malicious clients ] @@ -44,7 +43,6 @@ deny = [ { name = "openssl" }, # We prefer rustls { name = "reqwest" }, # We prefer ureq - less dependencies ] - skip = [ { name = "ahash" }, # Popular crate + fast release schedule = lots of crates still using old versions { name = "async-fs" }, # Old version via accesskit @@ -56,13 +54,13 @@ skip = [ { name = "memoffset" }, # Small crate { name = "prettyplease" }, # Old version being used by prost { name = "raw-window-handle" }, # Pretty small crate; some crates still on old version + { name = "redox_syscall" }, # Plenty of versions in the wild { name = "spin" }, # Old version used by rusttls { name = "windows" }, # Old version used by accesskit_windows, newer version used by wgpu ] skip-tree = [ { name = "async-io" }, # Old version via rfd { name = "cargo-run-wasm" }, # Dev-tool - { name = "comfy-table" }, # arrow vs. polars use different major versions (polars' one is dev-dep only) { name = "core-graphics" }, # old version via arboard { name = "criterion" }, # dev-dependency { name = "objc2" }, # old version via accesskit