diff --git a/Cargo.lock b/Cargo.lock index c55895ff204..8b90798e301 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -190,12 +190,9 @@ dependencies = [ [[package]] name = "arc-swap" -version = "1.8.0" +version = "1.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51d03449bb8ca2cc2ef70869af31463d1ae5ccc8fa3e334b307203fbf815207e" -dependencies = [ - "rustversion", -] +checksum = "69f7f8c3906b62b754cd5326047894316021dcfe5a194c8ea52bdd94934a3457" [[package]] name = "arrayref" @@ -489,16 +486,16 @@ dependencies = [ "futures-lite 2.6.1", "parking", "polling", - "rustix 1.1.3", + "rustix 1.1.2", "slab", "windows-sys 0.61.2", ] [[package]] name = "async-lock" -version = "3.4.2" +version = "3.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "290f7f2596bd5b78a9fec8088ccd89180d7f9f55b94b0576823bbbdc72ee8311" +checksum = "5fd03604047cee9b6ce9de9f70c6cd540a0520c813cbd49bae61f33ab80ed1dc" dependencies = [ "event-listener 5.4.1", "event-listener-strategy", @@ -520,7 +517,7 @@ dependencies = [ "cfg-if", "event-listener 5.4.1", "futures-lite 2.6.1", - "rustix 1.1.3", + "rustix 1.1.2", ] [[package]] @@ -546,7 +543,7 @@ dependencies = [ "cfg-if", "futures-core", "futures-io", - "rustix 1.1.3", + "rustix 1.1.2", "signal-hook-registry", "slab", "windows-sys 0.61.2", @@ -643,9 +640,9 @@ dependencies = [ [[package]] name = "aws-lc-rs" -version = "1.15.2" +version = "1.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a88aab2464f1f25453baa7a07c84c5b7684e274054ba06817f382357f77a288" +checksum = "6b5ce75405893cd713f9ab8e297d8e438f624dde7d706108285f7e17a25a180f" dependencies = [ "aws-lc-sys", "zeroize", @@ -653,9 +650,9 @@ dependencies = [ [[package]] name = "aws-lc-sys" -version = "0.35.0" +version = "0.34.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b45afffdee1e7c9126814751f88dddc747f41d91da16c9551a0f1e8a11e788a1" +checksum = "179c3777a8b5e70e90ea426114ffc565b2c1a9f82f6c4a0c5a34aa6ef5e781b6" dependencies = [ "cc", "cmake", @@ -712,9 +709,9 @@ dependencies = [ [[package]] name = "aws-sdk-s3" -version = "1.119.0" +version = "1.117.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d65fddc3844f902dfe1864acb8494db5f9342015ee3ab7890270d36fbd2e01c" +checksum = "c134e2d1ad1ad23a8cf88ceccf39d515914f385e670ffc12226013bd16dfe825" dependencies = [ "aws-credential-types", "aws-runtime", @@ -935,9 +932,9 @@ dependencies = [ [[package]] name = "aws-smithy-json" -version = "0.61.9" +version = "0.61.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49fa1213db31ac95288d981476f78d05d9cbb0353d22cdf3472cc05bb02f6551" +checksum = "a6864c190cbb8e30cf4b77b2c8f3b6dfffa697a09b7218d2f7cd3d4c4065a9f7" dependencies = [ "aws-smithy-types", ] @@ -1236,7 +1233,7 @@ dependencies = [ "miniz_oxide", "object 0.37.3", "rustc-demangle", - "windows-link", + "windows-link 0.2.1", ] [[package]] @@ -1461,9 +1458,9 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.19.1" +version = "3.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5dd9dc738b7a8311c7ade152424974d8115f2cdad61e8dab8dac9f2362298510" +checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" [[package]] name = "bytemuck" @@ -1538,9 +1535,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.50" +version = "1.2.49" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f50d563227a1c37cc0a263f64eca3334388c01c5e4c4861a9def205c614383c" +checksum = "90583009037521a116abf44494efecd645ba48b6622457080f080b85544e2215" dependencies = [ "find-msvc-tools", "jobserver", @@ -1586,7 +1583,7 @@ dependencies = [ "num-traits", "serde", "wasm-bindgen", - "windows-link", + "windows-link 0.2.1", ] [[package]] @@ -1678,9 +1675,9 @@ checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d" [[package]] name = "cmake" -version = "0.1.57" +version = "0.1.55" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75443c44cd6b379beb8c5b45d85d0773baf31cce901fe7bb252f4eff3008ef7d" +checksum = "d49d74c227b6cc9f3c51a2c7c667a05b6453f7f0f952a5f8e4493bb9e731d68e" dependencies = [ "cc", ] @@ -3345,9 +3342,9 @@ checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99" [[package]] name = "flatbuffers" -version = "25.12.19" +version = "25.9.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35f6839d7b3b98adde531effaf34f0c2badc6f4735d26fe74709d8e513a96ef3" +checksum = "09b6620799e7340ebd9968d2e0708eb82cf1971e9a16821e2091b6d6e475eed5" dependencies = [ "bitflags 2.10.0", "rustc_version", @@ -3436,7 +3433,7 @@ checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" [[package]] name = "fsst" -version = "2.0.0-beta.5" +version = "2.0.0-beta.4" dependencies = [ "arrow-array", "lance-datagen", @@ -3586,17 +3583,16 @@ dependencies = [ [[package]] name = "generator" -version = "0.8.8" +version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52f04ae4152da20c76fe800fa48659201d5cf627c5149ca0b707b69d7eef6cf9" +checksum = "605183a538e3e2a9c1038635cc5c2d194e2ee8fd0d1b66b8349fad7dbacce5a2" dependencies = [ "cc", "cfg-if", "libc", "log", "rustversion", - "windows-link", - "windows-result", + "windows", ] [[package]] @@ -4270,7 +4266,7 @@ dependencies = [ "js-sys", "log", "wasm-bindgen", - "windows-core", + "windows-core 0.62.2", ] [[package]] @@ -4579,9 +4575,9 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.16" +version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ee5b5339afb4c41626dde77b7a611bd4f2c202b897852b4bcf5d03eddc61010" +checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" [[package]] name = "jieba-macros" @@ -4713,7 +4709,7 @@ dependencies = [ [[package]] name = "lance" -version = "2.0.0-beta.5" +version = "2.0.0-beta.4" dependencies = [ "all_asserts", "approx", @@ -4804,7 +4800,7 @@ dependencies = [ [[package]] name = "lance-arrow" -version = "2.0.0-beta.5" +version = "2.0.0-beta.4" dependencies = [ "arrow-array", "arrow-buffer", @@ -4823,7 +4819,7 @@ dependencies = [ [[package]] name = "lance-bitpacking" -version = "2.0.0-beta.5" +version = "2.0.0-beta.4" dependencies = [ "arrayref", "paste", @@ -4832,7 +4828,7 @@ dependencies = [ [[package]] name = "lance-core" -version = "2.0.0-beta.5" +version = "2.0.0-beta.4" dependencies = [ "arrow-array", "arrow-buffer", @@ -4872,7 +4868,7 @@ dependencies = [ [[package]] name = "lance-datafusion" -version = "2.0.0-beta.5" +version = "2.0.0-beta.4" dependencies = [ "arrow", "arrow-array", @@ -4903,7 +4899,7 @@ dependencies = [ [[package]] name = "lance-datagen" -version = "2.0.0-beta.5" +version = "2.0.0-beta.4" dependencies = [ "arrow", "arrow-array", @@ -4923,7 +4919,7 @@ dependencies = [ [[package]] name = "lance-encoding" -version = "2.0.0-beta.5" +version = "2.0.0-beta.4" dependencies = [ "arrow-arith", "arrow-array", @@ -4970,7 +4966,7 @@ dependencies = [ [[package]] name = "lance-examples" -version = "2.0.0-beta.5" +version = "2.0.0-beta.4" dependencies = [ "all_asserts", "arrow", @@ -4996,7 +4992,7 @@ dependencies = [ [[package]] name = "lance-file" -version = "2.0.0-beta.5" +version = "2.0.0-beta.4" dependencies = [ "arrow-arith", "arrow-array", @@ -5038,7 +5034,7 @@ dependencies = [ [[package]] name = "lance-geo" -version = "2.0.0-beta.5" +version = "2.0.0-beta.4" dependencies = [ "datafusion", "geo-types", @@ -5049,7 +5045,7 @@ dependencies = [ [[package]] name = "lance-index" -version = "2.0.0-beta.5" +version = "2.0.0-beta.4" dependencies = [ "approx", "arrow", @@ -5123,7 +5119,7 @@ dependencies = [ [[package]] name = "lance-io" -version = "2.0.0-beta.5" +version = "2.0.0-beta.4" dependencies = [ "arrow", "arrow-arith", @@ -5169,7 +5165,7 @@ dependencies = [ [[package]] name = "lance-linalg" -version = "2.0.0-beta.5" +version = "2.0.0-beta.4" dependencies = [ "approx", "arrow-array", @@ -5190,7 +5186,7 @@ dependencies = [ [[package]] name = "lance-namespace" -version = "2.0.0-beta.5" +version = "2.0.0-beta.4" dependencies = [ "arrow", "async-trait", @@ -5204,7 +5200,7 @@ dependencies = [ [[package]] name = "lance-namespace-impls" -version = "2.0.0-beta.5" +version = "2.0.0-beta.4" dependencies = [ "arrow", "arrow-ipc", @@ -5246,9 +5242,9 @@ dependencies = [ [[package]] name = "lance-namespace-reqwest-client" -version = "0.3.2" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00a21b43fe2a373896727b97927adedd2683d2907683f294f62cf8815fbf6a01" +checksum = "4dfe76b82f4167fa1c19d5d8825f8fb7d3831e83fa6e0485b3dd59ef0f7b1685" dependencies = [ "reqwest", "serde", @@ -5259,7 +5255,7 @@ dependencies = [ [[package]] name = "lance-table" -version = "2.0.0-beta.5" +version = "2.0.0-beta.4" dependencies = [ "arrow", "arrow-array", @@ -5305,7 +5301,7 @@ dependencies = [ [[package]] name = "lance-test-macros" -version = "2.0.0-beta.5" +version = "2.0.0-beta.4" dependencies = [ "proc-macro2", "quote", @@ -5314,7 +5310,7 @@ dependencies = [ [[package]] name = "lance-testing" -version = "2.0.0-beta.5" +version = "2.0.0-beta.4" dependencies = [ "arrow-array", "arrow-schema", @@ -5325,7 +5321,7 @@ dependencies = [ [[package]] name = "lance-tools" -version = "2.0.0-beta.5" +version = "2.0.0-beta.4" dependencies = [ "clap", "lance-core", @@ -5473,20 +5469,20 @@ checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de" [[package]] name = "libredox" -version = "0.1.11" +version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df15f6eac291ed1cf25865b1ee60399f57e7c227e7f51bdbd4c5270396a9ed50" +checksum = "416f7e718bdb06000964960ffa43b4335ad4012ae8b99060261aa4a8088d5ccb" dependencies = [ "bitflags 2.10.0", "libc", - "redox_syscall 0.6.0", + "redox_syscall", ] [[package]] name = "libz-rs-sys" -version = "0.5.5" +version = "0.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c10501e7805cee23da17c7790e59df2870c0d4043ec6d03f67d31e2b53e77415" +checksum = "15413ef615ad868d4d65dce091cb233b229419c7c0c4bcaa746c0901c49ff39c" dependencies = [ "zlib-rs", ] @@ -5887,9 +5883,9 @@ dependencies = [ [[package]] name = "moka" -version = "0.12.12" +version = "0.12.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3dec6bd31b08944e08b58fd99373893a6c17054d6f3ea5006cc894f4f4eee2a" +checksum = "8261cd88c312e0004c1d51baad2980c66528dfdb2bee62003e643a4d8f86b077" dependencies = [ "async-lock", "crossbeam-channel", @@ -5900,6 +5896,7 @@ dependencies = [ "futures-util", "parking_lot", "portable-atomic", + "rustc_version", "smallvec", "tagptr", "uuid", @@ -6458,9 +6455,9 @@ checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1" dependencies = [ "cfg-if", "libc", - "redox_syscall 0.5.18", + "redox_syscall", "smallvec", - "windows-link", + "windows-link 0.2.1", ] [[package]] @@ -6810,15 +6807,15 @@ dependencies = [ "concurrent-queue", "hermit-abi", "pin-project-lite", - "rustix 1.1.3", + "rustix 1.1.2", "windows-sys 0.61.2", ] [[package]] name = "portable-atomic" -version = "1.12.0" +version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f59e70c4aef1e55797c2e8fd94a4f2a973fc972cfde0e0b05f683667b0cd39dd" +checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483" [[package]] name = "portable-atomic-util" @@ -7328,9 +7325,9 @@ dependencies = [ [[package]] name = "rangemap" -version = "1.7.1" +version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "973443cf09a9c8656b574a866ab68dfa19f0867d0340648c7d2f6a71b8a8ea68" +checksum = "acbbbbea733ec66275512d0b9694f34102e7d5406fdbe2ad8d21b28dce92887c" [[package]] name = "rawpointer" @@ -7398,15 +7395,6 @@ dependencies = [ "bitflags 2.10.0", ] -[[package]] -name = "redox_syscall" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec96166dafa0886eb81fe1c0a388bece180fbef2135f97c1e2cf8302e74b43b5" -dependencies = [ - "bitflags 2.10.0", -] - [[package]] name = "redox_users" version = "0.4.6" @@ -7514,9 +7502,9 @@ dependencies = [ [[package]] name = "reqwest" -version = "0.12.28" +version = "0.12.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eddd3ca559203180a307f12d114c268abf583f59b03cb906fd0b3ff8646c1147" +checksum = "b6eff9328d40131d43bd911d42d79eb6a47312002a4daefc9e37f17e74a7701a" dependencies = [ "base64 0.22.1", "bytes", @@ -7735,9 +7723,9 @@ dependencies = [ [[package]] name = "rustix" -version = "1.1.3" +version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "146c9e247ccc180c1f61615433868c99f3de3ae256a30a43b49f67c2d9171f34" +checksum = "cd15f8a2c5551a84d56efdc1cd049089e409ac19a3072d5037a17fd70719ff3e" dependencies = [ "bitflags 2.10.0", "errno", @@ -7797,9 +7785,9 @@ dependencies = [ [[package]] name = "rustls-pki-types" -version = "1.13.2" +version = "1.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21e6f2ab2928ca4291b86736a8bd920a277a399bba1589409d72154ff87c1282" +checksum = "708c0f9d5f54ba0272468c1d306a52c495b31fa155e91bc25371e6df7996908c" dependencies = [ "web-time", "zeroize", @@ -7847,9 +7835,9 @@ dependencies = [ [[package]] name = "ryu" -version = "1.0.21" +version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62049b2877bf12821e8f9ad256ee38fdc31db7387ec2d3b3f403024de2034aea" +checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" [[package]] name = "salsa20" @@ -8044,15 +8032,15 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.147" +version = "1.0.145" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6af14725505314343e673e9ecb7cd7e8a36aa9791eb936235a3567cc31447ae4" +checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c" dependencies = [ "itoa", "memchr", + "ryu", "serde", "serde_core", - "zmij", ] [[package]] @@ -8770,14 +8758,14 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.24.0" +version = "3.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "655da9c7eb6305c55742045d5a8d2037996d61d8de95806335c7c86ce0f82e9c" +checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16" dependencies = [ "fastrand 2.3.0", "getrandom 0.3.4", "once_cell", - "rustix 1.1.3", + "rustix 1.1.2", "windows-sys 0.61.2", ] @@ -9073,18 +9061,18 @@ dependencies = [ [[package]] name = "toml_datetime" -version = "0.7.5+spec-1.1.0" +version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92e1cfed4a3038bc5a127e35a2d360f145e1f4b971b551a2ba5fd7aedf7e1347" +checksum = "f2cdb639ebbc97961c51720f858597f7f24c4fc295327923af55b74c3c724533" dependencies = [ "serde_core", ] [[package]] name = "toml_edit" -version = "0.23.10+spec-1.0.0" +version = "0.23.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "84c8b9f757e028cee9fa244aea147aab2a9ec09d5325a9b01e0a49730c2b5269" +checksum = "5d7cbc3b4b49633d57a0509303158ca50de80ae32c265093b24c414705807832" dependencies = [ "indexmap", "toml_datetime", @@ -9094,9 +9082,9 @@ dependencies = [ [[package]] name = "toml_parser" -version = "1.0.6+spec-1.1.0" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3198b4b0a8e11f09dd03e133c0280504d0801269e9afa46362ffde1cbeebf44" +checksum = "c0cbe268d35bdb4bb5a56a2de88d0ad0eb70af5384a99d648cd4b3d04039800e" dependencies = [ "winnow", ] @@ -9171,9 +9159,9 @@ checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" [[package]] name = "tracing" -version = "0.1.44" +version = "0.1.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100" +checksum = "2d15d90a0b5c19378952d479dc858407149d7bb45a14de0142f6c534b16fc647" dependencies = [ "log", "pin-project-lite", @@ -9205,9 +9193,9 @@ dependencies = [ [[package]] name = "tracing-core" -version = "0.1.36" +version = "0.1.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a" +checksum = "7a04e24fab5c89c6a36eb8558c9656f30d81de51dfa4d3b45f26b21d61fa0a6c" dependencies = [ "once_cell", "valuable", @@ -9700,6 +9688,41 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "windows" +version = "0.61.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9babd3a767a4c1aef6900409f85f5d53ce2544ccdfaa86dad48c91782c6d6893" +dependencies = [ + "windows-collections", + "windows-core 0.61.2", + "windows-future", + "windows-link 0.1.3", + "windows-numerics", +] + +[[package]] +name = "windows-collections" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3beeceb5e5cfd9eb1d76b381630e82c4241ccd0d27f1a39ed41b2760b255c5e8" +dependencies = [ + "windows-core 0.61.2", +] + +[[package]] +name = "windows-core" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0fdd3ddb90610c7638aa2b3a3ab2904fb9e5cdbecc643ddb3647212781c4ae3" +dependencies = [ + "windows-implement", + "windows-interface", + "windows-link 0.1.3", + "windows-result 0.3.4", + "windows-strings 0.4.2", +] + [[package]] name = "windows-core" version = "0.62.2" @@ -9708,9 +9731,20 @@ checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" dependencies = [ "windows-implement", "windows-interface", - "windows-link", - "windows-result", - "windows-strings", + "windows-link 0.2.1", + "windows-result 0.4.1", + "windows-strings 0.5.1", +] + +[[package]] +name = "windows-future" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc6a41e98427b19fe4b73c550f060b59fa592d7d686537eebf9385621bfbad8e" +dependencies = [ + "windows-core 0.61.2", + "windows-link 0.1.3", + "windows-threading", ] [[package]] @@ -9735,21 +9769,46 @@ dependencies = [ "syn 2.0.111", ] +[[package]] +name = "windows-link" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a" + [[package]] name = "windows-link" version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" +[[package]] +name = "windows-numerics" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9150af68066c4c5c07ddc0ce30421554771e528bde427614c61038bc2c92c2b1" +dependencies = [ + "windows-core 0.61.2", + "windows-link 0.1.3", +] + [[package]] name = "windows-registry" version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "02752bf7fbdcce7f2a27a742f798510f3e5ad88dbe84871e5168e2120c3d5720" dependencies = [ - "windows-link", - "windows-result", - "windows-strings", + "windows-link 0.2.1", + "windows-result 0.4.1", + "windows-strings 0.5.1", +] + +[[package]] +name = "windows-result" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56f42bd332cc6c8eac5af113fc0c1fd6a8fd2aa08a0119358686e5160d0586c6" +dependencies = [ + "windows-link 0.1.3", ] [[package]] @@ -9758,7 +9817,16 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5" dependencies = [ - "windows-link", + "windows-link 0.2.1", +] + +[[package]] +name = "windows-strings" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56e6c93f3a0c3b36176cb1327a4958a0353d5d166c2a35cb268ace15e91d3b57" +dependencies = [ + "windows-link 0.1.3", ] [[package]] @@ -9767,7 +9835,7 @@ version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091" dependencies = [ - "windows-link", + "windows-link 0.2.1", ] [[package]] @@ -9812,7 +9880,7 @@ version = "0.61.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" dependencies = [ - "windows-link", + "windows-link 0.2.1", ] [[package]] @@ -9852,7 +9920,7 @@ version = "0.53.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3" dependencies = [ - "windows-link", + "windows-link 0.2.1", "windows_aarch64_gnullvm 0.53.1", "windows_aarch64_msvc 0.53.1", "windows_i686_gnu 0.53.1", @@ -9863,6 +9931,15 @@ dependencies = [ "windows_x86_64_msvc 0.53.1", ] +[[package]] +name = "windows-threading" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b66463ad2e0ea3bbf808b7f1d371311c80e115c0b71d60efc142cafbcfb057a6" +dependencies = [ + "windows-link 0.1.3", +] + [[package]] name = "windows_aarch64_gnullvm" version = "0.48.5" @@ -10086,7 +10163,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32e45ad4206f6d2479085147f02bc2ef834ac85886624a23575ae137c8aa8156" dependencies = [ "libc", - "rustix 1.1.3", + "rustix 1.1.2", ] [[package]] @@ -10227,15 +10304,9 @@ dependencies = [ [[package]] name = "zlib-rs" -version = "0.5.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40990edd51aae2c2b6907af74ffb635029d5788228222c4bb811e9351c0caad3" - -[[package]] -name = "zmij" -version = "0.1.7" +version = "0.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e404bcd8afdaf006e529269d3e85a743f9480c3cef60034d77860d02964f3ba" +checksum = "51f936044d677be1a1168fae1d03b583a285a5dd9d8cbf7b24c23aa1fc775235" [[package]] name = "zstd" diff --git a/Cargo.toml b/Cargo.toml index 66e5c0a99f0..38a6b1264f0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,7 +26,7 @@ resolver = "2" [workspace.package] -version = "2.0.0-beta.5" +version = "2.0.0-beta.4" edition = "2021" authors = ["Lance Devs "] license = "Apache-2.0" @@ -50,23 +50,23 @@ rust-version = "1.82.0" [workspace.dependencies] libc = "0.2.176" -lance = { version = "=2.0.0-beta.5", path = "./rust/lance", default-features = false } -lance-arrow = { version = "=2.0.0-beta.5", path = "./rust/lance-arrow" } -lance-core = { version = "=2.0.0-beta.5", path = "./rust/lance-core" } -lance-datafusion = { version = "=2.0.0-beta.5", path = "./rust/lance-datafusion" } -lance-datagen = { version = "=2.0.0-beta.5", path = "./rust/lance-datagen" } -lance-encoding = { version = "=2.0.0-beta.5", path = "./rust/lance-encoding" } -lance-file = { version = "=2.0.0-beta.5", path = "./rust/lance-file" } -lance-geo = { version = "=2.0.0-beta.5", path = "./rust/lance-geo" } -lance-index = { version = "=2.0.0-beta.5", path = "./rust/lance-index" } -lance-io = { version = "=2.0.0-beta.5", path = "./rust/lance-io", default-features = false } -lance-linalg = { version = "=2.0.0-beta.5", path = "./rust/lance-linalg" } -lance-namespace = { version = "=2.0.0-beta.5", path = "./rust/lance-namespace" } -lance-namespace-impls = { version = "=2.0.0-beta.5", path = "./rust/lance-namespace-impls" } -lance-namespace-reqwest-client = "0.3.1" -lance-table = { version = "=2.0.0-beta.5", path = "./rust/lance-table" } -lance-test-macros = { version = "=2.0.0-beta.5", path = "./rust/lance-test-macros" } -lance-testing = { version = "=2.0.0-beta.5", path = "./rust/lance-testing" } +lance = { version = "=2.0.0-beta.4", path = "./rust/lance", default-features = false } +lance-arrow = { version = "=2.0.0-beta.4", path = "./rust/lance-arrow" } +lance-core = { version = "=2.0.0-beta.4", path = "./rust/lance-core" } +lance-datafusion = { version = "=2.0.0-beta.4", path = "./rust/lance-datafusion" } +lance-datagen = { version = "=2.0.0-beta.4", path = "./rust/lance-datagen" } +lance-encoding = { version = "=2.0.0-beta.4", path = "./rust/lance-encoding" } +lance-file = { version = "=2.0.0-beta.4", path = "./rust/lance-file" } +lance-geo = { version = "=2.0.0-beta.4", path = "./rust/lance-geo" } +lance-index = { version = "=2.0.0-beta.4", path = "./rust/lance-index" } +lance-io = { version = "=2.0.0-beta.4", path = "./rust/lance-io", default-features = false } +lance-linalg = { version = "=2.0.0-beta.4", path = "./rust/lance-linalg" } +lance-namespace = { version = "=2.0.0-beta.4", path = "./rust/lance-namespace" } +lance-namespace-impls = { version = "=2.0.0-beta.4", path = "./rust/lance-namespace-impls" } +lance-namespace-reqwest-client = "0.4.0" +lance-table = { version = "=2.0.0-beta.4", path = "./rust/lance-table" } +lance-test-macros = { version = "=2.0.0-beta.4", path = "./rust/lance-test-macros" } +lance-testing = { version = "=2.0.0-beta.4", path = "./rust/lance-testing" } approx = "0.5.1" # Note that this one does not include pyarrow arrow = { version = "56.1", optional = false, features = ["prettyprint"] } @@ -87,12 +87,11 @@ aws-config = "1.2.0" aws-credential-types = "1.2.0" aws-sdk-dynamodb = "1.38.0" aws-sdk-s3 = "1.38.0" -aws-sdk-sts = "1.38.0" half = { "version" = "2.1", default-features = false, features = [ "num-traits", "std", ] } -lance-bitpacking = { version = "=2.0.0-beta.5", path = "./rust/compression/bitpacking" } +lance-bitpacking = { version = "=2.0.0-beta.4", path = "./rust/compression/bitpacking" } bitvec = "1" bytes = "1.4" byteorder = "1.5" @@ -131,7 +130,7 @@ deepsize = "0.2.0" dirs = "6.0.0" either = "1.0" fst = { version = "0.4.7", features = ["levenshtein"] } -fsst = { version = "=2.0.0-beta.5", path = "./rust/compression/fsst" } +fsst = { version = "=2.0.0-beta.4", path = "./rust/compression/fsst" } futures = "0.3" geoarrow-array = "0.6" geoarrow-schema = "0.6" diff --git a/java/lance-jni/Cargo.lock b/java/lance-jni/Cargo.lock index 1ee870160f5..b10bdc4a257 100644 --- a/java/lance-jni/Cargo.lock +++ b/java/lance-jni/Cargo.lock @@ -154,9 +154,12 @@ dependencies = [ [[package]] name = "arc-swap" -version = "1.7.1" +version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69f7f8c3906b62b754cd5326047894316021dcfe5a194c8ea52bdd94934a3457" +checksum = "51d03449bb8ca2cc2ef70869af31463d1ae5ccc8fa3e334b307203fbf815207e" +dependencies = [ + "rustversion", +] [[package]] name = "arrayref" @@ -819,9 +822,9 @@ dependencies = [ [[package]] name = "aws-smithy-runtime" -version = "1.9.6" +version = "1.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65fda37911905ea4d3141a01364bc5509a0f32ae3f3b22d6e330c0abfb62d247" +checksum = "a392db6c583ea4a912538afb86b7be7c5d8887d91604f50eb55c262ee1b4a5f5" dependencies = [ "aws-smithy-async", "aws-smithy-http", @@ -2723,7 +2726,7 @@ checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" [[package]] name = "fsst" -version = "2.0.0-beta.5" +version = "2.0.0-beta.4" dependencies = [ "arrow-array", "rand 0.9.2", @@ -3842,7 +3845,7 @@ dependencies = [ [[package]] name = "lance" -version = "2.0.0-beta.5" +version = "2.0.0-beta.4" dependencies = [ "arrow", "arrow-arith", @@ -3906,7 +3909,7 @@ dependencies = [ [[package]] name = "lance-arrow" -version = "2.0.0-beta.5" +version = "2.0.0-beta.4" dependencies = [ "arrow-array", "arrow-buffer", @@ -3925,7 +3928,7 @@ dependencies = [ [[package]] name = "lance-bitpacking" -version = "2.0.0-beta.5" +version = "2.0.0-beta.4" dependencies = [ "arrayref", "paste", @@ -3934,7 +3937,7 @@ dependencies = [ [[package]] name = "lance-core" -version = "2.0.0-beta.5" +version = "2.0.0-beta.4" dependencies = [ "arrow-array", "arrow-buffer", @@ -3971,7 +3974,7 @@ dependencies = [ [[package]] name = "lance-datafusion" -version = "2.0.0-beta.5" +version = "2.0.0-beta.4" dependencies = [ "arrow", "arrow-array", @@ -4002,7 +4005,7 @@ dependencies = [ [[package]] name = "lance-datagen" -version = "2.0.0-beta.5" +version = "2.0.0-beta.4" dependencies = [ "arrow", "arrow-array", @@ -4020,7 +4023,7 @@ dependencies = [ [[package]] name = "lance-encoding" -version = "2.0.0-beta.5" +version = "2.0.0-beta.4" dependencies = [ "arrow-arith", "arrow-array", @@ -4057,7 +4060,7 @@ dependencies = [ [[package]] name = "lance-file" -version = "2.0.0-beta.5" +version = "2.0.0-beta.4" dependencies = [ "arrow-arith", "arrow-array", @@ -4089,7 +4092,7 @@ dependencies = [ [[package]] name = "lance-geo" -version = "2.0.0-beta.5" +version = "2.0.0-beta.4" dependencies = [ "datafusion", "geo-types", @@ -4100,7 +4103,7 @@ dependencies = [ [[package]] name = "lance-index" -version = "2.0.0-beta.5" +version = "2.0.0-beta.4" dependencies = [ "arrow", "arrow-arith", @@ -4162,7 +4165,7 @@ dependencies = [ [[package]] name = "lance-io" -version = "2.0.0-beta.5" +version = "2.0.0-beta.4" dependencies = [ "arrow", "arrow-arith", @@ -4235,7 +4238,7 @@ dependencies = [ [[package]] name = "lance-linalg" -version = "2.0.0-beta.5" +version = "2.0.0-beta.4" dependencies = [ "arrow-array", "arrow-buffer", @@ -4251,7 +4254,7 @@ dependencies = [ [[package]] name = "lance-namespace" -version = "2.0.0-beta.5" +version = "2.0.0-beta.4" dependencies = [ "arrow", "async-trait", @@ -4263,7 +4266,7 @@ dependencies = [ [[package]] name = "lance-namespace-impls" -version = "2.0.0-beta.5" +version = "2.0.0-beta.4" dependencies = [ "arrow", "arrow-ipc", @@ -4302,9 +4305,9 @@ dependencies = [ [[package]] name = "lance-namespace-reqwest-client" -version = "0.3.2" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00a21b43fe2a373896727b97927adedd2683d2907683f294f62cf8815fbf6a01" +checksum = "4dfe76b82f4167fa1c19d5d8825f8fb7d3831e83fa6e0485b3dd59ef0f7b1685" dependencies = [ "reqwest", "serde", @@ -4315,7 +4318,7 @@ dependencies = [ [[package]] name = "lance-table" -version = "2.0.0-beta.5" +version = "2.0.0-beta.4" dependencies = [ "arrow", "arrow-array", @@ -6395,15 +6398,15 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.146" +version = "1.0.147" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "217ca874ae0207aac254aa02c957ded05585a90892cc8d87f9e5fa49669dadd8" +checksum = "6af14725505314343e673e9ecb7cd7e8a36aa9791eb936235a3567cc31447ae4" dependencies = [ "itoa", "memchr", - "ryu", "serde", "serde_core", + "zmij", ] [[package]] @@ -8240,6 +8243,12 @@ version = "0.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "40990edd51aae2c2b6907af74ffb635029d5788228222c4bb811e9351c0caad3" +[[package]] +name = "zmij" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e404bcd8afdaf006e529269d3e85a743f9480c3cef60034d77860d02964f3ba" + [[package]] name = "zstd" version = "0.13.3" diff --git a/java/lance-jni/src/error.rs b/java/lance-jni/src/error.rs index 4e8f988120d..ef05b8cdb5c 100644 --- a/java/lance-jni/src/error.rs +++ b/java/lance-jni/src/error.rs @@ -6,6 +6,7 @@ use std::str::Utf8Error; use arrow_schema::ArrowError; use jni::{errors::Error as JniError, JNIEnv}; use lance::Error as LanceError; +use lance_namespace::error::NamespaceError; use serde_json::Error as JsonError; #[derive(Debug, PartialEq, Eq)] @@ -15,6 +16,7 @@ pub enum JavaExceptionClass { RuntimeException, UnsupportedOperationException, AlreadyInException, + LanceNamespaceException, } impl JavaExceptionClass { @@ -26,6 +28,7 @@ impl JavaExceptionClass { Self::UnsupportedOperationException => "java/lang/UnsupportedOperationException", // Included for display purposes. This is not a real exception. Self::AlreadyInException => "AlreadyInException", + Self::LanceNamespaceException => "org/lance/namespace/errors/LanceNamespaceException", } } } @@ -34,6 +37,7 @@ impl JavaExceptionClass { pub struct Error { message: String, java_class: JavaExceptionClass, + namespace_error_code: Option, } impl Error { @@ -41,6 +45,7 @@ impl Error { Self { message, java_class, + namespace_error_code: None, } } @@ -48,6 +53,7 @@ impl Error { Self { message, java_class: JavaExceptionClass::RuntimeException, + namespace_error_code: None, } } @@ -63,10 +69,19 @@ impl Error { Self::new(message, JavaExceptionClass::UnsupportedOperationException) } + pub fn namespace_error(code: u32, message: String) -> Self { + Self { + message, + java_class: JavaExceptionClass::LanceNamespaceException, + namespace_error_code: Some(code), + } + } + pub fn in_exception() -> Self { Self { message: String::default(), java_class: JavaExceptionClass::AlreadyInException, + namespace_error_code: None, } } @@ -75,11 +90,105 @@ impl Error { // An exception is already in progress, so we don't need to throw another one. return; } + + // For namespace errors, throw the specific LanceNamespaceException + if self.java_class == JavaExceptionClass::LanceNamespaceException { + if let Some(code) = self.namespace_error_code { + // Call LanceNamespaceException.fromCode static method + if self.throw_namespace_exception(env, code).is_err() { + // lance-namespace is bundled as a dependency, so the exception classes + // should always be available. Panic if they're not. + panic!( + "Failed to throw LanceNamespaceException (code={}). \ + org.lance.namespace.errors.LanceNamespaceException and ErrorCode classes \ + must be available in the classpath.", + code + ); + } + return; + } + } + if let Err(e) = env.throw_new(self.java_class.as_str(), &self.message) { eprintln!("Error when throwing Java exception: {:?}", e.to_string()); panic!("Error when throwing Java exception: {:?}", e); } } + + fn throw_namespace_exception( + &self, + env: &mut JNIEnv, + code: u32, + ) -> std::result::Result<(), ()> { + // Try to find and call the LanceNamespaceException constructor + // that takes ErrorCode and message + let class_name = "org/lance/namespace/errors/LanceNamespaceException"; + let error_code_class = "org/lance/namespace/errors/ErrorCode"; + + // Find the ErrorCode.fromCode method + let error_code_cls = env.find_class(error_code_class).map_err(|_| ())?; + let from_code_method = env + .get_static_method_id( + &error_code_cls, + "fromCode", + "(I)Lorg/lance/namespace/errors/ErrorCode;", + ) + .map_err(|_| ())?; + let error_code_obj = unsafe { + env.call_static_method_unchecked( + &error_code_cls, + from_code_method, + jni::signature::ReturnType::Object, + &[jni::sys::jvalue { + i: code as jni::sys::jint, + }], + ) + } + .map_err(|_| ())?; + + let error_code = match error_code_obj { + jni::objects::JValueGen::Object(obj) => obj, + _ => return Err(()), + }; + + // Find the LanceNamespaceException class + let exception_cls = env.find_class(class_name).map_err(|_| ())?; + + // Create message JString + let message_str = env.new_string(&self.message).map_err(|_| ())?; + + // Find constructor (ErrorCode, String) + let constructor = env + .get_method_id( + &exception_cls, + "", + "(Lorg/lance/namespace/errors/ErrorCode;Ljava/lang/String;)V", + ) + .map_err(|_| ())?; + + // Create the exception object + let exception_obj = unsafe { + env.new_object_unchecked( + &exception_cls, + constructor, + &[ + jni::sys::jvalue { + l: error_code.as_raw(), + }, + jni::sys::jvalue { + l: message_str.as_raw(), + }, + ], + ) + } + .map_err(|_| ())?; + + // Throw the exception + env.throw(jni::objects::JThrowable::from(exception_obj)) + .map_err(|_| ())?; + + Ok(()) + } } pub type Result = std::result::Result; @@ -92,7 +201,7 @@ impl std::fmt::Display for Error { impl From for Error { fn from(err: LanceError) -> Self { - match err { + match &err { LanceError::DatasetNotFound { .. } | LanceError::DatasetAlreadyExists { .. } | LanceError::CommitConflict { .. } @@ -100,6 +209,19 @@ impl From for Error { LanceError::IO { .. } => Self::io_error(err.to_string()), LanceError::NotSupported { .. } => Self::unsupported_error(err.to_string()), LanceError::NotFound { .. } => Self::io_error(err.to_string()), + LanceError::Namespace { source, .. } => { + // Try to downcast to NamespaceError and get the error code + if let Some(ns_err) = source.downcast_ref::() { + Self::namespace_error(ns_err.code().as_u32(), ns_err.to_string()) + } else { + log::warn!( + "Failed to downcast NamespaceError source, falling back to runtime error. \ + This may indicate a version mismatch. Source type: {:?}", + source + ); + Self::runtime_error(err.to_string()) + } + } _ => Self::runtime_error(err.to_string()), } } diff --git a/java/lance-jni/src/namespace.rs b/java/lance-jni/src/namespace.rs index d197c2b594b..4b1d5a82d21 100644 --- a/java/lance-jni/src/namespace.rs +++ b/java/lance-jni/src/namespace.rs @@ -313,6 +313,7 @@ pub extern "system" fn Java_org_lance_namespace_DirectoryNamespace_createTableNa } #[no_mangle] +#[allow(deprecated)] pub extern "system" fn Java_org_lance_namespace_DirectoryNamespace_createEmptyTableNative( mut env: JNIEnv, _obj: JObject, @@ -329,6 +330,23 @@ pub extern "system" fn Java_org_lance_namespace_DirectoryNamespace_createEmptyTa .into_raw() } +#[no_mangle] +pub extern "system" fn Java_org_lance_namespace_DirectoryNamespace_declareTableNative( + mut env: JNIEnv, + _obj: JObject, + handle: jlong, + request_json: JString, +) -> jstring { + ok_or_throw_with_return!( + env, + call_namespace_method(&mut env, handle, request_json, |ns, req| { + RT.block_on(ns.inner.declare_table(req)) + }), + std::ptr::null_mut() + ) + .into_raw() +} + #[no_mangle] pub extern "system" fn Java_org_lance_namespace_DirectoryNamespace_insertIntoTableNative( mut env: JNIEnv, @@ -790,6 +808,7 @@ pub extern "system" fn Java_org_lance_namespace_RestNamespace_createTableNative( } #[no_mangle] +#[allow(deprecated)] pub extern "system" fn Java_org_lance_namespace_RestNamespace_createEmptyTableNative( mut env: JNIEnv, _obj: JObject, @@ -806,6 +825,23 @@ pub extern "system" fn Java_org_lance_namespace_RestNamespace_createEmptyTableNa .into_raw() } +#[no_mangle] +pub extern "system" fn Java_org_lance_namespace_RestNamespace_declareTableNative( + mut env: JNIEnv, + _obj: JObject, + handle: jlong, + request_json: JString, +) -> jstring { + ok_or_throw_with_return!( + env, + call_rest_namespace_method(&mut env, handle, request_json, |ns, req| { + RT.block_on(ns.inner.declare_table(req)) + }), + std::ptr::null_mut() + ) + .into_raw() +} + #[no_mangle] pub extern "system" fn Java_org_lance_namespace_RestNamespace_insertIntoTableNative( mut env: JNIEnv, diff --git a/java/pom.xml b/java/pom.xml index 5fca8c93b84..cf212b22f20 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -108,12 +108,12 @@ org.lance lance-namespace-core - 0.3.1 + 0.4.0 org.lance lance-namespace-apache-client - 0.3.1 + 0.4.0 com.fasterxml.jackson.core diff --git a/java/src/main/java/org/lance/WriteDatasetBuilder.java b/java/src/main/java/org/lance/WriteDatasetBuilder.java index 6f49ce9338a..ec7e856b4f3 100644 --- a/java/src/main/java/org/lance/WriteDatasetBuilder.java +++ b/java/src/main/java/org/lance/WriteDatasetBuilder.java @@ -18,6 +18,8 @@ import org.lance.namespace.LanceNamespaceStorageOptionsProvider; import org.lance.namespace.model.CreateEmptyTableRequest; import org.lance.namespace.model.CreateEmptyTableResponse; +import org.lance.namespace.model.DeclareTableRequest; +import org.lance.namespace.model.DeclareTableResponse; import org.lance.namespace.model.DescribeTableRequest; import org.lance.namespace.model.DescribeTableResponse; @@ -365,18 +367,33 @@ private Dataset executeWithNamespace() { // Mode-specific namespace operations if (mode == WriteParams.WriteMode.CREATE) { - // Call namespace.createEmptyTable() to create new table - CreateEmptyTableRequest request = new CreateEmptyTableRequest(); - request.setId(tableId); - - CreateEmptyTableResponse response = namespace.createEmptyTable(request); + // Try declareTable first, fall back to deprecated createEmptyTable + // for backward compatibility with older namespace implementations. + // createEmptyTable support will be removed in 3.0.0. + String location; + Map responseStorageOptions; + + try { + DeclareTableRequest declareRequest = new DeclareTableRequest(); + declareRequest.setId(tableId); + DeclareTableResponse declareResponse = namespace.declareTable(declareRequest); + location = declareResponse.getLocation(); + responseStorageOptions = declareResponse.getStorageOptions(); + } catch (UnsupportedOperationException e) { + // Fall back to deprecated createEmptyTable + CreateEmptyTableRequest fallbackRequest = new CreateEmptyTableRequest(); + fallbackRequest.setId(tableId); + CreateEmptyTableResponse fallbackResponse = namespace.createEmptyTable(fallbackRequest); + location = fallbackResponse.getLocation(); + responseStorageOptions = fallbackResponse.getStorageOptions(); + } - tableUri = response.getLocation(); + tableUri = location; if (tableUri == null || tableUri.isEmpty()) { throw new IllegalArgumentException("Namespace did not return a table location"); } - namespaceStorageOptions = ignoreNamespaceStorageOptions ? null : response.getStorageOptions(); + namespaceStorageOptions = ignoreNamespaceStorageOptions ? null : responseStorageOptions; } else { // For APPEND/OVERWRITE modes, call namespace.describeTable() DescribeTableRequest request = new DescribeTableRequest(); diff --git a/java/src/main/java/org/lance/namespace/DirectoryNamespace.java b/java/src/main/java/org/lance/namespace/DirectoryNamespace.java index 2d13db69694..a0796739a3c 100644 --- a/java/src/main/java/org/lance/namespace/DirectoryNamespace.java +++ b/java/src/main/java/org/lance/namespace/DirectoryNamespace.java @@ -272,6 +272,14 @@ public CreateEmptyTableResponse createEmptyTable(CreateEmptyTableRequest request return fromJson(responseJson, CreateEmptyTableResponse.class); } + @Override + public DeclareTableResponse declareTable(DeclareTableRequest request) { + ensureInitialized(); + String requestJson = toJson(request); + String responseJson = declareTableNative(nativeDirectoryNamespaceHandle, requestJson); + return fromJson(responseJson, DeclareTableResponse.class); + } + @Override public InsertIntoTableResponse insertIntoTable( InsertIntoTableRequest request, byte[] requestData) { @@ -423,6 +431,8 @@ private static T fromJson(String json, Class clazz) { private native String createEmptyTableNative(long handle, String requestJson); + private native String declareTableNative(long handle, String requestJson); + private native String insertIntoTableNative(long handle, String requestJson, byte[] requestData); private native String mergeInsertIntoTableNative( diff --git a/java/src/main/java/org/lance/namespace/RestNamespace.java b/java/src/main/java/org/lance/namespace/RestNamespace.java index 995c53c4b92..b55eeb2f200 100644 --- a/java/src/main/java/org/lance/namespace/RestNamespace.java +++ b/java/src/main/java/org/lance/namespace/RestNamespace.java @@ -196,6 +196,14 @@ public CreateEmptyTableResponse createEmptyTable(CreateEmptyTableRequest request return fromJson(responseJson, CreateEmptyTableResponse.class); } + @Override + public DeclareTableResponse declareTable(DeclareTableRequest request) { + ensureInitialized(); + String requestJson = toJson(request); + String responseJson = declareTableNative(nativeRestNamespaceHandle, requestJson); + return fromJson(responseJson, DeclareTableResponse.class); + } + @Override public InsertIntoTableResponse insertIntoTable( InsertIntoTableRequest request, byte[] requestData) { @@ -345,6 +353,8 @@ private static T fromJson(String json, Class clazz) { private native String createEmptyTableNative(long handle, String requestJson); + private native String declareTableNative(long handle, String requestJson); + private native String insertIntoTableNative(long handle, String requestJson, byte[] requestData); private native String mergeInsertIntoTableNative( diff --git a/java/src/test/java/org/lance/NamespaceIntegrationTest.java b/java/src/test/java/org/lance/NamespaceIntegrationTest.java index d2ea43f5e53..ad0b55dccdc 100644 --- a/java/src/test/java/org/lance/NamespaceIntegrationTest.java +++ b/java/src/test/java/org/lance/NamespaceIntegrationTest.java @@ -18,6 +18,8 @@ import org.lance.namespace.LanceNamespaceStorageOptionsProvider; import org.lance.namespace.model.CreateEmptyTableRequest; import org.lance.namespace.model.CreateEmptyTableResponse; +import org.lance.namespace.model.DeclareTableRequest; +import org.lance.namespace.model.DeclareTableResponse; import org.lance.namespace.model.DescribeTableRequest; import org.lance.namespace.model.DescribeTableResponse; import org.lance.operation.Append; @@ -215,6 +217,16 @@ public CreateEmptyTableResponse createEmptyTable(CreateEmptyTableRequest request return response; } + @Override + public DeclareTableResponse declareTable(DeclareTableRequest request) { + int count = createCallCount.incrementAndGet(); + + DeclareTableResponse response = inner.declareTable(request); + response.setStorageOptions(modifyStorageOptions(response.getStorageOptions(), count)); + + return response; + } + @Override public DescribeTableResponse describeTable(DescribeTableRequest request) { int count = describeCallCount.incrementAndGet(); diff --git a/python/Cargo.lock b/python/Cargo.lock index d4e19a95a6d..74ae32cfcb0 100644 --- a/python/Cargo.lock +++ b/python/Cargo.lock @@ -208,9 +208,12 @@ dependencies = [ [[package]] name = "arc-swap" -version = "1.7.1" +version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69f7f8c3906b62b754cd5326047894316021dcfe5a194c8ea52bdd94934a3457" +checksum = "51d03449bb8ca2cc2ef70869af31463d1ae5ccc8fa3e334b307203fbf815207e" +dependencies = [ + "rustversion", +] [[package]] name = "arrayref" @@ -929,9 +932,9 @@ dependencies = [ [[package]] name = "aws-smithy-runtime" -version = "1.9.6" +version = "1.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65fda37911905ea4d3141a01364bc5509a0f32ae3f3b22d6e330c0abfb62d247" +checksum = "a392db6c583ea4a912538afb86b7be7c5d8887d91604f50eb55c262ee1b4a5f5" dependencies = [ "aws-smithy-async", "aws-smithy-http", @@ -3025,7 +3028,7 @@ checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" [[package]] name = "fsst" -version = "2.0.0-beta.5" +version = "2.0.0-beta.4" dependencies = [ "arrow-array", "rand 0.9.2", @@ -4209,7 +4212,7 @@ dependencies = [ [[package]] name = "lance" -version = "2.0.0-beta.5" +version = "2.0.0-beta.4" dependencies = [ "arrow", "arrow-arith", @@ -4274,7 +4277,7 @@ dependencies = [ [[package]] name = "lance-arrow" -version = "2.0.0-beta.5" +version = "2.0.0-beta.4" dependencies = [ "arrow-array", "arrow-buffer", @@ -4293,7 +4296,7 @@ dependencies = [ [[package]] name = "lance-bitpacking" -version = "2.0.0-beta.5" +version = "2.0.0-beta.4" dependencies = [ "arrayref", "paste", @@ -4302,7 +4305,7 @@ dependencies = [ [[package]] name = "lance-core" -version = "2.0.0-beta.5" +version = "2.0.0-beta.4" dependencies = [ "arrow-array", "arrow-buffer", @@ -4339,7 +4342,7 @@ dependencies = [ [[package]] name = "lance-datafusion" -version = "2.0.0-beta.5" +version = "2.0.0-beta.4" dependencies = [ "arrow", "arrow-array", @@ -4370,7 +4373,7 @@ dependencies = [ [[package]] name = "lance-datagen" -version = "2.0.0-beta.5" +version = "2.0.0-beta.4" dependencies = [ "arrow", "arrow-array", @@ -4388,7 +4391,7 @@ dependencies = [ [[package]] name = "lance-encoding" -version = "2.0.0-beta.5" +version = "2.0.0-beta.4" dependencies = [ "arrow-arith", "arrow-array", @@ -4425,7 +4428,7 @@ dependencies = [ [[package]] name = "lance-file" -version = "2.0.0-beta.5" +version = "2.0.0-beta.4" dependencies = [ "arrow-arith", "arrow-array", @@ -4457,7 +4460,7 @@ dependencies = [ [[package]] name = "lance-geo" -version = "2.0.0-beta.5" +version = "2.0.0-beta.4" dependencies = [ "datafusion", "geo-types", @@ -4468,7 +4471,7 @@ dependencies = [ [[package]] name = "lance-index" -version = "2.0.0-beta.5" +version = "2.0.0-beta.4" dependencies = [ "arrow", "arrow-arith", @@ -4533,7 +4536,7 @@ dependencies = [ [[package]] name = "lance-io" -version = "2.0.0-beta.5" +version = "2.0.0-beta.4" dependencies = [ "arrow", "arrow-arith", @@ -4573,7 +4576,7 @@ dependencies = [ [[package]] name = "lance-linalg" -version = "2.0.0-beta.5" +version = "2.0.0-beta.4" dependencies = [ "arrow-array", "arrow-buffer", @@ -4589,7 +4592,7 @@ dependencies = [ [[package]] name = "lance-namespace" -version = "2.0.0-beta.5" +version = "2.0.0-beta.4" dependencies = [ "arrow", "async-trait", @@ -4601,7 +4604,7 @@ dependencies = [ [[package]] name = "lance-namespace-impls" -version = "2.0.0-beta.5" +version = "2.0.0-beta.4" dependencies = [ "arrow", "arrow-ipc", @@ -4640,9 +4643,9 @@ dependencies = [ [[package]] name = "lance-namespace-reqwest-client" -version = "0.3.2" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00a21b43fe2a373896727b97927adedd2683d2907683f294f62cf8815fbf6a01" +checksum = "4dfe76b82f4167fa1c19d5d8825f8fb7d3831e83fa6e0485b3dd59ef0f7b1685" dependencies = [ "reqwest", "serde", @@ -4653,7 +4656,7 @@ dependencies = [ [[package]] name = "lance-table" -version = "2.0.0-beta.5" +version = "2.0.0-beta.4" dependencies = [ "arrow", "arrow-array", @@ -7131,15 +7134,15 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.146" +version = "1.0.147" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "217ca874ae0207aac254aa02c957ded05585a90892cc8d87f9e5fa49669dadd8" +checksum = "6af14725505314343e673e9ecb7cd7e8a36aa9791eb936235a3567cc31447ae4" dependencies = [ "itoa", "memchr", - "ryu", "serde", "serde_core", + "zmij", ] [[package]] @@ -9035,6 +9038,12 @@ version = "0.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "40990edd51aae2c2b6907af74ffb635029d5788228222c4bb811e9351c0caad3" +[[package]] +name = "zmij" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e404bcd8afdaf006e529269d3e85a743f9480c3cef60034d77860d02964f3ba" + [[package]] name = "zstd" version = "0.13.3" diff --git a/python/pyproject.toml b/python/pyproject.toml index 060e0bba8e9..0a6dd542222 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "pylance" dynamic = ["version"] -dependencies = ["pyarrow>=14", "numpy>=1.22", "lance-namespace>=0.3.1"] +dependencies = ["pyarrow>=14", "numpy>=1.22", "lance-namespace>=0.4.0"] description = "python wrapper for Lance columnar format" authors = [{ name = "Lance Devs", email = "dev@lance.org" }] license = { file = "LICENSE" } diff --git a/python/python/lance/dataset.py b/python/python/lance/dataset.py index 7666eca293d..136cf082a8a 100644 --- a/python/python/lance/dataset.py +++ b/python/python/lance/dataset.py @@ -5524,16 +5524,48 @@ def write_dataset( from .namespace import ( CreateEmptyTableRequest, + DeclareTableRequest, DescribeTableRequest, LanceNamespaceStorageOptionsProvider, ) # Determine which namespace method to call based on mode if mode == "create": - request = CreateEmptyTableRequest( - id=table_id, location=None, properties=None - ) - response = namespace.create_empty_table(request) + # Try declare_table first, fall back to deprecated create_empty_table + # for backward compatibility with older namespace implementations. + # create_empty_table support will be removed in 3.0.0. + if hasattr(namespace, "declare_table"): + try: + from lance_namespace.errors import UnsupportedOperationError + + declare_request = DeclareTableRequest(id=table_id, location=None) + response = namespace.declare_table(declare_request) + except (UnsupportedOperationError, NotImplementedError): + # Fall back to deprecated create_empty_table + import warnings + + warnings.warn( + "create_empty_table is deprecated, use declare_table instead. " + "Support will be removed in 3.0.0.", + DeprecationWarning, + stacklevel=2, + ) + fallback_request = CreateEmptyTableRequest( + id=table_id, location=None + ) + response = namespace.create_empty_table(fallback_request) + else: + # Namespace doesn't have declare_table, fall back to create_empty_table + import warnings + + warnings.warn( + "create_empty_table is deprecated, use declare_table instead. " + "Support will be removed in 3.0.0.", + DeprecationWarning, + stacklevel=2, + ) + fallback_request = CreateEmptyTableRequest(id=table_id, location=None) + response = namespace.create_empty_table(fallback_request) elif mode in ("append", "overwrite"): request = DescribeTableRequest(id=table_id, version=None) response = namespace.describe_table(request) diff --git a/python/python/lance/namespace.py b/python/python/lance/namespace.py index 59db935f8ce..9b18e3ee215 100644 --- a/python/python/lance/namespace.py +++ b/python/python/lance/namespace.py @@ -20,6 +20,8 @@ CreateNamespaceResponse, CreateTableRequest, CreateTableResponse, + DeclareTableRequest, + DeclareTableResponse, DeregisterTableRequest, DeregisterTableResponse, DescribeNamespaceRequest, @@ -218,6 +220,10 @@ def create_empty_table( response_dict = self._inner.create_empty_table(request.model_dump()) return CreateEmptyTableResponse.from_dict(response_dict) + def declare_table(self, request: DeclareTableRequest) -> DeclareTableResponse: + response_dict = self._inner.declare_table(request.model_dump()) + return DeclareTableResponse.from_dict(response_dict) + class RestNamespace(LanceNamespace): """REST-based Lance Namespace implementation backed by Rust. @@ -334,6 +340,10 @@ def create_empty_table( response_dict = self._inner.create_empty_table(request.model_dump()) return CreateEmptyTableResponse.from_dict(response_dict) + def declare_table(self, request: DeclareTableRequest) -> DeclareTableResponse: + response_dict = self._inner.declare_table(request.model_dump()) + return DeclareTableResponse.from_dict(response_dict) + class RestAdapter: """REST adapter server that creates a namespace backend and exposes it via REST. diff --git a/python/python/tests/test_namespace_integration.py b/python/python/tests/test_namespace_integration.py index 592bbd2c3ef..3c93dbcb504 100644 --- a/python/python/tests/test_namespace_integration.py +++ b/python/python/tests/test_namespace_integration.py @@ -22,6 +22,8 @@ from lance.namespace import ( CreateEmptyTableRequest, CreateEmptyTableResponse, + DeclareTableRequest, + DeclareTableResponse, DescribeTableRequest, DescribeTableResponse, LanceNamespace, @@ -143,6 +145,18 @@ def create_empty_table( return response + def declare_table(self, request: DeclareTableRequest) -> DeclareTableResponse: + with self.lock: + self.create_call_count += 1 + count = self.create_call_count + + response = self.inner.declare_table(request) + response.storage_options = self._modify_storage_options( + response.storage_options, count + ) + + return response + def describe_table(self, request: DescribeTableRequest) -> DescribeTableResponse: with self.lock: self.describe_call_count += 1 @@ -434,8 +448,8 @@ def test_namespace_distributed_write(s3_bucket: str): table_name = uuid.uuid4().hex table_id = ["test_ns", table_name] - request = CreateEmptyTableRequest(id=table_id, location=None, properties=None) - response = namespace.create_empty_table(request) + request = DeclareTableRequest(id=table_id, location=None) + response = namespace.declare_table(request) assert namespace.get_create_call_count() == 1 assert namespace.get_describe_call_count() == 0 diff --git a/python/src/error.rs b/python/src/error.rs index ab12bead1e2..45569331289 100644 --- a/python/src/error.rs +++ b/python/src/error.rs @@ -12,13 +12,49 @@ // See the License for the specific language governing permissions and // limitations under the License. +use lance_namespace::error::NamespaceError; use pyo3::{ exceptions::{PyIOError, PyNotImplementedError, PyRuntimeError, PyValueError}, - PyResult, + types::{PyAnyMethods, PyModule}, + BoundObject, PyErr, PyResult, Python, }; use lance::Error as LanceError; +/// Try to convert a NamespaceError to the corresponding Python exception. +/// Returns the appropriate Python exception from lance_namespace.errors module. +fn namespace_error_to_pyerr(py: Python<'_>, ns_err: &NamespaceError) -> PyErr { + let code = ns_err.code().as_u32(); + let message = ns_err.to_string(); + + // Try to import the lance_namespace.errors module and use from_error_code + match PyModule::import(py, "lance_namespace.errors") { + Ok(module) => { + match module.getattr("from_error_code") { + Ok(from_error_code) => { + match from_error_code.call1((code, message.clone())) { + Ok(exc) => { + // Create a PyErr from the exception object + PyErr::from_value(exc.into_bound()) + } + Err(_) => PyRuntimeError::new_err(format!( + "[NamespaceError code={}] {}", + code, message + )), + } + } + Err(_) => { + PyRuntimeError::new_err(format!("[NamespaceError code={}] {}", code, message)) + } + } + } + Err(_) => { + // lance_namespace module not available, use RuntimeError with code prefix + PyRuntimeError::new_err(format!("[NamespaceError code={}] {}", code, message)) + } + } +} + pub trait PythonErrorExt { /// Convert to a python error based on the Lance error type fn infer_error(self) -> PyResult; @@ -43,7 +79,19 @@ impl PythonErrorExt for std::result::Result { LanceError::NotFound { .. } => self.value_error(), LanceError::RefNotFound { .. } => self.value_error(), LanceError::VersionNotFound { .. } => self.value_error(), - + LanceError::Namespace { source, .. } => { + // Try to downcast to NamespaceError and convert to proper Python exception + if let Some(ns_err) = source.downcast_ref::() { + Python::with_gil(|py| Err(namespace_error_to_pyerr(py, ns_err))) + } else { + log::warn!( + "Failed to downcast NamespaceError source, falling back to runtime error. \ + This may indicate a version mismatch. Source type: {:?}", + source + ); + self.runtime_error() + } + } _ => self.runtime_error(), }, } diff --git a/python/src/namespace.rs b/python/src/namespace.rs index 4ddf0fc76a4..cc579248943 100644 --- a/python/src/namespace.rs +++ b/python/src/namespace.rs @@ -183,6 +183,7 @@ impl PyDirectoryNamespace { Ok(pythonize(py, &response)?.into()) } + #[allow(deprecated)] fn create_empty_table(&self, py: Python, request: &Bound<'_, PyAny>) -> PyResult { let request = depythonize(request)?; let response = crate::rt() @@ -190,6 +191,14 @@ impl PyDirectoryNamespace { .infer_error()?; Ok(pythonize(py, &response)?.into()) } + + fn declare_table(&self, py: Python, request: &Bound<'_, PyAny>) -> PyResult { + let request = depythonize(request)?; + let response = crate::rt() + .block_on(Some(py), self.inner.declare_table(request))? + .infer_error()?; + Ok(pythonize(py, &response)?.into()) + } } #[cfg(feature = "rest")] @@ -341,6 +350,7 @@ impl PyRestNamespace { Ok(pythonize(py, &response)?.into()) } + #[allow(deprecated)] fn create_empty_table(&self, py: Python, request: &Bound<'_, PyAny>) -> PyResult { let request = depythonize(request)?; let response = crate::rt() @@ -348,6 +358,14 @@ impl PyRestNamespace { .infer_error()?; Ok(pythonize(py, &response)?.into()) } + + fn declare_table(&self, py: Python, request: &Bound<'_, PyAny>) -> PyResult { + let request = depythonize(request)?; + let response = crate::rt() + .block_on(Some(py), self.inner.declare_table(request))? + .infer_error()?; + Ok(pythonize(py, &response)?.into()) + } } #[cfg(feature = "rest-adapter")] diff --git a/rust/lance-namespace-impls/src/dir.rs b/rust/lance-namespace-impls/src/dir.rs index 91714d73d90..b0170023e3c 100644 --- a/rust/lance-namespace-impls/src/dir.rs +++ b/rust/lance-namespace-impls/src/dir.rs @@ -16,17 +16,18 @@ use lance::dataset::{Dataset, WriteParams}; use lance::session::Session; use lance_io::object_store::{ObjectStore, ObjectStoreParams, ObjectStoreRegistry}; use object_store::path::Path; +use object_store::{Error as ObjectStoreError, ObjectStore as OSObjectStore, PutMode, PutOptions}; use std::collections::HashMap; use std::io::Cursor; use std::sync::Arc; use lance_namespace::models::{ CreateEmptyTableRequest, CreateEmptyTableResponse, CreateNamespaceRequest, - CreateNamespaceResponse, CreateTableRequest, CreateTableResponse, DescribeNamespaceRequest, - DescribeNamespaceResponse, DescribeTableRequest, DescribeTableResponse, DropNamespaceRequest, - DropNamespaceResponse, DropTableRequest, DropTableResponse, ListNamespacesRequest, - ListNamespacesResponse, ListTablesRequest, ListTablesResponse, NamespaceExistsRequest, - TableExistsRequest, + CreateNamespaceResponse, CreateTableRequest, CreateTableResponse, DeclareTableRequest, + DeclareTableResponse, DescribeNamespaceRequest, DescribeNamespaceResponse, + DescribeTableRequest, DescribeTableResponse, DropNamespaceRequest, DropNamespaceResponse, + DropTableRequest, DropTableResponse, ListNamespacesRequest, ListNamespacesResponse, + ListTablesRequest, ListTablesResponse, NamespaceExistsRequest, TableExistsRequest, }; use lance_core::{box_error, Error, Result}; @@ -37,6 +38,19 @@ use crate::credentials::{ create_credential_vendor_for_location, has_credential_vendor_config, CredentialVendor, }; +/// Result of checking table status atomically. +/// +/// This struct captures the state of a table directory in a single snapshot, +/// avoiding race conditions between checking existence and other status flags. +pub(crate) struct TableStatus { + /// Whether the table directory exists (has any files) + pub(crate) exists: bool, + /// Whether the table has a `.lance-deregistered` marker file + pub(crate) is_deregistered: bool, + /// Whether the table has a `.lance-reserved` marker file (declared but not written) + pub(crate) has_reserved_file: bool, +} + /// Builder for creating a DirectoryNamespace. /// /// This builder provides a fluent API for configuring and establishing @@ -547,6 +561,13 @@ impl DirectoryNamespace { } let table_name = &path[..path.len() - 6]; + + // Use atomic check to skip deregistered tables and declared-but-not-written tables + let status = self.check_table_status(table_name).await; + if status.is_deregistered || status.has_reserved_file { + continue; + } + tables.push(table_name.to_string()); } @@ -608,6 +629,71 @@ impl DirectoryNamespace { .child(".lance-reserved") } + /// Get the deregistered marker file path for a table + fn table_deregistered_file_path(&self, table_name: &str) -> Path { + self.base_path + .child(format!("{}.lance", table_name).as_str()) + .child(".lance-deregistered") + } + + /// Atomically check table existence and deregistration status. + /// + /// This performs a single directory listing to get a consistent snapshot of the + /// table's state, avoiding race conditions between checking existence and + /// checking deregistration status. + pub(crate) async fn check_table_status(&self, table_name: &str) -> TableStatus { + let table_path = self.table_path(table_name); + match self.object_store.read_dir(table_path).await { + Ok(entries) => { + let exists = !entries.is_empty(); + let is_deregistered = entries.iter().any(|e| e.ends_with(".lance-deregistered")); + let has_reserved_file = entries.iter().any(|e| e.ends_with(".lance-reserved")); + TableStatus { + exists, + is_deregistered, + has_reserved_file, + } + } + Err(_) => TableStatus { + exists: false, + is_deregistered: false, + has_reserved_file: false, + }, + } + } + + /// Atomically create a marker file using put_if_not_exists semantics. + /// + /// This uses `PutMode::Create` which will fail if the file already exists, + /// providing atomic creation semantics to avoid race conditions. + /// + /// Returns Ok(()) if the file was created successfully. + /// Returns Err with appropriate message if the file already exists or other error. + async fn put_marker_file_atomic( + &self, + path: &Path, + file_description: &str, + ) -> std::result::Result<(), String> { + let put_opts = PutOptions { + mode: PutMode::Create, + ..Default::default() + }; + + match self + .object_store + .inner + .put_opts(path, bytes::Bytes::new().into(), put_opts) + .await + { + Ok(_) => Ok(()), + Err(ObjectStoreError::AlreadyExists { .. }) + | Err(ObjectStoreError::Precondition { .. }) => { + Err(format!("{} already exists", file_description)) + } + Err(e) => Err(format!("Failed to create {}: {}", file_description, e)), + } + } + /// Get storage options for a table, using credential vending if configured. /// /// If credential vendor properties are configured and the table location matches @@ -890,21 +976,23 @@ impl LanceNamespace for DirectoryNamespace { let table_name = Self::table_name_from_id(&request.id)?; let table_uri = self.table_full_uri(&table_name); - let table_path = self.table_path(&table_name); - let dir_exists = self - .object_store - .read_dir(table_path) - .await - .map(|entries| !entries.is_empty()) - .unwrap_or(false); + // Atomically check table existence and deregistration status + let status = self.check_table_status(&table_name).await; - if !dir_exists { + if !status.exists { return Err(Error::Namespace { source: format!("Table does not exist: {}", table_name).into(), location: snafu::location!(), }); } + if status.is_deregistered { + return Err(Error::Namespace { + source: format!("Table is deregistered: {}", table_name).into(), + location: snafu::location!(), + }); + } + // Try to load the dataset to get real information match Dataset::open(&table_uri).await { Ok(mut dataset) => { @@ -937,13 +1025,8 @@ impl LanceNamespace for DirectoryNamespace { }) } Err(err) => { - let reserved_file_path = self.table_reserved_file_path(&table_name); - if self - .object_store - .exists(&reserved_file_path) - .await - .unwrap_or(false) - { + // Use the reserved file status from the atomic check + if status.has_reserved_file { let storage_options = self.get_storage_options_for_table(&table_uri).await?; Ok(DescribeTableResponse { table: Some(table_name), @@ -987,21 +1070,24 @@ impl LanceNamespace for DirectoryNamespace { } let table_name = Self::table_name_from_id(&request.id)?; - let table_path = self.table_path(&table_name); - let table_exists = self - .object_store - .read_dir(table_path) - .await - .map(|entries| !entries.is_empty()) - .unwrap_or(false); - if !table_exists { + // Atomically check table existence and deregistration status + let status = self.check_table_status(&table_name).await; + + if !status.exists { return Err(Error::Namespace { source: format!("Table does not exist: {}", table_name).into(), location: snafu::location!(), }); } + if status.is_deregistered { + return Err(Error::Namespace { + source: format!("Table is deregistered: {}", table_name).into(), + location: snafu::location!(), + }); + } + Ok(()) } @@ -1107,6 +1193,7 @@ impl LanceNamespace for DirectoryNamespace { request: CreateEmptyTableRequest, ) -> Result { if let Some(ref manifest_ns) = self.manifest_ns { + #[allow(deprecated)] return manifest_ns.create_empty_table(request).await; } @@ -1128,35 +1215,74 @@ impl LanceNamespace for DirectoryNamespace { } } - // Create the .lance-reserved file to mark the table as existing + // Atomically create the .lance-reserved file to mark the table as existing. + // This uses put_if_not_exists semantics to avoid race conditions. let reserved_file_path = self.table_reserved_file_path(&table_name); - self.object_store - .create(&reserved_file_path) + self.put_marker_file_atomic(&reserved_file_path, &format!("table {}", table_name)) .await .map_err(|e| Error::Namespace { - source: format!( - "Failed to create .lance-reserved file for table {}: {}", - table_name, e - ) - .into(), + source: e.into(), + location: snafu::location!(), + })?; + + Ok(CreateEmptyTableResponse { + transaction_id: None, + location: Some(table_uri), + storage_options: self.storage_options.clone(), + }) + } + + async fn declare_table(&self, request: DeclareTableRequest) -> Result { + if let Some(ref manifest_ns) = self.manifest_ns { + return manifest_ns.declare_table(request).await; + } + + let table_name = Self::table_name_from_id(&request.id)?; + let table_uri = self.table_full_uri(&table_name); + + // Validate location if provided + if let Some(location) = &request.location { + let location = location.trim_end_matches('/'); + if location != table_uri { + return Err(Error::Namespace { + source: format!( + "Cannot declare table {} at location {}, must be at location {}", + table_name, location, table_uri + ) + .into(), + location: snafu::location!(), + }); + } + } + + // Check if table already has data (created via create_table). + // The atomic put only prevents races between concurrent declare_table calls, + // not between declare_table and existing data. + let status = self.check_table_status(&table_name).await; + if status.exists && !status.has_reserved_file { + // Table has data but no reserved file - it was created with data + return Err(Error::Namespace { + source: format!("Table already exists: {}", table_name).into(), location: snafu::location!(), - })? - .shutdown() + }); + } + + // Atomically create the .lance-reserved file to mark the table as declared. + // This uses put_if_not_exists semantics to avoid race conditions between + // concurrent declare_table calls. + let reserved_file_path = self.table_reserved_file_path(&table_name); + + self.put_marker_file_atomic(&reserved_file_path, &format!("table {}", table_name)) .await .map_err(|e| Error::Namespace { - source: format!( - "Failed to finalize .lance-reserved file for table {}: {}", - table_name, e - ) - .into(), + source: e.into(), location: snafu::location!(), })?; - Ok(CreateEmptyTableResponse { + Ok(DeclareTableResponse { transaction_id: None, location: Some(table_uri), - properties: None, storage_options: self.storage_options.clone(), }) } @@ -1186,10 +1312,57 @@ impl LanceNamespace for DirectoryNamespace { return LanceNamespace::deregister_table(manifest_ns.as_ref(), request).await; } - // Without manifest, deregister_table is not supported - Err(Error::NotSupported { - source: "deregister_table is only supported when manifest mode is enabled".into(), - location: snafu::location!(), + // V1 mode: create a .lance-deregistered marker file in the table directory + let table_name = Self::table_name_from_id(&request.id)?; + let table_uri = self.table_full_uri(&table_name); + + // Check table existence and deregistration status. + // This provides better error messages for common cases. + let status = self.check_table_status(&table_name).await; + + if !status.exists { + return Err(Error::Namespace { + source: format!("Table does not exist: {}", table_name).into(), + location: snafu::location!(), + }); + } + + if status.is_deregistered { + return Err(Error::Namespace { + source: format!("Table is already deregistered: {}", table_name).into(), + location: snafu::location!(), + }); + } + + // Atomically create the .lance-deregistered marker file. + // This uses put_if_not_exists semantics to prevent race conditions + // when multiple processes try to deregister the same table concurrently. + // If a race occurs and another process already created the file, + // we'll get an AlreadyExists error which we convert to a proper message. + let deregistered_path = self.table_deregistered_file_path(&table_name); + self.put_marker_file_atomic( + &deregistered_path, + &format!("deregistration marker for table {}", table_name), + ) + .await + .map_err(|e| { + // Convert "already exists" to "already deregistered" for better UX + let message = if e.contains("already exists") { + format!("Table is already deregistered: {}", table_name) + } else { + e + }; + Error::Namespace { + source: message.into(), + location: snafu::location!(), + } + })?; + + Ok(lance_namespace::models::DeregisterTableResponse { + id: request.id, + location: Some(table_uri), + properties: None, + transaction_id: None, }) } @@ -1877,6 +2050,7 @@ mod tests { } #[tokio::test] + #[allow(deprecated)] async fn test_create_empty_table() { let (namespace, temp_dir) = create_test_namespace().await; @@ -1921,6 +2095,7 @@ mod tests { } #[tokio::test] + #[allow(deprecated)] async fn test_create_empty_table_with_wrong_location() { let (namespace, _temp_dir) = create_test_namespace().await; @@ -1937,6 +2112,7 @@ mod tests { } #[tokio::test] + #[allow(deprecated)] async fn test_create_empty_table_then_drop() { let (namespace, temp_dir) = create_test_namespace().await; @@ -2156,6 +2332,7 @@ mod tests { } #[tokio::test] + #[allow(deprecated)] async fn test_empty_table_in_child_namespace() { let (namespace, _temp_dir) = create_test_namespace().await; @@ -2669,8 +2846,8 @@ mod tests { } #[tokio::test] - async fn test_register_deregister_without_manifest_fails() { - use lance_namespace::models::{DeregisterTableRequest, RegisterTableRequest}; + async fn test_register_without_manifest_fails() { + use lance_namespace::models::RegisterTableRequest; let temp_dir = TempStdDir::default(); let temp_path = temp_dir.to_str().unwrap(); @@ -2682,7 +2859,7 @@ mod tests { .await .unwrap(); - // Try to register - should fail + // Try to register - should fail (register requires manifest) let mut register_req = RegisterTableRequest::new("test_table.lance".to_string()); register_req.id = Some(vec!["test_table".to_string()]); let result = namespace.register_table(register_req).await; @@ -2692,15 +2869,8 @@ mod tests { .to_string() .contains("manifest mode is enabled")); - // Try to deregister - should fail - let mut deregister_req = DeregisterTableRequest::new(); - deregister_req.id = Some(vec!["test_table".to_string()]); - let result = namespace.deregister_table(deregister_req).await; - assert!(result.is_err()); - assert!(result - .unwrap_err() - .to_string() - .contains("manifest mode is enabled")); + // Note: deregister_table now works in V1 mode via .lance-deregistered marker files + // See test_deregister_table_v1_mode for that test case } #[tokio::test] @@ -2876,4 +3046,372 @@ mod tests { .unwrap(); assert_eq!(a_col.values(), &[100, 200]); } + + // ============================================================ + // Tests for declare_table + // ============================================================ + + #[tokio::test] + async fn test_declare_table_v1_mode() { + use lance_namespace::models::{ + DeclareTableRequest, DescribeTableRequest, TableExistsRequest, + }; + + let temp_dir = TempStdDir::default(); + let temp_path = temp_dir.to_str().unwrap(); + + // Create namespace in V1 mode (no manifest) + let namespace = DirectoryNamespaceBuilder::new(temp_path) + .manifest_enabled(false) + .build() + .await + .unwrap(); + + // Declare a table + let mut declare_req = DeclareTableRequest::new(); + declare_req.id = Some(vec!["test_table".to_string()]); + let response = namespace.declare_table(declare_req).await.unwrap(); + + // Should return location + assert!(response.location.is_some()); + let location = response.location.as_ref().unwrap(); + assert!(location.ends_with("test_table.lance")); + + // Table should exist (via reserved file) + let mut exists_req = TableExistsRequest::new(); + exists_req.id = Some(vec!["test_table".to_string()]); + assert!(namespace.table_exists(exists_req).await.is_ok()); + + // Describe should work but return no version/schema (not written yet) + let mut describe_req = DescribeTableRequest::new(); + describe_req.id = Some(vec!["test_table".to_string()]); + let describe_response = namespace.describe_table(describe_req).await.unwrap(); + assert!(describe_response.location.is_some()); + assert!(describe_response.version.is_none()); // Not written yet + assert!(describe_response.schema.is_none()); // Not written yet + } + + #[tokio::test] + async fn test_declare_table_with_manifest() { + use lance_namespace::models::{DeclareTableRequest, TableExistsRequest}; + + let temp_dir = TempStdDir::default(); + let temp_path = temp_dir.to_str().unwrap(); + + // Create namespace with manifest + let namespace = DirectoryNamespaceBuilder::new(temp_path) + .manifest_enabled(true) + .dir_listing_enabled(false) + .build() + .await + .unwrap(); + + // Declare a table + let mut declare_req = DeclareTableRequest::new(); + declare_req.id = Some(vec!["test_table".to_string()]); + let response = namespace.declare_table(declare_req).await.unwrap(); + + // Should return location + assert!(response.location.is_some()); + + // Table should exist in manifest + let mut exists_req = TableExistsRequest::new(); + exists_req.id = Some(vec!["test_table".to_string()]); + assert!(namespace.table_exists(exists_req).await.is_ok()); + } + + #[tokio::test] + async fn test_declare_table_when_table_exists() { + use lance_namespace::models::DeclareTableRequest; + + let temp_dir = TempStdDir::default(); + let temp_path = temp_dir.to_str().unwrap(); + + let namespace = DirectoryNamespaceBuilder::new(temp_path) + .manifest_enabled(false) + .build() + .await + .unwrap(); + + // First create a table with actual data + let schema = create_test_schema(); + let ipc_data = create_test_ipc_data(&schema); + let mut create_req = CreateTableRequest::new(); + create_req.id = Some(vec!["test_table".to_string()]); + namespace + .create_table(create_req, bytes::Bytes::from(ipc_data)) + .await + .unwrap(); + + // Try to declare the same table - should fail because it already has data + let mut declare_req = DeclareTableRequest::new(); + declare_req.id = Some(vec!["test_table".to_string()]); + let result = namespace.declare_table(declare_req).await; + assert!(result.is_err()); + } + + // ============================================================ + // Tests for deregister_table in V1 mode + // ============================================================ + + #[tokio::test] + async fn test_deregister_table_v1_mode() { + use lance_namespace::models::{DeregisterTableRequest, TableExistsRequest}; + + let temp_dir = TempStdDir::default(); + let temp_path = temp_dir.to_str().unwrap(); + + // Create namespace in V1 mode (no manifest, with dir listing) + let namespace = DirectoryNamespaceBuilder::new(temp_path) + .manifest_enabled(false) + .dir_listing_enabled(true) + .build() + .await + .unwrap(); + + // Create a table with data + let schema = create_test_schema(); + let ipc_data = create_test_ipc_data(&schema); + let mut create_req = CreateTableRequest::new(); + create_req.id = Some(vec!["test_table".to_string()]); + namespace + .create_table(create_req, bytes::Bytes::from(ipc_data)) + .await + .unwrap(); + + // Verify table exists + let mut exists_req = TableExistsRequest::new(); + exists_req.id = Some(vec!["test_table".to_string()]); + assert!(namespace.table_exists(exists_req.clone()).await.is_ok()); + + // Deregister the table + let mut deregister_req = DeregisterTableRequest::new(); + deregister_req.id = Some(vec!["test_table".to_string()]); + let response = namespace.deregister_table(deregister_req).await.unwrap(); + + // Should return location + assert!(response.location.is_some()); + let location = response.location.as_ref().unwrap(); + assert!(location.contains("test_table")); + + // Table should no longer exist (deregistered) + let result = namespace.table_exists(exists_req).await; + assert!(result.is_err()); + assert!(result.unwrap_err().to_string().contains("deregistered")); + + // Physical data should still exist + let dataset = Dataset::open(location).await; + assert!(dataset.is_ok(), "Physical table data should still exist"); + } + + #[tokio::test] + async fn test_deregister_table_v1_already_deregistered() { + use lance_namespace::models::DeregisterTableRequest; + + let temp_dir = TempStdDir::default(); + let temp_path = temp_dir.to_str().unwrap(); + + let namespace = DirectoryNamespaceBuilder::new(temp_path) + .manifest_enabled(false) + .dir_listing_enabled(true) + .build() + .await + .unwrap(); + + // Create a table + let schema = create_test_schema(); + let ipc_data = create_test_ipc_data(&schema); + let mut create_req = CreateTableRequest::new(); + create_req.id = Some(vec!["test_table".to_string()]); + namespace + .create_table(create_req, bytes::Bytes::from(ipc_data)) + .await + .unwrap(); + + // Deregister once + let mut deregister_req = DeregisterTableRequest::new(); + deregister_req.id = Some(vec!["test_table".to_string()]); + namespace + .deregister_table(deregister_req.clone()) + .await + .unwrap(); + + // Try to deregister again - should fail + let result = namespace.deregister_table(deregister_req).await; + assert!(result.is_err()); + assert!(result + .unwrap_err() + .to_string() + .contains("already deregistered")); + } + + // ============================================================ + // Tests for list_tables skipping deregistered tables + // ============================================================ + + #[tokio::test] + async fn test_list_tables_skips_deregistered_v1() { + use lance_namespace::models::DeregisterTableRequest; + + let temp_dir = TempStdDir::default(); + let temp_path = temp_dir.to_str().unwrap(); + + let namespace = DirectoryNamespaceBuilder::new(temp_path) + .manifest_enabled(false) + .dir_listing_enabled(true) + .build() + .await + .unwrap(); + + // Create two tables + let schema = create_test_schema(); + let ipc_data = create_test_ipc_data(&schema); + + let mut create_req1 = CreateTableRequest::new(); + create_req1.id = Some(vec!["table1".to_string()]); + namespace + .create_table(create_req1, bytes::Bytes::from(ipc_data.clone())) + .await + .unwrap(); + + let mut create_req2 = CreateTableRequest::new(); + create_req2.id = Some(vec!["table2".to_string()]); + namespace + .create_table(create_req2, bytes::Bytes::from(ipc_data)) + .await + .unwrap(); + + // List tables - should see both (root namespace = empty vec) + let mut list_req = ListTablesRequest::new(); + list_req.id = Some(vec![]); + let list_response = namespace.list_tables(list_req.clone()).await.unwrap(); + assert_eq!(list_response.tables.len(), 2); + + // Deregister table1 + let mut deregister_req = DeregisterTableRequest::new(); + deregister_req.id = Some(vec!["table1".to_string()]); + namespace.deregister_table(deregister_req).await.unwrap(); + + // List tables - should only see table2 + let list_response = namespace.list_tables(list_req).await.unwrap(); + assert_eq!(list_response.tables.len(), 1); + assert!(list_response.tables.contains(&"table2".to_string())); + assert!(!list_response.tables.contains(&"table1".to_string())); + } + + // ============================================================ + // Tests for describe_table and table_exists with deregistered tables + // ============================================================ + + #[tokio::test] + async fn test_describe_table_fails_for_deregistered_v1() { + use lance_namespace::models::{DeregisterTableRequest, DescribeTableRequest}; + + let temp_dir = TempStdDir::default(); + let temp_path = temp_dir.to_str().unwrap(); + + let namespace = DirectoryNamespaceBuilder::new(temp_path) + .manifest_enabled(false) + .dir_listing_enabled(true) + .build() + .await + .unwrap(); + + // Create a table + let schema = create_test_schema(); + let ipc_data = create_test_ipc_data(&schema); + let mut create_req = CreateTableRequest::new(); + create_req.id = Some(vec!["test_table".to_string()]); + namespace + .create_table(create_req, bytes::Bytes::from(ipc_data)) + .await + .unwrap(); + + // Describe should work before deregistration + let mut describe_req = DescribeTableRequest::new(); + describe_req.id = Some(vec!["test_table".to_string()]); + assert!(namespace.describe_table(describe_req.clone()).await.is_ok()); + + // Deregister + let mut deregister_req = DeregisterTableRequest::new(); + deregister_req.id = Some(vec!["test_table".to_string()]); + namespace.deregister_table(deregister_req).await.unwrap(); + + // Describe should fail after deregistration + let result = namespace.describe_table(describe_req).await; + assert!(result.is_err()); + assert!(result.unwrap_err().to_string().contains("deregistered")); + } + + #[tokio::test] + async fn test_table_exists_fails_for_deregistered_v1() { + use lance_namespace::models::{DeregisterTableRequest, TableExistsRequest}; + + let temp_dir = TempStdDir::default(); + let temp_path = temp_dir.to_str().unwrap(); + + let namespace = DirectoryNamespaceBuilder::new(temp_path) + .manifest_enabled(false) + .dir_listing_enabled(true) + .build() + .await + .unwrap(); + + // Create a table + let schema = create_test_schema(); + let ipc_data = create_test_ipc_data(&schema); + let mut create_req = CreateTableRequest::new(); + create_req.id = Some(vec!["test_table".to_string()]); + namespace + .create_table(create_req, bytes::Bytes::from(ipc_data)) + .await + .unwrap(); + + // Table exists should work before deregistration + let mut exists_req = TableExistsRequest::new(); + exists_req.id = Some(vec!["test_table".to_string()]); + assert!(namespace.table_exists(exists_req.clone()).await.is_ok()); + + // Deregister + let mut deregister_req = DeregisterTableRequest::new(); + deregister_req.id = Some(vec!["test_table".to_string()]); + namespace.deregister_table(deregister_req).await.unwrap(); + + // Table exists should fail after deregistration + let result = namespace.table_exists(exists_req).await; + assert!(result.is_err()); + assert!(result.unwrap_err().to_string().contains("deregistered")); + } + + #[tokio::test] + async fn test_atomic_table_status_check() { + // This test verifies that the TableStatus check is atomic + // by ensuring a single directory listing is used + + let temp_dir = TempStdDir::default(); + let temp_path = temp_dir.to_str().unwrap(); + + let namespace = DirectoryNamespaceBuilder::new(temp_path) + .manifest_enabled(false) + .dir_listing_enabled(true) + .build() + .await + .unwrap(); + + // Create a table + let schema = create_test_schema(); + let ipc_data = create_test_ipc_data(&schema); + let mut create_req = CreateTableRequest::new(); + create_req.id = Some(vec!["test_table".to_string()]); + namespace + .create_table(create_req, bytes::Bytes::from(ipc_data)) + .await + .unwrap(); + + // Table status should show exists=true, is_deregistered=false + let status = namespace.check_table_status("test_table").await; + assert!(status.exists); + assert!(!status.is_deregistered); + assert!(!status.has_reserved_file); + } } diff --git a/rust/lance-namespace-impls/src/dir/manifest.rs b/rust/lance-namespace-impls/src/dir/manifest.rs index 4791bbb9df5..8dca60b83d8 100644 --- a/rust/lance-namespace-impls/src/dir/manifest.rs +++ b/rust/lance-namespace-impls/src/dir/manifest.rs @@ -24,12 +24,13 @@ use lance_index::IndexType; use lance_io::object_store::{ObjectStore, ObjectStoreParams}; use lance_namespace::models::{ CreateEmptyTableRequest, CreateEmptyTableResponse, CreateNamespaceRequest, - CreateNamespaceResponse, CreateTableRequest, CreateTableResponse, DeregisterTableRequest, - DeregisterTableResponse, DescribeNamespaceRequest, DescribeNamespaceResponse, - DescribeTableRequest, DescribeTableResponse, DropNamespaceRequest, DropNamespaceResponse, - DropTableRequest, DropTableResponse, ListNamespacesRequest, ListNamespacesResponse, - ListTablesRequest, ListTablesResponse, NamespaceExistsRequest, RegisterTableRequest, - RegisterTableResponse, TableExistsRequest, + CreateNamespaceResponse, CreateTableRequest, CreateTableResponse, DeclareTableRequest, + DeclareTableResponse, DeregisterTableRequest, DeregisterTableResponse, + DescribeNamespaceRequest, DescribeNamespaceResponse, DescribeTableRequest, + DescribeTableResponse, DropNamespaceRequest, DropNamespaceResponse, DropTableRequest, + DropTableResponse, ListNamespacesRequest, ListNamespacesResponse, ListTablesRequest, + ListTablesResponse, NamespaceExistsRequest, RegisterTableRequest, RegisterTableResponse, + TableExistsRequest, }; use lance_namespace::schema::arrow_schema_to_json; use lance_namespace::LanceNamespace; @@ -1624,7 +1625,101 @@ impl LanceNamespace for ManifestNamespace { Ok(CreateEmptyTableResponse { transaction_id: None, location: Some(table_uri), - properties: None, + storage_options: self.storage_options.clone(), + }) + } + + async fn declare_table(&self, request: DeclareTableRequest) -> Result { + let table_id = request.id.as_ref().ok_or_else(|| Error::InvalidInput { + source: "Table ID is required".into(), + location: location!(), + })?; + + if table_id.is_empty() { + return Err(Error::InvalidInput { + source: "Table ID cannot be empty".into(), + location: location!(), + }); + } + + let (namespace, table_name) = Self::split_object_id(table_id); + let object_id = Self::build_object_id(&namespace, &table_name); + + // Check if table already exists in manifest + let existing = self.query_manifest_for_table(&object_id).await?; + if existing.is_some() { + return Err(Error::Namespace { + source: format!("Table '{}' already exists", table_name).into(), + location: location!(), + }); + } + + // Create table location path with hash-based naming + // When dir_listing_enabled is true and it's a root table, use directory-style naming: {table_name}.lance + // Otherwise, use hash-based naming: {hash}_{object_id} + let dir_name = if namespace.is_empty() && self.dir_listing_enabled { + // Root table with directory listing enabled: use {table_name}.lance + format!("{}.lance", table_name) + } else { + // Child namespace table or dir listing disabled: use hash-based naming + Self::generate_dir_name(&object_id) + }; + let table_path = self.base_path.child(dir_name.as_str()); + let table_uri = Self::construct_full_uri(&self.root, &dir_name)?; + + // Validate location if provided + if let Some(req_location) = &request.location { + let req_location = req_location.trim_end_matches('/'); + if req_location != table_uri { + return Err(Error::Namespace { + source: format!( + "Cannot declare table {} at location {}, must be at location {}", + table_name, req_location, table_uri + ) + .into(), + location: location!(), + }); + } + } + + // Create the .lance-reserved file to mark the table as existing + let reserved_file_path = table_path.child(".lance-reserved"); + + self.object_store + .create(&reserved_file_path) + .await + .map_err(|e| Error::Namespace { + source: format!( + "Failed to create .lance-reserved file for table {}: {}", + table_name, e + ) + .into(), + location: location!(), + })? + .shutdown() + .await + .map_err(|e| Error::Namespace { + source: format!( + "Failed to finalize .lance-reserved file for table {}: {}", + table_name, e + ) + .into(), + location: location!(), + })?; + + // Add entry to manifest marking this as a declared table (store dir_name, not full path) + self.insert_into_manifest(object_id, ObjectType::Table, Some(dir_name)) + .await?; + + log::info!( + "Declared table '{}' in manifest at {}", + table_name, + table_uri + ); + + Ok(DeclareTableResponse { + transaction_id: None, + location: Some(table_uri), storage_options: self.storage_options.clone(), }) } diff --git a/rust/lance-namespace-impls/src/rest.rs b/rust/lance-namespace-impls/src/rest.rs index 3b5d0650659..f92d44cd305 100644 --- a/rust/lance-namespace-impls/src/rest.rs +++ b/rust/lance-namespace-impls/src/rest.rs @@ -19,13 +19,13 @@ use lance_namespace::models::{ CreateNamespaceRequest, CreateNamespaceResponse, CreateTableIndexRequest, CreateTableIndexResponse, CreateTableRequest, CreateTableResponse, CreateTableScalarIndexResponse, CreateTableTagRequest, CreateTableTagResponse, - DeleteFromTableRequest, DeleteFromTableResponse, DeleteTableTagRequest, DeleteTableTagResponse, - DeregisterTableRequest, DeregisterTableResponse, DescribeNamespaceRequest, - DescribeNamespaceResponse, DescribeTableIndexStatsRequest, DescribeTableIndexStatsResponse, - DescribeTableRequest, DescribeTableResponse, DescribeTransactionRequest, - DescribeTransactionResponse, DropNamespaceRequest, DropNamespaceResponse, - DropTableIndexRequest, DropTableIndexResponse, DropTableRequest, DropTableResponse, - ExplainTableQueryPlanRequest, GetTableStatsRequest, GetTableStatsResponse, + DeclareTableRequest, DeclareTableResponse, DeleteFromTableRequest, DeleteFromTableResponse, + DeleteTableTagRequest, DeleteTableTagResponse, DeregisterTableRequest, DeregisterTableResponse, + DescribeNamespaceRequest, DescribeNamespaceResponse, DescribeTableIndexStatsRequest, + DescribeTableIndexStatsResponse, DescribeTableRequest, DescribeTableResponse, + DescribeTransactionRequest, DescribeTransactionResponse, DropNamespaceRequest, + DropNamespaceResponse, DropTableIndexRequest, DropTableIndexResponse, DropTableRequest, + DropTableResponse, ExplainTableQueryPlanRequest, GetTableStatsRequest, GetTableStatsResponse, GetTableTagVersionRequest, GetTableTagVersionResponse, InsertIntoTableRequest, InsertIntoTableResponse, ListNamespacesRequest, ListNamespacesResponse, ListTableIndicesRequest, ListTableIndicesResponse, ListTableTagsRequest, ListTableTagsResponse, @@ -549,6 +549,14 @@ impl LanceNamespace for RestNamespace { .map_err(convert_api_error) } + async fn declare_table(&self, request: DeclareTableRequest) -> Result { + let id = object_id_str(&request.id, &self.delimiter)?; + + table_api::declare_table(&self.reqwest_config, &id, request, Some(&self.delimiter)) + .await + .map_err(convert_api_error) + } + async fn insert_into_table( &self, request: InsertIntoTableRequest, diff --git a/rust/lance-namespace-impls/src/rest_adapter.rs b/rust/lance-namespace-impls/src/rest_adapter.rs index 284b0d42fa9..f0d1c3ac60d 100644 --- a/rust/lance-namespace-impls/src/rest_adapter.rs +++ b/rust/lance-namespace-impls/src/rest_adapter.rs @@ -80,6 +80,7 @@ impl RestAdapter { // Table data operations .route("/v1/table/:id/create", post(create_table)) .route("/v1/table/:id/create-empty", post(create_empty_table)) + .route("/v1/table/:id/declare", post(declare_table)) .route("/v1/table/:id/insert", post(insert_into_table)) .route("/v1/table/:id/merge_insert", post(merge_insert_into_table)) .route("/v1/table/:id/update", post(update_table)) @@ -501,6 +502,7 @@ async fn create_table( } } +#[allow(deprecated)] async fn create_empty_table( State(backend): State>, Path(id): Path, @@ -515,6 +517,20 @@ async fn create_empty_table( } } +async fn declare_table( + State(backend): State>, + Path(id): Path, + Query(params): Query, + Json(mut request): Json, +) -> Response { + request.id = Some(parse_id(&id, params.delimiter.as_deref())); + + match backend.declare_table(request).await { + Ok(response) => (StatusCode::CREATED, Json(response)).into_response(), + Err(e) => error_to_response(e), + } +} + #[derive(Debug, Deserialize)] struct InsertQuery { delimiter: Option, @@ -1440,6 +1456,7 @@ mod tests { } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] + #[allow(deprecated)] async fn test_empty_table_exists_in_child_namespace() { let fixture = RestServerFixture::new().await; @@ -1611,6 +1628,7 @@ mod tests { } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] + #[allow(deprecated)] async fn test_create_empty_table_in_child_namespace() { let fixture = RestServerFixture::new().await; @@ -1665,6 +1683,7 @@ mod tests { } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] + #[allow(deprecated)] async fn test_describe_empty_table_in_child_namespace() { let fixture = RestServerFixture::new().await; @@ -1720,6 +1739,7 @@ mod tests { } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] + #[allow(deprecated)] async fn test_drop_empty_table_in_child_namespace() { let fixture = RestServerFixture::new().await; @@ -1765,6 +1785,7 @@ mod tests { } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] + #[allow(deprecated)] async fn test_deeply_nested_namespace_with_empty_table() { let fixture = RestServerFixture::new().await; diff --git a/rust/lance-namespace/src/error.rs b/rust/lance-namespace/src/error.rs new file mode 100644 index 00000000000..71fb7c12c31 --- /dev/null +++ b/rust/lance-namespace/src/error.rs @@ -0,0 +1,404 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright The Lance Authors + +//! Lance Namespace error types. +//! +//! This module defines fine-grained error types for Lance Namespace operations. +//! Each error type has a unique numeric code that is consistent across all +//! Lance Namespace implementations (Python, Java, Rust, REST). +//! +//! # Error Handling +//! +//! Namespace operations return [`NamespaceError`] which can be converted to +//! [`lance_core::Error`] for integration with the Lance ecosystem. +//! +//! ```rust,ignore +//! use lance_namespace::{NamespaceError, ErrorCode}; +//! +//! // Create and use namespace errors +//! let err = NamespaceError::TableNotFound { +//! message: "Table 'users' not found".into(), +//! }; +//! assert_eq!(err.code(), ErrorCode::TableNotFound); +//! +//! // Convert to lance_core::Error +//! let lance_err: lance_core::Error = err.into(); +//! ``` + +use lance_core::error::ToSnafuLocation; +use snafu::Snafu; + +/// Lance Namespace error codes. +/// +/// These codes are globally unique across all Lance Namespace implementations +/// (Python, Java, Rust, REST). Use these codes for programmatic error handling. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +#[repr(u32)] +pub enum ErrorCode { + /// Operation not supported by this backend + Unsupported = 0, + /// The specified namespace does not exist + NamespaceNotFound = 1, + /// A namespace with this name already exists + NamespaceAlreadyExists = 2, + /// Namespace contains tables or child namespaces + NamespaceNotEmpty = 3, + /// The specified table does not exist + TableNotFound = 4, + /// A table with this name already exists + TableAlreadyExists = 5, + /// The specified table index does not exist + TableIndexNotFound = 6, + /// A table index with this name already exists + TableIndexAlreadyExists = 7, + /// The specified table tag does not exist + TableTagNotFound = 8, + /// A table tag with this name already exists + TableTagAlreadyExists = 9, + /// The specified transaction does not exist + TransactionNotFound = 10, + /// The specified table version does not exist + TableVersionNotFound = 11, + /// The specified table column does not exist + TableColumnNotFound = 12, + /// Malformed request or invalid parameters + InvalidInput = 13, + /// Optimistic concurrency conflict + ConcurrentModification = 14, + /// User lacks permission for this operation + PermissionDenied = 15, + /// Authentication credentials are missing or invalid + Unauthenticated = 16, + /// Service is temporarily unavailable + ServiceUnavailable = 17, + /// Unexpected server/implementation error + Internal = 18, + /// Table is in an invalid state for the operation + InvalidTableState = 19, + /// Table schema validation failed + TableSchemaValidationError = 20, +} + +impl ErrorCode { + /// Returns the numeric code value. + pub fn as_u32(self) -> u32 { + self as u32 + } + + /// Creates an ErrorCode from a numeric code. + /// + /// Returns `None` if the code is not recognized. + pub fn from_u32(code: u32) -> Option { + match code { + 0 => Some(Self::Unsupported), + 1 => Some(Self::NamespaceNotFound), + 2 => Some(Self::NamespaceAlreadyExists), + 3 => Some(Self::NamespaceNotEmpty), + 4 => Some(Self::TableNotFound), + 5 => Some(Self::TableAlreadyExists), + 6 => Some(Self::TableIndexNotFound), + 7 => Some(Self::TableIndexAlreadyExists), + 8 => Some(Self::TableTagNotFound), + 9 => Some(Self::TableTagAlreadyExists), + 10 => Some(Self::TransactionNotFound), + 11 => Some(Self::TableVersionNotFound), + 12 => Some(Self::TableColumnNotFound), + 13 => Some(Self::InvalidInput), + 14 => Some(Self::ConcurrentModification), + 15 => Some(Self::PermissionDenied), + 16 => Some(Self::Unauthenticated), + 17 => Some(Self::ServiceUnavailable), + 18 => Some(Self::Internal), + 19 => Some(Self::InvalidTableState), + 20 => Some(Self::TableSchemaValidationError), + _ => None, + } + } +} + +impl std::fmt::Display for ErrorCode { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let name = match self { + Self::Unsupported => "Unsupported", + Self::NamespaceNotFound => "NamespaceNotFound", + Self::NamespaceAlreadyExists => "NamespaceAlreadyExists", + Self::NamespaceNotEmpty => "NamespaceNotEmpty", + Self::TableNotFound => "TableNotFound", + Self::TableAlreadyExists => "TableAlreadyExists", + Self::TableIndexNotFound => "TableIndexNotFound", + Self::TableIndexAlreadyExists => "TableIndexAlreadyExists", + Self::TableTagNotFound => "TableTagNotFound", + Self::TableTagAlreadyExists => "TableTagAlreadyExists", + Self::TransactionNotFound => "TransactionNotFound", + Self::TableVersionNotFound => "TableVersionNotFound", + Self::TableColumnNotFound => "TableColumnNotFound", + Self::InvalidInput => "InvalidInput", + Self::ConcurrentModification => "ConcurrentModification", + Self::PermissionDenied => "PermissionDenied", + Self::Unauthenticated => "Unauthenticated", + Self::ServiceUnavailable => "ServiceUnavailable", + Self::Internal => "Internal", + Self::InvalidTableState => "InvalidTableState", + Self::TableSchemaValidationError => "TableSchemaValidationError", + }; + write!(f, "{}", name) + } +} + +/// Lance Namespace error type. +/// +/// This enum provides fine-grained error types for Lance Namespace operations. +/// Each variant corresponds to a specific error condition and has an associated +/// [`ErrorCode`] accessible via the [`code()`](NamespaceError::code) method. +/// +/// # Converting to lance_core::Error +/// +/// `NamespaceError` implements `Into`, preserving the original +/// error so it can be downcast later: +/// +/// ```rust,ignore +/// let ns_err = NamespaceError::TableNotFound { message: "...".into() }; +/// let lance_err: lance_core::Error = ns_err.into(); +/// +/// // Later, extract the original error: +/// if let lance_core::Error::Namespace { source, .. } = &lance_err { +/// if let Some(ns_err) = source.downcast_ref::() { +/// println!("Error code: {:?}", ns_err.code()); +/// } +/// } +/// ``` +#[derive(Debug, Snafu)] +#[snafu(visibility(pub))] +pub enum NamespaceError { + /// Operation not supported by this backend. + #[snafu(display("Unsupported: {message}"))] + Unsupported { message: String }, + + /// The specified namespace does not exist. + #[snafu(display("Namespace not found: {message}"))] + NamespaceNotFound { message: String }, + + /// A namespace with this name already exists. + #[snafu(display("Namespace already exists: {message}"))] + NamespaceAlreadyExists { message: String }, + + /// Namespace contains tables or child namespaces. + #[snafu(display("Namespace not empty: {message}"))] + NamespaceNotEmpty { message: String }, + + /// The specified table does not exist. + #[snafu(display("Table not found: {message}"))] + TableNotFound { message: String }, + + /// A table with this name already exists. + #[snafu(display("Table already exists: {message}"))] + TableAlreadyExists { message: String }, + + /// The specified table index does not exist. + #[snafu(display("Table index not found: {message}"))] + TableIndexNotFound { message: String }, + + /// A table index with this name already exists. + #[snafu(display("Table index already exists: {message}"))] + TableIndexAlreadyExists { message: String }, + + /// The specified table tag does not exist. + #[snafu(display("Table tag not found: {message}"))] + TableTagNotFound { message: String }, + + /// A table tag with this name already exists. + #[snafu(display("Table tag already exists: {message}"))] + TableTagAlreadyExists { message: String }, + + /// The specified transaction does not exist. + #[snafu(display("Transaction not found: {message}"))] + TransactionNotFound { message: String }, + + /// The specified table version does not exist. + #[snafu(display("Table version not found: {message}"))] + TableVersionNotFound { message: String }, + + /// The specified table column does not exist. + #[snafu(display("Table column not found: {message}"))] + TableColumnNotFound { message: String }, + + /// Malformed request or invalid parameters. + #[snafu(display("Invalid input: {message}"))] + InvalidInput { message: String }, + + /// Optimistic concurrency conflict. + #[snafu(display("Concurrent modification: {message}"))] + ConcurrentModification { message: String }, + + /// User lacks permission for this operation. + #[snafu(display("Permission denied: {message}"))] + PermissionDenied { message: String }, + + /// Authentication credentials are missing or invalid. + #[snafu(display("Unauthenticated: {message}"))] + Unauthenticated { message: String }, + + /// Service is temporarily unavailable. + #[snafu(display("Service unavailable: {message}"))] + ServiceUnavailable { message: String }, + + /// Unexpected internal error. + #[snafu(display("Internal error: {message}"))] + Internal { message: String }, + + /// Table is in an invalid state for the operation. + #[snafu(display("Invalid table state: {message}"))] + InvalidTableState { message: String }, + + /// Table schema validation failed. + #[snafu(display("Table schema validation error: {message}"))] + TableSchemaValidationError { message: String }, +} + +impl NamespaceError { + /// Returns the error code for this error. + /// + /// Use this for programmatic error handling across language boundaries. + pub fn code(&self) -> ErrorCode { + match self { + Self::Unsupported { .. } => ErrorCode::Unsupported, + Self::NamespaceNotFound { .. } => ErrorCode::NamespaceNotFound, + Self::NamespaceAlreadyExists { .. } => ErrorCode::NamespaceAlreadyExists, + Self::NamespaceNotEmpty { .. } => ErrorCode::NamespaceNotEmpty, + Self::TableNotFound { .. } => ErrorCode::TableNotFound, + Self::TableAlreadyExists { .. } => ErrorCode::TableAlreadyExists, + Self::TableIndexNotFound { .. } => ErrorCode::TableIndexNotFound, + Self::TableIndexAlreadyExists { .. } => ErrorCode::TableIndexAlreadyExists, + Self::TableTagNotFound { .. } => ErrorCode::TableTagNotFound, + Self::TableTagAlreadyExists { .. } => ErrorCode::TableTagAlreadyExists, + Self::TransactionNotFound { .. } => ErrorCode::TransactionNotFound, + Self::TableVersionNotFound { .. } => ErrorCode::TableVersionNotFound, + Self::TableColumnNotFound { .. } => ErrorCode::TableColumnNotFound, + Self::InvalidInput { .. } => ErrorCode::InvalidInput, + Self::ConcurrentModification { .. } => ErrorCode::ConcurrentModification, + Self::PermissionDenied { .. } => ErrorCode::PermissionDenied, + Self::Unauthenticated { .. } => ErrorCode::Unauthenticated, + Self::ServiceUnavailable { .. } => ErrorCode::ServiceUnavailable, + Self::Internal { .. } => ErrorCode::Internal, + Self::InvalidTableState { .. } => ErrorCode::InvalidTableState, + Self::TableSchemaValidationError { .. } => ErrorCode::TableSchemaValidationError, + } + } + + /// Creates a NamespaceError from an error code and message. + /// + /// This is useful when receiving errors from REST API or other language bindings. + pub fn from_code(code: u32, message: impl Into) -> Self { + let message = message.into(); + match ErrorCode::from_u32(code) { + Some(ErrorCode::Unsupported) => Self::Unsupported { message }, + Some(ErrorCode::NamespaceNotFound) => Self::NamespaceNotFound { message }, + Some(ErrorCode::NamespaceAlreadyExists) => Self::NamespaceAlreadyExists { message }, + Some(ErrorCode::NamespaceNotEmpty) => Self::NamespaceNotEmpty { message }, + Some(ErrorCode::TableNotFound) => Self::TableNotFound { message }, + Some(ErrorCode::TableAlreadyExists) => Self::TableAlreadyExists { message }, + Some(ErrorCode::TableIndexNotFound) => Self::TableIndexNotFound { message }, + Some(ErrorCode::TableIndexAlreadyExists) => Self::TableIndexAlreadyExists { message }, + Some(ErrorCode::TableTagNotFound) => Self::TableTagNotFound { message }, + Some(ErrorCode::TableTagAlreadyExists) => Self::TableTagAlreadyExists { message }, + Some(ErrorCode::TransactionNotFound) => Self::TransactionNotFound { message }, + Some(ErrorCode::TableVersionNotFound) => Self::TableVersionNotFound { message }, + Some(ErrorCode::TableColumnNotFound) => Self::TableColumnNotFound { message }, + Some(ErrorCode::InvalidInput) => Self::InvalidInput { message }, + Some(ErrorCode::ConcurrentModification) => Self::ConcurrentModification { message }, + Some(ErrorCode::PermissionDenied) => Self::PermissionDenied { message }, + Some(ErrorCode::Unauthenticated) => Self::Unauthenticated { message }, + Some(ErrorCode::ServiceUnavailable) => Self::ServiceUnavailable { message }, + Some(ErrorCode::Internal) => Self::Internal { message }, + Some(ErrorCode::InvalidTableState) => Self::InvalidTableState { message }, + Some(ErrorCode::TableSchemaValidationError) => { + Self::TableSchemaValidationError { message } + } + None => Self::Internal { message }, + } + } +} + +/// Converts a NamespaceError into a lance_core::Error. +/// +/// The original `NamespaceError` is preserved in the `source` field and can be +/// extracted via downcasting for programmatic error handling. +impl From for lance_core::Error { + #[track_caller] + fn from(err: NamespaceError) -> Self { + Self::Namespace { + source: Box::new(err), + location: std::panic::Location::caller().to_snafu_location(), + } + } +} + +/// Result type for namespace operations. +pub type Result = std::result::Result; + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_error_code_roundtrip() { + for code in 0..=20 { + let error_code = ErrorCode::from_u32(code).unwrap(); + assert_eq!(error_code.as_u32(), code); + } + } + + #[test] + fn test_unknown_error_code() { + assert!(ErrorCode::from_u32(999).is_none()); + } + + #[test] + fn test_namespace_error_code() { + let err = NamespaceError::TableNotFound { + message: "test table".to_string(), + }; + assert_eq!(err.code(), ErrorCode::TableNotFound); + assert_eq!(err.code().as_u32(), 4); + } + + #[test] + fn test_from_code() { + let err = NamespaceError::from_code(4, "table not found"); + assert_eq!(err.code(), ErrorCode::TableNotFound); + assert!(err.to_string().contains("table not found")); + } + + #[test] + fn test_from_unknown_code() { + let err = NamespaceError::from_code(999, "unknown error"); + assert_eq!(err.code(), ErrorCode::Internal); + } + + #[test] + fn test_convert_to_lance_error() { + let ns_err = NamespaceError::TableNotFound { + message: "users".to_string(), + }; + let lance_err: lance_core::Error = ns_err.into(); + + // Verify it's a Namespace error + match &lance_err { + lance_core::Error::Namespace { source, .. } => { + // Downcast to get the original error + let downcast = source.downcast_ref::(); + assert!(downcast.is_some()); + assert_eq!(downcast.unwrap().code(), ErrorCode::TableNotFound); + } + _ => panic!("Expected Namespace error"), + } + } + + #[test] + fn test_error_display() { + let err = NamespaceError::TableNotFound { + message: "users".to_string(), + }; + assert_eq!(err.to_string(), "Table not found: users"); + } +} diff --git a/rust/lance-namespace/src/lib.rs b/rust/lance-namespace/src/lib.rs index 51bd18a2fb5..6fd9a9b7ab2 100644 --- a/rust/lance-namespace/src/lib.rs +++ b/rust/lance-namespace/src/lib.rs @@ -5,7 +5,17 @@ //! //! A Rust client for the Lance Namespace API that provides a unified interface //! for managing namespaces and tables across different backend implementations. +//! +//! # Error Handling +//! +//! This crate provides fine-grained error types through the [`error`] module. +//! Each error type has a unique numeric code that is consistent across all +//! Lance Namespace implementations (Python, Java, Rust, REST). +//! +//! See [`error::ErrorCode`] for the list of error codes and +//! [`error::NamespaceError`] for the error types. +pub mod error; pub mod namespace; pub mod schema; @@ -13,6 +23,9 @@ pub mod schema; pub use lance_core::{Error, Result}; pub use namespace::LanceNamespace; +// Re-export error types +pub use error::{ErrorCode, NamespaceError, Result as NamespaceResult}; + // Re-export reqwest client for convenience pub use lance_namespace_reqwest_client as reqwest_client; diff --git a/rust/lance-namespace/src/namespace.rs b/rust/lance-namespace/src/namespace.rs index 60c206530f4..3e27df15ba7 100644 --- a/rust/lance-namespace/src/namespace.rs +++ b/rust/lance-namespace/src/namespace.rs @@ -16,13 +16,13 @@ use lance_namespace_reqwest_client::models::{ CreateNamespaceRequest, CreateNamespaceResponse, CreateTableIndexRequest, CreateTableIndexResponse, CreateTableRequest, CreateTableResponse, CreateTableScalarIndexResponse, CreateTableTagRequest, CreateTableTagResponse, - DeleteFromTableRequest, DeleteFromTableResponse, DeleteTableTagRequest, DeleteTableTagResponse, - DeregisterTableRequest, DeregisterTableResponse, DescribeNamespaceRequest, - DescribeNamespaceResponse, DescribeTableIndexStatsRequest, DescribeTableIndexStatsResponse, - DescribeTableRequest, DescribeTableResponse, DescribeTransactionRequest, - DescribeTransactionResponse, DropNamespaceRequest, DropNamespaceResponse, - DropTableIndexRequest, DropTableIndexResponse, DropTableRequest, DropTableResponse, - ExplainTableQueryPlanRequest, GetTableStatsRequest, GetTableStatsResponse, + DeclareTableRequest, DeclareTableResponse, DeleteFromTableRequest, DeleteFromTableResponse, + DeleteTableTagRequest, DeleteTableTagResponse, DeregisterTableRequest, DeregisterTableResponse, + DescribeNamespaceRequest, DescribeNamespaceResponse, DescribeTableIndexStatsRequest, + DescribeTableIndexStatsResponse, DescribeTableRequest, DescribeTableResponse, + DescribeTransactionRequest, DescribeTransactionResponse, DropNamespaceRequest, + DropNamespaceResponse, DropTableIndexRequest, DropTableIndexResponse, DropTableRequest, + DropTableResponse, ExplainTableQueryPlanRequest, GetTableStatsRequest, GetTableStatsResponse, GetTableTagVersionRequest, GetTableTagVersionResponse, InsertIntoTableRequest, InsertIntoTableResponse, ListNamespacesRequest, ListNamespacesResponse, ListTableIndicesRequest, ListTableIndicesResponse, ListTableTagsRequest, ListTableTagsResponse, @@ -39,9 +39,26 @@ use lance_namespace_reqwest_client::models::{ /// This trait defines the interface that all Lance namespace implementations /// must provide. Each method corresponds to a specific operation on namespaces /// or tables. +/// +/// # Error Handling +/// +/// All operations may return the following common errors (via [`crate::NamespaceError`]): +/// +/// - [`crate::ErrorCode::Unsupported`] - Operation not supported by this backend +/// - [`crate::ErrorCode::InvalidInput`] - Invalid request parameters +/// - [`crate::ErrorCode::PermissionDenied`] - Insufficient permissions +/// - [`crate::ErrorCode::Unauthenticated`] - Invalid credentials +/// - [`crate::ErrorCode::ServiceUnavailable`] - Service temporarily unavailable +/// - [`crate::ErrorCode::Internal`] - Unexpected internal error +/// +/// See individual method documentation for operation-specific errors. #[async_trait] pub trait LanceNamespace: Send + Sync + std::fmt::Debug { /// List namespaces. + /// + /// # Errors + /// + /// Returns [`crate::ErrorCode::NamespaceNotFound`] if the parent namespace does not exist. async fn list_namespaces( &self, _request: ListNamespacesRequest, @@ -53,6 +70,10 @@ pub trait LanceNamespace: Send + Sync + std::fmt::Debug { } /// Describe a namespace. + /// + /// # Errors + /// + /// Returns [`crate::ErrorCode::NamespaceNotFound`] if the namespace does not exist. async fn describe_namespace( &self, _request: DescribeNamespaceRequest, @@ -64,6 +85,10 @@ pub trait LanceNamespace: Send + Sync + std::fmt::Debug { } /// Create a new namespace. + /// + /// # Errors + /// + /// Returns [`crate::ErrorCode::NamespaceAlreadyExists`] if a namespace with the same name already exists. async fn create_namespace( &self, _request: CreateNamespaceRequest, @@ -75,6 +100,11 @@ pub trait LanceNamespace: Send + Sync + std::fmt::Debug { } /// Drop a namespace. + /// + /// # Errors + /// + /// - [`crate::ErrorCode::NamespaceNotFound`] if the namespace does not exist. + /// - [`crate::ErrorCode::NamespaceNotEmpty`] if the namespace contains tables or child namespaces. async fn drop_namespace( &self, _request: DropNamespaceRequest, @@ -86,6 +116,10 @@ pub trait LanceNamespace: Send + Sync + std::fmt::Debug { } /// Check if a namespace exists. + /// + /// # Errors + /// + /// Returns [`crate::ErrorCode::NamespaceNotFound`] if the namespace does not exist. async fn namespace_exists(&self, _request: NamespaceExistsRequest) -> Result<()> { Err(Error::NotSupported { source: "namespace_exists not implemented".into(), @@ -170,7 +204,23 @@ pub trait LanceNamespace: Send + Sync + std::fmt::Debug { }) } + /// Declare a table (metadata only operation). + async fn declare_table(&self, _request: DeclareTableRequest) -> Result { + Err(Error::NotSupported { + source: "declare_table not implemented".into(), + location: Location::new(file!(), line!(), column!()), + }) + } + /// Create an empty table (metadata only operation). + /// + /// # Deprecated + /// + /// Use [`declare_table`](Self::declare_table) instead. Support will be removed in 3.0.0. + #[deprecated( + since = "2.0.0", + note = "Use declare_table instead. Support will be removed in 3.0.0." + )] async fn create_empty_table( &self, _request: CreateEmptyTableRequest, diff --git a/rust/lance/src/dataset.rs b/rust/lance/src/dataset.rs index 1030faafe14..0ff3cb6873a 100644 --- a/rust/lance/src/dataset.rs +++ b/rust/lance/src/dataset.rs @@ -110,7 +110,9 @@ pub use blob::BlobFile; use hash_joiner::HashJoiner; use lance_core::box_error; pub use lance_core::ROW_ID; -use lance_namespace::models::{CreateEmptyTableRequest, DescribeTableRequest}; +use lance_namespace::models::{ + CreateEmptyTableRequest, DeclareTableRequest, DeclareTableResponse, DescribeTableRequest, +}; use lance_table::feature_flags::{apply_feature_flags, can_read_dataset}; use lance_table::io::deletion::{relative_deletion_file_path, DELETIONS_DIR}; pub use schema_evolution::{ @@ -820,23 +822,45 @@ impl Dataset { match write_params.mode { WriteMode::Create => { - let request = CreateEmptyTableRequest { + let declare_request = DeclareTableRequest { id: Some(table_id.clone()), location: None, - properties: None, }; - let response = - namespace - .create_empty_table(request) - .await - .map_err(|e| Error::Namespace { + // Try declare_table first, fall back to deprecated create_empty_table + // for backward compatibility with older namespace implementations. + // create_empty_table support will be removed in 3.0.0. + #[allow(deprecated)] + let response = match namespace.declare_table(declare_request).await { + Ok(resp) => resp, + Err(Error::NotSupported { .. }) => { + let fallback_request = CreateEmptyTableRequest { + id: Some(table_id.clone()), + location: None, + }; + let fallback_resp = namespace + .create_empty_table(fallback_request) + .await + .map_err(|e| Error::Namespace { + source: Box::new(e), + location: location!(), + })?; + DeclareTableResponse { + transaction_id: fallback_resp.transaction_id, + location: fallback_resp.location, + storage_options: fallback_resp.storage_options, + } + } + Err(e) => { + return Err(Error::Namespace { source: Box::new(e), location: location!(), - })?; + }); + } + }; let uri = response.location.ok_or_else(|| Error::Namespace { source: Box::new(std::io::Error::other( - "Table location not found in create_empty_table response", + "Table location not found in declare_table response", )), location: location!(), })?;