diff --git a/Cargo.lock b/Cargo.lock index ed98581..54a0e3f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -10,7 +10,7 @@ dependencies = [ "csv", "distance", "divvunspell", - "indicatif 0.15.0", + "indicatif", "pretty_env_logger", "rayon", "serde", @@ -18,31 +18,11 @@ dependencies = [ "structopt", ] -[[package]] -name = "addr2line" -version = "0.24.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dfbe277e56a376000877090da837660b4427aad530e3028d44e0bffe4f89a1c1" -dependencies = [ - "gimli", -] - [[package]] name = "adler2" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" - -[[package]] -name = "aes" -version = "0.8.4" +version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b169f7a6d4742236a0a00c541b845991d0ac43e546831af1249753ab4c3aa3a0" -dependencies = [ - "cfg-if", - "cipher", - "cpufeatures", -] +checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" [[package]] name = "ahash" @@ -79,12 +59,6 @@ dependencies = [ "alloc-no-stdlib", ] -[[package]] -name = "android-tzdata" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" - [[package]] name = "android_system_properties" version = "0.1.5" @@ -105,9 +79,9 @@ dependencies = [ [[package]] name = "anstream" -version = "0.6.18" +version = "0.6.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8acc5369981196006228e28809f761875c0327210a891e941f4c683b3a99529b" +checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a" dependencies = [ "anstyle", "anstyle-parse", @@ -120,44 +94,44 @@ dependencies = [ [[package]] name = "anstyle" -version = "1.0.10" +version = "1.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9" +checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78" [[package]] name = "anstyle-parse" -version = "0.2.6" +version = "0.2.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b2d16507662817a6a20a9ea92df6652ee4f94f914589377d69f3b21bc5798a9" +checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2" dependencies = [ "utf8parse", ] [[package]] name = "anstyle-query" -version = "1.1.2" +version = "1.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79947af37f4177cfead1110013d678905c37501914fba0efea834c3fe9a8d60c" +checksum = "9e231f6134f61b71076a3eab506c379d4f36122f2af15a9ff04415ea4c3339e2" dependencies = [ - "windows-sys 0.59.0", + "windows-sys 0.60.2", ] [[package]] name = "anstyle-wincon" -version = "3.0.7" +version = "3.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca3534e77181a9cc07539ad51f2141fe32f6c3ffd4df76db8ad92346b003ae4e" +checksum = "3e0633414522a32ffaac8ac6cc8f748e090c5717661fddeea04219e2344f5f2a" dependencies = [ "anstyle", - "once_cell", - "windows-sys 0.59.0", + "once_cell_polyfill", + "windows-sys 0.60.2", ] [[package]] name = "anyhow" -version = "1.0.98" +version = "1.0.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487" +checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" [[package]] name = "atty" @@ -172,45 +146,9 @@ dependencies = [ [[package]] name = "autocfg" -version = "0.1.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0dde43e75fd43e8a1bf86103336bc699aa8d17ad1be60c76c0bdfd4828e19b78" -dependencies = [ - "autocfg 1.4.0", -] - -[[package]] -name = "autocfg" -version = "1.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" - -[[package]] -name = "backtrace" -version = "0.3.74" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d82cb332cdfaed17ae235a638438ac4d4839913cc2af585c3c6746e8f8bee1a" -dependencies = [ - "addr2line", - "cfg-if", - "libc", - "miniz_oxide", - "object", - "rustc-demangle", - "windows-targets 0.52.6", -] - -[[package]] -name = "base64" -version = "0.21.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" - -[[package]] -name = "base64ct" -version = "1.7.3" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89e25b6adfb930f02d1981565a6e5d9c547ac15a96606256d3b59040e5cd4ca3" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" [[package]] name = "bitflags" @@ -220,18 +158,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.9.0" +version = "2.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c8214115b7bf84099f1309324e63141d4c5d7cc26862f97a0a857dbefe165bd" - -[[package]] -name = "block-buffer" -version = "0.10.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" -dependencies = [ - "generic-array", -] +checksum = "2261d10cca569e4643e526d8dc2e62e433cc8aba21ab764233731f8d369bf394" [[package]] name = "box-format" @@ -243,7 +172,7 @@ dependencies = [ "comde", "fastvlq", "log", - "memmap2 0.9.5", + "memmap2", "pathdiff", "relative-path", "serde_json", @@ -286,9 +215,9 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.17.0" +version = "3.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1628fb46dfa0b37568d12e5edd512553eccf6a22a78e8bde00bb4aed84d5bdbf" +checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" [[package]] name = "byteorder" @@ -296,61 +225,13 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" -[[package]] -name = "bytes" -version = "1.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" - -[[package]] -name = "bzip2" -version = "0.4.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bdb116a6ef3f6c3698828873ad02c3014b3c85cadb88496095628e3ef1e347f8" -dependencies = [ - "bzip2-sys", - "libc", -] - -[[package]] -name = "bzip2-sys" -version = "0.1.13+1.0.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "225bff33b2141874fe80d71e07d6eec4f85c5c216453dd96388240f96e1acc14" -dependencies = [ - "cc", - "pkg-config", -] - -[[package]] -name = "cached-path" -version = "0.5.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f1c56d30236522ab3393a08746b138d4e16372001f42d29c88d513aeb8ab7ef" -dependencies = [ - "flate2", - "fs2", - "glob", - "indicatif 0.16.2", - "log", - "rand 0.8.5", - "reqwest", - "serde", - "serde_json", - "sha2", - "tar", - "tempfile", - "thiserror", - "zip 0.5.13", - "zip-extensions", -] - [[package]] name = "cc" -version = "1.2.21" +version = "1.2.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8691782945451c1c383942c4874dbe63814f61cb57ef773cda2972682b7bb3c0" +checksum = "e1d05d92f4b1fd76aad469d46cdd858ca761576082cd37df81416691e50199fb" dependencies = [ + "find-msvc-tools", "jobserver", "libc", "shlex", @@ -358,9 +239,8 @@ dependencies = [ [[package]] name = "cffi" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5e4ef6239aac8a2d645d60f08cc345889659f64c815ce204de14e6ffc2b52ed" +version = "0.2.0-dev" +source = "git+https://github.com/cffi-rs/cffi#ee4a9f5a5bcf72164831650b23d9dc0d5618a04e" dependencies = [ "cffi-impl", "libc", @@ -369,35 +249,33 @@ dependencies = [ [[package]] name = "cffi-impl" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5991ed1ca79f668096f267671e6035156f23871a8a2dbd88a38dd43a8c73c68" +version = "0.2.0-dev" +source = "git+https://github.com/cffi-rs/cffi#ee4a9f5a5bcf72164831650b23d9dc0d5618a04e" dependencies = [ "ctor", "darling", - "heck 0.3.3", + "heck 0.4.1", "log", "phf", "phf_codegen", "pretty_env_logger", "proc-macro2", "quote", - "syn 1.0.109", + "syn 2.0.106", ] [[package]] name = "cfg-if" -version = "1.0.0" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9" [[package]] name = "chrono" -version = "0.4.41" +version = "0.4.42" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c469d952047f47f91b68d1cba3f10d63c11d73e4636f24f08daf0278abf01c4d" +checksum = "145052bdd345b87320e369255277e3fb5152762ad123a901ef5c262dd38fe8d2" dependencies = [ - "android-tzdata", "iana-time-zone", "js-sys", "num-traits", @@ -405,16 +283,6 @@ dependencies = [ "windows-link", ] -[[package]] -name = "cipher" -version = "0.4.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "773f3b9af64447d2ce9850330c473515014aa235e6a783b02db81ff39e4a3dad" -dependencies = [ - "crypto-common", - "inout", -] - [[package]] name = "clap" version = "2.34.0" @@ -432,9 +300,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.37" +version = "4.5.48" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eccb054f56cbd38340b380d4a8e69ef1f02f1af43db2f0cc817a4774d80ae071" +checksum = "e2134bb3ea021b78629caa971416385309e0131b351b25e01dc16fb54e1b5fae" dependencies = [ "clap_builder", "clap_derive", @@ -442,9 +310,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.37" +version = "4.5.48" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "efd9466fac8543255d3b1fcad4762c5e116ffe808c8a3043d4263cd4fd4862a2" +checksum = "c2ba64afa3c0a6df7fa517765e31314e983f51dda798ffba27b988194fb65dc9" dependencies = [ "anstream", "anstyle", @@ -454,36 +322,27 @@ dependencies = [ [[package]] name = "clap_derive" -version = "4.5.32" +version = "4.5.47" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09176aae279615badda0765c0c0b3f6ed53f4709118af73cf4655d85d1530cd7" +checksum = "bbfd7eae0b0f1a6e63d4b13c9c478de77c2eb546fba158ad50b4203dc24b9f9c" dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn 2.0.101", + "syn 2.0.106", ] [[package]] name = "clap_lex" -version = "0.7.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6" - -[[package]] -name = "cloudabi" -version = "0.0.3" +version = "0.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ddfc5b9aa5d4507acaf872de71051dfd0e309860e88966e1051e462a077aac4f" -dependencies = [ - "bitflags 1.3.2", -] +checksum = "b94f61472cee1439c0b966b47e3aca9ae07e45d070759512cd390ea2bebc6675" [[package]] name = "colorchoice" -version = "1.0.3" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990" +checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" [[package]] name = "comde" @@ -496,36 +355,20 @@ dependencies = [ "flate2", "liblzma", "snap", - "zstd 0.13.3", + "zstd", ] [[package]] name = "console" -version = "0.15.11" +version = "0.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "054ccb5b10f9f2cbf51eb355ca1d05c2d279ce1804688d0db74b4733a5aeafd8" +checksum = "b430743a6eb14e9764d4260d4c0d8123087d504eeb9c48f2b2a5e810dd369df4" dependencies = [ "encode_unicode", "libc", "once_cell", - "unicode-width 0.2.0", - "windows-sys 0.59.0", -] - -[[package]] -name = "constant_time_eq" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "245097e9a4535ee1e3e3931fcfcd55a796a44c643e8596ff6566d68f09b87bbc" - -[[package]] -name = "core-foundation" -version = "0.9.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f" -dependencies = [ - "core-foundation-sys", - "libc", + "unicode-width 0.2.2", + "windows-sys 0.61.2", ] [[package]] @@ -543,20 +386,11 @@ dependencies = [ "memchr", ] -[[package]] -name = "cpufeatures" -version = "0.2.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" -dependencies = [ - "libc", -] - [[package]] name = "crc32fast" -version = "1.4.2" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" +checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" dependencies = [ "cfg-if", ] @@ -586,16 +420,6 @@ version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" -[[package]] -name = "crypto-common" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" -dependencies = [ - "generic-array", - "typenum", -] - [[package]] name = "csv" version = "1.3.1" @@ -619,49 +443,19 @@ dependencies = [ [[package]] name = "ctor" -version = "0.1.26" +version = "0.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d2301688392eb071b0bf1a37be05c469d3cc4dbbd95df672fe28ab021e6a096" +checksum = "32a2785755761f3ddc1492979ce1e48d2c00d09311c39e4466429188f3dd6501" dependencies = [ "quote", - "syn 1.0.109", -] - -[[package]] -name = "curl" -version = "0.4.47" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9fb4d13a1be2b58f14d60adba57c9834b78c62fd86c3e76a148f732686e9265" -dependencies = [ - "curl-sys", - "libc", - "openssl-probe", - "openssl-sys", - "schannel", - "socket2", - "windows-sys 0.52.0", -] - -[[package]] -name = "curl-sys" -version = "0.4.80+curl-8.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55f7df2eac63200c3ab25bde3b2268ef2ee56af3d238e76d61f01c3c49bff734" -dependencies = [ - "cc", - "libc", - "libz-sys", - "openssl-sys", - "pkg-config", - "vcpkg", - "windows-sys 0.52.0", + "syn 2.0.106", ] [[package]] name = "darling" -version = "0.10.2" +version = "0.20.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d706e75d87e35569db781a9b5e2416cff1236a47ed380831f959382ccd5f858" +checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee" dependencies = [ "darling_core", "darling_macro", @@ -669,27 +463,27 @@ dependencies = [ [[package]] name = "darling_core" -version = "0.10.2" +version = "0.20.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0c960ae2da4de88a91b2d920c2a7233b400bc33cb28453a2987822d8392519b" +checksum = "0d00b9596d185e565c2207a0b01f8bd1a135483d02d9b7b0a54b11da8d53412e" dependencies = [ "fnv", "ident_case", "proc-macro2", "quote", - "strsim 0.9.3", - "syn 1.0.109", + "strsim 0.11.1", + "syn 2.0.106", ] [[package]] name = "darling_macro" -version = "0.10.2" +version = "0.20.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9b5a2f4ac4969822c62224815d069952656cadc7084fdca9751e6d959189b72" +checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead" dependencies = [ "darling_core", "quote", - "syn 1.0.109", + "syn 2.0.106", ] [[package]] @@ -700,58 +494,7 @@ checksum = "4e018fccbeeb50ff26562ece792ed06659b9c2dae79ece77c4456bb10d9bf79b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.101", -] - -[[package]] -name = "deranged" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c9e6a11ca8224451684bc0d7d5a7adbf8f2fd6887261a1cfc3c0432f9d4068e" -dependencies = [ - "powerfmt", -] - -[[package]] -name = "digest" -version = "0.10.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" -dependencies = [ - "block-buffer", - "crypto-common", - "subtle", -] - -[[package]] -name = "dirs" -version = "4.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca3aa72a6f96ea37bbc5aa912f6788242832f75369bdfdadcb0e38423f100059" -dependencies = [ - "dirs-sys", -] - -[[package]] -name = "dirs-sys" -version = "0.3.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b1d1d91c932ef41c0f2663aa8b0ca0342d444d842c06914aa0a7e352d0bada6" -dependencies = [ - "libc", - "redox_users", - "winapi", -] - -[[package]] -name = "displaydoc" -version = "0.2.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.101", + "syn 2.0.106", ] [[package]] @@ -768,27 +511,24 @@ dependencies = [ "byteorder", "cffi", "eieio", - "env_logger 0.9.3", + "env_logger 0.11.8", "flatbuffers", "fs_extra", "globwalk", - "hashbrown 0.11.2", + "hashbrown", "itertools", "language-tags", "libc", "lifeguard", "log", - "memmap2 0.5.10", + "memmap2", "parking_lot", "pathos", - "rust-bert", - "rust_tokenizers", "serde", "serde-xml-rs", "serde_json", "smol_str", - "strsim 0.10.0", - "tch", + "strsim 0.11.1", "tempfile", "thiserror", "unic-char-property", @@ -797,11 +537,11 @@ dependencies = [ "unic-segment", "unic-ucd-category", "unic-ucd-common", - "zip 0.5.13", + "zip", ] [[package]] -name = "divvunspell-bin" +name = "divvunspell-cli" version = "1.0.0" dependencies = [ "anyhow", @@ -833,22 +573,23 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0" [[package]] -name = "encoding_rs" -version = "0.8.35" +name = "env_filter" +version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3" +checksum = "186e05a59d4c50738528153b83b0b0194d3a29507dfec16eccd4b342903397d0" dependencies = [ - "cfg-if", + "log", + "regex", ] [[package]] name = "env_logger" -version = "0.7.1" +version = "0.10.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44533bbbb3bb3c1fa17d9f2e4e38bbbaf8396ba82193c4cb1b6445d711445d36" +checksum = "4cd405aab171cb85d6735e5c8d9db038c17d3ca007a4d2c25f337935c3d90580" dependencies = [ - "atty", - "humantime 1.3.0", + "humantime", + "is-terminal", "log", "regex", "termcolor", @@ -856,31 +597,25 @@ dependencies = [ [[package]] name = "env_logger" -version = "0.9.3" +version = "0.11.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a12e6657c4c97ebab115a42dcee77225f7f482cdd841cf7088c657a42e9e00e7" +checksum = "13c863f0904021b108aa8b2f55046443e6b1ebde8fd4a15c399893aae4fa069f" dependencies = [ - "atty", - "humantime 2.2.0", + "anstream", + "anstyle", + "env_filter", + "jiff", "log", - "regex", - "termcolor", ] -[[package]] -name = "equivalent" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" - [[package]] name = "errno" -version = "0.3.11" +version = "0.3.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "976dd42dc7e85965fe702eb8164f21f450704bdde31faefd6471dba214cb594e" +checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -899,16 +634,10 @@ dependencies = [ ] [[package]] -name = "filetime" -version = "0.2.25" +name = "find-msvc-tools" +version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35c0522e981e68cbfa8c3f978441a5f34b30b96e146b33cd3359176b50fe8586" -dependencies = [ - "cfg-if", - "libc", - "libredox", - "windows-sys 0.59.0", -] +checksum = "0399f9d26e5191ce32c498bebd31e7a3ceabc2745f0ac54af3f335126c3f24b3" [[package]] name = "flatbuffers" @@ -921,9 +650,9 @@ dependencies = [ [[package]] name = "flate2" -version = "1.1.1" +version = "1.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ced92e76e966ca2fd84c8f7aa01a4aea65b0eb6648d72f7c8f3e2764a67fece" +checksum = "dc5a4e564e38c699f2880d3fda590bedc2e69f3f84cd48b457bd892ce61d0aa9" dependencies = [ "crc32fast", "miniz_oxide", @@ -936,255 +665,111 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" [[package]] -name = "foreign-types" -version = "0.3.2" +name = "fruity__bbqsrc" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" +checksum = "1283d8f04c860649949007b3d0b8e185798209cfbd8cc21ac8f03cfa29e66380" dependencies = [ - "foreign-types-shared", + "malloced", ] [[package]] -name = "foreign-types-shared" -version = "0.1.1" +name = "fs_extra" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" +checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" [[package]] -name = "form_urlencoded" -version = "1.2.1" +name = "getrandom" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456" +checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" dependencies = [ - "percent-encoding", + "cfg-if", + "libc", + "wasi 0.11.1+wasi-snapshot-preview1", ] [[package]] -name = "fruity__bbqsrc" -version = "0.2.0" +name = "getrandom" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1283d8f04c860649949007b3d0b8e185798209cfbd8cc21ac8f03cfa29e66380" +checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4" dependencies = [ - "malloced", + "cfg-if", + "libc", + "r-efi", + "wasi 0.14.7+wasi-0.2.4", ] [[package]] -name = "fs2" -version = "0.4.3" +name = "globset" +version = "0.4.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9564fc758e15025b46aa6643b1b77d047d1a56a1aea6e01002ac0c7026876213" +checksum = "54a1028dfc5f5df5da8a56a73e6c153c9a9708ec57232470703592a3f18e49f5" dependencies = [ - "libc", - "winapi", + "aho-corasick", + "bstr", + "log", + "regex-automata", + "regex-syntax", ] [[package]] -name = "fs_extra" -version = "1.3.0" +name = "globwalk" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" +checksum = "0bf760ebf69878d9fd8f110c89703d90ce35095324d1f1edcb595c63945ee757" +dependencies = [ + "bitflags 2.9.4", + "ignore", + "walkdir", +] [[package]] -name = "fuchsia-cprng" -version = "0.1.1" +name = "gumdrop" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba" +checksum = "5bc700f989d2f6f0248546222d9b4258f5b02a171a431f8285a81c08142629e3" +dependencies = [ + "gumdrop_derive", +] [[package]] -name = "futures-channel" -version = "0.3.31" +name = "gumdrop_derive" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" +checksum = "729f9bd3449d77e7831a18abfb7ba2f99ee813dfd15b8c2167c9a54ba20aa99d" dependencies = [ - "futures-core", + "proc-macro2", + "quote", + "syn 1.0.109", ] [[package]] -name = "futures-core" -version = "0.3.31" +name = "hashbrown" +version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" +checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e" +dependencies = [ + "ahash", + "serde", +] [[package]] -name = "futures-io" -version = "0.3.31" +name = "heck" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" - -[[package]] -name = "futures-sink" -version = "0.3.31" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" - -[[package]] -name = "futures-task" -version = "0.3.31" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" - -[[package]] -name = "futures-util" -version = "0.3.31" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" -dependencies = [ - "futures-core", - "futures-io", - "futures-task", - "memchr", - "pin-project-lite", - "pin-utils", - "slab", -] - -[[package]] -name = "generic-array" -version = "0.14.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" -dependencies = [ - "typenum", - "version_check", -] - -[[package]] -name = "getrandom" -version = "0.2.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" -dependencies = [ - "cfg-if", - "libc", - "wasi 0.11.0+wasi-snapshot-preview1", -] - -[[package]] -name = "getrandom" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73fea8450eea4bac3940448fb7ae50d91f034f941199fcd9d909a5a07aa455f0" -dependencies = [ - "cfg-if", - "libc", - "r-efi", - "wasi 0.14.2+wasi-0.2.4", -] - -[[package]] -name = "gimli" -version = "0.31.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" - -[[package]] -name = "glob" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2" - -[[package]] -name = "globset" -version = "0.4.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "54a1028dfc5f5df5da8a56a73e6c153c9a9708ec57232470703592a3f18e49f5" -dependencies = [ - "aho-corasick", - "bstr", - "log", - "regex-automata", - "regex-syntax", -] - -[[package]] -name = "globwalk" -version = "0.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93e3af942408868f6934a7b85134a3230832b9977cf66125df2f9edcfce4ddcc" -dependencies = [ - "bitflags 1.3.2", - "ignore", - "walkdir", -] - -[[package]] -name = "gumdrop" -version = "0.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5bc700f989d2f6f0248546222d9b4258f5b02a171a431f8285a81c08142629e3" -dependencies = [ - "gumdrop_derive", -] - -[[package]] -name = "gumdrop_derive" -version = "0.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "729f9bd3449d77e7831a18abfb7ba2f99ee813dfd15b8c2167c9a54ba20aa99d" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", -] - -[[package]] -name = "h2" -version = "0.3.26" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81fe527a889e1532da5c525686d96d4c2e74cdd345badf8dfef9f6b39dd5f5e8" -dependencies = [ - "bytes", - "fnv", - "futures-core", - "futures-sink", - "futures-util", - "http", - "indexmap", - "slab", - "tokio", - "tokio-util", - "tracing", -] - -[[package]] -name = "half" -version = "1.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b43ede17f21864e81be2fa654110bf1e793774238d86ef8555c37e6519c0403" - -[[package]] -name = "hashbrown" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e" -dependencies = [ - "ahash", - "serde", -] - -[[package]] -name = "hashbrown" -version = "0.12.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c" dependencies = [ - "ahash", + "unicode-segmentation", ] -[[package]] -name = "hashbrown" -version = "0.15.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "84b26c544d002229e640969970a2e74021aadf6e2f96372b9c58eff97de08eb3" - [[package]] name = "heck" -version = "0.3.3" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c" -dependencies = [ - "unicode-segmentation", -] +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" [[package]] name = "heck" @@ -1203,110 +788,21 @@ dependencies = [ [[package]] name = "hermit-abi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" - -[[package]] -name = "hmac" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e" -dependencies = [ - "digest", -] - -[[package]] -name = "http" -version = "0.2.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1" -dependencies = [ - "bytes", - "fnv", - "itoa", -] - -[[package]] -name = "http-body" -version = "0.4.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" -dependencies = [ - "bytes", - "http", - "pin-project-lite", -] - -[[package]] -name = "httparse" -version = "1.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87" - -[[package]] -name = "httpdate" -version = "1.0.3" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" - -[[package]] -name = "humantime" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df004cfca50ef23c36850aaaa59ad52cc70d0e90243c3c7737a4dd32dc7a3c4f" -dependencies = [ - "quick-error", -] +checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" [[package]] name = "humantime" -version = "2.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b112acc8b3adf4b107a8ec20977da0273a8c386765a3ec0229bd500a1443f9f" - -[[package]] -name = "hyper" -version = "0.14.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41dfc780fdec9373c01bae43289ea34c972e40ee3c9f6b3c8801a35f35586ce7" -dependencies = [ - "bytes", - "futures-channel", - "futures-core", - "futures-util", - "h2", - "http", - "http-body", - "httparse", - "httpdate", - "itoa", - "pin-project-lite", - "socket2", - "tokio", - "tower-service", - "tracing", - "want", -] - -[[package]] -name = "hyper-tls" -version = "0.5.0" +version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6183ddfa99b85da61a140bea0efc93fdf56ceaa041b37d553518030827f9905" -dependencies = [ - "bytes", - "hyper", - "native-tls", - "tokio", - "tokio-native-tls", -] +checksum = "135b12329e5e3ce057a9f972339ea52bc954fe1e9358ef27f95e89716fbc5424" [[package]] name = "iana-time-zone" -version = "0.1.63" +version = "0.1.64" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0c919e5debc312ad217002b8048a17b7d83f80703865bbfcfebb0458b0b27d8" +checksum = "33e57f83510bb73707521ebaffa789ec8caf86f9657cad665b092b581d40e9fb" dependencies = [ "android_system_properties", "core-foundation-sys", @@ -1326,151 +822,12 @@ dependencies = [ "cc", ] -[[package]] -name = "icu_collections" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db2fa452206ebee18c4b5c2274dbf1de17008e874b4dc4f0aea9d01ca79e4526" -dependencies = [ - "displaydoc", - "yoke", - "zerofrom", - "zerovec", -] - -[[package]] -name = "icu_locid" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13acbb8371917fc971be86fc8057c41a64b521c184808a698c02acc242dbf637" -dependencies = [ - "displaydoc", - "litemap", - "tinystr", - "writeable", - "zerovec", -] - -[[package]] -name = "icu_locid_transform" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01d11ac35de8e40fdeda00d9e1e9d92525f3f9d887cdd7aa81d727596788b54e" -dependencies = [ - "displaydoc", - "icu_locid", - "icu_locid_transform_data", - "icu_provider", - "tinystr", - "zerovec", -] - -[[package]] -name = "icu_locid_transform_data" -version = "1.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7515e6d781098bf9f7205ab3fc7e9709d34554ae0b21ddbcb5febfa4bc7df11d" - -[[package]] -name = "icu_normalizer" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19ce3e0da2ec68599d193c93d088142efd7f9c5d6fc9b803774855747dc6a84f" -dependencies = [ - "displaydoc", - "icu_collections", - "icu_normalizer_data", - "icu_properties", - "icu_provider", - "smallvec", - "utf16_iter", - "utf8_iter", - "write16", - "zerovec", -] - -[[package]] -name = "icu_normalizer_data" -version = "1.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5e8338228bdc8ab83303f16b797e177953730f601a96c25d10cb3ab0daa0cb7" - -[[package]] -name = "icu_properties" -version = "1.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93d6020766cfc6302c15dbbc9c8778c37e62c14427cb7f6e601d849e092aeef5" -dependencies = [ - "displaydoc", - "icu_collections", - "icu_locid_transform", - "icu_properties_data", - "icu_provider", - "tinystr", - "zerovec", -] - -[[package]] -name = "icu_properties_data" -version = "1.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85fb8799753b75aee8d2a21d7c14d9f38921b54b3dbda10f5a3c7a7b82dba5e2" - -[[package]] -name = "icu_provider" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ed421c8a8ef78d3e2dbc98a973be2f3770cb42b606e3ab18d6237c4dfde68d9" -dependencies = [ - "displaydoc", - "icu_locid", - "icu_provider_macros", - "stable_deref_trait", - "tinystr", - "writeable", - "yoke", - "zerofrom", - "zerovec", -] - -[[package]] -name = "icu_provider_macros" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.101", -] - [[package]] name = "ident_case" version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" -[[package]] -name = "idna" -version = "1.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "686f825264d630750a544639377bae737628043f20d38bbc029e8f29ea968a7e" -dependencies = [ - "idna_adapter", - "smallvec", - "utf8_iter", -] - -[[package]] -name = "idna_adapter" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "daca1df1c957320b2cf139ac61e7bd64fed304c5040df000a745aa1de3b4ef71" -dependencies = [ - "icu_normalizer", - "icu_properties", -] - [[package]] name = "ignore" version = "0.4.23" @@ -1487,16 +844,6 @@ dependencies = [ "winapi-util", ] -[[package]] -name = "indexmap" -version = "2.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e" -dependencies = [ - "equivalent", - "hashbrown 0.15.3", -] - [[package]] name = "indicatif" version = "0.15.0" @@ -1505,93 +852,101 @@ checksum = "7baab56125e25686df467fe470785512329883aab42696d661247aca2a2896e4" dependencies = [ "console", "lazy_static", - "number_prefix 0.3.0", + "number_prefix", "rayon", "regex", ] [[package]] -name = "indicatif" -version = "0.16.2" +name = "instant" +version = "0.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d207dc617c7a380ab07ff572a6e52fa202a2a8f355860ac9c38e23f8196be1b" +checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222" dependencies = [ - "console", - "lazy_static", - "number_prefix 0.4.0", - "regex", + "cfg-if", ] [[package]] -name = "inout" -version = "0.1.4" +name = "iref" +version = "1.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "879f10e63c20629ecabbb64a8010319738c66a5cd0c29b02d63d272b03751d01" +checksum = "de6c200d5291daeb5747f0bb0e479e440b769ecd13f90a6c57a3f732517c568f" dependencies = [ - "generic-array", + "pct-str", + "smallvec", ] [[package]] -name = "instant" -version = "0.1.13" +name = "is-terminal" +version = "0.4.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222" +checksum = "e04d7f318608d35d4b61ddd75cbdaee86b023ebe2bd5a66ee0915f0bf93095a9" dependencies = [ - "cfg-if", + "hermit-abi 0.5.2", + "libc", + "windows-sys 0.59.0", ] [[package]] -name = "ipnet" -version = "2.11.0" +name = "is_terminal_polyfill" +version = "1.70.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130" +checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" [[package]] -name = "iref" -version = "1.4.3" +name = "itertools" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de6c200d5291daeb5747f0bb0e479e440b769ecd13f90a6c57a3f732517c568f" +checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" dependencies = [ - "pct-str", - "smallvec", + "either", ] [[package]] -name = "is_terminal_polyfill" -version = "1.70.1" +name = "itoa" +version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" +checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" [[package]] -name = "itertools" -version = "0.10.5" +name = "jiff" +version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +checksum = "be1f93b8b1eb69c77f24bbb0afdf66f54b632ee39af40ca21c4365a1d7347e49" dependencies = [ - "either", + "jiff-static", + "log", + "portable-atomic", + "portable-atomic-util", + "serde", ] [[package]] -name = "itoa" -version = "1.0.15" +name = "jiff-static" +version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" +checksum = "03343451ff899767262ec32146f6d559dd759fdadf42ff0e227c7c48f72594b4" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.106", +] [[package]] name = "jobserver" -version = "0.1.33" +version = "0.1.34" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38f262f097c174adebe41eb73d66ae9c06b2844fb0da69969647bbddd9b0538a" +checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33" dependencies = [ - "getrandom 0.3.2", + "getrandom 0.3.3", "libc", ] [[package]] name = "js-sys" -version = "0.3.77" +version = "0.3.81" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f" +checksum = "ec48937a97411dcb524a265206ccd4c90bb711fca92b2792c407f268825b9305" dependencies = [ "once_cell", "wasm-bindgen", @@ -1611,15 +966,15 @@ checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" [[package]] name = "libc" -version = "0.2.172" +version = "0.2.177" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa" +checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976" [[package]] name = "liblzma" -version = "0.4.1" +version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "66352d7a8ac12d4877b6e6ea5a9b7650ee094257dc40889955bea5bc5b08c1d0" +checksum = "73c36d08cad03a3fbe2c4e7bb3a9e84c57e4ee4135ed0b065cade3d98480c648" dependencies = [ "liblzma-sys", "num_cpus", @@ -1627,36 +982,13 @@ dependencies = [ [[package]] name = "liblzma-sys" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5839bad90c3cc2e0b8c4ed8296b80e86040240f81d46b9c0e9bc8dd51ddd3af1" -dependencies = [ - "cc", - "libc", - "pkg-config", -] - -[[package]] -name = "libredox" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0ff37bd590ca25063e35af745c343cb7a0271906fb7b37e4813e8f79f00268d" -dependencies = [ - "bitflags 2.9.0", - "libc", - "redox_syscall 0.5.12", -] - -[[package]] -name = "libz-sys" -version = "1.1.22" +version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b70e7a7df205e92a1a4cd9aaae7898dac0aa555503cc0a649494d0d60e7651d" +checksum = "01b9596486f6d60c3bbe644c0e1be1aa6ccc472ad630fe8927b456973d7cb736" dependencies = [ "cc", "libc", "pkg-config", - "vcpkg", ] [[package]] @@ -1667,31 +999,24 @@ checksum = "89be94dbd775db37b46ca4f4bf5cf89adfb13ba197bfbcb69b2122848ee73c26" [[package]] name = "linux-raw-sys" -version = "0.9.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12" - -[[package]] -name = "litemap" -version = "0.7.5" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23fb14cb19457329c82206317a5663005a4d404783dc74f4252769b0d5f42856" +checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" [[package]] name = "lock_api" -version = "0.4.12" +version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" +checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965" dependencies = [ - "autocfg 1.4.0", "scopeguard", ] [[package]] name = "log" -version = "0.4.27" +version = "0.4.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" +checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432" [[package]] name = "malloced" @@ -1705,118 +1030,29 @@ version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2532096657941c2fea9c289d370a250971c689d4f143798ff67113ec042024a5" -[[package]] -name = "matrixmultiply" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9380b911e3e96d10c1f415da0876389aaf1b56759054eeb0de7df940c456ba1a" -dependencies = [ - "autocfg 1.4.0", - "rawpointer", -] - [[package]] name = "memchr" -version = "2.7.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" - -[[package]] -name = "memmap2" -version = "0.5.10" +version = "2.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83faa42c0a078c393f6b29d5db232d8be22776a891f8f56e5284faee4a20b327" -dependencies = [ - "libc", -] +checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" [[package]] name = "memmap2" -version = "0.9.5" +version = "0.9.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd3f7eed9d3848f8b98834af67102b720745c4ec028fcd0aa0239277e7de374f" +checksum = "843a98750cd611cc2965a8213b53b43e715f13c37a9e096c6408e69990961db7" dependencies = [ "libc", ] -[[package]] -name = "mime" -version = "0.3.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" - [[package]] name = "miniz_oxide" -version = "0.8.8" +version = "0.8.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3be647b768db090acb35d5ec5db2b0e1f1de11133ca123b9eacf5137868f892a" +checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" dependencies = [ "adler2", -] - -[[package]] -name = "mio" -version = "1.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2886843bf800fba2e3377cff24abf6379b4c4d5c6681eaf9ea5b0d15090450bd" -dependencies = [ - "libc", - "wasi 0.11.0+wasi-snapshot-preview1", - "windows-sys 0.52.0", -] - -[[package]] -name = "native-tls" -version = "0.2.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87de3442987e9dbec73158d5c715e7ad9072fda936bb03d19d7fa10e00520f0e" -dependencies = [ - "libc", - "log", - "openssl", - "openssl-probe", - "openssl-sys", - "schannel", - "security-framework", - "security-framework-sys", - "tempfile", -] - -[[package]] -name = "ndarray" -version = "0.15.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adb12d4e967ec485a5f71c6311fe28158e9d6f4bc4a447b474184d0f91a8fa32" -dependencies = [ - "matrixmultiply", - "num-complex", - "num-integer", - "num-traits", - "rawpointer", -] - -[[package]] -name = "num-complex" -version = "0.4.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" -dependencies = [ - "num-traits", -] - -[[package]] -name = "num-conv" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" - -[[package]] -name = "num-integer" -version = "0.1.46" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" -dependencies = [ - "num-traits", + "simd-adler32", ] [[package]] @@ -1825,16 +1061,16 @@ version = "0.2.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" dependencies = [ - "autocfg 1.4.0", + "autocfg", ] [[package]] name = "num_cpus" -version = "1.16.0" +version = "1.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" +checksum = "91df4bbde75afed763b708b7eee1e8e7651e02d97f6d5dd763e89367e957b23b" dependencies = [ - "hermit-abi 0.3.9", + "hermit-abi 0.5.2", "libc", ] @@ -1844,21 +1080,6 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "17b02fc0ff9a9e4b35b3342880f48e896ebf69f2967921fe8646bf5b7125956a" -[[package]] -name = "number_prefix" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" - -[[package]] -name = "object" -version = "0.36.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62948e14d923ea95ea2c7c86c71013138b66525b86bdc08d2dcc262bdb497b87" -dependencies = [ - "memchr", -] - [[package]] name = "once_cell" version = "1.21.3" @@ -1866,57 +1087,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" [[package]] -name = "openssl" -version = "0.10.72" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fedfea7d58a1f73118430a55da6a286e7b044961736ce96a16a17068ea25e5da" -dependencies = [ - "bitflags 2.9.0", - "cfg-if", - "foreign-types", - "libc", - "once_cell", - "openssl-macros", - "openssl-sys", -] - -[[package]] -name = "openssl-macros" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.101", -] - -[[package]] -name = "openssl-probe" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e" - -[[package]] -name = "openssl-sys" -version = "0.9.108" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e145e1651e858e820e4860f7b9c5e169bc1d8ce1c86043be79fa7b7634821847" -dependencies = [ - "cc", - "libc", - "pkg-config", - "vcpkg", -] - -[[package]] -name = "ordered-float" -version = "2.10.1" +name = "once_cell_polyfill" +version = "1.70.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68f19d67e5a2795c94e73e0bb1cc1a7edeb2e28efd39e2e1c9b7a40c1108b11c" -dependencies = [ - "num-traits", -] +checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad" [[package]] name = "os_str_bytes" @@ -1944,22 +1118,11 @@ dependencies = [ "cfg-if", "instant", "libc", - "redox_syscall 0.2.16", + "redox_syscall", "smallvec", "winapi", ] -[[package]] -name = "password-hash" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7676374caaee8a325c9e7a2ae557f216c5563a171d6997b0ef8a65af35147700" -dependencies = [ - "base64ct", - "rand_core 0.6.4", - "subtle", -] - [[package]] name = "pathdiff" version = "0.2.3" @@ -1984,18 +1147,6 @@ dependencies = [ "windirs", ] -[[package]] -name = "pbkdf2" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83a0692ec44e4cf1ef28ca317f14f8f07da2d95ec3fa01f86e4467b725e60917" -dependencies = [ - "digest", - "hmac", - "password-hash", - "sha2", -] - [[package]] name = "pct-str" version = "1.2.0" @@ -2007,24 +1158,24 @@ dependencies = [ [[package]] name = "percent-encoding" -version = "2.3.1" +version = "2.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" +checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" [[package]] name = "phf" -version = "0.7.24" +version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3da44b85f8e8dfaec21adae67f95d93244b2ecf6ad2a692320598dcc8e6dd18" +checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078" dependencies = [ "phf_shared", ] [[package]] name = "phf_codegen" -version = "0.7.24" +version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b03e85129e324ad4166b06b2c7491ae27fe3ec353af72e72cd1654c7225d517e" +checksum = "aef8048c789fa5e851558d709946d6d79a8ff88c0440c587967f8e94bfb1216a" dependencies = [ "phf_generator", "phf_shared", @@ -2032,19 +1183,19 @@ dependencies = [ [[package]] name = "phf_generator" -version = "0.7.24" +version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09364cc93c159b8b06b1f4dd8a4398984503483891b0c26b867cf431fb132662" +checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d" dependencies = [ "phf_shared", - "rand 0.6.5", + "rand", ] [[package]] name = "phf_shared" -version = "0.7.24" +version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "234f71a15de2288bcb7e3b6515828d22af7ec8598ee6d24c3b526fa0a80b67a0" +checksum = "67eabc2ef2a60eb7faa00097bd1ffdb5bd28e62bf39990626a582201b7a754e5" dependencies = [ "siphasher", ] @@ -2055,12 +1206,6 @@ version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" -[[package]] -name = "pin-utils" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" - [[package]] name = "pkg-config" version = "0.3.32" @@ -2068,27 +1213,27 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" [[package]] -name = "powerfmt" -version = "0.2.0" +name = "portable-atomic" +version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" +checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483" [[package]] -name = "ppv-lite86" -version = "0.2.21" +name = "portable-atomic-util" +version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" +checksum = "d8a2f0d8d040d7848a709caf78912debcc3f33ee4b3cac47d73d1e1069e83507" dependencies = [ - "zerocopy", + "portable-atomic", ] [[package]] name = "pretty_env_logger" -version = "0.4.0" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "926d36b9553851b8b0005f1275891b392ee4d2d833852c417ed025477350fb9d" +checksum = "865724d4dbe39d9f3dd3b52b88d859d66bcb2d6a0acfd5ea68a65fb66d4bdc1c" dependencies = [ - "env_logger 0.7.1", + "env_logger 0.10.2", "log", ] @@ -2118,58 +1263,27 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.95" +version = "1.0.101" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778" +checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de" dependencies = [ "unicode-ident", ] -[[package]] -name = "protobuf" -version = "2.27.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf7e6d18738ecd0902d30d1ad232c9125985a3422929b16c65517b38adc14f96" - -[[package]] -name = "quick-error" -version = "1.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" - [[package]] name = "quote" -version = "1.0.40" +version = "1.0.41" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" +checksum = "ce25767e7b499d1b604768e7cde645d14cc8584231ea6b295e9c9eb22c02e1d1" dependencies = [ "proc-macro2", ] [[package]] name = "r-efi" -version = "5.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "74765f6d916ee2faa39bc8e68e4f3ed8949b48cccdac59983d287a7cb71ce9c5" - -[[package]] -name = "rand" -version = "0.6.5" +version = "5.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d71dacdc3c88c1fde3885a3be3fbab9f35724e6ce99467f7d9c5026132184ca" -dependencies = [ - "autocfg 0.1.8", - "libc", - "rand_chacha 0.1.1", - "rand_core 0.4.2", - "rand_hc", - "rand_isaac", - "rand_jitter", - "rand_os", - "rand_pcg", - "rand_xorshift", - "winapi", -] +checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" [[package]] name = "rand" @@ -2177,186 +1291,49 @@ version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" dependencies = [ - "libc", - "rand_chacha 0.3.1", - "rand_core 0.6.4", -] - -[[package]] -name = "rand_chacha" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "556d3a1ca6600bfcbab7c7c91ccb085ac7fbbcd70e008a98742e7847f4f7bcef" -dependencies = [ - "autocfg 0.1.8", - "rand_core 0.3.1", -] - -[[package]] -name = "rand_chacha" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" -dependencies = [ - "ppv-lite86", - "rand_core 0.6.4", -] - -[[package]] -name = "rand_core" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a6fdeb83b075e8266dcc8762c22776f6877a63111121f5f8c7411e5be7eed4b" -dependencies = [ - "rand_core 0.4.2", + "rand_core", ] -[[package]] -name = "rand_core" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c33a3c44ca05fa6f1807d8e6743f3824e8509beca625669633be0acbdf509dc" - [[package]] name = "rand_core" version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" -dependencies = [ - "getrandom 0.2.16", -] - -[[package]] -name = "rand_hc" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b40677c7be09ae76218dc623efbf7b18e34bced3f38883af07bb75630a21bc4" -dependencies = [ - "rand_core 0.3.1", -] - -[[package]] -name = "rand_isaac" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ded997c9d5f13925be2a6fd7e66bf1872597f759fd9dd93513dd7e92e5a5ee08" -dependencies = [ - "rand_core 0.3.1", -] - -[[package]] -name = "rand_jitter" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1166d5c91dc97b88d1decc3285bb0a99ed84b05cfd0bc2341bdf2d43fc41e39b" -dependencies = [ - "libc", - "rand_core 0.4.2", - "winapi", -] - -[[package]] -name = "rand_os" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b75f676a1e053fc562eafbb47838d67c84801e38fc1ba459e8f180deabd5071" -dependencies = [ - "cloudabi", - "fuchsia-cprng", - "libc", - "rand_core 0.4.2", - "rdrand", - "winapi", -] - -[[package]] -name = "rand_pcg" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "abf9b09b01790cfe0364f52bf32995ea3c39f4d2dd011eac241d2914146d0b44" -dependencies = [ - "autocfg 0.1.8", - "rand_core 0.4.2", -] - -[[package]] -name = "rand_xorshift" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cbf7e9e623549b0e21f6e97cf8ecf247c1a8fd2e8a992ae265314300b2455d5c" -dependencies = [ - "rand_core 0.3.1", -] - -[[package]] -name = "rawpointer" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3" [[package]] name = "rayon" -version = "1.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" -dependencies = [ - "either", - "rayon-core", -] - -[[package]] -name = "rayon-core" -version = "1.12.1" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" -dependencies = [ - "crossbeam-deque", - "crossbeam-utils", -] - -[[package]] -name = "rdrand" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "678054eb77286b51581ba43620cc911abf02758c91f93f479767aed0f90458b2" -dependencies = [ - "rand_core 0.3.1", -] - -[[package]] -name = "redox_syscall" -version = "0.2.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a" +checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f" dependencies = [ - "bitflags 1.3.2", + "either", + "rayon-core", ] [[package]] -name = "redox_syscall" -version = "0.5.12" +name = "rayon-core" +version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "928fca9cf2aa042393a8325b9ead81d2f0df4cb12e1e24cef072922ccd99c5af" +checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91" dependencies = [ - "bitflags 2.9.0", + "crossbeam-deque", + "crossbeam-utils", ] [[package]] -name = "redox_users" -version = "0.4.6" +name = "redox_syscall" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba009ff324d1fc1b900bd1fdb31564febe58a8ccc8a6fdbb93b543d33b13ca43" +checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a" dependencies = [ - "getrandom 0.2.16", - "libredox", - "thiserror", + "bitflags 1.3.2", ] [[package]] name = "regex" -version = "1.11.1" +version = "1.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" +checksum = "8b5288124840bee7b386bc413c487869b360b2b4ec421ea56425128692f2a82c" dependencies = [ "aho-corasick", "memchr", @@ -2366,9 +1343,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.9" +version = "0.4.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" +checksum = "833eb9ce86d40ef33cb1306d8accf7bc8ec2bfea4355cbdebb3df68b40925cad" dependencies = [ "aho-corasick", "memchr", @@ -2377,15 +1354,15 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.8.5" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" +checksum = "caf4aa5b0f434c91fe5c7f1ecb6a5ece2130b02ad2a590589dda5146df959001" [[package]] name = "regtest" version = "0.1.0" dependencies = [ - "clap 4.5.37", + "clap 4.5.48", "csv", "divvunspell", ] @@ -2396,118 +1373,24 @@ version = "1.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ba39f3699c378cd8970968dcbff9c43159ea4cfbd88d43c00b22f2ef10a435d2" -[[package]] -name = "reqwest" -version = "0.11.27" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd67538700a17451e7cba03ac727fb961abb7607553461627b97de0b89cf4a62" -dependencies = [ - "base64", - "bytes", - "encoding_rs", - "futures-core", - "futures-util", - "h2", - "http", - "http-body", - "hyper", - "hyper-tls", - "ipnet", - "js-sys", - "log", - "mime", - "native-tls", - "once_cell", - "percent-encoding", - "pin-project-lite", - "rustls-pemfile", - "serde", - "serde_json", - "serde_urlencoded", - "sync_wrapper", - "system-configuration", - "tokio", - "tokio-native-tls", - "tower-service", - "url", - "wasm-bindgen", - "wasm-bindgen-futures", - "web-sys", - "winreg", -] - -[[package]] -name = "rust-bert" -version = "0.17.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f74d53d058c1478224f08146fb1af6e4466660464a323fb957c2105728c62bf" -dependencies = [ - "cached-path", - "dirs", - "half", - "lazy_static", - "ordered-float", - "rust_tokenizers", - "serde", - "serde_json", - "tch", - "thiserror", - "uuid", -] - -[[package]] -name = "rust_tokenizers" -version = "7.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "196e3b77b07fd5bfcbc8187ecaef5d5931820d9abd6c3fe0a9dc6d3ddb035d72" -dependencies = [ - "csv", - "hashbrown 0.12.3", - "itertools", - "lazy_static", - "protobuf", - "rayon", - "regex", - "serde", - "serde_json", - "thiserror", - "unicode-normalization", - "unicode-normalization-alignments", -] - -[[package]] -name = "rustc-demangle" -version = "0.1.24" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" - [[package]] name = "rustix" -version = "1.0.7" +version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c71e83d6afe7ff64890ec6b71d6a69bb8a610ab78ce364b3352876bb4c801266" +checksum = "cd15f8a2c5551a84d56efdc1cd049089e409ac19a3072d5037a17fd70719ff3e" dependencies = [ - "bitflags 2.9.0", + "bitflags 2.9.4", "errno", "libc", "linux-raw-sys", - "windows-sys 0.59.0", -] - -[[package]] -name = "rustls-pemfile" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c74cae0a4cf6ccbbf5f359f08efdf8ee7e1dc532573bf0db71968cb56b1448c" -dependencies = [ - "base64", + "windows-sys 0.61.2", ] [[package]] name = "rustversion" -version = "1.0.20" +version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eded382c5f5f786b989652c49544c4877d9f015cc22e145a5ea8ea66c2921cd2" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" [[package]] name = "ryu" @@ -2524,58 +1407,27 @@ dependencies = [ "winapi-util", ] -[[package]] -name = "schannel" -version = "0.1.27" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f29ebaa345f945cec9fbbc532eb307f0fdad8161f281b6369539c8d84876b3d" -dependencies = [ - "windows-sys 0.59.0", -] - [[package]] name = "scopeguard" version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" -[[package]] -name = "security-framework" -version = "2.11.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02" -dependencies = [ - "bitflags 2.9.0", - "core-foundation", - "core-foundation-sys", - "libc", - "security-framework-sys", -] - -[[package]] -name = "security-framework-sys" -version = "2.14.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49db231d56a190491cb4aeda9527f1ad45345af50b0851622a7adb8c03b01c32" -dependencies = [ - "core-foundation-sys", - "libc", -] - [[package]] name = "serde" -version = "1.0.219" +version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" dependencies = [ + "serde_core", "serde_derive", ] [[package]] name = "serde-xml-rs" -version = "0.5.1" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65162e9059be2f6a3421ebbb4fef3e74b7d9e7c60c50a0e292c6239f19f1edfa" +checksum = "fb3aa78ecda1ebc9ec9847d5d3aba7d618823446a049ba2491940506da6e2782" dependencies = [ "log", "serde", @@ -2584,60 +1436,36 @@ dependencies = [ ] [[package]] -name = "serde_derive" -version = "1.0.219" +name = "serde_core" +version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.101", + "serde_derive", ] [[package]] -name = "serde_json" -version = "1.0.140" +name = "serde_derive" +version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "20068b6e96dc6c9bd23e01df8827e6c7e1f2fddd43c21810382803c136b99373" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ - "itoa", - "memchr", - "ryu", - "serde", + "proc-macro2", + "quote", + "syn 2.0.106", ] [[package]] -name = "serde_urlencoded" -version = "0.7.1" +name = "serde_json" +version = "1.0.145" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" +checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c" dependencies = [ - "form_urlencoded", "itoa", + "memchr", "ryu", "serde", -] - -[[package]] -name = "sha1" -version = "0.10.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba" -dependencies = [ - "cfg-if", - "cpufeatures", - "digest", -] - -[[package]] -name = "sha2" -version = "0.10.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" -dependencies = [ - "cfg-if", - "cpufeatures", - "digest", + "serde_core", ] [[package]] @@ -2647,31 +1475,28 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" [[package]] -name = "siphasher" -version = "0.2.3" +name = "simd-adler32" +version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b8de496cf83d4ed58b6be86c3a275b8602f6ffe98d3024a869e124147a9a3ac" +checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe" [[package]] -name = "slab" -version = "0.4.9" +name = "siphasher" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67" -dependencies = [ - "autocfg 1.4.0", -] +checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d" [[package]] name = "smallvec" -version = "1.15.0" +version = "1.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8917285742e9f3e1683f0a9c4e6b57960b7314d0b08d30d1ecd426713ee2eee9" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" [[package]] name = "smol_str" -version = "0.1.24" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fad6c857cbab2627dcf01ec85a623ca4e7dcb5691cbaa3d7fb7653671f0d09c9" +checksum = "dd538fb6910ac1099850255cf94a94df6551fbdd602454387d0adb2d1ca6dead" dependencies = [ "serde", ] @@ -2682,40 +1507,12 @@ version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" -[[package]] -name = "socket2" -version = "0.5.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f5fd57c80058a56cf5c777ab8a126398ece8e442983605d280a44ce79d0edef" -dependencies = [ - "libc", - "windows-sys 0.52.0", -] - -[[package]] -name = "stable_deref_trait" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" - [[package]] name = "strsim" version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" -[[package]] -name = "strsim" -version = "0.9.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6446ced80d6c486436db5c078dde11a9f73d42b57fb273121e160b84f63d894c" - -[[package]] -name = "strsim" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" - [[package]] name = "strsim" version = "0.11.1" @@ -2746,12 +1543,6 @@ dependencies = [ "syn 1.0.109", ] -[[package]] -name = "subtle" -version = "2.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" - [[package]] name = "syn" version = "1.0.109" @@ -2765,9 +1556,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.101" +version = "2.0.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ce2b7fc941b3a24138a0a7cf8e858bfc6a992e7978a068a5c760deb0ed43caf" +checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6" dependencies = [ "proc-macro2", "quote", @@ -2775,244 +1566,67 @@ dependencies = [ ] [[package]] -name = "sync_wrapper" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160" - -[[package]] -name = "synstructure" -version = "0.13.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.101", -] - -[[package]] -name = "system-configuration" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba3a3adc5c275d719af8cb4272ea1c4a6d668a777f37e115f6d11ddbc1c8e0e7" -dependencies = [ - "bitflags 1.3.2", - "core-foundation", - "system-configuration-sys", -] - -[[package]] -name = "system-configuration-sys" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a75fb188eb626b924683e3b95e3a48e63551fcfb51949de2f06a9d91dbee93c9" -dependencies = [ - "core-foundation-sys", - "libc", -] - -[[package]] -name = "tar" -version = "0.4.44" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d863878d212c87a19c1a610eb53bb01fe12951c0501cf5a0d65f724914a667a" -dependencies = [ - "filetime", - "libc", - "xattr", -] - -[[package]] -name = "tch" -version = "0.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b73f876b186599e22b01fa6ebfeea2dee2f11e8083463ab3572933d8201436b" -dependencies = [ - "half", - "lazy_static", - "libc", - "ndarray", - "rand 0.8.5", - "thiserror", - "torch-sys", - "zip 0.5.13", -] - -[[package]] -name = "tempfile" -version = "3.19.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7437ac7763b9b123ccf33c338a5cc1bac6f69b45a136c19bdd8a65e3916435bf" -dependencies = [ - "fastrand", - "getrandom 0.3.2", - "once_cell", - "rustix", - "windows-sys 0.59.0", -] - -[[package]] -name = "termcolor" -version = "1.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" -dependencies = [ - "winapi-util", -] - -[[package]] -name = "textwrap" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" -dependencies = [ - "unicode-width 0.1.14", -] - -[[package]] -name = "thfst-tools" -version = "1.0.0-beta.3" -dependencies = [ - "box-format", - "divvunspell", - "serde_json", - "structopt", - "tempfile", -] - -[[package]] -name = "thiserror" -version = "1.0.69" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" -dependencies = [ - "thiserror-impl", -] - -[[package]] -name = "thiserror-impl" -version = "1.0.69" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.101", -] - -[[package]] -name = "time" -version = "0.1.45" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b797afad3f312d1c66a56d11d0316f916356d11bd158fbc6ca6389ff6bf805a" -dependencies = [ - "libc", - "wasi 0.10.0+wasi-snapshot-preview1", - "winapi", -] - -[[package]] -name = "time" -version = "0.3.41" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a7619e19bc266e0f9c5e6686659d394bc57973859340060a69221e57dbc0c40" -dependencies = [ - "deranged", - "num-conv", - "powerfmt", - "serde", - "time-core", -] - -[[package]] -name = "time-core" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c9e9a38711f559d9e3ce1cdb06dd7c5b8ea546bc90052da6d06bb76da74bb07c" - -[[package]] -name = "tinystr" -version = "0.7.6" +name = "tempfile" +version = "3.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9117f5d4db391c1cf6927e7bea3db74b9a1c1add8f7eda9ffd5364f40f57b82f" +checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16" dependencies = [ - "displaydoc", - "zerovec", + "fastrand", + "getrandom 0.3.3", + "once_cell", + "rustix", + "windows-sys 0.61.2", ] [[package]] -name = "tinyvec" -version = "1.9.0" +name = "termcolor" +version = "1.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09b3661f17e86524eccd4371ab0429194e0d7c008abb45f7a7495b1719463c71" +checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" dependencies = [ - "tinyvec_macros", + "winapi-util", ] [[package]] -name = "tinyvec_macros" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" - -[[package]] -name = "tokio" -version = "1.44.2" +name = "textwrap" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6b88822cbe49de4185e3a4cbf8321dd487cf5fe0c5c65695fef6346371e9c48" +checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" dependencies = [ - "backtrace", - "bytes", - "libc", - "mio", - "pin-project-lite", - "socket2", - "windows-sys 0.52.0", + "unicode-width 0.1.14", ] [[package]] -name = "tokio-native-tls" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbae76ab933c85776efabc971569dd6119c580d8f5d448769dec1764bf796ef2" +name = "thfst-tools" +version = "1.0.0-beta.3" dependencies = [ - "native-tls", - "tokio", + "box-format", + "divvunspell", + "serde_json", + "structopt", + "tempfile", ] [[package]] -name = "tokio-util" -version = "0.7.15" +name = "thiserror" +version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "66a539a9ad6d5d281510d5bd368c973d636c02dbf8a67300bfb6b950696ad7df" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" dependencies = [ - "bytes", - "futures-core", - "futures-sink", - "pin-project-lite", - "tokio", + "thiserror-impl", ] [[package]] -name = "torch-sys" -version = "0.6.1" +name = "thiserror-impl" +version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34cc0f21b1aad5d71d529e9fe4dbbbdbf53918d7b4bde946f523839aa32cffae" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ - "anyhow", - "cc", - "curl", - "libc", - "zip 0.5.13", + "proc-macro2", + "quote", + "syn 2.0.106", ] -[[package]] -name = "tower-service" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" - [[package]] name = "tracing" version = "0.1.41" @@ -3026,36 +1640,24 @@ dependencies = [ [[package]] name = "tracing-attributes" -version = "0.1.28" +version = "0.1.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "395ae124c09f9e6918a2310af6038fba074bcf474ac352496d5910dd59a2226d" +checksum = "81383ab64e72a7a8b8e13130c49e3dab29def6d0c7d76a03087b3cf71c5c6903" dependencies = [ "proc-macro2", "quote", - "syn 2.0.101", + "syn 2.0.106", ] [[package]] name = "tracing-core" -version = "0.1.33" +version = "0.1.34" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e672c95779cf947c5311f83787af4fa8fffd12fb27e4993211a84bdfd9610f9c" +checksum = "b9d12581f227e93f094d3af2ae690a574abb8a2b9b7a96e7cfe9647b2b617678" dependencies = [ "once_cell", ] -[[package]] -name = "try-lock" -version = "0.2.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" - -[[package]] -name = "typenum" -version = "1.18.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1dccffe3ce07af9386bfd29e80c0ab1a8205a2fc34e4bcd40364df902cfa8f3f" - [[package]] name = "unic-char-property" version = "0.9.0" @@ -3270,27 +1872,9 @@ dependencies = [ [[package]] name = "unicode-ident" -version = "1.0.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" - -[[package]] -name = "unicode-normalization" -version = "0.1.24" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5033c97c4262335cded6d6fc3e5c18ab755e1a3dc96376350f3d8e9f009ad956" -dependencies = [ - "tinyvec", -] - -[[package]] -name = "unicode-normalization-alignments" -version = "0.1.12" +version = "1.0.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43f613e4fa046e69818dd287fdc4bc78175ff20331479dab6e1b0f98d57062de" -dependencies = [ - "smallvec", -] +checksum = "f63a545481291138910575129486daeaf8ac54aee4387fe7906919f7830c7d9d" [[package]] name = "unicode-segmentation" @@ -3306,26 +1890,9 @@ checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" [[package]] name = "unicode-width" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fc81956842c57dac11422a97c3b8195a1ff727f06e85c84ed2e8aa277c9a0fd" - -[[package]] -name = "url" -version = "2.5.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32f8b686cadd1473f4bd0117a5d28d36b1ade384ea9b5069a1c40aefed7fda60" -dependencies = [ - "form_urlencoded", - "idna", - "percent-encoding", -] - -[[package]] -name = "utf16_iter" -version = "1.0.5" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246" +checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254" [[package]] name = "utf8-decode" @@ -3333,33 +1900,12 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ca61eb27fa339aa08826a29f03e87b99b4d8f0fc2255306fd266bb1b6a9de498" -[[package]] -name = "utf8_iter" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" - [[package]] name = "utf8parse" version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" -[[package]] -name = "uuid" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc5cf98d8186244414c848017f0e2676b3fcb46807f6668a97dfe67359a3c4b7" -dependencies = [ - "getrandom 0.2.16", -] - -[[package]] -name = "vcpkg" -version = "0.2.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" - [[package]] name = "vec_map" version = "0.8.2" @@ -3382,80 +1928,62 @@ dependencies = [ "winapi-util", ] -[[package]] -name = "want" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e" -dependencies = [ - "try-lock", -] - [[package]] name = "wasi" -version = "0.10.0+wasi-snapshot-preview1" +version = "0.11.1+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f" +checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" [[package]] name = "wasi" -version = "0.11.0+wasi-snapshot-preview1" +version = "0.14.7+wasi-0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" +checksum = "883478de20367e224c0090af9cf5f9fa85bed63a95c1abf3afc5c083ebc06e8c" +dependencies = [ + "wasip2", +] [[package]] -name = "wasi" -version = "0.14.2+wasi-0.2.4" +name = "wasip2" +version = "1.0.1+wasi-0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3" +checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7" dependencies = [ - "wit-bindgen-rt", + "wit-bindgen", ] [[package]] name = "wasm-bindgen" -version = "0.2.100" +version = "0.2.104" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5" +checksum = "c1da10c01ae9f1ae40cbfac0bac3b1e724b320abfcf52229f80b547c0d250e2d" dependencies = [ "cfg-if", "once_cell", "rustversion", "wasm-bindgen-macro", + "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-backend" -version = "0.2.100" +version = "0.2.104" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6" +checksum = "671c9a5a66f49d8a47345ab942e2cb93c7d1d0339065d4f8139c486121b43b19" dependencies = [ "bumpalo", "log", "proc-macro2", "quote", - "syn 2.0.101", + "syn 2.0.106", "wasm-bindgen-shared", ] -[[package]] -name = "wasm-bindgen-futures" -version = "0.4.50" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "555d470ec0bc3bb57890405e5d4322cc9ea83cebb085523ced7be4144dac1e61" -dependencies = [ - "cfg-if", - "js-sys", - "once_cell", - "wasm-bindgen", - "web-sys", -] - [[package]] name = "wasm-bindgen-macro" -version = "0.2.100" +version = "0.2.104" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407" +checksum = "7ca60477e4c59f5f2986c50191cd972e3a50d8a95603bc9434501cf156a9a119" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -3463,36 +1991,26 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.100" +version = "0.2.104" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" +checksum = "9f07d2f20d4da7b26400c9f4a0511e6e0345b040694e8a75bd41d578fa4421d7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.101", + "syn 2.0.106", "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.100" +version = "0.2.104" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d" +checksum = "bad67dc8b2a1a6e5448428adec4c3e84c43e561d8c9ee8a9e5aabeb193ec41d1" dependencies = [ "unicode-ident", ] -[[package]] -name = "web-sys" -version = "0.3.77" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33b6dd2ef9186f1f2072e409e99cd22a975331a6b3591b12c764e0e55c60d5d2" -dependencies = [ - "js-sys", - "wasm-bindgen", -] - [[package]] name = "winapi" version = "0.3.9" @@ -3511,11 +2029,11 @@ checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" [[package]] name = "winapi-util" -version = "0.1.9" +version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" +checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -3535,9 +2053,9 @@ dependencies = [ [[package]] name = "windows-core" -version = "0.61.0" +version = "0.62.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4763c1de310c86d75a878046489e2e5ba02c649d185f21c67d4cf8a56d098980" +checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" dependencies = [ "windows-implement", "windows-interface", @@ -3548,90 +2066,75 @@ dependencies = [ [[package]] name = "windows-implement" -version = "0.60.0" +version = "0.60.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a47fddd13af08290e67f4acabf4b459f647552718f683a7b415d290ac744a836" +checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" dependencies = [ "proc-macro2", "quote", - "syn 2.0.101", + "syn 2.0.106", ] [[package]] name = "windows-interface" -version = "0.59.1" +version = "0.59.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd9211b69f8dcdfa817bfd14bf1c97c9188afa36f4750130fcdf3f400eca9fa8" +checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" dependencies = [ "proc-macro2", "quote", - "syn 2.0.101", + "syn 2.0.106", ] [[package]] name = "windows-link" -version = "0.1.1" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76840935b766e1b0a05c0066835fb9ec80071d4c09a16f6bd5f7e655e3c14c38" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" [[package]] name = "windows-result" -version = "0.3.2" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c64fd11a4fd95df68efcfee5f44a294fe71b8bc6a91993e2791938abcc712252" +checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5" dependencies = [ "windows-link", ] [[package]] name = "windows-strings" -version = "0.4.0" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a2ba9642430ee452d5a7aa78d72907ebe8cfda358e8cb7918a2050581322f97" +checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091" dependencies = [ "windows-link", ] [[package]] name = "windows-sys" -version = "0.48.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" -dependencies = [ - "windows-targets 0.48.5", -] - -[[package]] -name = "windows-sys" -version = "0.52.0" +version = "0.59.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" dependencies = [ "windows-targets 0.52.6", ] [[package]] name = "windows-sys" -version = "0.59.0" +version = "0.60.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" dependencies = [ - "windows-targets 0.52.6", + "windows-targets 0.53.5", ] [[package]] -name = "windows-targets" -version = "0.48.5" +name = "windows-sys" +version = "0.61.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" dependencies = [ - "windows_aarch64_gnullvm 0.48.5", - "windows_aarch64_msvc 0.48.5", - "windows_i686_gnu 0.48.5", - "windows_i686_msvc 0.48.5", - "windows_x86_64_gnu 0.48.5", - "windows_x86_64_gnullvm 0.48.5", - "windows_x86_64_msvc 0.48.5", + "windows-link", ] [[package]] @@ -3643,7 +2146,7 @@ dependencies = [ "windows_aarch64_gnullvm 0.52.6", "windows_aarch64_msvc 0.52.6", "windows_i686_gnu 0.52.6", - "windows_i686_gnullvm", + "windows_i686_gnullvm 0.52.6", "windows_i686_msvc 0.52.6", "windows_x86_64_gnu 0.52.6", "windows_x86_64_gnullvm 0.52.6", @@ -3651,10 +2154,21 @@ dependencies = [ ] [[package]] -name = "windows_aarch64_gnullvm" -version = "0.48.5" +name = "windows-targets" +version = "0.53.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" +checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3" +dependencies = [ + "windows-link", + "windows_aarch64_gnullvm 0.53.1", + "windows_aarch64_msvc 0.53.1", + "windows_i686_gnu 0.53.1", + "windows_i686_gnullvm 0.53.1", + "windows_i686_msvc 0.53.1", + "windows_x86_64_gnu 0.53.1", + "windows_x86_64_gnullvm 0.53.1", + "windows_x86_64_msvc 0.53.1", +] [[package]] name = "windows_aarch64_gnullvm" @@ -3663,10 +2177,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" [[package]] -name = "windows_aarch64_msvc" -version = "0.48.5" +name = "windows_aarch64_gnullvm" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" +checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53" [[package]] name = "windows_aarch64_msvc" @@ -3675,10 +2189,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" [[package]] -name = "windows_i686_gnu" -version = "0.48.5" +name = "windows_aarch64_msvc" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" +checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006" [[package]] name = "windows_i686_gnu" @@ -3686,6 +2200,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" +[[package]] +name = "windows_i686_gnu" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3" + [[package]] name = "windows_i686_gnullvm" version = "0.52.6" @@ -3693,10 +2213,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" [[package]] -name = "windows_i686_msvc" -version = "0.48.5" +name = "windows_i686_gnullvm" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" +checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c" [[package]] name = "windows_i686_msvc" @@ -3705,10 +2225,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" [[package]] -name = "windows_x86_64_gnu" -version = "0.48.5" +name = "windows_i686_msvc" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" +checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2" [[package]] name = "windows_x86_64_gnu" @@ -3717,10 +2237,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" [[package]] -name = "windows_x86_64_gnullvm" -version = "0.48.5" +name = "windows_x86_64_gnu" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" +checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499" [[package]] name = "windows_x86_64_gnullvm" @@ -3729,10 +2249,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" [[package]] -name = "windows_x86_64_msvc" -version = "0.48.5" +name = "windows_x86_64_gnullvm" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" +checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1" [[package]] name = "windows_x86_64_msvc" @@ -3741,138 +2261,22 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" [[package]] -name = "winreg" -version = "0.50.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "524e57b2c537c0f9b1e69f1965311ec12182b4122e45035b1508cd24d2adadb1" -dependencies = [ - "cfg-if", - "windows-sys 0.48.0", -] - -[[package]] -name = "wit-bindgen-rt" -version = "0.39.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1" -dependencies = [ - "bitflags 2.9.0", -] - -[[package]] -name = "write16" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936" - -[[package]] -name = "writeable" -version = "0.5.5" +name = "windows_x86_64_msvc" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51" +checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" [[package]] -name = "xattr" -version = "1.5.0" +name = "wit-bindgen" +version = "0.46.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d65cbf2f12c15564212d48f4e3dfb87923d25d611f2aed18f4cb23f0413d89e" -dependencies = [ - "libc", - "rustix", -] +checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" [[package]] name = "xml-rs" -version = "0.8.26" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a62ce76d9b56901b19a74f19431b0d8b3bc7ca4ad685a746dfd78ca8f4fc6bda" - -[[package]] -name = "yoke" -version = "0.7.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "120e6aef9aa629e3d4f52dc8cc43a015c7724194c97dfaf45180d2daf2b77f40" -dependencies = [ - "serde", - "stable_deref_trait", - "yoke-derive", - "zerofrom", -] - -[[package]] -name = "yoke-derive" -version = "0.7.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.101", - "synstructure", -] - -[[package]] -name = "zerocopy" -version = "0.8.25" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1702d9583232ddb9174e01bb7c15a2ab8fb1bc6f227aa1233858c351a3ba0cb" -dependencies = [ - "zerocopy-derive", -] - -[[package]] -name = "zerocopy-derive" -version = "0.8.25" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28a6e20d751156648aa063f3800b706ee209a32c0b4d9f24be3d980b01be55ef" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.101", -] - -[[package]] -name = "zerofrom" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5" -dependencies = [ - "zerofrom-derive", -] - -[[package]] -name = "zerofrom-derive" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.101", - "synstructure", -] - -[[package]] -name = "zerovec" -version = "0.10.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa2b893d79df23bfb12d5461018d408ea19dfafe76c2c7ef6d4eba614f8ff079" -dependencies = [ - "yoke", - "zerofrom", - "zerovec-derive", -] - -[[package]] -name = "zerovec-derive" -version = "0.10.3" +version = "0.8.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.101", -] +checksum = "6fd8403733700263c6eb89f192880191f1b83e332f7a20371ddcf421c4a337c7" [[package]] name = "zip" @@ -3881,49 +2285,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "93ab48844d61251bb3835145c521d88aa4031d7139e8485990f60ca911fa0815" dependencies = [ "byteorder", - "bzip2", "crc32fast", "flate2", "thiserror", - "time 0.1.45", -] - -[[package]] -name = "zip" -version = "0.6.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "760394e246e4c28189f19d488c058bf16f564016aefac5d32bb1f3b51d5e9261" -dependencies = [ - "aes", - "byteorder", - "bzip2", - "constant_time_eq", - "crc32fast", - "crossbeam-utils", - "flate2", - "hmac", - "pbkdf2", - "sha1", - "time 0.3.41", - "zstd 0.11.2+zstd.1.5.2", -] - -[[package]] -name = "zip-extensions" -version = "0.6.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cecf62554c4ff96bce01a7ef123d160c3ffe9180638820f8b4d545c65b221b8c" -dependencies = [ - "zip 0.6.6", -] - -[[package]] -name = "zstd" -version = "0.11.2+zstd.1.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "20cc960326ece64f010d2d2107537f26dc589a6573a316bd5b1dba685fa5fde4" -dependencies = [ - "zstd-safe 5.0.2+zstd.1.5.2", ] [[package]] @@ -3932,17 +2296,7 @@ version = "0.13.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e91ee311a569c327171651566e07972200e76fcfe2242a4fa446149a3881c08a" dependencies = [ - "zstd-safe 7.2.4", -] - -[[package]] -name = "zstd-safe" -version = "5.0.2+zstd.1.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d2a5585e04f9eea4b2a3d1eca508c4dee9592a89ef6f450c11719da0726f4db" -dependencies = [ - "libc", - "zstd-sys", + "zstd-safe", ] [[package]] @@ -3956,9 +2310,9 @@ dependencies = [ [[package]] name = "zstd-sys" -version = "2.0.15+zstd.1.5.7" +version = "2.0.16+zstd.1.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb81183ddd97d0c74cedf1d50d85c8d08c1b8b68ee863bdee9e706eedba1a237" +checksum = "91e19ebc2adc8f83e43039e79776e3fda8ca919132d68a1fed6a5faca2683748" dependencies = [ "cc", "pkg-config", diff --git a/Cargo.toml b/Cargo.toml index c434ef9..3203476 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,11 +1,64 @@ +[package] +name = "divvunspell" +description = "Spell checking library for ZHFST/BHFST spellers, with case handling and tokenization support." +version = "1.0.0-beta.5" +authors = ["Brendan Molloy "] +edition = "2021" +license = "MIT OR Apache-2.0" +repository = "https://github.com/divvun/divvunspell" + +[lib] +name = "divvunspell" +crate-type = ["rlib", "staticlib", "cdylib"] + +[dependencies] +libc = "0.2" +memmap2 = "0.9.4" +byteorder = "1.3.4" +serde = { version = "1.0.116", features = ["derive"] } +serde_json = "1.0.57" +serde-xml-rs = { version = "0.6.0", default-features = false } +zip = { version = "0.5", default-features = false } +unic-segment = "0.9.0" +unic-char-range = "0.9.0" +unic-char-property = "0.9.0" +unic-ucd-category = "0.9.0" +unic-emoji-char = "0.9.0" +parking_lot = "0.11.2" +hashbrown = { version = "0.11", features = ["serde"] } +lifeguard = "0.6.1" +smol_str = { version = "0.2.1", features = ["serde"] } +box-format = { version = "0.3.2", features = ["reader"], default-features = false } +itertools = "0.12.1" +strsim = "0.11.0" +log = "0.4.11" +cffi = { git = "https://github.com/cffi-rs/cffi", optional = true } +unic-ucd-common = "0.9.0" +flatbuffers = { version = "0.6.1", optional = true } +env_logger = { version = "0.11.2", optional = true } +thiserror = "1.0.20" +tempfile = "3.3.0" +fs_extra = "1.2.0" +eieio = "1.0.0" +pathos = "0.3.0" +language-tags = "0.3.2" +globwalk = "0.9.1" + +[features] +compression = ["zip/deflate"] +logging = ["env_logger"] +cargo-clippy = [] + +# Internal features: unstable, not for external use! +internal_convert = [] +internal_ffi = ["flatbuffers", "logging", "cffi"] + [workspace] resolver = "2" members = [ - "divvunspell", - "accuracy", - "divvunspell-bin", - "thfst-tools", - "regtest", + ".", + "cli", + "crates/*" ] [profile.dev] diff --git a/LICENSE-MIT b/LICENSE-MIT index c5469f7..07ad97c 100644 --- a/LICENSE-MIT +++ b/LICENSE-MIT @@ -1,6 +1,6 @@ -Copyright (c) 2017-2021 Brendan Molloy -Copyright (c) 2018-2021 UiT The Arctic University of Norway -Copyright (c) 2018-2021 Sámediggi +Copyright (c) 2017-2025 Brendan Molloy +Copyright (c) 2018-2025 UiT The Arctic University of Norway +Copyright (c) 2018-2025 Sámediggi Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index afec627..046b4be 100644 --- a/README.md +++ b/README.md @@ -17,17 +17,6 @@ cargo install thfst-tools cargo install --path . ``` -### Building with `gpt2` support on macOS aarch64 - -(Skip this if you are not experimenting with gpt2 support. So skip. Now.) - -Clone this repo then: - -```bash -brew install libtorch -LIBTORCH=/opt/homebrew/opt/libtorch cargo build --features gpt2 --bin divvunspell -``` - ### No Rust? ```sh @@ -49,7 +38,6 @@ Optional arguments: Available subcommands: suggest get suggestions for provided input tokenize print input in word-separated tokenized form - predict predict next words using GPT2 model $ divvunspell suggest -h Usage: divvunspell suggest [OPTIONS] diff --git a/divvunspell-bin/Cargo.toml b/cli/Cargo.toml similarity index 71% rename from divvunspell-bin/Cargo.toml rename to cli/Cargo.toml index 3606464..1d3c437 100644 --- a/divvunspell-bin/Cargo.toml +++ b/cli/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "divvunspell-bin" +name = "divvunspell-cli" description = "Spellchecker for ZHFST/BHFST spellers, with case handling and tokenization support." version = "1.0.0" authors = ["Brendan Molloy "] @@ -14,13 +14,9 @@ path = "src/main.rs" [dependencies] serde = { version = "1.0.116", features = ["derive"] } serde_json = "1.0.57" -divvunspell = { version = "1.0.0-beta.5", features = ["internal_convert", "compression"], path = "../divvunspell" } +divvunspell = { features = ["internal_convert", "compression"], path = ".." } box-format = { version = "0.3.2", features = ["reader"], default-features = false } -pretty_env_logger = "0.4.0" +pretty_env_logger = "0.5.0" gumdrop = "0.8.0" anyhow = "1.0.32" structopt = "0.3.17" - -[features] -default = [] -gpt2 = ["divvunspell/gpt2"] diff --git a/accuracy/LICENSE b/cli/LICENSE similarity index 100% rename from accuracy/LICENSE rename to cli/LICENSE diff --git a/divvunspell-bin/src/main.rs b/cli/src/main.rs similarity index 65% rename from divvunspell-bin/src/main.rs rename to cli/src/main.rs index 77295cd..7ca1c25 100644 --- a/divvunspell-bin/src/main.rs +++ b/cli/src/main.rs @@ -1,17 +1,17 @@ use std::io::{self, Read}; +use std::process; use std::{ path::{Path, PathBuf}, sync::Arc, }; +use divvunspell::speller::HfstSpeller; +use divvunspell::transducer::hfst::HfstTransducer; +use divvunspell::transducer::Transducer; +use divvunspell::vfs::Fs; use gumdrop::Options; use serde::Serialize; -#[cfg(feature = "gpt2")] -use divvunspell::archive::{ - boxf::BoxGpt2PredictorArchive, error::PredictorArchiveError, PredictorArchive, -}; - use divvunspell::{ archive::{ boxf::ThfstBoxSpellerArchive, error::SpellerArchiveError, BoxSpellerArchive, @@ -24,11 +24,14 @@ use divvunspell::{ trait OutputWriter { fn write_correction(&mut self, word: &str, is_correct: bool); fn write_suggestions(&mut self, word: &str, suggestions: &[Suggestion]); - fn write_predictions(&mut self, predictions: &[String]); + fn write_input_analyses(&mut self, word: &str, analyses: &[Suggestion]); + fn write_output_analyses(&mut self, word: &str, analyses: &[Suggestion]); fn finish(&mut self); } -struct StdoutWriter; +struct StdoutWriter { + has_continuation_marker: Option, +} impl OutputWriter for StdoutWriter { fn write_correction(&mut self, word: &str, is_correct: bool) { @@ -40,15 +43,36 @@ impl OutputWriter for StdoutWriter { } fn write_suggestions(&mut self, _word: &str, suggestions: &[Suggestion]) { + if let Some(s) = &self.has_continuation_marker { + for sugg in suggestions { + print!("{}", sugg.value); + if sugg.completed == Some(true) { + print!("{s}"); + } + println!("\t\t{}", sugg.weight); + } + } else { + for sugg in suggestions { + println!("{}\t\t{}", sugg.value, sugg.weight); + } + } + println!(); + } + + fn write_input_analyses(&mut self, _word: &str, suggestions: &[Suggestion]) { + println!("Input analyses: "); for sugg in suggestions { println!("{}\t\t{}", sugg.value, sugg.weight); } println!(); } - fn write_predictions(&mut self, predictions: &[String]) { - println!("Predictions: "); - println!("{}", predictions.join(" ")); + fn write_output_analyses(&mut self, _word: &str, suggestions: &[Suggestion]) { + println!("Output analyses: "); + for sugg in suggestions { + println!("{}\t\t{}", sugg.value, sugg.weight); + } + println!(); } fn finish(&mut self) {} @@ -62,18 +86,25 @@ struct SuggestionRequest { } #[derive(Serialize)] +struct AnalysisRequest { + word: String, + suggestions: Vec, +} + +#[derive(Default, Serialize)] #[serde(rename_all = "camelCase")] struct JsonWriter { + #[serde(skip_serializing_if = "Vec::is_empty")] suggest: Vec, - predict: Option>, + #[serde(skip_serializing_if = "Vec::is_empty")] + input_analysis: Vec, + #[serde(skip_serializing_if = "Vec::is_empty")] + output_analysis: Vec, } impl JsonWriter { pub fn new() -> JsonWriter { - JsonWriter { - suggest: vec![], - predict: None, - } + Self::default() } } @@ -91,8 +122,18 @@ impl OutputWriter for JsonWriter { self.suggest[i].suggestions = suggestions.to_vec(); } - fn write_predictions(&mut self, predictions: &[String]) { - self.predict = Some(predictions.to_vec()); + fn write_input_analyses(&mut self, word: &str, suggestions: &[Suggestion]) { + self.input_analysis.push(AnalysisRequest { + word: word.to_string(), + suggestions: suggestions.to_vec(), + }) + } + + fn write_output_analyses(&mut self, word: &str, suggestions: &[Suggestion]) { + self.output_analysis.push(AnalysisRequest { + word: word.to_string(), + suggestions: suggestions.to_vec(), + }) } fn finish(&mut self) { @@ -104,6 +145,7 @@ fn run( speller: Arc, words: Vec, writer: &mut dyn OutputWriter, + is_analyzing: bool, is_suggesting: bool, is_always_suggesting: bool, suggest_cfg: &SpellerConfig, @@ -116,6 +158,23 @@ fn run( let suggestions = speller.clone().suggest_with_config(&word, &suggest_cfg); writer.write_suggestions(&word, &suggestions); } + + if is_analyzing { + let input_analyses = speller + .clone() + .analyze_input_with_config(&word, &suggest_cfg); + writer.write_input_analyses(&word, &input_analyses); + + let output_analyses = speller + .clone() + .analyze_output_with_config(&word, &suggest_cfg); + writer.write_output_analyses(&word, &output_analyses); + + let final_suggs = speller + .clone() + .analyze_suggest_with_config(&word, &suggest_cfg); + writer.write_suggestions(&word, &final_suggs); + } } } #[derive(Debug, Options)] @@ -134,9 +193,6 @@ enum Command { #[options(help = "print input in word-separated tokenized form")] Tokenize(TokenizeArgs), - - #[options(help = "predict next words using GPT2 model")] - Predict(PredictArgs), } #[derive(Debug, Options)] @@ -144,18 +200,30 @@ struct SuggestArgs { #[options(help = "print help message")] help: bool, - #[options(help = "BHFST or ZHFST archive to be used", required)] - archive: PathBuf, + #[options(short = "a", help = "BHFST or ZHFST archive to be used")] + archive_path: Option, + + #[options(long = "mutator", help = "mutator to use (if archive not provided)")] + mutator_path: Option, + + #[options(long = "lexicon", help = "lexicon to use (if archive not provided)")] + lexicon_path: Option, #[options(short = "S", help = "always show suggestions even if word is correct")] always_suggest: bool, + #[options(short = "A", help = "analyze words and suggestions")] + analyze: bool, + #[options(help = "maximum weight limit for suggestions")] weight: Option, #[options(help = "maximum number of results")] nbest: Option, + #[options(help = "character for incomplete suggestions")] + continuation_marker: Option, + #[options( no_short, long = "no-reweighting", @@ -192,31 +260,6 @@ struct TokenizeArgs { inputs: Vec, } -#[derive(Debug, Options)] -struct PredictArgs { - #[options(help = "print help message")] - help: bool, - - #[options(help = "BHFST archive to be used", required)] - archive: PathBuf, - - #[options( - short = "n", - long = "name", - help = "Predictor name to use (default: gpt2_predictor)" - )] - predictor_name: Option, - - #[options(help = "whether suggestions should not be validated against a speller")] - disable_spelling_validation: bool, - - #[options(no_short, long = "json", help = "output in JSON format")] - use_json: bool, - - #[options(free, help = "text to be tokenized")] - inputs: Vec, -} - fn tokenize(args: TokenizeArgs) -> anyhow::Result<()> { let inputs: String = if args.inputs.is_empty() { eprintln!("Reading from stdin..."); @@ -288,21 +331,42 @@ fn load_archive(path: &Path) -> Result, SpellerArchiveEr } fn suggest(args: SuggestArgs) -> anyhow::Result<()> { + // 1. default config let mut suggest_cfg = SpellerConfig::default(); + let speller = if let Some(archive_path) = args.archive_path { + let archive = load_archive(&archive_path)?; + // 2. config from metadata + if let Some(metadata) = archive.metadata() { + if let Some(continuation) = metadata.acceptor().continuation() { + suggest_cfg.continuation_marker = Some(continuation.to_string()); + } + } + let speller = archive.speller(); + speller + } else if let (Some(lexicon_path), Some(mutator_path)) = (args.lexicon_path, args.mutator_path) + { + let acceptor = HfstTransducer::from_path(&Fs, lexicon_path)?; + let errmodel = HfstTransducer::from_path(&Fs, mutator_path)?; + HfstSpeller::new(errmodel, acceptor) as _ + } else { + eprintln!("Either a BHFST or ZHFST archive must be provided, or a mutator and lexicon."); + process::exit(1); + }; + // 3. config from explicit config file if let Some(config_path) = args.config { let config_file = std::fs::File::open(config_path)?; let config: SpellerConfig = serde_json::from_reader(config_file)?; suggest_cfg = config; } - + // 4. config from other command line stuff if args.disable_reweight { suggest_cfg.reweight = None; } if args.disable_recase { suggest_cfg.recase = false; } - + suggest_cfg.continuation_marker = args.continuation_marker.clone(); if let Some(v) = args.nbest { if v == 0 { suggest_cfg.n_best = None; @@ -322,7 +386,9 @@ fn suggest(args: SuggestArgs) -> anyhow::Result<()> { let mut writer: Box = if args.use_json { Box::new(JsonWriter::new()) } else { - Box::new(StdoutWriter) + Box::new(StdoutWriter { + has_continuation_marker: args.continuation_marker, + }) }; let words = if args.inputs.is_empty() { @@ -340,12 +406,11 @@ fn suggest(args: SuggestArgs) -> anyhow::Result<()> { args.inputs.into_iter().collect() }; - let archive = load_archive(&args.archive)?; - let speller = archive.speller(); run( speller, words, &mut *writer, + args.analyze, true, args.always_suggest, &suggest_cfg, @@ -356,76 +421,6 @@ fn suggest(args: SuggestArgs) -> anyhow::Result<()> { Ok(()) } -#[cfg(feature = "gpt2")] -fn load_predictor_archive( - path: &Path, - name: Option<&str>, -) -> Result, PredictorArchiveError> { - let archive = BoxGpt2PredictorArchive::open(path, name)?; - let archive = Box::new(archive); - Ok(archive) -} - -#[cfg(feature = "gpt2")] -fn predict(args: PredictArgs) -> anyhow::Result<()> { - let raw_input = if args.inputs.is_empty() { - eprintln!("Reading from stdin..."); - let mut buffer = String::new(); - io::stdin() - .read_to_string(&mut buffer) - .expect("reading stdin"); - buffer - } else { - args.inputs.join(" ") - }; - - let predictor_name = args.predictor_name.as_deref(); - let archive = load_predictor_archive(&args.archive, predictor_name)?; - let predictor = archive.predictor(); - - let mut writer: Box = if args.use_json { - Box::new(JsonWriter::new()) - } else { - Box::new(StdoutWriter) - }; - - let suggest_cfg = SpellerConfig::default(); - - let predictions = predictor.predict(&raw_input); - writer.write_predictions(&predictions); - - let has_speller = archive.metadata().map(|x| x.speller).unwrap_or(false); - if !args.disable_spelling_validation { - if !has_speller { - eprintln!("Error: requested spell checking but no speller present in archive!"); - } else { - let speller_archive = load_archive(&args.archive)?; - let speller = speller_archive.speller(); - - for word in predictions { - let cleaned_str = word.as_str().word_indices(); - for w in cleaned_str { - let is_correct = speller.clone().is_correct_with_config(&w.1, &suggest_cfg); - writer.write_correction(w.1, is_correct); - } - } - } - }; - - Ok(()) -} - -#[cfg(not(feature = "gpt2"))] -fn predict(_args: PredictArgs) -> anyhow::Result<()> { - eprintln!("ERROR: DivvunSpell was built without GPT2 support."); - eprintln!("If you built this using cargo, re-run the build with the following:"); - eprintln!(""); - eprintln!(" cargo build --features gpt2"); - eprintln!(""); - - std::process::exit(1); -} - fn main() -> anyhow::Result<()> { pretty_env_logger::init(); @@ -435,6 +430,5 @@ fn main() -> anyhow::Result<()> { None => Ok(()), Some(Command::Suggest(args)) => suggest(args), Some(Command::Tokenize(args)) => tokenize(args), - Some(Command::Predict(args)) => predict(args), } } diff --git a/accuracy/Cargo.toml b/crates/accuracy/Cargo.toml similarity index 80% rename from accuracy/Cargo.toml rename to crates/accuracy/Cargo.toml index e6d3de0..a7643e4 100644 --- a/accuracy/Cargo.toml +++ b/crates/accuracy/Cargo.toml @@ -9,13 +9,13 @@ publish = false [dependencies] serde = { version = "1.0.116", features = ["derive"] } serde_json = "1.0.57" -divvunspell = { version = "1.0.0-beta.5", features = ["internal_convert", "compression"], path = "../divvunspell" } +divvunspell = { features = ["internal_convert", "compression"], path = "../.." } csv = { version = "1.1" } rayon = { version = "1.4.0" } indicatif = { version = "0.15", features = ["with_rayon"] } # box-format = { git = "https://github.com/bbqsrc/box", branch = "master" } # tempdir = "0.3.7" -pretty_env_logger = "0.4.0" +pretty_env_logger = "0.5.0" # ctor = "*" # gumdrop = "0.8.0" # thiserror = "1.0.20" diff --git a/divvunspell-bin/LICENSE b/crates/accuracy/LICENSE similarity index 100% rename from divvunspell-bin/LICENSE rename to crates/accuracy/LICENSE diff --git a/accuracy/src/main.rs b/crates/accuracy/src/main.rs similarity index 99% rename from accuracy/src/main.rs rename to crates/accuracy/src/main.rs index 03b2b4d..9de7cb4 100644 --- a/accuracy/src/main.rs +++ b/crates/accuracy/src/main.rs @@ -24,6 +24,7 @@ $ cargo run -- --threshold 0.9 typos.txt se.zhfst */ use chrono::prelude::*; +use divvunspell::types::Weight; use std::error::Error; use std::{ io::Write, @@ -42,10 +43,11 @@ use structopt::clap::{App, AppSettings, Arg}; static CFG: SpellerConfig = SpellerConfig { n_best: Some(10), - max_weight: Some(10000.0), + max_weight: Some(Weight(10000.0)), beam: None, reweight: Some(ReweightingConfig::default_const()), node_pool_size: 128, + continuation_marker: None, recase: true, }; diff --git a/regtest/Cargo.toml b/crates/regtest/Cargo.toml similarity index 55% rename from regtest/Cargo.toml rename to crates/regtest/Cargo.toml index c64339d..41d8a68 100644 --- a/regtest/Cargo.toml +++ b/crates/regtest/Cargo.toml @@ -6,4 +6,4 @@ edition = "2021" [dependencies] clap = { version = "4.5.32", features = ["derive"] } csv = "1.3.1" -divvunspell = { version = "1.0.0-beta.5", features = ["internal_convert", "compression"], path = "../divvunspell" } +divvunspell = { features = ["internal_convert", "compression"], path = "../.." } diff --git a/regtest/src/main.rs b/crates/regtest/src/main.rs similarity index 100% rename from regtest/src/main.rs rename to crates/regtest/src/main.rs diff --git a/thfst-tools/Cargo.toml b/crates/thfst-tools/Cargo.toml similarity index 76% rename from thfst-tools/Cargo.toml rename to crates/thfst-tools/Cargo.toml index ee29450..b5c1475 100644 --- a/thfst-tools/Cargo.toml +++ b/crates/thfst-tools/Cargo.toml @@ -9,7 +9,7 @@ repository = "https://github.com/divvun/divvunspell" [dependencies] serde_json = "1.0.57" -divvunspell = { version = "1.0.0-beta.5", features = ["internal_convert", "compression"], path = "../divvunspell" } +divvunspell = { features = ["internal_convert", "compression"], path = "../.." } box-format = "0.3.2" structopt = "0.3.17" tempfile = "3" diff --git a/thfst-tools/LICENSE b/crates/thfst-tools/LICENSE similarity index 100% rename from thfst-tools/LICENSE rename to crates/thfst-tools/LICENSE diff --git a/thfst-tools/src/main.rs b/crates/thfst-tools/src/main.rs similarity index 96% rename from thfst-tools/src/main.rs rename to crates/thfst-tools/src/main.rs index f5a8e96..683d38d 100644 --- a/thfst-tools/src/main.rs +++ b/crates/thfst-tools/src/main.rs @@ -142,8 +142,10 @@ fn convert_zhfst_to_bhfst(zhfst_path: &Path) -> Result<(), std::io::Error> { Some(metadata) => { println!("Converting \"index.xml\" to \"meta.json\"..."); let mut m = metadata.to_owned(); - m.acceptor.id = metadata.acceptor.id.replace(".hfst", ".thfst"); - m.errmodel.id = metadata.errmodel.id.replace(".hfst", ".thfst"); + m.acceptor_mut() + .set_id(metadata.acceptor().id().replace(".hfst", ".thfst")); + m.errmodel_mut() + .set_id(metadata.errmodel().id().replace(".hfst", ".thfst")); Some(serde_json::to_string_pretty(&m)?) } None => None, diff --git a/divvunspell/Cargo.toml b/divvunspell/Cargo.toml deleted file mode 100644 index a50d6c4..0000000 --- a/divvunspell/Cargo.toml +++ /dev/null @@ -1,58 +0,0 @@ -[package] -name = "divvunspell" -description = "Spell checking library for ZHFST/BHFST spellers, with case handling and tokenization support." -version = "1.0.0-beta.5" -authors = ["Brendan Molloy "] -edition = "2021" -license = "MIT OR Apache-2.0" -repository = "https://github.com/divvun/divvunspell" - -[lib] -name = "divvunspell" -crate-type = ["rlib", "staticlib", "cdylib"] - -[dependencies] -libc = "0.2" -memmap2 = "0.5.0" -byteorder = "1.3.4" -serde = { version = "1.0.116", features = ["derive"] } -serde_json = "1.0.57" -serde-xml-rs = { version = "0.5.0", default-features = false } -zip = { version = "0.5", default-features = false } -unic-segment = "0.9.0" -unic-char-range = "0.9.0" -unic-char-property = "0.9.0" -unic-ucd-category = "0.9.0" -unic-emoji-char = "0.9.0" -parking_lot = "0.11.2" -hashbrown = { version = "0.11", features = ["serde"] } -lifeguard = "0.6.1" -smol_str = { version = "0.1.16", features = ["serde"] } -box-format = { version = "0.3.2", features = ["reader"], default-features = false } -itertools = "0.10" -strsim = "0.10.0" -log = "0.4.11" -cffi = "0.1.6" -unic-ucd-common = "0.9.0" -flatbuffers = { version = "0.6.1", optional = true } -env_logger = { version = "0.9", optional = true } -thiserror = "1.0.20" -tch = { version = "0.6.1", optional = true } -rust-bert = { version = "0.17.0", optional = true } -rust_tokenizers = { version = "7", optional = true } -tempfile = "3.3.0" -fs_extra = "1.2.0" -eieio = "1.0.0" -pathos = "0.3.0" -language-tags = "0.3.2" -globwalk = "0.8.1" - -[features] -compression = ["zip/deflate"] -logging = ["env_logger"] -gpt2 = ["tch", "rust-bert", "rust_tokenizers"] -cargo-clippy = [] - -# Internal features: unstable, not for external use! -internal_convert = [] -internal_ffi = ["flatbuffers", "logging"] diff --git a/divvunspell/LICENSE-APACHE b/divvunspell/LICENSE-APACHE deleted file mode 100644 index 8f71f43..0000000 --- a/divvunspell/LICENSE-APACHE +++ /dev/null @@ -1,202 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "{}" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright {yyyy} {name of copyright owner} - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - diff --git a/divvunspell/LICENSE-MIT b/divvunspell/LICENSE-MIT deleted file mode 100644 index c5469f7..0000000 --- a/divvunspell/LICENSE-MIT +++ /dev/null @@ -1,21 +0,0 @@ -Copyright (c) 2017-2021 Brendan Molloy -Copyright (c) 2018-2021 UiT The Arctic University of Norway -Copyright (c) 2018-2021 Sámediggi - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/divvunspell/src/archive/error.rs b/divvunspell/src/archive/error.rs deleted file mode 100644 index d037ebb..0000000 --- a/divvunspell/src/archive/error.rs +++ /dev/null @@ -1,59 +0,0 @@ -//! Archive-related errors. -use std::{ffi::OsString, io::Error}; - -#[cfg(feature = "gpt2")] -use rust_bert::RustBertError; - -use crate::transducer::TransducerError; - -#[derive(Debug, thiserror::Error)] -pub enum SpellerArchiveError { - #[error("File error")] - File(#[source] Error), - - #[error("IO error")] - Io(String, #[source] eieio::Error), - - #[error("Transducer error")] - Transducer(#[source] TransducerError), - - #[error("Missing metadata")] - NoMetadata, - - #[error("Unsupported compression")] - UnsupportedCompressed, - - #[error("Unknown error code {0}")] - Unknown(u8), - - #[error("Unsupported file extension: {0:?}")] - UnsupportedExt(OsString), -} - -#[derive(Debug, thiserror::Error)] -pub enum PredictorArchiveError { - #[error("File error")] - File(#[source] Error), - - #[error("IO error")] - Io(String, #[source] Error), - - #[cfg(feature = "gpt2")] - #[error("Error loading bert model")] - Bert(#[from] RustBertError), - - #[error("Error deserialising JSON")] - Json(#[from] serde_json::Error), - - #[error("Missing metadata")] - NoMetadata, - - #[error("Unsupported compression")] - UnsupportedCompressed, - - #[error("Unknown error code {0}")] - Unknown(u8), - - #[error("Unsupported file extension: {0:?}")] - UnsupportedExt(OsString), -} diff --git a/divvunspell/src/archive/meta.rs b/divvunspell/src/archive/meta.rs deleted file mode 100644 index e6588d9..0000000 --- a/divvunspell/src/archive/meta.rs +++ /dev/null @@ -1,120 +0,0 @@ -//! Archive metadata handling -use serde::{Deserialize, Serialize}; -use serde_xml_rs::{from_reader, Error, ParserConfig}; - -#[derive(Serialize, Deserialize, Debug, Clone)] -pub struct SpellerMetadata { - pub info: SpellerMetadataInfo, - pub acceptor: SpellerMetadataAcceptor, - pub errmodel: SpellerMetadataErrmodel, -} - -#[derive(Serialize, Deserialize, Debug, Default, Clone)] -pub struct PredictorMetadata { - #[serde(default)] - pub speller: bool, -} - -#[derive(Serialize, Deserialize, Debug, Clone)] -pub struct SpellerTitle { - pub lang: Option, - #[serde(rename = "$value")] - pub value: String, -} - -#[derive(Serialize, Deserialize, Debug, Clone)] -pub struct SpellerMetadataInfo { - pub locale: String, - pub title: Vec, - pub description: String, - pub producer: String, -} - -#[derive(Serialize, Deserialize, Debug, Clone)] -pub struct SpellerMetadataAcceptor { - #[serde(rename = "type", default)] - pub type_: String, - pub id: String, - pub title: Vec, - pub description: String, -} - -#[derive(Serialize, Deserialize, Debug, Clone)] -pub struct SpellerMetadataErrmodel { - pub id: String, - pub title: Vec, - pub description: String, -} - -impl std::str::FromStr for SpellerMetadata { - type Err = Error; - - fn from_str(string: &str) -> Result { - SpellerMetadata::from_bytes(string.as_bytes()) - } -} - -impl SpellerMetadata { - pub fn from_bytes(bytes: &[u8]) -> Result { - let mut reader = ParserConfig::new() - .trim_whitespace(true) - .ignore_comments(true) - .coalesce_characters(true) - .create_reader(bytes) - .into_inner(); - - from_reader(&mut reader) - } -} - -impl PredictorMetadata { - pub fn from_bytes(bytes: &[u8]) -> Result { - let mut reader = ParserConfig::new() - .trim_whitespace(true) - .ignore_comments(true) - .coalesce_characters(true) - .create_reader(bytes) - .into_inner(); - - from_reader(&mut reader) - } -} - -#[test] -fn test_xml_parse() { - use std::str::FromStr; - - let xml_data = r##" - - - - se - Giellatekno/Divvun/UiT fst-based speller for Northern Sami - This is an fst-based speller for Northern Sami. It is based - on the normative subset of the morphological analyser for Northern Sami. - The source code can be found at: - https://victorio.uit.no/langtech/trunk/langs/sme/ - License: GPL3+. - GT_VERSION - DATE - Giellatekno/Divvun/UiT contributors - - - - Giellatekno/Divvun/UiT dictionary Northern Sami - Giellatekno/Divvun/UiT dictionary for - Northern Sami compiled for HFST. - - - Levenshtein edit distance transducer - Correction model for keyboard misstrokes, at most 2 per - word. - - errormodel.default.hfst - - - "##; - - let s = SpellerMetadata::from_str(&xml_data).unwrap(); - println!("{:#?}", s); -} diff --git a/divvunspell/src/predictor/gpt2.rs b/divvunspell/src/predictor/gpt2.rs deleted file mode 100644 index c76a731..0000000 --- a/divvunspell/src/predictor/gpt2.rs +++ /dev/null @@ -1,58 +0,0 @@ -use std::path::Path; -use std::sync::Arc; - -use parking_lot::Mutex; -use rust_bert::pipelines::common::ModelType; -use rust_bert::pipelines::text_generation::{TextGenerationConfig, TextGenerationModel}; -use rust_bert::resources::{LocalResource, Resource}; -use rust_bert::RustBertError; - -use super::Predictor; - -pub struct Gpt2Predictor { - model: Mutex, -} - -impl Gpt2Predictor { - pub fn new(model_path: &Path) -> Result { - let config_resource = Resource::Local(LocalResource { - local_path: model_path.join("config.json"), - }); - let vocab_resource = Resource::Local(LocalResource { - local_path: model_path.join("vocab.json"), - }); - let merges_resource = Resource::Local(LocalResource { - local_path: model_path.join("merges.txt"), - }); - let weights_resource = Resource::Local(LocalResource { - local_path: model_path.join("rust_model.ot"), - }); - - let generate_config = TextGenerationConfig { - model_resource: weights_resource, - vocab_resource: vocab_resource, - merges_resource: merges_resource, - config_resource: config_resource, - model_type: ModelType::GPT2, - max_length: 24, - do_sample: true, - num_beams: 1, - temperature: 1.1, - num_return_sequences: 1, - ..Default::default() - }; - let model = Mutex::new(TextGenerationModel::new(generate_config)?); - Ok(Self { model }) - } - - fn generate(&self, raw_input: &str) -> Vec { - let guard = self.model.lock(); - guard.generate(&[raw_input], None) - } -} - -impl Predictor for Gpt2Predictor { - fn predict(self: Arc, raw_input: &str) -> Vec { - self.generate(raw_input) - } -} diff --git a/divvunspell/src/predictor/mod.rs b/divvunspell/src/predictor/mod.rs deleted file mode 100644 index 672ccfa..0000000 --- a/divvunspell/src/predictor/mod.rs +++ /dev/null @@ -1,9 +0,0 @@ -//! Autocorrect type spell-checking that predicts next word. -#[cfg(feature = "gpt2")] -pub mod gpt2; - -use std::sync::Arc; - -pub trait Predictor { - fn predict(self: Arc, raw_input: &str) -> Vec; -} diff --git a/divvunspell/src/types.rs b/divvunspell/src/types.rs deleted file mode 100644 index 437aeb0..0000000 --- a/divvunspell/src/types.rs +++ /dev/null @@ -1,54 +0,0 @@ -use serde::{Deserialize, Serialize}; - -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub enum FlagDiacriticOperator { - PositiveSet, - NegativeSet, - Require, - Disallow, - Clear, - Unification, -} - -impl std::str::FromStr for FlagDiacriticOperator { - type Err = (); - - fn from_str(s: &str) -> Result { - match s { - "P" => Ok(FlagDiacriticOperator::PositiveSet), - "N" => Ok(FlagDiacriticOperator::NegativeSet), - "R" => Ok(FlagDiacriticOperator::Require), - "D" => Ok(FlagDiacriticOperator::Disallow), - "C" => Ok(FlagDiacriticOperator::Clear), - "U" => Ok(FlagDiacriticOperator::Unification), - _ => Err(()), - } - } -} - -#[derive(Debug)] -pub enum HeaderFlag { - Weighted, - Deterministic, - InputDeterministic, - Minimized, - Cyclic, - HasEpsilonEpsilonTransitions, - HasInputEpsilonTransitions, - HasInputEpsilonCycles, - HasUnweightedInputEpsilonCycles, -} - -#[derive(Debug, Serialize, Deserialize)] -pub struct FlagDiacriticOperation { - pub operation: FlagDiacriticOperator, - pub feature: SymbolNumber, - pub value: ValueNumber, -} - -pub type SymbolNumber = u16; -pub type ValueNumber = i16; -pub type TransitionTableIndex = u32; -pub type Weight = f32; -pub type FlagDiacriticState = Vec; -pub type OperationsMap = hashbrown::HashMap; diff --git a/docs/src/divvunspell/archive/meta.rs.html b/docs/src/divvunspell/archive/meta.rs.html index e7ce629..b635b89 100644 --- a/docs/src/divvunspell/archive/meta.rs.html +++ b/docs/src/divvunspell/archive/meta.rs.html @@ -211,7 +211,7 @@ <locale>se</locale> <title>Giellatekno/Divvun/UiT fst-based speller for Northern Sami</title> <description>This is an fst-based speller for Northern Sami. It is based - on the normative subset of the morphological analyser for Northern Sami. + on the normative subset of the morphological analyzer for Northern Sami. The source code can be found at: https://victorio.uit.no/langtech/trunk/langs/sme/ License: GPL3+.</description> diff --git a/divvunspell/examples/find-path.rs b/examples/find-path.rs similarity index 100% rename from divvunspell/examples/find-path.rs rename to examples/find-path.rs diff --git a/divvunspell/src/archive/boxf.rs b/src/archive/boxf.rs similarity index 60% rename from divvunspell/src/archive/boxf.rs rename to src/archive/boxf.rs index f6b6c05..6520a76 100644 --- a/divvunspell/src/archive/boxf.rs +++ b/src/archive/boxf.rs @@ -3,12 +3,6 @@ use std::sync::Arc; use box_format::BoxFileReader; -#[cfg(feature = "gpt2")] -use tempfile::TempDir; - -#[cfg(feature = "gpt2")] -use super::{error::PredictorArchiveError, meta::PredictorMetadata, PredictorArchive}; - use super::error::SpellerArchiveError; use super::{meta::SpellerMetadata, SpellerArchive}; use crate::speller::{HfstSpeller, Speller}; @@ -101,63 +95,3 @@ where self.metadata.as_ref() } } - -#[cfg(feature = "gpt2")] -pub struct BoxGpt2PredictorArchive { - #[allow(unused)] - model_path: std::path::PathBuf, - model: Arc, - _temp_dir: TempDir, // necessary to keep the temp dir alive until dropped - metadata: Option, -} - -#[cfg(feature = "gpt2")] -impl PredictorArchive for BoxGpt2PredictorArchive { - fn open( - path: &std::path::Path, - predictor_name: Option<&str>, - ) -> Result - where - Self: Sized, - { - let archive = BoxFileReader::open(path).map_err(|e| { - PredictorArchiveError::File(std::io::Error::new(std::io::ErrorKind::Other, e)) - })?; - let fs = BoxFilesystem::new(&archive); - - let predictor_name = predictor_name.unwrap_or("gpt2_predictor"); - let predictor_path = std::path::Path::new(predictor_name); - - // TODO: make this name customizable via metadata? - let file = fs - .open_file(predictor_path.join("meta.json")) - .map_err(|e| PredictorArchiveError::Io("Could not load meta.json".into(), e))?; - - let metadata = serde_json::from_reader(file)?; - - let temp_dir = fs.copy_to_temp_dir(&predictor_path).map_err(|e| { - PredictorArchiveError::Io( - format!("Could not copy '{}' to temp directory", predictor_name), - e, - ) - })?; - let model_path = temp_dir.path().join(&predictor_path); - - let model = Arc::new(crate::predictor::gpt2::Gpt2Predictor::new(&model_path)?); - - Ok(BoxGpt2PredictorArchive { - model_path, - model, - _temp_dir: temp_dir, - metadata, - }) - } - - fn predictor(&self) -> Arc { - self.model.clone() - } - - fn metadata(&self) -> Option<&PredictorMetadata> { - self.metadata.as_ref() - } -} diff --git a/src/archive/error.rs b/src/archive/error.rs new file mode 100644 index 0000000..6f3b35c --- /dev/null +++ b/src/archive/error.rs @@ -0,0 +1,37 @@ +//! Archive-related errors. +use std::{ffi::OsString, io::Error}; + +use crate::transducer::TransducerError; + +/// Errors that can occur when opening or using a speller archive. +#[derive(Debug, thiserror::Error)] +#[non_exhaustive] +pub enum SpellerArchiveError { + /// Error opening or reading the archive file + #[error("File error")] + File(#[source] Error), + + /// I/O error while reading archive contents + #[error("IO error")] + Io(String, #[source] eieio::Error), + + /// Error loading or parsing a transducer from the archive + #[error("Transducer error")] + Transducer(#[source] TransducerError), + + /// Archive is missing required metadata + #[error("Missing metadata")] + NoMetadata, + + /// Archive uses unsupported compression + #[error("Unsupported compression")] + UnsupportedCompressed, + + /// Unknown error code encountered + #[error("Unknown error code {0}")] + Unknown(u8), + + /// File has an unsupported extension (expected .zhfst or .bhfst) + #[error("Unsupported file extension: {0:?}")] + UnsupportedExt(OsString), +} diff --git a/src/archive/meta.rs b/src/archive/meta.rs new file mode 100644 index 0000000..0937962 --- /dev/null +++ b/src/archive/meta.rs @@ -0,0 +1,252 @@ +//! Data structures of speller metadata. +//! +//! These are usually read from the speller archives, in xml or json files or +//! such. XML format is described here and json format there. +use serde::{Deserialize, Serialize}; +use serde_xml_rs::{from_reader, Error, ParserConfig}; + +/// Speller metadata +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct SpellerMetadata { + /// speller info + info: SpellerMetadataInfo, + /// acceptor metadata + acceptor: SpellerMetadataAcceptor, + /// error model metadata + errmodel: SpellerMetadataErrmodel, +} + +impl SpellerMetadata { + /// Get the speller information + pub fn info(&self) -> &SpellerMetadataInfo { + &self.info + } + + /// Get the acceptor metadata + pub fn acceptor(&self) -> &SpellerMetadataAcceptor { + &self.acceptor + } + + /// Get the error model metadata + pub fn errmodel(&self) -> &SpellerMetadataErrmodel { + &self.errmodel + } + + /// Get mutable reference to acceptor metadata + /// + /// # Warning + /// This method is only for internal tooling use and should not be used in normal applications. + /// It may be removed in a future version. + #[doc(hidden)] + pub fn acceptor_mut(&mut self) -> &mut SpellerMetadataAcceptor { + &mut self.acceptor + } + + /// Get mutable reference to error model metadata + /// + /// # Warning + /// This method is only for internal tooling use and should not be used in normal applications. + /// It may be removed in a future version. + #[doc(hidden)] + pub fn errmodel_mut(&mut self) -> &mut SpellerMetadataErrmodel { + &mut self.errmodel + } +} + +/// localised speller title +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct SpellerTitle { + /// ISO 639 code of the title's content language + pub lang: Option, + /// translated title + #[serde(rename = "$value")] + pub value: String, +} + +/// Speller metadata +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct SpellerMetadataInfo { + /// ISO-639 code of speller language + locale: String, + /// localised, human readable titles of speller + title: Vec, + /// human readable description of speller + description: String, + /// creator and copyright owner of the speller + producer: String, +} + +impl SpellerMetadataInfo { + /// Get the ISO-639 locale code + pub fn locale(&self) -> &str { + &self.locale + } + + /// Get the localized titles + pub fn title(&self) -> &[SpellerTitle] { + &self.title + } + + /// Get the description + pub fn description(&self) -> &str { + &self.description + } + + /// Get the producer/creator + pub fn producer(&self) -> &str { + &self.producer + } +} + +/// Acceptor metadata +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct SpellerMetadataAcceptor { + /// acceptor type: + /// - `blah` if normal dictionary automaton + /// - `foo` if analyzer + #[serde(rename = "type", default)] + type_: String, + /// locally unique id for this acceptor + id: String, + /// localised human readable titles of speller + title: Vec, + /// human readable description of the acceptor + description: String, + /// marker for incomplete strings + continuation: Option, +} + +impl SpellerMetadataAcceptor { + /// Get the acceptor type + pub fn type_(&self) -> &str { + &self.type_ + } + + /// Get the acceptor ID + pub fn id(&self) -> &str { + &self.id + } + + /// Get the localized titles + pub fn title(&self) -> &[SpellerTitle] { + &self.title + } + + /// Get the description + pub fn description(&self) -> &str { + &self.description + } + + /// Get the continuation marker for incomplete strings + pub fn continuation(&self) -> Option<&str> { + self.continuation.as_deref() + } + + /// Set the acceptor ID + /// + /// # Warning + /// This method is only for internal tooling use and should not be used in normal applications. + /// It may be removed in a future version. + #[doc(hidden)] + pub fn set_id(&mut self, id: String) { + self.id = id; + } +} + +/// Error model metadata +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct SpellerMetadataErrmodel { + /// locally unique id for the error model + id: String, + /// localised human readable titles for the error model + title: Vec, + /// human readable description of the error model + description: String, +} + +impl SpellerMetadataErrmodel { + /// Get the error model ID + pub fn id(&self) -> &str { + &self.id + } + + /// Get the localized titles + pub fn title(&self) -> &[SpellerTitle] { + &self.title + } + + /// Get the description + pub fn description(&self) -> &str { + &self.description + } + + /// Set the error model ID + /// + /// # Warning + /// This method is only for internal tooling use and should not be used in normal applications. + /// It may be removed in a future version. + #[doc(hidden)] + pub fn set_id(&mut self, id: String) { + self.id = id; + } +} + +impl std::str::FromStr for SpellerMetadata { + type Err = Error; + + fn from_str(string: &str) -> Result { + SpellerMetadata::from_bytes(string.as_bytes()) + } +} + +impl SpellerMetadata { + pub fn from_bytes(bytes: &[u8]) -> Result { + let mut reader = ParserConfig::new() + .trim_whitespace(true) + .ignore_comments(true) + .coalesce_characters(true) + .create_reader(bytes) + .into_inner(); + + from_reader(&mut reader) + } +} + +#[test] +fn test_xml_parse() { + use std::str::FromStr; + + let xml_data = r##" + + + + se + Giellatekno/Divvun/UiT fst-based speller for Northern Sami + This is an fst-based speller for Northern Sami. It is based + on the normative subset of the morphological analyzer for Northern Sami. + The source code can be found at: + https://victorio.uit.no/langtech/trunk/langs/sme/ + License: GPL3+. + GT_VERSION + DATE + Giellatekno/Divvun/UiT contributors + + + + Giellatekno/Divvun/UiT dictionary Northern Sami + Giellatekno/Divvun/UiT dictionary for + Northern Sami compiled for HFST. + + + Levenshtein edit distance transducer + Correction model for keyboard misstrokes, at most 2 per + word. + + errormodel.default.hfst + + + "##; + + let s = SpellerMetadata::from_str(&xml_data).unwrap(); + println!("{:#?}", s); +} diff --git a/divvunspell/src/archive/mod.rs b/src/archive/mod.rs similarity index 76% rename from divvunspell/src/archive/mod.rs rename to src/archive/mod.rs index 99f1f5f..fc9746e 100644 --- a/divvunspell/src/archive/mod.rs +++ b/src/archive/mod.rs @@ -7,16 +7,12 @@ pub mod error; pub mod meta; pub mod zip; -use error::PredictorArchiveError; - pub use self::{boxf::BoxSpellerArchive, zip::ZipSpellerArchive}; use self::{ - boxf::ThfstChunkedBoxSpellerArchive, - error::SpellerArchiveError, - meta::{PredictorMetadata, SpellerMetadata}, + boxf::ThfstChunkedBoxSpellerArchive, error::SpellerArchiveError, meta::SpellerMetadata, }; -use crate::{predictor::Predictor, speller::Speller}; +use crate::speller::Speller; pub(crate) struct TempMmap { mmap: Arc, @@ -46,23 +42,14 @@ pub trait SpellerArchive { where Self: Sized; - /// retrieve spell-checker. + /// Retrieve spell-checker. + /// + /// The returned speller can perform both spell checking and morphological analysis + /// depending on the `OutputMode` passed to `suggest()`. fn speller(&self) -> Arc; - /// retrieve metadata. - fn metadata(&self) -> Option<&SpellerMetadata>; -} -/// Predictor archive is a file read intoo a predictor with metadata. -pub trait PredictorArchive { - /// Read and parse a predictor archive. - fn open(path: &Path, predictor_name: Option<&str>) -> Result - where - Self: Sized; - - /// Retrieve predictor. - fn predictor(&self) -> Arc; - /// retrieve metadata. - fn metadata(&self) -> Option<&PredictorMetadata>; + /// Retrieve metadata. + fn metadata(&self) -> Option<&SpellerMetadata>; } /// Reads a speller archive. @@ -89,7 +76,7 @@ pub(crate) mod ffi { use cffi::{FromForeign, ToForeign}; use std::error::Error; - #[cffi::marshal(return_marshaler = "cffi::ArcMarshaler::")] + #[cffi::marshal(return_marshaler = cffi::ArcMarshaler::)] pub extern "C" fn divvun_speller_archive_open( #[marshal(cffi::PathBufMarshaler)] path: std::path::PathBuf, ) -> Result, Box> { @@ -112,7 +99,7 @@ pub(crate) mod ffi { >, ) -> Result> { match handle.metadata() { - Some(v) => Ok(v.info.locale.to_string()), + Some(v) => Ok(v.info().locale().to_string()), None => Err(Box::new(SpellerArchiveError::NoMetadata) as _), } } diff --git a/divvunspell/src/archive/zip.rs b/src/archive/zip.rs similarity index 97% rename from divvunspell/src/archive/zip.rs rename to src/archive/zip.rs index 5c763cd..710a729 100644 --- a/divvunspell/src/archive/zip.rs +++ b/src/archive/zip.rs @@ -82,8 +82,8 @@ impl SpellerArchive for ZipSpellerArchive { .map_err(|e| SpellerArchiveError::Io("index.xml".into(), e.into()))?; let metadata = SpellerMetadata::from_bytes(&*metadata_mmap.map()).expect("meta"); - let acceptor_id = &metadata.acceptor.id; - let errmodel_id = &metadata.errmodel.id; + let acceptor_id = metadata.acceptor().id(); + let errmodel_id = metadata.errmodel().id(); let acceptor_mmap = mmap_by_name(&mut file, &mut archive, &acceptor_id) .map_err(|e| SpellerArchiveError::Io(acceptor_id.into(), e.into()))?; diff --git a/divvunspell/src/constants.rs b/src/constants.rs similarity index 72% rename from divvunspell/src/constants.rs rename to src/constants.rs index 1d75d2a..365c23f 100644 --- a/divvunspell/src/constants.rs +++ b/src/constants.rs @@ -1,6 +1,8 @@ -pub const INDEX_TABLE_SIZE: usize = 6; -pub const TRANS_TABLE_SIZE: usize = 12; -pub const TARGET_TABLE: u32 = 2_147_483_648; +use crate::types::TransitionTableIndex; + +pub(crate) const INDEX_TABLE_SIZE: usize = 6; +pub(crate) const TRANS_TABLE_SIZE: usize = 12; +pub(crate) const TARGET_TABLE: TransitionTableIndex = TransitionTableIndex(2_147_483_648); #[cfg(test)] mod tests { diff --git a/divvunspell/src/ffi/fbs/mod.rs b/src/ffi/fbs/mod.rs similarity index 100% rename from divvunspell/src/ffi/fbs/mod.rs rename to src/ffi/fbs/mod.rs diff --git a/divvunspell/src/ffi/fbs/tokenizer.rs b/src/ffi/fbs/tokenizer.rs similarity index 100% rename from divvunspell/src/ffi/fbs/tokenizer.rs rename to src/ffi/fbs/tokenizer.rs diff --git a/divvunspell/src/ffi/mod.rs b/src/ffi/mod.rs similarity index 100% rename from divvunspell/src/ffi/mod.rs rename to src/ffi/mod.rs diff --git a/divvunspell/src/lib.rs b/src/lib.rs similarity index 65% rename from divvunspell/src/lib.rs rename to src/lib.rs index 7fddde2..5626f11 100644 --- a/divvunspell/src/lib.rs +++ b/src/lib.rs @@ -33,11 +33,20 @@ pub mod archive; pub mod ffi; pub mod paths; -pub mod predictor; pub mod speller; pub mod tokenizer; pub mod transducer; + +/// Virtual filesystem abstraction (internal use only) +/// +/// **Warning:** This module is only for internal tooling use and should not be used in normal applications. +/// It may be removed or significantly changed in a future version without a major version bump. +/// Use the higher-level [`archive`] module APIs instead. +#[doc(hidden)] pub mod vfs; pub(crate) mod constants; -pub(crate) mod types; +/// Core types for transducers and spell-checking. +/// +/// This module contains type aliases and enums used throughout the transducer API. +pub mod types; diff --git a/divvunspell/src/paths.rs b/src/paths.rs similarity index 87% rename from divvunspell/src/paths.rs rename to src/paths.rs index ed84a3b..662cdde 100644 --- a/divvunspell/src/paths.rs +++ b/src/paths.rs @@ -3,11 +3,15 @@ use std::path::PathBuf; #[cfg(target_os = "windows")] use std::path::PathBuf; +#[cfg(target_os = "linux")] +use std::path::PathBuf; #[cfg(target_os = "macos")] use language_tags::LanguageTag; #[cfg(target_os = "windows")] use language_tags::LanguageTag; +#[cfg(target_os = "linux")] +use language_tags::LanguageTag; #[cfg(target_os = "macos")] pub fn find_speller_path(tag: LanguageTag) -> Option { @@ -48,3 +52,8 @@ pub fn find_speller_path(tag: LanguageTag) -> Option { .next() .map(|x| x.path().to_path_buf()) } + +#[cfg(target_os = "linux")] +pub fn find_speller_path(tag: LanguageTag) -> Option { + None +} diff --git a/divvunspell/src/speller/mod.rs b/src/speller/mod.rs similarity index 67% rename from divvunspell/src/speller/mod.rs rename to src/speller/mod.rs index 55d7a20..3d4384a 100644 --- a/divvunspell/src/speller/mod.rs +++ b/src/speller/mod.rs @@ -18,6 +18,26 @@ use crate::types::{SymbolNumber, Weight}; pub mod suggestion; mod worker; +/// Controls whether morphological tags are preserved in FST output. +/// +/// When traversing an FST, epsilon transitions can either preserve their symbols +/// (keeping morphological tags like "+V", "+Noun", etc.) or convert them to true +/// epsilons (stripping the tags from the output). +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub(crate) enum OutputMode { + /// Strip morphological tags from output. + /// + /// Used for spelling correction where you want clean word forms without tags. + /// Example: "run" instead of "run+V+PresPartc" + WithoutTags, + + /// Keep morphological tags in output. + /// + /// Used for morphological analysis where you want to see the linguistic structure. + /// Example: "run+V+PresPartc" instead of "run" + WithTags, +} + /// configurable extra penalties for edit distance #[derive(Clone, Debug, Serialize, Deserialize)] #[serde(rename_all = "kebab-case")] @@ -77,6 +97,8 @@ pub struct SpellerConfig { /// some parallel stuff? #[serde(default = "default_node_pool_size")] pub node_pool_size: usize, + /// used when suggesting unfinished word parts + pub continuation_marker: Option, /// whether we try to recase mispelt word before other suggestions #[serde(default = "default_recase")] pub recase: bool, @@ -97,6 +119,7 @@ impl SpellerConfig { beam: default_beam(), reweight: default_reweight(), node_pool_size: default_node_pool_size(), + continuation_marker: None, recase: default_recase(), } } @@ -107,7 +130,7 @@ const fn default_n_best() -> Option { } const fn default_max_weight() -> Option { - Some(10000.0) + Some(Weight(10000.0)) } const fn default_beam() -> Option { @@ -125,18 +148,75 @@ const fn default_node_pool_size() -> usize { const fn default_recase() -> bool { true } - -/// can determine if string is a correct word or suggest corrections. -/// Also with SpellerConfig. +/// FST-based spell checker and morphological analyzer. +/// +/// This trait provides methods for spell checking and morphological analysis +/// using finite-state transducers. The same FST traversal logic is used for both +/// operations - the difference is controlled by the `OutputMode`: +/// +/// - `OutputMode::WithoutTags` strips morphological tags (for spelling correction) +/// - `OutputMode::WithTags` preserves morphological tags (for morphological analysis) pub trait Speller { - /// check if the word is correctly spelled + /// Check if the word is correctly spelled + #[must_use] fn is_correct(self: Arc, word: &str) -> bool; - /// check if word is correctly spelled with config recasing etc. + + /// Check if word is correctly spelled with config (handles recasing, etc.) + #[must_use] fn is_correct_with_config(self: Arc, word: &str, config: &SpellerConfig) -> bool; - /// suggest corrections to word + + /// Generate suggestions or analyses for a word. + #[must_use] fn suggest(self: Arc, word: &str) -> Vec; - /// suggest corrections with recasing and reweighting from config + + /// Generate suggestions with config options (recasing, reweighting, etc.) + #[must_use] fn suggest_with_config(self: Arc, word: &str, config: &SpellerConfig) -> Vec; + + /// Analyze the input word form. + /// + /// Performs lexicon-only traversal (no error model) to get morphological analyses + /// of exactly what was typed. Does not generate spelling corrections. + #[must_use] + fn analyze_input(self: Arc, word: &str) -> Vec; + + /// Analyze input word form with config options. + #[must_use] + fn analyze_input_with_config( + self: Arc, + word: &str, + config: &SpellerConfig, + ) -> Vec; + + /// Analyze the suggested word forms. + /// + /// Generates spelling corrections using the error model, then returns them with + /// morphological tags preserved (equivalent to `suggest(word, OutputMode::WithTags)`). + #[must_use] + fn analyze_output(self: Arc, word: &str) -> Vec; + + /// Analyze suggested word forms with config options. + #[must_use] + fn analyze_output_with_config( + self: Arc, + word: &str, + config: &SpellerConfig, + ) -> Vec; + + /// Create suggestion list and use their analyses for filtering. + /// + /// Gets spelling corrections, analyzes each one, and filters based on + /// morphological analysis results. + #[must_use] + fn analyze_suggest(self: Arc, word: &str) -> Vec; + + /// Create suggestion list and use analyses for filtering with config. + #[must_use] + fn analyze_suggest_with_config( + self: Arc, + word: &str, + config: &SpellerConfig, + ) -> Vec; } impl Speller for HfstSpeller @@ -172,7 +252,12 @@ where config ); for word in std::iter::once(word.into()).chain(words.into_iter()) { - let worker = SpellerWorker::new(self.clone(), self.to_input_vec(&word), config.clone()); + let worker = SpellerWorker::new( + self.clone(), + self.to_input_vec(&word), + config.clone(), + OutputMode::WithoutTags, + ); if worker.is_correct() { return true; @@ -193,19 +278,75 @@ where } fn suggest_with_config(self: Arc, word: &str, config: &SpellerConfig) -> Vec { - use crate::tokenizer::case_handling::*; + self._suggest_with_config(word, config, OutputMode::WithoutTags) + } - if word.len() == 0 { + fn analyze_input_with_config( + self: Arc, + word: &str, + config: &SpellerConfig, + ) -> Vec { + if word.is_empty() { return vec![]; } - if let Some(reweight) = config.reweight.as_ref() { - let case_handler = word_variants(word); + let worker = SpellerWorker::new( + self.clone(), + self.to_input_vec(word), + config.clone(), + OutputMode::WithTags, + ); - self.suggest_case(case_handler, config, reweight) - } else { - self.suggest_single(word, config) - } + log::trace!("Beginning analyze_input with config"); + worker.analyze() + } + + #[inline] + fn analyze_input(self: Arc, word: &str) -> Vec { + self.analyze_input_with_config(word, &SpellerConfig::default()) + } + + fn analyze_output_with_config( + self: Arc, + word: &str, + config: &SpellerConfig, + ) -> Vec { + self._suggest_with_config(word, config, OutputMode::WithTags) + } + + #[inline] + fn analyze_output(self: Arc, word: &str) -> Vec { + self.analyze_output_with_config(word, &SpellerConfig::default()) + } + + fn analyze_suggest_with_config( + self: Arc, + word: &str, + config: &SpellerConfig, + ) -> Vec { + let mut suggs = self.clone().suggest_with_config(word, config); + suggs.retain(|sugg| { + log::trace!("suggestion {}", sugg.value); + let analyses = self + .clone() + .analyze_input_with_config(sugg.value.as_str(), config); + let mut all_filtered = true; + for analysis in analyses { + log::trace!("-> {}", analysis.value); + if !analysis.value.contains("+Spell/NoSugg") { + all_filtered = false; + } else { + log::trace!("filtering=?"); + } + } + !all_filtered + }); + suggs + } + + #[inline] + fn analyze_suggest(self: Arc, word: &str) -> Vec { + self.analyze_suggest_with_config(word, &SpellerConfig::default()) } } @@ -241,6 +382,27 @@ where }) } + fn _suggest_with_config( + self: Arc, + word: &str, + config: &SpellerConfig, + mode: OutputMode, + ) -> Vec { + use crate::tokenizer::case_handling::*; + + if word.len() == 0 { + return vec![]; + } + + if let Some(reweight) = config.reweight.as_ref() { + let case_handler = word_variants(word); + + self.suggest_case(case_handler, config, reweight, mode) + } else { + self.suggest_single(word, config, mode) + } + } + /// get the error model automaton pub fn mutator(&self) -> &T { &self.mutator @@ -266,15 +428,22 @@ where key_table .iter() .position(|x| x == &s) - .map(|x| x as u16) - .unwrap_or_else(|| alphabet.unknown().unwrap_or(0u16)) + .map(|x| SymbolNumber(x as u16)) + .unwrap_or_else(|| alphabet.unknown().unwrap_or(SymbolNumber::ZERO)) }) .collect() } - fn suggest_single(self: Arc, word: &str, config: &SpellerConfig) -> Vec { - let worker = SpellerWorker::new(self.clone(), self.to_input_vec(word), config.clone()); + fn suggest_single( + self: Arc, + word: &str, + config: &SpellerConfig, + mode: OutputMode, + ) -> Vec { + let worker = + SpellerWorker::new(self.clone(), self.to_input_vec(word), config.clone(), mode); + log::trace!("suggesting single {}", word); worker.suggest() } @@ -283,19 +452,27 @@ where case: CaseHandler, config: &SpellerConfig, reweight: &ReweightingConfig, + output_mode: OutputMode, ) -> Vec { use crate::tokenizer::case_handling::*; + log::trace!("suggesting cases..."); let CaseHandler { original_input, mutation, mode, words, } = case; - let mut best: HashMap = HashMap::new(); + let mut best: HashMap = HashMap::new(); for word in std::iter::once(&original_input).chain(words.iter()) { - let worker = SpellerWorker::new(self.clone(), self.to_input_vec(&word), config.clone()); + log::trace!("suggesting for word {}", word); + let worker = SpellerWorker::new( + self.clone(), + self.to_input_vec(&word), + config.clone(), + output_mode, + ); let mut suggestions = worker.suggest(); match mutation { @@ -314,7 +491,9 @@ where match mode { CaseMode::MergeAll => { + log::trace!("Case merge all"); for sugg in suggestions.into_iter() { + log::trace!("for {}", sugg.value); let penalty_start = if !sugg.value().starts_with(word.chars().next().unwrap()) { reweight.start_penalty - reweight.mid_penalty @@ -332,11 +511,12 @@ where strsim::damerau_levenshtein(&words[0].as_str(), &word.as_str()) + strsim::damerau_levenshtein(&word.as_str(), sugg.value()); let penalty_middle = reweight.mid_penalty * distance as f32; - let additional_weight = if sugg.value.chars().all(|c| is_emoji(c)) { - 0.0 - } else { - penalty_start + penalty_end + penalty_middle - }; + let additional_weight = + Weight(if sugg.value.chars().all(|c| is_emoji(c)) { + 0.0 + } else { + penalty_start + penalty_end + penalty_middle + }); log::trace!( "Penalty: +{} = {} + {} * {} + {}", additional_weight, @@ -374,14 +554,26 @@ where if best.is_empty() { return vec![]; } - - let mut out = best - .into_iter() - .map(|(k, v)| Suggestion { - value: k, - weight: v, - }) - .collect::>(); + let mut out: Vec; + if let Some(s) = &config.continuation_marker { + out = best + .into_iter() + .map(|(k, v)| Suggestion { + value: k.clone(), + weight: v, + completed: Some(!k.ends_with(s)), + }) + .collect::>(); + } else { + out = best + .into_iter() + .map(|(k, v)| Suggestion { + value: k, + weight: v, + completed: None, + }) + .collect::>(); + } out.sort(); if let Some(n_best) = config.n_best { out.truncate(n_best); @@ -498,6 +690,7 @@ pub(crate) mod ffi { }, reweight, node_pool_size: config.node_pool_size, + continuation_marker: None, recase: true, }; diff --git a/divvunspell/src/speller/suggestion.rs b/src/speller/suggestion.rs similarity index 58% rename from divvunspell/src/speller/suggestion.rs rename to src/speller/suggestion.rs index a331f68..f9fff9d 100644 --- a/divvunspell/src/speller/suggestion.rs +++ b/src/speller/suggestion.rs @@ -6,23 +6,41 @@ use std::cmp::Ordering; use std::cmp::Ordering::Equal; #[derive(Clone, Debug, Serialize, Deserialize)] +/// Suggestion for a spelling correction pub struct Suggestion { + /// the suggested word-form pub value: SmolStr, + /// total penalty weight of the word-form pub weight: Weight, + /// whether the word is completed or partial + #[serde(skip_serializing_if = "Option::is_none")] + pub completed: Option, } impl Suggestion { - pub fn new(value: SmolStr, weight: Weight) -> Suggestion { - Suggestion { value, weight } + /// creates a spelling correction suggestion + pub fn new(value: SmolStr, weight: Weight, completed: Option) -> Suggestion { + Suggestion { + value, + weight, + completed, + } } + /// gets the suggested word-form pub fn value(&self) -> &str { &self.value } + /// gets the penalty weight of the suggestion pub fn weight(&self) -> Weight { self.weight } + + /// returns whether this suggestion is a full word or partial + pub fn completed(&self) -> Option { + self.completed + } } impl PartialOrd for Suggestion { diff --git a/divvunspell/src/speller/worker.rs b/src/speller/worker.rs similarity index 72% rename from divvunspell/src/speller/worker.rs rename to src/speller/worker.rs index 9072f09..375a26e 100644 --- a/divvunspell/src/speller/worker.rs +++ b/src/speller/worker.rs @@ -1,19 +1,18 @@ use hashbrown::HashMap; use smol_str::SmolStr; -use std::f32; use std::sync::Arc; use lifeguard::{Pool, Recycled}; -use super::{HfstSpeller, SpellerConfig}; +use super::{HfstSpeller, OutputMode, SpellerConfig}; use crate::speller::suggestion::Suggestion; use crate::transducer::tree_node::TreeNode; use crate::transducer::Transducer; -use crate::types::{SymbolNumber, Weight}; +use crate::types::{SymbolNumber, TransitionTableIndex, ValueNumber, Weight}; #[inline(always)] -fn speller_start_node(pool: &Pool, size: usize) -> Vec> { - let start_node = TreeNode::empty(pool, vec![0; size]); +fn speller_start_node(pool: &Pool, size: usize) -> Vec> { + let start_node = TreeNode::empty(pool, vec![ValueNumber::ZERO; size]); let mut nodes = Vec::with_capacity(256); nodes.push(start_node); nodes @@ -23,6 +22,7 @@ pub struct SpellerWorker, U: Transducer speller: Arc>, input: Vec, config: SpellerConfig, + output_mode: OutputMode, } #[allow(clippy::too_many_arguments)] @@ -37,11 +37,13 @@ where speller: Arc>, input: Vec, config: SpellerConfig, + output_mode: OutputMode, ) -> SpellerWorker { SpellerWorker { speller, input, config, + output_mode, } } @@ -56,21 +58,27 @@ where let lexicon = self.speller.lexicon(); let operations = lexicon.alphabet().operations(); - if !lexicon.has_epsilons_or_flags(next_node.lexicon_state + 1) { + if !lexicon.has_epsilons_or_flags(next_node.lexicon_state.incr()) { return; } - let mut next = lexicon.next(next_node.lexicon_state, 0).unwrap(); + let mut next = lexicon + .next(next_node.lexicon_state, SymbolNumber::ZERO) + .unwrap(); while let Some(transition) = lexicon.take_epsilons_and_flags(next) { if let Some(sym) = lexicon.transition_input_symbol(next) { let transition_weight = transition.weight().unwrap(); - if sym == 0 { + if sym == SymbolNumber::ZERO { if self .is_under_weight_limit(max_weight, next_node.weight() + transition_weight) { - let new_node = next_node.update_lexicon(pool, transition); + let new_node = match self.output_mode { + OutputMode::WithoutTags => next_node + .update_lexicon(pool, transition.clone_with_epsilon_symbol()), + OutputMode::WithTags => next_node.update_lexicon(pool, transition), + }; output_nodes.push(new_node); } } else { @@ -78,7 +86,7 @@ where if let Some(op) = operation { if !self.is_under_weight_limit(max_weight, transition_weight) { - next += 1; + next = next.incr(); continue; } @@ -90,7 +98,7 @@ where } } - next += 1; + next = next.incr(); } } @@ -106,14 +114,16 @@ where let lexicon = self.speller.lexicon(); let alphabet_translator = self.speller.alphabet_translator(); - if !mutator.has_transitions(next_node.mutator_state + 1, Some(0)) { + if !mutator.has_transitions(next_node.mutator_state.incr(), Some(SymbolNumber::ZERO)) { return; } - let mut next_m = mutator.next(next_node.mutator_state, 0).unwrap(); + let mut next_m = mutator + .next(next_node.mutator_state, SymbolNumber::ZERO) + .unwrap(); while let Some(transition) = mutator.take_epsilons(next_m) { - if let Some(0) = transition.symbol() { + if let Some(SymbolNumber::ZERO) = transition.symbol() { if self.is_under_weight_limit( max_weight, next_node.weight() + transition.weight().unwrap(), @@ -122,20 +132,20 @@ where output_nodes.push(new_node); } - next_m += 1; + next_m = next_m.incr(); continue; } if let Some(sym) = transition.symbol() { - let trans_sym = alphabet_translator[sym as usize]; + let trans_sym = alphabet_translator[sym.0 as usize]; - if !lexicon.has_transitions(next_node.lexicon_state + 1, Some(trans_sym)) { + if !lexicon.has_transitions(next_node.lexicon_state.incr(), Some(trans_sym)) { // we have no regular transitions for this if trans_sym >= lexicon.alphabet().initial_symbol_count() { // this input was not originally in the alphabet, so unknown or identity // may apply if lexicon.has_transitions( - next_node.lexicon_state + 1, + next_node.lexicon_state.incr(), lexicon.alphabet().unknown(), ) { self.queue_lexicon_arcs( @@ -151,7 +161,7 @@ where } if lexicon.has_transitions( - next_node.lexicon_state + 1, + next_node.lexicon_state.incr(), lexicon.alphabet().identity(), ) { self.queue_lexicon_arcs( @@ -167,7 +177,7 @@ where } } - next_m += 1; + next_m = next_m.incr(); continue; } @@ -183,7 +193,7 @@ where ); } - next_m += 1; + next_m = next_m.incr(); } } @@ -194,7 +204,7 @@ where max_weight: Weight, next_node: &TreeNode, input_sym: SymbolNumber, - mutator_state: u32, + mutator_state: TransitionTableIndex, mutator_weight: Weight, input_increment: i16, output_nodes: &mut Vec>, @@ -210,7 +220,7 @@ where // Symbol replacement here is unfortunate but necessary. if let Some(id) = identity { if sym == id { - sym = self.input[next_node.input_state as usize]; + sym = self.input[next_node.input_state.0 as usize]; } } @@ -220,20 +230,29 @@ where ); if is_under_weight_limit { - let new_node = next_node.update( - pool, - sym, - Some(next_node.input_state + input_increment as u32), - mutator_state, - noneps_trans.target().unwrap(), - noneps_trans.weight().unwrap() + mutator_weight, - ); - + let new_node = match self.output_mode { + OutputMode::WithoutTags => next_node.update( + pool, + input_sym, + Some(next_node.input_state.incr(input_increment as u32)), + mutator_state, + noneps_trans.target().unwrap(), + noneps_trans.weight().unwrap() + mutator_weight, + ), + OutputMode::WithTags => next_node.update( + pool, + sym, + Some(next_node.input_state.incr(input_increment as u32)), + mutator_state, + noneps_trans.target().unwrap(), + noneps_trans.weight().unwrap() + mutator_weight, + ), + }; output_nodes.push(new_node); } } - next += 1; + next = next.incr(); } } @@ -255,13 +274,13 @@ where while let Some(transition) = mutator.take_non_epsilons(next_m, input_sym) { let symbol = transition.symbol(); - if let Some(0) = symbol { + if let Some(SymbolNumber::ZERO) = symbol { let transition_weight = transition.weight().unwrap(); if self.is_under_weight_limit(max_weight, next_node.weight() + transition_weight) { let new_node = next_node.update( pool, - 0, - Some(next_node.input_state + 1), + SymbolNumber::ZERO, + Some(next_node.input_state.incr(1)), transition.target().unwrap(), next_node.lexicon_state, transition_weight, @@ -270,17 +289,17 @@ where output_nodes.push(new_node); } - next_m += 1; + next_m = next_m.incr(); continue; } if let Some(sym) = symbol { - let trans_sym = alphabet_translator[sym as usize]; + let trans_sym = alphabet_translator[sym.0 as usize]; - if !lexicon.has_transitions(next_node.lexicon_state + 1, Some(trans_sym)) { + if !lexicon.has_transitions(next_node.lexicon_state.incr(), Some(trans_sym)) { if trans_sym >= lexicon.alphabet().initial_symbol_count() { if lexicon.has_transitions( - next_node.lexicon_state + 1, + next_node.lexicon_state.incr(), lexicon.alphabet().unknown(), ) { self.queue_lexicon_arcs( @@ -295,7 +314,7 @@ where ); } if lexicon.has_transitions( - next_node.lexicon_state + 1, + next_node.lexicon_state.incr(), lexicon.alphabet().identity(), ) { self.queue_lexicon_arcs( @@ -310,7 +329,7 @@ where ); } } - next_m += 1; + next_m = next_m.incr(); continue; } @@ -325,7 +344,7 @@ where output_nodes, ); - next_m += 1; + next_m = next_m.incr(); } } } @@ -339,7 +358,7 @@ where output_nodes: &mut Vec>, ) { let mutator = self.speller.mutator(); - let input_state = next_node.input_state as usize; + let input_state = next_node.input_state.0 as usize; if input_state >= self.input.len() { return; @@ -347,12 +366,13 @@ where let input_sym = self.input[input_state]; - if !mutator.has_transitions(next_node.mutator_state + 1, Some(input_sym)) { + if !mutator.has_transitions(next_node.mutator_state.incr(), Some(input_sym)) { // we have no regular transitions for this if input_sym >= mutator.alphabet().initial_symbol_count() { - if mutator - .has_transitions(next_node.mutator_state + 1, mutator.alphabet().identity()) - { + if mutator.has_transitions( + next_node.mutator_state.incr(), + mutator.alphabet().identity(), + ) { self.queue_mutator_arcs( pool, max_weight, @@ -364,7 +384,7 @@ where // Check for unknown transition if mutator - .has_transitions(next_node.mutator_state + 1, mutator.alphabet().unknown()) + .has_transitions(next_node.mutator_state.incr(), mutator.alphabet().unknown()) { self.queue_mutator_arcs( pool, @@ -391,22 +411,22 @@ where let mutator = self.speller.mutator(); let lexicon = self.speller.lexicon(); let alphabet_translator = self.speller.alphabet_translator(); - let input_state = next_node.input_state as usize; + let input_state = next_node.input_state.0 as usize; if input_state >= self.input.len() { return; } - let input_sym = alphabet_translator[self.input[input_state as usize] as usize]; - let next_lexicon_state = next_node.lexicon_state + 1; - log::trace!( - "lexicon consuming {}: {}", - input_sym, - self.speller - .lexicon - .alphabet() - .string_from_symbols(&[input_sym]) - ); + let input_sym = alphabet_translator[self.input[input_state as usize].0 as usize]; + let next_lexicon_state = next_node.lexicon_state.incr(); + // log::trace!( + // "lexicon consuming {}: {}", + // input_sym, + // self.speller + // .lexicon + // .alphabet() + // .string_from_symbols(&[input_sym]) + // ); if !lexicon.has_transitions(next_lexicon_state, Some(input_sym)) { // we have no regular transitions for this @@ -419,7 +439,7 @@ where &next_node, identity.unwrap(), next_node.mutator_state, - 0.0, + Weight::ZERO, 1, output_nodes, ); @@ -433,7 +453,7 @@ where &next_node, unknown.unwrap(), next_node.mutator_state, - 0.0, + Weight::ZERO, 1, output_nodes, ); @@ -449,7 +469,7 @@ where &next_node, input_sym, next_node.mutator_state, - 0.0, + Weight::ZERO, 1, output_nodes, ); @@ -460,7 +480,7 @@ where use std::cmp::Ordering::{Equal, Less}; let c = &self.config; - let mut max_weight = c.max_weight.unwrap_or(f32::MAX); + let mut max_weight = c.max_weight.unwrap_or(Weight::MAX); if let Some(beam) = c.beam { let candidate_weight = best_weight + beam; @@ -487,28 +507,63 @@ where #[inline(always)] fn state_size(&self) -> usize { - self.speller.lexicon().alphabet().state_size() as usize + self.speller.lexicon().alphabet().state_size().0 as usize } pub(crate) fn is_correct(&self) -> bool { + log::trace!("is_correct"); // let max_weight = speller_max_weight(&self.config); let pool = Pool::with_size_and_max(0, 0); let mut nodes = speller_start_node(&pool, self.state_size() as usize); log::trace!("beginning is_correct {:?}?", self.input); while let Some(next_node) = nodes.pop() { - if next_node.input_state as usize == self.input.len() + if next_node.input_state.0 as usize == self.input.len() && self.speller.lexicon().is_final(next_node.lexicon_state) { return true; } - self.lexicon_epsilons(&pool, f32::INFINITY, &next_node, &mut nodes); - self.lexicon_consume(&pool, f32::INFINITY, &next_node, &mut nodes); + self.lexicon_epsilons(&pool, Weight::INFINITE, &next_node, &mut nodes); + self.lexicon_consume(&pool, Weight::INFINITE, &next_node, &mut nodes); } false } + pub(crate) fn analyze(&self) -> Vec { + log::trace!("Beginning analyze"); + let pool = Pool::with_size_and_max(0, 0); + let mut nodes = speller_start_node(&pool, self.state_size() as usize); + log::trace!("beginning analyze {:?}", self.input); + let mut lookups = HashMap::new(); + let mut analyses: Vec = vec![]; + while let Some(next_node) = nodes.pop() { + if next_node.input_state.0 as usize == self.input.len() + && self.speller.lexicon().is_final(next_node.lexicon_state) + { + let string = self + .speller + .lexicon() + .alphabet() + .string_from_symbols(&next_node.string); + let weight = next_node.weight() + + self + .speller + .lexicon() + .final_weight(next_node.lexicon_state) + .unwrap(); + let entry = lookups.entry(string).or_insert(weight); + if *entry > weight { + *entry = weight; + } + } + self.lexicon_epsilons(&pool, Weight::INFINITE, &next_node, &mut nodes); + self.lexicon_consume(&pool, Weight::INFINITE, &next_node, &mut nodes); + analyses = self.generate_sorted_suggestions(&lookups); + } + analyses + } + pub(crate) fn suggest(&self) -> Vec { log::trace!("Beginning suggest"); @@ -516,7 +571,7 @@ where let mut nodes = speller_start_node(&pool, self.state_size() as usize); let mut corrections = HashMap::new(); let mut suggestions: Vec = vec![]; - let mut best_weight = self.config.max_weight.unwrap_or(f32::MAX); + let mut best_weight = self.config.max_weight.unwrap_or(Weight::MAX); let key_table = self.speller.mutator().alphabet().key_table(); let mut iteration_count = 0usize; @@ -530,7 +585,7 @@ where let name: SmolStr = self .input .iter() - .map(|s| &*key_table[*s as usize]) + .map(|s| &*key_table[s.0 as usize]) .collect(); log::warn!("{}: iteration count at {}", name, iteration_count); log::warn!("Node count: {}", nodes.len()); @@ -545,7 +600,7 @@ where self.lexicon_epsilons(&pool, max_weight, &next_node, &mut nodes); self.mutator_epsilons(&pool, max_weight, &next_node, &mut nodes); - if next_node.input_state as usize != self.input.len() { + if next_node.input_state.0 as usize != self.input.len() { self.consume_input(&pool, max_weight, &next_node, &mut nodes); continue; } @@ -577,7 +632,7 @@ where .lexicon() .alphabet() .string_from_symbols(&next_node.string); - + // log::trace!("suggesting? {}::{}", string, weight); if weight < best_weight { best_weight = weight; } @@ -592,7 +647,6 @@ where suggestions = self.generate_sorted_suggestions(&corrections); } - suggestions } @@ -600,17 +654,24 @@ where &self, corrections: &HashMap, ) -> Vec { - let mut c: Vec = corrections - .into_iter() - .map(|x| Suggestion::new(x.0.clone(), *x.1)) - .collect(); - + //log::trace!("Generating sorted suggestions"); + let mut c: Vec; + if let Some(s) = &self.config.continuation_marker { + c = corrections + .into_iter() + .map(|x| Suggestion::new(x.0.clone(), *x.1, Some(x.0.ends_with(s)))) + .collect(); + } else { + c = corrections + .into_iter() + .map(|x| Suggestion::new(x.0.clone(), *x.1, None)) + .collect(); + } c.sort(); if let Some(n) = self.config.n_best { c.truncate(n); } - c } } diff --git a/divvunspell/src/tokenizer/case_handling.rs b/src/tokenizer/case_handling.rs similarity index 100% rename from divvunspell/src/tokenizer/case_handling.rs rename to src/tokenizer/case_handling.rs diff --git a/divvunspell/src/tokenizer/mod.rs b/src/tokenizer/mod.rs similarity index 97% rename from divvunspell/src/tokenizer/mod.rs rename to src/tokenizer/mod.rs index 6b59e9c..a0e871a 100644 --- a/divvunspell/src/tokenizer/mod.rs +++ b/src/tokenizer/mod.rs @@ -27,8 +27,8 @@ impl<'a> Iterator for WordIndices<'a> { pub trait Tokenize { fn word_bound_indices(&self) -> WordBoundIndices<'_>; fn word_indices(&self) -> WordIndices<'_>; - fn word_bound_indices_with_alphabet(&self, alphabet: Vec) -> WordBoundIndices; - fn words_with_alphabet(&self, alphabet: Vec) -> Words; + fn word_bound_indices_with_alphabet(&self, alphabet: Vec) -> WordBoundIndices<'_>; + fn words_with_alphabet(&self, alphabet: Vec) -> Words<'_>; } impl Tokenize for str { @@ -42,11 +42,11 @@ impl Tokenize for str { } } - fn word_bound_indices_with_alphabet(&self, alphabet: Vec) -> WordBoundIndices { + fn word_bound_indices_with_alphabet(&self, alphabet: Vec) -> WordBoundIndices<'_> { WordBoundIndices::new_with_alphabet(self, alphabet) } - fn words_with_alphabet(&self, alphabet: Vec) -> Words { + fn words_with_alphabet(&self, alphabet: Vec) -> Words<'_> { Words::new_with_alphabet(self, |s| s.chars().any(|ch| ch.is_alphanumeric()), alphabet) } } diff --git a/divvunspell/src/tokenizer/tables/word_break.rsv b/src/tokenizer/tables/word_break.rsv similarity index 100% rename from divvunspell/src/tokenizer/tables/word_break.rsv rename to src/tokenizer/tables/word_break.rsv diff --git a/divvunspell/src/tokenizer/word.rs b/src/tokenizer/word.rs similarity index 100% rename from divvunspell/src/tokenizer/word.rs rename to src/tokenizer/word.rs diff --git a/divvunspell/src/tokenizer/word_break.rs b/src/tokenizer/word_break.rs similarity index 100% rename from divvunspell/src/tokenizer/word_break.rs rename to src/tokenizer/word_break.rs diff --git a/divvunspell/src/transducer/alphabet.rs b/src/transducer/alphabet.rs similarity index 84% rename from divvunspell/src/transducer/alphabet.rs rename to src/transducer/alphabet.rs index 6090425..192309b 100644 --- a/divvunspell/src/transducer/alphabet.rs +++ b/src/transducer/alphabet.rs @@ -20,7 +20,9 @@ pub struct TransducerAlphabet { impl TransducerAlphabet { #[inline(always)] pub fn string_from_symbols(&self, syms: &[SymbolNumber]) -> SmolStr { - syms.iter().map(|s| &*self.key_table[*s as usize]).collect() + syms.iter() + .map(|s| &*self.key_table[s.0 as usize]) + .collect() } #[inline(always)] @@ -50,8 +52,10 @@ impl TransducerAlphabet { #[inline(always)] pub fn add_symbol(&mut self, string: &str) { - self.string_to_symbol - .insert(string.into(), self.key_table.len() as u16); + self.string_to_symbol.insert( + string.into(), + SymbolNumber(self.key_table.len().try_into().expect("too many symbols")), + ); self.key_table.push(string.into()); } @@ -91,14 +95,14 @@ impl TransducerAlphabet { let from_keys = from.key_table(); let mut translator = Vec::with_capacity(64); - translator.push(0); + translator.push(SymbolNumber::ZERO); for from_sym in from_keys.iter().skip(1) { log::trace!("key {}", from_sym); - if let Some(&sym) = self.string_to_symbol.get(from_sym) { - translator.push(sym); + if let Some(sym) = self.string_to_symbol.get(from_sym) { + translator.push(*sym); } else { - let lexicon_key = self.key_table.len() as SymbolNumber; + let lexicon_key = SymbolNumber(self.key_table.len() as u16); translator.push(lexicon_key); self.add_symbol(from_sym); } diff --git a/divvunspell/src/transducer/convert.rs b/src/transducer/convert.rs similarity index 67% rename from divvunspell/src/transducer/convert.rs rename to src/transducer/convert.rs index cf18c41..5e26ecd 100644 --- a/divvunspell/src/transducer/convert.rs +++ b/src/transducer/convert.rs @@ -7,6 +7,7 @@ use byteorder::{LittleEndian, WriteBytesExt}; use super::hfst; use super::thfst; use crate::transducer::Transducer; +use crate::types::{SymbolNumber, TransitionTableIndex}; pub trait ConvertFile { fn convert_file(transducer: &T, path: &Path) -> Result<(), std::io::Error>; @@ -48,20 +49,17 @@ impl ConvertFrom for thfst::MemmapIndexTable Result<(), std::io::Error> { - use std::{u16, u32}; - - // eprintln!( - // "size: {}, len: {}, offset: {}", - // table.size, table.len, table.offset - // ); - - for index in 0..table.size { - let input_symbol = table.input_symbol(index).unwrap_or(u16::MAX); - let targetish = table.target(index).unwrap_or(u32::MAX); - - writer.write_u16::(input_symbol).unwrap(); + for index in 0..table.size.0 { + let input_symbol = table + .input_symbol(TransitionTableIndex(index)) + .unwrap_or(SymbolNumber::MAX); + let targetish = table + .target(TransitionTableIndex(index)) + .unwrap_or(TransitionTableIndex::MAX); + + writer.write_u16::(input_symbol.0).unwrap(); writer.write_u16::(0).unwrap(); - writer.write_u32::(targetish).unwrap(); + writer.write_u32::(targetish.0).unwrap(); } Ok(()) @@ -73,24 +71,18 @@ impl ConvertFrom for thfst::MemmapTransitionTable Result<(), std::io::Error> { - use std::{u16, u32}; - - // eprintln!( - // "size: {}, len: {}, offset: {}", - // table.size, table.len, table.offset - // ); - - for index in 0..table.size { - let input_symbol = table.input_symbol(index).unwrap_or(u16::MAX); - let output_symbol = table.output_symbol(index).unwrap_or(u16::MAX); - let target = table.target(index).unwrap_or(u32::MAX); + for index in 0..table.size.0 { + let index = TransitionTableIndex(index); + let input_symbol = table.input_symbol(index).unwrap_or(SymbolNumber::MAX); + let output_symbol = table.output_symbol(index).unwrap_or(SymbolNumber::MAX); + let target = table.target(index).unwrap_or(TransitionTableIndex::MAX); let weight = table.weight(index).unwrap(); - writer.write_u16::(input_symbol).unwrap(); - writer.write_u16::(output_symbol).unwrap(); - writer.write_u32::(target).unwrap(); + writer.write_u16::(input_symbol.0).unwrap(); + writer.write_u16::(output_symbol.0).unwrap(); + writer.write_u32::(target.0).unwrap(); writer - .write_u32::(unsafe { std::mem::transmute::(weight) }) + .write_u32::(weight.0.to_bits()) .unwrap(); } diff --git a/divvunspell/src/transducer/hfst/alphabet.rs b/src/transducer/hfst/alphabet.rs similarity index 88% rename from divvunspell/src/transducer/hfst/alphabet.rs rename to src/transducer/hfst/alphabet.rs index 306394b..e555186 100644 --- a/divvunspell/src/transducer/hfst/alphabet.rs +++ b/src/transducer/hfst/alphabet.rs @@ -25,14 +25,14 @@ impl std::default::Default for TransducerAlphabetParser { fn default() -> Self { TransducerAlphabetParser { key_table: Vec::with_capacity(64), - flag_state_size: 0, + flag_state_size: SymbolNumber::ZERO, length: 0, string_to_symbol: HashMap::new(), operations: HashMap::new(), feature_bucket: HashMap::new(), value_bucket: HashMap::new(), - val_n: 0i16, - feat_n: 0u16, + val_n: ValueNumber::ZERO, + feat_n: SymbolNumber::ZERO, identity_symbol: None, unknown_symbol: None, } @@ -64,12 +64,12 @@ impl TransducerAlphabetParser { if !self.feature_bucket.contains_key(&feature) { self.feature_bucket.insert(feature.clone(), self.feat_n); - self.feat_n += 1; + self.feat_n = self.feat_n.incr(); } if !self.value_bucket.contains_key(&value) { self.value_bucket.insert(value.clone(), self.val_n); - self.val_n += 1; + self.val_n = self.val_n.incr(); } let op = FlagDiacriticOperation { @@ -85,7 +85,8 @@ impl TransducerAlphabetParser { fn parse_inner(&mut self, buf: &[u8], symbols: SymbolNumber) { let mut offset = 0usize; - for i in 0..symbols { + for i in 0..symbols.0 { + let i = SymbolNumber(i); let mut end = 0usize; while buf[offset + end] != 0 { @@ -100,7 +101,7 @@ impl TransducerAlphabetParser { } else if key == "@_EPSILON_SYMBOL_@" { self.value_bucket.insert("".into(), self.val_n); self.key_table.push("".into()); - self.val_n += 1; + self.val_n = self.val_n.incr(); } else if key == "@_IDENTITY_SYMBOL_@" { self.identity_symbol = Some(i); self.key_table.push(key); @@ -120,7 +121,12 @@ impl TransducerAlphabetParser { offset += end + 1; } - self.flag_state_size = self.feature_bucket.len() as SymbolNumber; + self.flag_state_size = SymbolNumber( + self.feature_bucket + .len() + .try_into() + .expect("Too many features in the alphabet, cannot fit into SymbolNumber"), + ); // Count remaining null padding bytes while buf[offset] == b'\0' { diff --git a/divvunspell/src/transducer/hfst/header.rs b/src/transducer/hfst/header.rs similarity index 71% rename from divvunspell/src/transducer/hfst/header.rs rename to src/transducer/hfst/header.rs index 085025e..a735c8b 100644 --- a/divvunspell/src/transducer/hfst/header.rs +++ b/src/transducer/hfst/header.rs @@ -7,8 +7,8 @@ use crate::types::{HeaderFlag, SymbolNumber, TransitionTableIndex}; pub struct TransducerHeader { symbols: SymbolNumber, input_symbols: SymbolNumber, - trans_index_table: usize, - trans_target_table: usize, + trans_index_table: TransitionTableIndex, + trans_target_table: TransitionTableIndex, states: TransitionTableIndex, transitions: TransitionTableIndex, @@ -31,12 +31,12 @@ impl TransducerHeader { let pos = rdr.position() + u64::from(header_len); rdr.set_position(pos); - let input_symbols = rdr.read_u16::().unwrap(); - let symbols = rdr.read_u16::().unwrap(); - let trans_index_table = rdr.read_u32::().unwrap() as usize; - let trans_target_table = rdr.read_u32::().unwrap() as usize; - let states = rdr.read_u32::().unwrap(); - let transitions = rdr.read_u32::().unwrap(); + let input_symbols = SymbolNumber(rdr.read_u16::().unwrap()); + let symbols = SymbolNumber(rdr.read_u16::().unwrap()); + let trans_index_table = TransitionTableIndex(rdr.read_u32::().unwrap()); + let trans_target_table = TransitionTableIndex(rdr.read_u32::().unwrap()); + let states = TransitionTableIndex(rdr.read_u32::().unwrap()); + let transitions = TransitionTableIndex(rdr.read_u32::().unwrap()); let mut props = [false; 9]; @@ -65,11 +65,11 @@ impl TransducerHeader { self.input_symbols } - pub fn index_table_size(&self) -> usize { + pub fn index_table_size(&self) -> TransitionTableIndex { self.trans_index_table } - pub fn target_table_size(&self) -> usize { + pub fn target_table_size(&self) -> TransitionTableIndex { self.trans_target_table } diff --git a/divvunspell/src/transducer/hfst/index_table.rs b/src/transducer/hfst/index_table.rs similarity index 66% rename from divvunspell/src/transducer/hfst/index_table.rs rename to src/transducer/hfst/index_table.rs index 2d50db2..013f46f 100644 --- a/divvunspell/src/transducer/hfst/index_table.rs +++ b/src/transducer/hfst/index_table.rs @@ -6,7 +6,6 @@ use std::fmt; use std::io::Cursor; use std::mem; use std::ptr; -use std::{u16, u32}; use crate::constants::INDEX_TABLE_SIZE; use crate::types::{SymbolNumber, TransitionTableIndex, Weight}; @@ -59,18 +58,17 @@ impl MappedIndexTable { return None; } - let index = self.offset + INDEX_TABLE_SIZE * i as usize; + let index = self.offset + INDEX_TABLE_SIZE * i.0 as usize; - let input_symbol: SymbolNumber = - if cfg!(all(target_arch = "arm", target_pointer_width = "32")) { - let mut cursor = self.make_cursor(); - cursor.set_position(index as u64); - cursor.read_u16::().unwrap() - } else { - unsafe { ptr::read(self.mmap.as_ptr().add(index) as *const _) } - }; + let input_symbol = if cfg!(all(target_arch = "arm", target_pointer_width = "32")) { + let mut cursor = self.make_cursor(); + cursor.set_position(index as u64); + SymbolNumber(cursor.read_u16::().unwrap()) + } else { + SymbolNumber(unsafe { ptr::read(self.mmap.as_ptr().add(index) as *const _) }) + }; - if input_symbol == u16::MAX { + if input_symbol == SymbolNumber::MAX { None } else { Some(input_symbol) @@ -83,17 +81,18 @@ impl MappedIndexTable { return None; } - let index = self.offset + INDEX_TABLE_SIZE * i as usize; - let target: TransitionTableIndex = - if cfg!(all(target_arch = "arm", target_pointer_width = "32")) { - let mut cursor = self.make_cursor(); - cursor.set_position((index + mem::size_of::()) as u64); - cursor.read_u32::().unwrap() - } else { - unsafe { ptr::read(self.mmap.as_ptr().add(index + 2) as *const _) } - }; - - if target == u32::MAX { + let index = self.offset + INDEX_TABLE_SIZE * i.0 as usize; + let target = if cfg!(all(target_arch = "arm", target_pointer_width = "32")) { + let mut cursor = self.make_cursor(); + cursor.set_position((index + mem::size_of::()) as u64); + TransitionTableIndex(cursor.read_u32::().unwrap()) + } else { + TransitionTableIndex(unsafe { + ptr::read(self.mmap.as_ptr().add(index + 2) as *const _) + }) + }; + + if target == TransitionTableIndex::MAX { None } else { Some(target) @@ -108,11 +107,11 @@ impl MappedIndexTable { return None; } - let index = self.offset + INDEX_TABLE_SIZE * i as usize; + let index = self.offset + INDEX_TABLE_SIZE * i.0 as usize; let weight: Weight = { let mut cursor = self.make_cursor(); cursor.set_position((index + mem::size_of::()) as u64); - cursor.read_f32::().unwrap() + Weight(cursor.read_f32::().unwrap()) }; Some(weight) diff --git a/divvunspell/src/transducer/hfst/mod.rs b/src/transducer/hfst/mod.rs similarity index 86% rename from divvunspell/src/transducer/hfst/mod.rs rename to src/transducer/hfst/mod.rs index 2dba15a..ee33cd2 100644 --- a/divvunspell/src/transducer/hfst/mod.rs +++ b/src/transducer/hfst/mod.rs @@ -55,19 +55,17 @@ impl HfstTransducer { let index_table_offset = alphabet_offset + alphabet.len(); - let index_table_end = index_table_offset + INDEX_TABLE_SIZE * header.index_table_size(); + let index_table_end = + index_table_offset + INDEX_TABLE_SIZE * header.index_table_size().0 as usize; let index_table = MappedIndexTable::new( buf.clone(), index_table_offset, index_table_end, - header.index_table_size() as u32, + header.index_table_size(), ); - let trans_table = MappedTransitionTable::new( - buf.clone(), - index_table_end, - header.target_table_size() as u32, - ); + let trans_table = + MappedTransitionTable::new(buf.clone(), index_table_end, header.target_table_size()); HfstTransducer { buf, @@ -139,7 +137,10 @@ impl Transducer for HfstTransducer { None => false, } } else { - match self.index_table.input_symbol(i + u32::from(sym)) { + match self + .index_table + .input_symbol(i + TransitionTableIndex(sym.0 as u32)) + { Some(res) => sym == res, None => false, } @@ -150,10 +151,10 @@ impl Transducer for HfstTransducer { fn has_epsilons_or_flags(&self, i: TransitionTableIndex) -> bool { if i >= TARGET_TABLE { match self.transition_table.input_symbol(i - TARGET_TABLE) { - Some(sym) => sym == 0 || self.alphabet.is_flag(sym), + Some(sym) => sym == SymbolNumber::ZERO || self.alphabet.is_flag(sym), None => false, } - } else if let Some(0) = self.index_table.input_symbol(i) { + } else if let Some(SymbolNumber::ZERO) = self.index_table.input_symbol(i) { true } else { false @@ -162,7 +163,7 @@ impl Transducer for HfstTransducer { #[inline(always)] fn take_epsilons(&self, i: TransitionTableIndex) -> Option { - if let Some(0) = self.transition_table.input_symbol(i) { + if let Some(SymbolNumber::ZERO) = self.transition_table.input_symbol(i) { Some(self.transition_table.symbol_transition(i)) } else { None @@ -172,7 +173,7 @@ impl Transducer for HfstTransducer { #[inline(always)] fn take_epsilons_and_flags(&self, i: TransitionTableIndex) -> Option { if let Some(sym) = self.transition_table.input_symbol(i) { - if sym != 0 && !self.alphabet.is_flag(sym) { + if sym != SymbolNumber::ZERO && !self.alphabet.is_flag(sym) { None } else { Some(self.transition_table.symbol_transition(i)) @@ -202,8 +203,11 @@ impl Transducer for HfstTransducer { #[inline(always)] fn next(&self, i: TransitionTableIndex, symbol: SymbolNumber) -> Option { if i >= TARGET_TABLE { - Some(i - TARGET_TABLE + 1) - } else if let Some(v) = self.index_table.target(i + 1 + u32::from(symbol)) { + Some(i - TARGET_TABLE + TransitionTableIndex::ONE) + } else if let Some(v) = self + .index_table + .target(i + TransitionTableIndex(symbol.0 as u32 + 1)) + { Some(v - TARGET_TABLE) } else { None diff --git a/divvunspell/src/transducer/hfst/transition_table.rs b/src/transducer/hfst/transition_table.rs similarity index 70% rename from divvunspell/src/transducer/hfst/transition_table.rs rename to src/transducer/hfst/transition_table.rs index bd9431e..a1cc909 100644 --- a/divvunspell/src/transducer/hfst/transition_table.rs +++ b/src/transducer/hfst/transition_table.rs @@ -5,9 +5,9 @@ use byteorder::{LittleEndian, ReadBytesExt}; use memmap2::Mmap; use std::fmt; use std::io::Cursor; +use std::mem; use std::ptr; use std::sync::Arc; -use std::{mem, u16, u32}; use crate::constants::TRANS_TABLE_SIZE; use crate::transducer::symbol_transition::SymbolTransition; @@ -28,7 +28,11 @@ impl fmt::Debug for MappedTransitionTable { impl MappedTransitionTable { #[inline(always)] - pub fn new(mmap: Arc, offset: usize, size: u32) -> MappedTransitionTable { + pub fn new( + mmap: Arc, + offset: usize, + size: TransitionTableIndex, + ) -> MappedTransitionTable { MappedTransitionTable { size, mmap, offset } } @@ -40,14 +44,14 @@ impl MappedTransitionTable { #[inline(always)] fn read_symbol_from_cursor(&self, index: usize) -> Option { let index = self.offset + index; - let x: SymbolNumber = if cfg!(all(target_arch = "arm", target_pointer_width = "32")) { + let x = if cfg!(all(target_arch = "arm", target_pointer_width = "32")) { let mut cursor = self.make_cursor(); cursor.set_position(index as u64); - cursor.read_u16::().unwrap() + SymbolNumber(cursor.read_u16::().unwrap()) } else { - unsafe { ptr::read(self.mmap.as_ptr().add(index) as *const _) } + SymbolNumber(unsafe { ptr::read(self.mmap.as_ptr().add(index) as *const _) }) }; - if x == u16::MAX { + if x == SymbolNumber::MAX { None } else { Some(x) @@ -60,7 +64,7 @@ impl MappedTransitionTable { return None; } - let index = TRANS_TABLE_SIZE as usize * i as usize; + let index = TRANS_TABLE_SIZE as usize * i.0 as usize; self.read_symbol_from_cursor(index) } @@ -70,7 +74,7 @@ impl MappedTransitionTable { return None; } - let index = ((TRANS_TABLE_SIZE * i as usize) + mem::size_of::()) as usize; + let index = ((TRANS_TABLE_SIZE * i.0 as usize) + mem::size_of::()) as usize; self.read_symbol_from_cursor(index) } @@ -80,18 +84,18 @@ impl MappedTransitionTable { return None; } - let index = - self.offset + ((TRANS_TABLE_SIZE * i as usize) + (2 * mem::size_of::())); + let index = self.offset + + ((TRANS_TABLE_SIZE * i.0 as usize) + (2 * mem::size_of::())); let x: TransitionTableIndex = if cfg!(all(target_arch = "arm", target_pointer_width = "32")) { let mut cursor = self.make_cursor(); cursor.set_position(index as u64); - cursor.read_u32::().unwrap() + TransitionTableIndex(cursor.read_u32::().unwrap()) } else { - unsafe { ptr::read(self.mmap.as_ptr().add(index) as *const _) } + TransitionTableIndex(unsafe { ptr::read(self.mmap.as_ptr().add(index) as *const _) }) }; - if x == u32::MAX { + if x == TransitionTableIndex::MAX { None } else { Some(x) @@ -105,23 +109,25 @@ impl MappedTransitionTable { } let index = self.offset - + ((TRANS_TABLE_SIZE * i as usize) + + ((TRANS_TABLE_SIZE * i.0 as usize) + (2 * mem::size_of::()) + mem::size_of::()); let x: Weight = if cfg!(all(target_arch = "arm", target_pointer_width = "32")) { let mut cursor = self.make_cursor(); cursor.set_position(index as u64); - cursor.read_f32::().unwrap() + Weight(cursor.read_f32::().unwrap()) } else { - unsafe { ptr::read(self.mmap.as_ptr().add(index) as *const _) } + Weight(unsafe { ptr::read(self.mmap.as_ptr().add(index) as *const _) }) }; Some(x) } #[inline(always)] pub fn is_final(&self, i: TransitionTableIndex) -> bool { - self.input_symbol(i) == None && self.output_symbol(i) == None && self.target(i) == Some(1) + self.input_symbol(i) == None + && self.output_symbol(i) == None + && self.target(i) == Some(TransitionTableIndex::ONE) } #[inline(always)] diff --git a/divvunspell/src/transducer/mod.rs b/src/transducer/mod.rs similarity index 90% rename from divvunspell/src/transducer/mod.rs rename to src/transducer/mod.rs index b8fc20d..2480daf 100644 --- a/divvunspell/src/transducer/mod.rs +++ b/src/transducer/mod.rs @@ -12,9 +12,8 @@ mod alphabet; mod symbol_transition; pub(crate) mod tree_node; -pub(crate) use self::alphabet::TransducerAlphabet; - -use self::symbol_transition::SymbolTransition; +pub use self::alphabet::TransducerAlphabet; +pub use self::symbol_transition::SymbolTransition; use crate::types::{SymbolNumber, TransitionTableIndex, Weight}; use crate::vfs::{self, Filesystem}; @@ -46,6 +45,11 @@ impl TransducerError { } /// A file-based finite-state transducer. +/// +/// This trait defines the interface for finite-state transducers that can be loaded +/// from files and used for spell-checking and morphological analysis. +/// +/// Implementors can provide custom transducer formats beyond the built-in HFST and THFST formats. pub trait Transducer: Sized { /// file extension. const FILE_EXT: &'static str; @@ -104,7 +108,9 @@ pub trait TransitionTable: Sized { /// check if the state is a final state. #[inline(always)] fn is_final(&self, i: TransitionTableIndex) -> bool { - self.input_symbol(i) == None && self.output_symbol(i) == None && self.target(i) == Some(1) + self.input_symbol(i) == None + && self.output_symbol(i) == None + && self.target(i) == Some(TransitionTableIndex(1)) } /// ??? diff --git a/divvunspell/src/transducer/symbol_transition.rs b/src/transducer/symbol_transition.rs similarity index 68% rename from divvunspell/src/transducer/symbol_transition.rs rename to src/transducer/symbol_transition.rs index cd2119c..5cacd40 100644 --- a/divvunspell/src/transducer/symbol_transition.rs +++ b/src/transducer/symbol_transition.rs @@ -1,10 +1,16 @@ use crate::types::{SymbolNumber, TransitionTableIndex, Weight}; +/// Represents a transition in a finite-state transducer. +/// +/// A transition connects states in the FST and carries a symbol and weight. #[derive(Debug, Clone)] pub struct SymbolTransition { - target: Option, - symbol: Option, - weight: Option, + /// Target state index, or None if this is a final state + pub target: Option, + /// Input/output symbol number + pub symbol: Option, + /// Transition weight + pub weight: Option, } impl SymbolTransition { @@ -39,7 +45,7 @@ impl SymbolTransition { pub fn clone_with_epsilon_symbol(&self) -> SymbolTransition { SymbolTransition { target: self.target, - symbol: Some(0), + symbol: Some(SymbolNumber(0)), weight: self.weight, } } diff --git a/divvunspell/src/transducer/thfst/chunked.rs b/src/transducer/thfst/chunked.rs similarity index 91% rename from divvunspell/src/transducer/thfst/chunked.rs rename to src/transducer/thfst/chunked.rs index 48a0283..efd5967 100644 --- a/divvunspell/src/transducer/thfst/chunked.rs +++ b/src/transducer/thfst/chunked.rs @@ -19,9 +19,9 @@ where { // meta: MetaRecord, index_tables: Vec>, - indexes_per_chunk: u32, + indexes_per_chunk: TransitionTableIndex, transition_tables: Vec>, - transitions_per_chunk: u32, + transitions_per_chunk: TransitionTableIndex, alphabet: TransducerAlphabet, _file: std::marker::PhantomData, } @@ -32,7 +32,7 @@ macro_rules! transition_rel_index { ($self:expr, $x:expr) => {{ let index_page = $x / $self.transitions_per_chunk; let relative_index = $x - ($self.transitions_per_chunk * index_page); - (index_page as usize, relative_index) + (index_page.0 as usize, relative_index) }}; } @@ -40,7 +40,7 @@ macro_rules! index_rel_index { ($self:expr, $x:expr) => {{ let index_page = $x / $self.indexes_per_chunk; let relative_index = $x - ($self.indexes_per_chunk * index_page); - (index_page as usize, relative_index) + (index_page.0 as usize, relative_index) }}; } @@ -201,7 +201,7 @@ impl Transducer for ThfstChunkedTransducer { } } else { log::trace!("has_transitions: i:{} s:{:?}", i, s); - let (page, index) = index_rel_index!(self, i + u32::from(sym)); + let (page, index) = index_rel_index!(self, i + TransitionTableIndex(sym.0 as u32)); log::trace!("has_transitions: page:{} index:{:?}", page, index); if page >= self.index_tables.len() { return false; @@ -218,12 +218,12 @@ impl Transducer for ThfstChunkedTransducer { if i >= TARGET_TABLE { let (page, index) = transition_rel_index!(self, i - TARGET_TABLE); match self.transition_tables[page].input_symbol(index) { - Some(sym) => sym == 0 || self.alphabet.is_flag(sym), + Some(sym) => sym == SymbolNumber::ZERO || self.alphabet.is_flag(sym), None => false, } } else { let (page, index) = index_rel_index!(self, i); - if let Some(0) = self.index_tables[page].input_symbol(index) { + if let Some(SymbolNumber::ZERO) = self.index_tables[page].input_symbol(index) { true } else { false @@ -235,7 +235,7 @@ impl Transducer for ThfstChunkedTransducer { fn take_epsilons(&self, i: TransitionTableIndex) -> Option { let (page, index) = transition_rel_index!(self, i); - if let Some(0) = self.transition_tables[page].input_symbol(index) { + if let Some(SymbolNumber::ZERO) = self.transition_tables[page].input_symbol(index) { Some(self.transition_tables[page].symbol_transition(index)) } else { None @@ -247,7 +247,7 @@ impl Transducer for ThfstChunkedTransducer { let (page, index) = transition_rel_index!(self, i); if let Some(sym) = self.transition_tables[page].input_symbol(index) { - if sym != 0 && !self.alphabet.is_flag(sym) { + if sym != SymbolNumber::ZERO && !self.alphabet.is_flag(sym) { None } else { Some(self.transition_tables[page].symbol_transition(index)) @@ -278,9 +278,10 @@ impl Transducer for ThfstChunkedTransducer { #[inline(always)] fn next(&self, i: TransitionTableIndex, symbol: SymbolNumber) -> Option { if i >= TARGET_TABLE { - Some(i - TARGET_TABLE + 1) + Some(i - TARGET_TABLE + TransitionTableIndex(1)) } else { - let (page, index) = index_rel_index!(self, i + 1 + u32::from(symbol)); + let (page, index) = + index_rel_index!(self, i + TransitionTableIndex(symbol.0 as u32 + 1)); if let Some(v) = self.index_tables[page].target(index) { Some(v - TARGET_TABLE) diff --git a/divvunspell/src/transducer/thfst/index_table.rs b/src/transducer/thfst/index_table.rs similarity index 80% rename from divvunspell/src/transducer/thfst/index_table.rs rename to src/transducer/thfst/index_table.rs index add31c4..be90dd9 100644 --- a/divvunspell/src/transducer/thfst/index_table.rs +++ b/src/transducer/thfst/index_table.rs @@ -9,7 +9,7 @@ use crate::vfs::{self, Filesystem}; #[derive(Debug)] pub struct MemmapIndexTable { buf: Mmap, - pub(crate) size: u32, + pub(crate) size: TransitionTableIndex, _file: std::marker::PhantomData, } @@ -32,7 +32,7 @@ impl MemmapIndexTable { file.partial_memory_map(chunk * len, len as usize) .map_err(TransducerError::Memmap)? }; - let size = (buf.len() / INDEX_TABLE_SIZE) as u32; + let size = TransitionTableIndex((buf.len() / INDEX_TABLE_SIZE) as u32); Ok(MemmapIndexTable { buf, size, @@ -49,7 +49,7 @@ impl crate::transducer::IndexTable for MemmapIndexTable { { let file = fs.open_file(path).map_err(TransducerError::Io)?; let buf = unsafe { file.memory_map().map_err(TransducerError::Memmap)? }; - let size = (buf.len() / INDEX_TABLE_SIZE) as u32; + let size = TransitionTableIndex((buf.len() / INDEX_TABLE_SIZE) as u32); Ok(MemmapIndexTable { buf, size, @@ -62,12 +62,12 @@ impl crate::transducer::IndexTable for MemmapIndexTable { return None; } - let index = INDEX_TABLE_SIZE * i as usize; + let index = INDEX_TABLE_SIZE * i.0 as usize; let input_symbol: SymbolNumber = unsafe { ptr::read(self.buf.as_ptr().add(index) as *const _) }; - if input_symbol == std::u16::MAX { + if input_symbol == SymbolNumber::MAX { None } else { Some(input_symbol) @@ -79,11 +79,11 @@ impl crate::transducer::IndexTable for MemmapIndexTable { return None; } - let index = (INDEX_TABLE_SIZE * i as usize) + 4; + let index = (INDEX_TABLE_SIZE * i.0 as usize) + 4; let target: TransitionTableIndex = unsafe { ptr::read(self.buf.as_ptr().add(index) as *const _) }; - if target == std::u32::MAX { + if target == TransitionTableIndex::MAX { None } else { Some(target) @@ -95,7 +95,7 @@ impl crate::transducer::IndexTable for MemmapIndexTable { return None; } - let index = (INDEX_TABLE_SIZE * i as usize) + 4; + let index = (INDEX_TABLE_SIZE * i.0 as usize) + 4; let weight: Weight = unsafe { ptr::read(self.buf.as_ptr().add(index) as *const _) }; Some(weight) @@ -113,7 +113,7 @@ mod unix { pub struct FileIndexTable { file: F, - size: u32, + size: TransitionTableIndex, } impl FileIndexTable { @@ -144,7 +144,7 @@ mod unix { { let file = fs.open_file(path).map_err(TransducerError::Io)?; Ok(FileIndexTable { - size: file.len().map_err(TransducerError::Io)? as u32, + size: TransitionTableIndex(file.len().map_err(TransducerError::Io)? as u32), file, }) } @@ -154,11 +154,11 @@ mod unix { return None; } - let index = INDEX_TABLE_SIZE * i as usize; + let index = INDEX_TABLE_SIZE * i.0 as usize; - let input_symbol: SymbolNumber = self.read_u16_at(index as u64); + let input_symbol = SymbolNumber(self.read_u16_at(index as u64)); - if input_symbol == std::u16::MAX { + if input_symbol == SymbolNumber::MAX { None } else { Some(input_symbol) @@ -170,10 +170,10 @@ mod unix { return None; } - let index = (INDEX_TABLE_SIZE * i as usize) + 4; - let target: TransitionTableIndex = self.read_u32_at(index as u64); + let index = (INDEX_TABLE_SIZE * i.0 as usize) + 4; + let target = TransitionTableIndex(self.read_u32_at(index as u64)); - if target == std::u32::MAX { + if target == TransitionTableIndex::MAX { None } else { Some(target) @@ -185,9 +185,9 @@ mod unix { return None; } - let index = (INDEX_TABLE_SIZE * i as usize) + 4; + let index = (INDEX_TABLE_SIZE * i.0 as usize) + 4; let x = self.read_u32_at(index as u64); - let weight: Weight = f32::from_bits(x); + let weight = Weight(f32::from_bits(x)); Some(weight) } diff --git a/divvunspell/src/transducer/thfst/mod.rs b/src/transducer/thfst/mod.rs similarity index 90% rename from divvunspell/src/transducer/thfst/mod.rs rename to src/transducer/thfst/mod.rs index 44dd0c4..d1073fc 100644 --- a/divvunspell/src/transducer/thfst/mod.rs +++ b/src/transducer/thfst/mod.rs @@ -150,7 +150,10 @@ where None => false, } } else { - match self.index_table.input_symbol(i + u32::from(sym)) { + match self + .index_table + .input_symbol(i + TransitionTableIndex(sym.0 as u32)) + { Some(res) => sym == res, None => false, } @@ -161,10 +164,10 @@ where fn has_epsilons_or_flags(&self, i: TransitionTableIndex) -> bool { if i >= TARGET_TABLE { match self.transition_table.input_symbol(i - TARGET_TABLE) { - Some(sym) => sym == 0 || self.alphabet.is_flag(sym), + Some(sym) => sym == SymbolNumber::ZERO || self.alphabet.is_flag(sym), None => false, } - } else if let Some(0) = self.index_table.input_symbol(i) { + } else if let Some(SymbolNumber::ZERO) = self.index_table.input_symbol(i) { true } else { false @@ -173,7 +176,7 @@ where #[inline(always)] fn take_epsilons(&self, i: TransitionTableIndex) -> Option { - if let Some(0) = self.transition_table.input_symbol(i) { + if let Some(SymbolNumber::ZERO) = self.transition_table.input_symbol(i) { Some(self.transition_table.symbol_transition(i)) } else { None @@ -183,7 +186,7 @@ where #[inline(always)] fn take_epsilons_and_flags(&self, i: TransitionTableIndex) -> Option { if let Some(sym) = self.transition_table.input_symbol(i) { - if sym != 0 && !self.alphabet.is_flag(sym) { + if sym != SymbolNumber::ZERO && !self.alphabet.is_flag(sym) { None } else { Some(self.transition_table.symbol_transition(i)) @@ -213,8 +216,11 @@ where #[inline(always)] fn next(&self, i: TransitionTableIndex, symbol: SymbolNumber) -> Option { if i >= TARGET_TABLE { - Some(i - TARGET_TABLE + 1) - } else if let Some(v) = self.index_table.target(i + 1 + u32::from(symbol)) { + Some(i - TARGET_TABLE + TransitionTableIndex(1)) + } else if let Some(v) = self + .index_table + .target(i + TransitionTableIndex(symbol.0 as u32 + 1)) + { Some(v - TARGET_TABLE) } else { None diff --git a/divvunspell/src/transducer/thfst/transition_table.rs b/src/transducer/thfst/transition_table.rs similarity index 79% rename from divvunspell/src/transducer/thfst/transition_table.rs rename to src/transducer/thfst/transition_table.rs index 4903fa1..16ed5f4 100644 --- a/divvunspell/src/transducer/thfst/transition_table.rs +++ b/src/transducer/thfst/transition_table.rs @@ -9,7 +9,7 @@ use memmap2::Mmap; #[derive(Debug)] pub struct MemmapTransitionTable { buf: Mmap, - pub(crate) size: u32, + pub(crate) size: TransitionTableIndex, _file: std::marker::PhantomData, } @@ -32,7 +32,7 @@ impl MemmapTransitionTable { file.partial_memory_map(chunk * len, len as usize) .map_err(TransducerError::Memmap)? }; - let size = (buf.len() / TRANS_TABLE_SIZE) as u32; + let size = TransitionTableIndex((buf.len() / TRANS_TABLE_SIZE) as u32); Ok(MemmapTransitionTable { buf, size, @@ -43,7 +43,7 @@ impl MemmapTransitionTable { #[inline] fn read_symbol_from_cursor(&self, index: usize) -> Option { let x = unsafe { ptr::read(self.buf.as_ptr().add(index) as *const _) }; - if x == std::u16::MAX { + if x == SymbolNumber::MAX { None } else { Some(x) @@ -62,7 +62,7 @@ impl TransitionTable for MemmapTransitionTable { let size = (buf.len() / TRANS_TABLE_SIZE) as u32; Ok(MemmapTransitionTable { buf, - size, + size: TransitionTableIndex(size), _file: std::marker::PhantomData::, }) } @@ -72,7 +72,7 @@ impl TransitionTable for MemmapTransitionTable { return None; } - let index = TRANS_TABLE_SIZE as usize * i as usize; + let index = TRANS_TABLE_SIZE as usize * i.0 as usize; self.read_symbol_from_cursor(index) } @@ -81,7 +81,7 @@ impl TransitionTable for MemmapTransitionTable { return None; } - let index = ((TRANS_TABLE_SIZE * i as usize) + mem::size_of::()) as usize; + let index = ((TRANS_TABLE_SIZE * i.0 as usize) + mem::size_of::()) as usize; self.read_symbol_from_cursor(index) } @@ -90,11 +90,11 @@ impl TransitionTable for MemmapTransitionTable { return None; } - let index = (TRANS_TABLE_SIZE * i as usize) + (2 * mem::size_of::()); + let index = (TRANS_TABLE_SIZE * i.0 as usize) + (2 * mem::size_of::()); let x: TransitionTableIndex = unsafe { ptr::read(self.buf.as_ptr().add(index) as *const _) }; - if x == std::u32::MAX { + if x == TransitionTableIndex::MAX { None } else { Some(x) @@ -106,7 +106,7 @@ impl TransitionTable for MemmapTransitionTable { return None; } - let index = (TRANS_TABLE_SIZE * i as usize) + let index = (TRANS_TABLE_SIZE * i.0 as usize) + (2 * mem::size_of::()) + mem::size_of::(); @@ -127,7 +127,7 @@ mod unix { pub struct FileTransitionTable { file: F, - size: u32, + size: TransitionTableIndex, } impl FileTransitionTable { @@ -158,7 +158,7 @@ mod unix { { let file = fs.open_file(path).map_err(TransducerError::Io)?; Ok(FileTransitionTable { - size: file.len().map_err(TransducerError::Io)? as u32, + size: TransitionTableIndex(file.len().map_err(TransducerError::Io)? as u32), file, }) } @@ -169,9 +169,9 @@ mod unix { return None; } - let index = TRANS_TABLE_SIZE as usize * i as usize; - let x = self.read_u16_at(index as u64); - if x == std::u16::MAX { + let index = TRANS_TABLE_SIZE as usize * i.0 as usize; + let x = SymbolNumber(self.read_u16_at(index as u64)); + if x == SymbolNumber::MAX { None } else { Some(x) @@ -184,9 +184,10 @@ mod unix { return None; } - let index = ((TRANS_TABLE_SIZE * i as usize) + mem::size_of::()) as usize; - let x = self.read_u16_at(index as u64); - if x == std::u16::MAX { + let index = + ((TRANS_TABLE_SIZE * i.0 as usize) + mem::size_of::()) as usize; + let x = SymbolNumber(self.read_u16_at(index as u64)); + if x == SymbolNumber::MAX { None } else { Some(x) @@ -199,10 +200,10 @@ mod unix { return None; } - let index = (TRANS_TABLE_SIZE * i as usize) + (2 * mem::size_of::()); + let index = (TRANS_TABLE_SIZE * i.0 as usize) + (2 * mem::size_of::()); - let x = self.read_u32_at(index as u64); - if x == std::u32::MAX { + let x = TransitionTableIndex(self.read_u32_at(index as u64)); + if x == TransitionTableIndex::MAX { None } else { Some(x) @@ -215,11 +216,11 @@ mod unix { return None; } - let index = (TRANS_TABLE_SIZE * i as usize) + let index = (TRANS_TABLE_SIZE * i.0 as usize) + (2 * mem::size_of::()) + mem::size_of::(); let x = self.read_u32_at(index as u64); - let x = f32::from_bits(x); + let x = Weight(f32::from_bits(x)); Some(x) } } diff --git a/divvunspell/src/transducer/tree_node.rs b/src/transducer/tree_node.rs similarity index 81% rename from divvunspell/src/transducer/tree_node.rs rename to src/transducer/tree_node.rs index c7b75ef..c935006 100644 --- a/divvunspell/src/transducer/tree_node.rs +++ b/src/transducer/tree_node.rs @@ -4,18 +4,18 @@ use std::hash::{Hash, Hasher}; use super::symbol_transition::SymbolTransition; use crate::types::{ - FlagDiacriticOperation, FlagDiacriticOperator, FlagDiacriticState, SymbolNumber, - TransitionTableIndex, Weight, + FlagDiacriticOperation, FlagDiacriticOperator, FlagDiacriticState, InputIndex, SymbolNumber, + TransitionTableIndex, ValueNumber, Weight, }; #[derive(Debug, Clone)] -pub struct TreeNode { - pub lexicon_state: TransitionTableIndex, - pub mutator_state: TransitionTableIndex, - pub input_state: u32, - pub weight: f32, - pub flag_state: FlagDiacriticState, - pub string: Vec, +pub(crate) struct TreeNode { + pub(crate) lexicon_state: TransitionTableIndex, + pub(crate) mutator_state: TransitionTableIndex, + pub(crate) input_state: InputIndex, + pub(crate) weight: Weight, + pub(crate) flag_state: FlagDiacriticState, + pub(crate) string: Vec, } impl std::cmp::PartialEq for TreeNode { @@ -52,9 +52,9 @@ impl std::cmp::Eq for TreeNode {} impl Hash for TreeNode { fn hash(&self, state: &mut H) { - state.write_u32(self.input_state); - state.write_u32(self.mutator_state); - state.write_u32(self.lexicon_state); + self.input_state.hash(state); + self.mutator_state.hash(state); + self.lexicon_state.hash(state); } } @@ -62,11 +62,11 @@ impl lifeguard::Recycleable for TreeNode { fn new() -> Self { TreeNode { string: Vec::with_capacity(1), - input_state: 0, - mutator_state: 0, - lexicon_state: 0, + input_state: InputIndex(0), + mutator_state: TransitionTableIndex(0), + lexicon_state: TransitionTableIndex(0), flag_state: vec![], - weight: 0.0, + weight: Weight(0.0), } } @@ -105,11 +105,11 @@ impl TreeNode { ) -> Recycled<'a, TreeNode> { pool.attach(TreeNode { string: vec![], - input_state: 0, - mutator_state: 0, - lexicon_state: 0, + input_state: InputIndex(0), + mutator_state: TransitionTableIndex(0), + lexicon_state: TransitionTableIndex(0), flag_state: start_state, - weight: 0.0, + weight: Weight(0.0), }) } @@ -132,7 +132,7 @@ impl TreeNode { } if let Some(value) = transition.symbol() { - if value != 0 { + if value.0 != 0 { node.string.push(value); } } @@ -182,7 +182,7 @@ impl TreeNode { &self, pool: &'a Pool, output_symbol: SymbolNumber, - next_input: Option, + next_input: Option, next_mutator: TransitionTableIndex, next_lexicon: TransitionTableIndex, weight: Weight, @@ -194,7 +194,7 @@ impl TreeNode { node.string.extend(&self.string); } - if output_symbol != 0 { + if output_symbol.0 != 0 { node.string.push(output_symbol); } @@ -223,11 +223,11 @@ impl TreeNode { &self, pool: &'a Pool, feature: SymbolNumber, - value: i16, + value: ValueNumber, transition: &SymbolTransition, ) -> Recycled<'a, TreeNode> { let mut node = self.apply_transition(pool, transition); - node.flag_state[feature as usize] = value; + node.flag_state[feature.0 as usize] = value; node } @@ -270,13 +270,13 @@ impl TreeNode { Some(self.update_flag(pool, op.feature, op.value, transition)) } FlagDiacriticOperator::NegativeSet => { - Some(self.update_flag(pool, op.feature, -op.value, transition)) + Some(self.update_flag(pool, op.feature, op.value.invert(), transition)) } FlagDiacriticOperator::Require => { - let res = if op.value == 0 { - self.flag_state[op.feature as usize] != 0 + let res = if op.value.0 == 0 { + self.flag_state[op.feature.0 as usize] != ValueNumber(0) } else { - self.flag_state[op.feature as usize] == op.value + self.flag_state[op.feature.0 as usize] == op.value }; if res { @@ -286,10 +286,10 @@ impl TreeNode { } } FlagDiacriticOperator::Disallow => { - let res = if op.value == 0 { - self.flag_state[op.feature as usize] == 0 + let res = if op.value.0 == 0 { + self.flag_state[op.feature.0 as usize] == ValueNumber(0) } else { - self.flag_state[op.feature as usize] != op.value + self.flag_state[op.feature.0 as usize] != op.value }; if res { @@ -298,13 +298,15 @@ impl TreeNode { None } } - FlagDiacriticOperator::Clear => Some(self.update_flag(pool, op.feature, 0, transition)), + FlagDiacriticOperator::Clear => { + Some(self.update_flag(pool, op.feature, ValueNumber(0), transition)) + } FlagDiacriticOperator::Unification => { // if the feature is unset OR the feature is to this value already OR // the feature is negatively set to something else than this value - let f = self.flag_state[op.feature as usize]; + let f = self.flag_state[op.feature.0 as usize]; - if f == 0 || f == op.value || (f < 0 && -f != op.value) { + if f.0 == 0 || f == op.value || (f.0 < 0 && f.invert() != op.value) { Some(self.update_flag(pool, op.feature, op.value, transition)) } else { None diff --git a/src/types.rs b/src/types.rs new file mode 100644 index 0000000..a51a890 --- /dev/null +++ b/src/types.rs @@ -0,0 +1,204 @@ +use std::{ + fmt::Display, + ops::{Add, Div, Mul, Sub}, +}; + +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub enum FlagDiacriticOperator { + PositiveSet, + NegativeSet, + Require, + Disallow, + Clear, + Unification, +} + +impl std::str::FromStr for FlagDiacriticOperator { + type Err = (); + + fn from_str(s: &str) -> Result { + match s { + "P" => Ok(FlagDiacriticOperator::PositiveSet), + "N" => Ok(FlagDiacriticOperator::NegativeSet), + "R" => Ok(FlagDiacriticOperator::Require), + "D" => Ok(FlagDiacriticOperator::Disallow), + "C" => Ok(FlagDiacriticOperator::Clear), + "U" => Ok(FlagDiacriticOperator::Unification), + _ => Err(()), + } + } +} + +#[derive(Debug)] +pub enum HeaderFlag { + Weighted, + Deterministic, + InputDeterministic, + Minimized, + Cyclic, + HasEpsilonEpsilonTransitions, + HasInputEpsilonTransitions, + HasInputEpsilonCycles, + HasUnweightedInputEpsilonCycles, +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct FlagDiacriticOperation { + pub operation: FlagDiacriticOperator, + pub feature: SymbolNumber, + pub value: ValueNumber, +} + +#[derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[repr(transparent)] +#[serde(transparent)] +pub struct SymbolNumber(pub u16); + +impl SymbolNumber { + pub(crate) const ZERO: Self = SymbolNumber(0); + pub(crate) const MAX: Self = SymbolNumber(u16::MAX); + + #[inline(always)] + pub(crate) fn incr(&self) -> Self { + Self(self.0 + 1) + } +} + +#[derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)] +#[repr(transparent)] +#[serde(transparent)] +pub struct ValueNumber(pub i16); + +impl ValueNumber { + pub const ZERO: Self = ValueNumber(0); + + #[inline(always)] + pub(crate) fn invert(&self) -> Self { + ValueNumber(-self.0) + } + + #[inline(always)] + pub(crate) fn incr(&self) -> Self { + ValueNumber(self.0 + 1) + } +} + +#[derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[repr(transparent)] +#[serde(transparent)] +pub struct InputIndex(pub u32); + +impl InputIndex { + #[inline(always)] + pub(crate) fn incr(&self, val: u32) -> Self { + Self(self.0 + val) + } +} + +#[derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[repr(transparent)] +#[serde(transparent)] +pub struct TransitionTableIndex(pub u32); + +impl Display for TransitionTableIndex { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.0) + } +} + +impl Add for TransitionTableIndex { + type Output = Self; + + fn add(self, rhs: Self) -> Self::Output { + TransitionTableIndex(self.0 + rhs.0) + } +} + +impl Sub for TransitionTableIndex { + type Output = Self; + + fn sub(self, rhs: Self) -> Self::Output { + TransitionTableIndex(self.0 - rhs.0) + } +} + +impl Mul for TransitionTableIndex { + type Output = Self; + + fn mul(self, rhs: Self) -> Self::Output { + TransitionTableIndex(self.0 * rhs.0) + } +} + +impl Div for TransitionTableIndex { + type Output = Self; + + fn div(self, rhs: Self) -> Self::Output { + TransitionTableIndex(self.0 / rhs.0) + } +} + +impl TransitionTableIndex { + pub(crate) const MAX: Self = TransitionTableIndex(u32::MAX); + pub(crate) const ZERO: Self = TransitionTableIndex(0); + pub(crate) const ONE: Self = TransitionTableIndex(1); + + #[inline(always)] + pub(crate) fn incr(&self) -> Self { + Self(self.0 + 1) + } +} + +#[derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq, PartialOrd)] +#[repr(transparent)] +#[serde(transparent)] +pub struct Weight(pub f32); + +impl Weight { + pub const ZERO: Self = Weight(0.0); + pub const MAX: Self = Weight(f32::MAX); + pub const INFINITE: Self = Weight(f32::INFINITY); +} + +impl Display for Weight { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.0) + } +} + +impl Add for Weight { + type Output = Self; + + fn add(self, rhs: Self) -> Self::Output { + Weight(self.0 + rhs.0) + } +} + +impl Sub for Weight { + type Output = Self; + + fn sub(self, rhs: Self) -> Self::Output { + Weight(self.0 - rhs.0) + } +} + +impl Mul for Weight { + type Output = Self; + + fn mul(self, rhs: Self) -> Self::Output { + Weight(self.0 * rhs.0) + } +} + +impl Div for Weight { + type Output = Self; + + fn div(self, rhs: Self) -> Self::Output { + Weight(self.0 / rhs.0) + } +} + +pub type FlagDiacriticState = Vec; +pub type OperationsMap = hashbrown::HashMap; diff --git a/divvunspell/src/vfs.rs b/src/vfs.rs similarity index 100% rename from divvunspell/src/vfs.rs rename to src/vfs.rs